loop-benchmarking

Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.
git clone https://git.shiptheloop.com/loop-benchmarking.git
Log | Files | Refs | README

commit 75a5908e2126a7bebde85ab1f235e7ce8077116d
parent 1eb57dbc55dbab68cb3aac08106b5d265ef83bb6
Author: Brian Graham <brian@buildingbetterteams.de>
Date:   Tue,  7 Apr 2026 19:21:30 +0200

Add OpenRouter provider with Qwen 3.6 Plus via litellm proxy

Requires litellm proxy running on localhost:4000 to translate
Anthropic API -> OpenAI API for OpenRouter.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Diffstat:
Mdashboard/src/lib/colors.ts | 3+++
Mgrid.yaml | 18++++++++++++++++--
Mharness/lib/compute_grid.py | 2++
Mharness/run.py | 4+++-
4 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/dashboard/src/lib/colors.ts b/dashboard/src/lib/colors.ts @@ -22,6 +22,9 @@ export const MODEL_COLORS: Record<string, string> = { "glm-4.7": "hsl(220 50% 65%)", // steel blue "glm-5.1": "hsl(15 65% 65%)", // coral + // Qwen (OpenRouter) + "qwen-3.6-plus": "hsl(270 40% 68%)", // lavender + // Future slots "slot-7": "hsl(270 40% 68%)", // lavender "slot-8": "hsl(60 50% 62%)", // olive diff --git a/grid.yaml b/grid.yaml @@ -9,7 +9,7 @@ defaults: axes: model: - values: ["haiku-4.5", "sonnet-4.6", "opus-4.6", "glm-4.5-air", "glm-4.7", "glm-5.1"] + values: ["haiku-4.5", "sonnet-4.6", "opus-4.6", "glm-4.5-air", "glm-4.7", "glm-5.1", "qwen-3.6-plus"] effort: values: [high, max] prompt_style: @@ -53,7 +53,7 @@ axes: renderer: values: ["none", "canvas", "svg", "dom", "webgl"] provider: - values: ["anthropic", "zai"] + values: ["anthropic", "zai", "openrouter"] providers: anthropic: @@ -66,6 +66,11 @@ providers: base_url: "https://api.z.ai/api/anthropic" api_key_env: "ZAI_API_KEY" models: ["glm-4.5-air", "glm-4.7", "glm-5.1"] + openrouter: + base_url: "http://localhost:4000" + auth_token: "dummy" + cli_model_map: + "qwen-3.6-plus": "openrouter/qwen/qwen3.6-plus:free" exclusions: # Haiku does not support extended thinking @@ -88,6 +93,15 @@ exclusions: - when: { provider: zai, model: "haiku-4.5" } - when: { provider: zai, model: "sonnet-4.6" } - when: { provider: zai, model: "opus-4.6" } + - when: { provider: zai, model: "qwen-3.6-plus" } + # OpenRouter models only with openrouter + - when: { provider: anthropic, model: "qwen-3.6-plus" } + - when: { provider: openrouter, model: "haiku-4.5" } + - when: { provider: openrouter, model: "sonnet-4.6" } + - when: { provider: openrouter, model: "opus-4.6" } + - when: { provider: openrouter, model: "glm-4.5-air" } + - when: { provider: openrouter, model: "glm-4.7" } + - when: { provider: openrouter, model: "glm-5.1" } tasks: - tetris diff --git a/harness/lib/compute_grid.py b/harness/lib/compute_grid.py @@ -72,7 +72,9 @@ VALUE_ABBREV = { "haiku-4.5": "haiku45", "sonnet-4.6": "sonnet46", "opus-4.6": "opus46", + "qwen-3.6-plus": "qwen36p", "anthropic": "anth", + "openrouter": "or", } diff --git a/harness/run.py b/harness/run.py @@ -310,7 +310,9 @@ def invoke_claude(cell: dict, workspace: Path, run_dir: Path, project_dir: Path, run_env["ANTHROPIC_BASE_URL"] = provider_config["base_url"] else: run_env.pop("ANTHROPIC_BASE_URL", None) - if provider_config.get("api_key_env"): + if provider_config.get("auth_token"): + run_env["ANTHROPIC_AUTH_TOKEN"] = provider_config["auth_token"] + elif provider_config.get("api_key_env"): key = os.environ.get(provider_config["api_key_env"]) if key: run_env["ANTHROPIC_AUTH_TOKEN"] = key

Impressum · Datenschutz