Add minimax-m2.7 and kimi-k2.5 via OpenRouter - loop-benchmarking - Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.

commit e59ff443edb659c9d21d3fef8d708bd29176f827
parent 7a1efd6efd6f2649b557db27eb8af49fd4795d6e
Author: Brian Graham <brian@buildingbetterteams.de>
Date:   Wed,  8 Apr 2026 07:09:29 +0200

Add minimax-m2.7 and kimi-k2.5 via OpenRouter

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Diffstat:
M dashboard/src/lib/colors.ts  | 5 +++++
M grid.yaml  | 8 +++++++-
M harness/lib/compute_grid.py  | 2 ++

3 files changed, 14 insertions(+), 1 deletion(-)
diff --git a/dashboard/src/lib/colors.ts b/dashboard/src/lib/colors.ts
@@ -28,6 +28,11 @@ export const MODEL_COLORS: Record<string, string> = {
   // Google
   "gemma-4-26b":  "hsl(60 50% 62%)",   // olive
 
+  // MiniMax
+  "minimax-m2.7": "hsl(340 50% 65%)",  // rose
+  // Moonshot
+  "kimi-k2.5":    "hsl(170 40% 55%)",  // teal
+
   // Future slots
   "slot-7":      "hsl(270 40% 68%)",  // lavender
   "slot-8":      "hsl(60 50% 62%)",   // olive
diff --git a/grid.yaml b/grid.yaml
@@ -9,7 +9,7 @@ defaults:
 
 axes:
   model:
-    values: ["haiku-4.5", "sonnet-4.6", "opus-4.6", "glm-4.5-air", "glm-4.7", "glm-5.1", "qwen-3.6-plus", "gemma-4-26b"]
+    values: ["haiku-4.5", "sonnet-4.6", "opus-4.6", "glm-4.5-air", "glm-4.7", "glm-5.1", "qwen-3.6-plus", "gemma-4-26b", "minimax-m2.7", "kimi-k2.5"]
   effort:
     values: [high, max]
   prompt_style:
@@ -72,6 +72,8 @@ providers:
     cli_model_map:
       "qwen-3.6-plus": "openrouter/qwen/qwen3.6-plus:free"
       "gemma-4-26b": "openrouter/google/gemma-4-26b-a4b-it"
+      "minimax-m2.7": "openrouter/minimax/minimax-m2.7"
+      "kimi-k2.5": "openrouter/moonshotai/kimi-k2.5"
 
 exclusions:
   # Haiku does not support extended thinking
@@ -98,7 +100,11 @@ exclusions:
   # OpenRouter models only with openrouter
   - when: { provider: anthropic, model: "qwen-3.6-plus" }
   - when: { provider: anthropic, model: "gemma-4-26b" }
+  - when: { provider: anthropic, model: "minimax-m2.7" }
+  - when: { provider: anthropic, model: "kimi-k2.5" }
   - when: { provider: zai, model: "gemma-4-26b" }
+  - when: { provider: zai, model: "minimax-m2.7" }
+  - when: { provider: zai, model: "kimi-k2.5" }
   - when: { provider: openrouter, model: "haiku-4.5" }
   - when: { provider: openrouter, model: "sonnet-4.6" }
   - when: { provider: openrouter, model: "opus-4.6" }
diff --git a/harness/lib/compute_grid.py b/harness/lib/compute_grid.py
@@ -74,6 +74,8 @@ VALUE_ABBREV = {
     "opus-4.6": "opus46",
     "qwen-3.6-plus": "qwen36p",
     "gemma-4-26b": "gemma426b",
+    "minimax-m2.7": "mmx27",
+    "kimi-k2.5": "kimi25",
     "anthropic": "anth",
     "openrouter": "or",
 }

	loop-benchmarking Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.
	git clone https://git.shiptheloop.com/loop-benchmarking.git
	Log \| Files \| Refs \| README

M	dashboard/src/lib/colors.ts	\|	5	+++++
M	grid.yaml	\|	8	+++++++-
M	harness/lib/compute_grid.py	\|	2	++