commit 6b806ab1e52180e1c3fdf0cefba9d1c9e4abd7a9
parent 1cfbc6fbd0a1ee476d350fd22f9cb179ad50df71
Author: Brian Graham <brian@buildingbetterteams.de>
Date: Mon, 6 Apr 2026 19:08:24 +0200
Use actual_model in cell_ids and dashboard display
Cell IDs now show the real model name (glm-4.5-air not haiku) when
using non-anthropic providers. Dashboard displays actual_model in
grid table, charts, and sort. Migrated 2 existing zai runs.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Diffstat:
5 files changed, 42 insertions(+), 10 deletions(-)
diff --git a/dashboard/src/components/Charts.tsx b/dashboard/src/components/Charts.tsx
@@ -94,7 +94,7 @@ function aggregateCells(runs: Run[]): CellAggregate[] {
const cellId = run.meta.cell_id;
if (!byCell[cellId]) {
byCell[cellId] = {
- model: run.meta.model,
+ model: run.meta.actual_model || run.meta.model,
task: run.meta.task,
scores: [],
costs: [],
diff --git a/dashboard/src/components/Grid.tsx b/dashboard/src/components/Grid.tsx
@@ -31,7 +31,7 @@ function formatRunId(run: Run): React.ReactNode {
<span style={{ display: "inline-flex", gap: "4px", alignItems: "center", flexWrap: "wrap" }}>
<span className="badge badge-neutral" style={{ fontSize: "0.7rem" }}>{m.task}</span>
<span style={{ color: "var(--text-muted)", fontSize: "0.7rem" }}>
- {m.model} {m.prompt_style} {m.language}
+ {m.actual_model || m.model} {m.prompt_style} {m.language}
</span>
</span>
);
@@ -48,7 +48,7 @@ type SortKey = "task" | "model" | "effort" | "prompt" | "lang" | "score" | "cost
function getSortValue(run: Run, key: SortKey): string | number {
switch (key) {
case "task": return run.meta.task;
- case "model": return run.meta.model;
+ case "model": return run.meta.actual_model || run.meta.model;
case "effort": return run.meta.effort;
case "prompt": return run.meta.prompt_style;
case "lang": return run.meta.language;
@@ -246,7 +246,7 @@ export default function Grid({ runs, axisValues, tasks }: GridProps) {
return (budget > 0 && cost >= budget * 0.95) || r.meta.exit_code === 124;
}) && <span style={{ color: "var(--yellow)", marginLeft: "4px", fontSize: "0.65rem" }} title="Budget or time limit reached">!</span>}
</td>
- <td><span className="badge badge-neutral">{g.meta.model}</span></td>
+ <td><span className="badge badge-neutral">{g.meta.actual_model || g.meta.model}</span></td>
<td>{g.meta.effort}</td>
<td>{g.meta.prompt_style}</td>
<td>{g.meta.language}</td>
@@ -287,7 +287,7 @@ export default function Grid({ runs, axisValues, tasks }: GridProps) {
</a>
</td>
<td>{run.meta.task}</td>
- <td><span className="badge badge-neutral">{run.meta.model}</span></td>
+ <td><span className="badge badge-neutral">{run.meta.actual_model || run.meta.model}</span></td>
<td>{run.meta.effort}</td>
<td>{run.meta.prompt_style}</td>
<td>{run.meta.language}</td>
diff --git a/harness/lib/compute_grid.py b/harness/lib/compute_grid.py
@@ -66,6 +66,9 @@ VALUE_ABBREV = {
"lorem_25": "lor25",
"lorem_50": "lor50",
"lorem_75": "lor75",
+ "glm-4.5-air": "glm45air",
+ "glm-4.7": "glm47",
+ "anthropic": "anth",
}
@@ -146,8 +149,20 @@ def compute_cells(grid, profile_name):
if excluded:
continue
- # Build cell ID from task + abbreviated axis values (deterministic, filename-safe)
- cell_id_parts = [task] + [f"{AXIS_ABBREV.get(k, k)}={VALUE_ABBREV.get(str(cell[k]), cell[k])}" for k in axis_names]
+ # Resolve actual_model from provider config
+ provider_name = cell.get("provider", "anthropic")
+ providers_config = grid.get("providers", {})
+ model_map = (providers_config.get(provider_name) or {}).get("model_map", {})
+ actual_model = model_map.get(cell.get("model", ""), cell.get("model", ""))
+ cell["actual_model"] = actual_model
+
+ # Build cell ID using actual_model instead of model for clarity
+ cell_id_parts = [task]
+ for k in axis_names:
+ val = cell[k]
+ if k == "model":
+ val = actual_model # use resolved model name in cell_id
+ cell_id_parts.append(f"{AXIS_ABBREV.get(k, k)}={VALUE_ABBREV.get(str(val), val)}")
cell_id = "_".join(cell_id_parts)
# Resolve budget value
diff --git a/harness/lib/experiment_design.py b/harness/lib/experiment_design.py
@@ -447,10 +447,23 @@ def _is_excluded(cell, grid):
def _build_cell(task, cell, defaults, grid):
from compute_grid import AXIS_ABBREV, VALUE_ABBREV
axis_names = sorted(cell.keys())
- cell_id_parts = [task] + [f"{AXIS_ABBREV.get(k, k)}={VALUE_ABBREV.get(str(cell[k]), cell[k])}" for k in axis_names]
+
+ # Resolve actual_model from provider config
+ provider_name = cell.get("provider", "anthropic")
+ providers_config = grid.get("providers", {})
+ model_map = (providers_config.get(provider_name) or {}).get("model_map", {})
+ actual_model = model_map.get(cell.get("model", ""), cell.get("model", ""))
+
+ cell_id_parts = [task]
+ for k in axis_names:
+ val = cell[k]
+ if k == "model":
+ val = actual_model
+ cell_id_parts.append(f"{AXIS_ABBREV.get(k, k)}={VALUE_ABBREV.get(str(val), val)}")
result = dict(cell)
result["task"] = task
+ result["actual_model"] = actual_model
result["cell_id"] = "_".join(cell_id_parts)
result["runs_per_cell"] = defaults.get("runs_per_cell", 3)
result["timeout_seconds"] = defaults.get("timeout_seconds", 600)
diff --git a/harness/migrate-run-ids.py b/harness/migrate-run-ids.py
@@ -28,13 +28,17 @@ def compute_new_cell_id(meta: dict) -> str:
"task", "cell_id", "run_id", "run_number", "runs_per_cell",
"max_budget_usd", "timeout_seconds", "base_tools",
"started_at", "completed_at", "wall_time_seconds", "exit_code",
- "claude_version", "short_id", "short_cell_id",
+ "claude_version", "short_id", "short_cell_id", "actual_model",
}
axis_names = sorted(k for k in meta.keys() if k not in skip_keys)
+ actual_model = meta.get("actual_model", meta.get("model", ""))
parts = [task]
for k in axis_names:
+ val = meta[k]
+ if k == "model":
+ val = actual_model # use resolved model name
abbrev_key = AXIS_ABBREV.get(k, k)
- abbrev_val = VALUE_ABBREV.get(str(meta[k]), str(meta[k]))
+ abbrev_val = VALUE_ABBREV.get(str(val), str(val))
parts.append(f"{abbrev_key}={abbrev_val}")
return "_".join(parts)