commit 64445a2b95d17f200a03c19d19ed7294be1a9b89
parent b2013ff82112d8a2dcc2ae66b5079b774ac1e47b
Author: Brian Graham <brian@buildingbetterteams.de>
Date: Fri, 3 Apr 2026 19:36:47 +0200
UI improvements: readable run IDs, run detail layout, config pills
- Grid: run IDs show only task + active config values, not the full string
- Run detail: stats bar at top, config as pills with inactive values
greyed out and active highlighted white, transcript left + scores right,
full-width layout, exit codes explained (0=Success, 124=Timeout, etc.)
- Transcript viewer: 80vh height instead of fixed 600px
- Harness: saves claude_version in run metadata
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Diffstat:
5 files changed, 343 insertions(+), 84 deletions(-)
diff --git a/dashboard/src/components/Grid.tsx b/dashboard/src/components/Grid.tsx
@@ -25,6 +25,20 @@ function formatCost(cost: number | null | undefined): string {
return "$" + cost.toFixed(2);
}
+function formatRunId(runId: string): string {
+ const parts = runId.split("_run");
+ const runNum = parts.length > 1 ? `#${parts[parts.length - 1]}` : "";
+ const segments = parts[0].split("_");
+ const task = segments[0];
+ // Only show non-default/interesting config values
+ const config = segments
+ .slice(1)
+ .filter((s) => !s.includes("=off") && !s.includes("=none"))
+ .map((s) => s.split("=")[1])
+ .join(" ");
+ return `${task} ${config} ${runNum}`.trim();
+}
+
function formatTime(seconds: number | null | undefined): string {
if (seconds === null || seconds === undefined) return "-";
if (seconds < 60) return seconds + "s";
@@ -86,10 +100,8 @@ export default function Grid({ runs, axisValues, tasks }: GridProps) {
filteredRuns.map((run) => (
<tr key={run.meta.run_id}>
<td>
- <a href={`/run/${run.meta.run_id}`}>
- {run.meta.run_id.length > 40
- ? run.meta.run_id.slice(0, 40) + "..."
- : run.meta.run_id}
+ <a href={`/run/${run.meta.run_id}`} style={{ fontSize: "0.75rem" }}>
+ {formatRunId(run.meta.run_id)}
</a>
</td>
<td>{run.meta.task}</td>
diff --git a/dashboard/src/components/RunDetail.tsx b/dashboard/src/components/RunDetail.tsx
@@ -1,16 +1,152 @@
-import type { Run } from "../lib/data";
+import type { Run, AxisName } from "../lib/data";
import TranscriptViewer from "./TranscriptViewer";
interface RunDetailProps {
run: Run;
transcriptLines: string[];
+ axisValues: Record<AxisName, string[]>;
}
-function ScoreBar({ label, score }: { label: string; score: number | null | undefined }) {
+const EXIT_CODES: Record<number, string> = {
+ 0: "Success",
+ 1: "Error",
+ 2: "Misuse of shell command",
+ 124: "Timeout (exceeded time limit)",
+ 125: "Command failed to execute",
+ 126: "Command not executable",
+ 127: "Command not found",
+ 130: "Interrupted (SIGINT)",
+ 137: "Killed (SIGKILL / OOM)",
+ 143: "Terminated (SIGTERM)",
+};
+
+const AXIS_CONFIG: Array<{
+ key: string;
+ label: string;
+}> = [
+ { key: "model", label: "Model" },
+ { key: "effort", label: "Effort" },
+ { key: "prompt_style", label: "Prompt" },
+ { key: "language", label: "Language" },
+ { key: "human_language", label: "Human Lang" },
+ { key: "tool_read", label: "Read" },
+ { key: "tool_write", label: "Write" },
+ { key: "tool_edit", label: "Edit" },
+ { key: "tool_glob", label: "Glob" },
+ { key: "tool_grep", label: "Grep" },
+ { key: "linter", label: "Linter" },
+ { key: "playwright", label: "Playwright" },
+ { key: "context_file", label: "Context" },
+ { key: "sub_agents", label: "Sub-agents" },
+ { key: "web_search", label: "Web Search" },
+ { key: "max_budget", label: "Budget" },
+];
+
+function ConfigPills({
+ axisKey,
+ label,
+ activeValue,
+ allValues,
+}: {
+ axisKey: string;
+ label: string;
+ activeValue: string;
+ allValues: string[];
+}) {
+ return (
+ <div style={{ display: "flex", alignItems: "center", gap: "8px", marginBottom: "6px" }}>
+ <div
+ style={{
+ width: "90px",
+ fontSize: "0.7rem",
+ color: "var(--text-muted)",
+ textTransform: "uppercase",
+ letterSpacing: "0.03em",
+ textAlign: "right",
+ flexShrink: 0,
+ }}
+ >
+ {label}
+ </div>
+ <div style={{ display: "flex", gap: "4px", flexWrap: "wrap" }}>
+ {allValues.map((val) => (
+ <span
+ key={val}
+ style={{
+ padding: "2px 8px",
+ borderRadius: "4px",
+ fontSize: "0.7rem",
+ fontFamily: "var(--font-mono)",
+ background:
+ val === activeValue
+ ? "rgba(255, 255, 255, 0.1)"
+ : "transparent",
+ color:
+ val === activeValue ? "#fff" : "rgba(255, 255, 255, 0.2)",
+ border:
+ val === activeValue
+ ? "1px solid rgba(255, 255, 255, 0.3)"
+ : "1px solid rgba(255, 255, 255, 0.05)",
+ }}
+ >
+ {val}
+ </span>
+ ))}
+ </div>
+ </div>
+ );
+}
+
+function ExitCodeBadge({ code }: { code: number | undefined }) {
+ if (code === undefined || code === null)
+ return <span style={{ color: "var(--text-muted)" }}>?</span>;
+
+ const label = EXIT_CODES[code] || `Exit ${code}`;
+ const isOk = code === 0;
+
+ return (
+ <div>
+ <span
+ style={{
+ fontFamily: "var(--font-mono)",
+ fontWeight: 700,
+ fontSize: "1.75rem",
+ color: isOk ? "var(--green)" : "var(--red)",
+ }}
+ >
+ {code}
+ </span>
+ <div
+ style={{
+ fontSize: "0.7rem",
+ color: isOk ? "var(--green)" : "var(--red)",
+ opacity: 0.8,
+ }}
+ >
+ {label}
+ </div>
+ </div>
+ );
+}
+
+function ScoreBar({
+ label,
+ score,
+}: {
+ label: string;
+ score: number | null | undefined;
+}) {
if (score === null || score === undefined) {
return (
<div style={{ marginBottom: "8px" }}>
- <div style={{ display: "flex", justifyContent: "space-between", fontSize: "0.8rem", marginBottom: "4px" }}>
+ <div
+ style={{
+ display: "flex",
+ justifyContent: "space-between",
+ fontSize: "0.8rem",
+ marginBottom: "4px",
+ }}
+ >
<span>{label}</span>
<span style={{ color: "var(--text-muted)" }}>N/A</span>
</div>
@@ -19,61 +155,61 @@ function ScoreBar({ label, score }: { label: string; score: number | null | unde
}
const pct = Math.round(score * 100);
- const color = pct >= 70 ? "var(--green)" : pct >= 40 ? "var(--yellow)" : "var(--red)";
+ const color =
+ pct >= 70 ? "var(--green)" : pct >= 40 ? "var(--yellow)" : "var(--red)";
return (
<div style={{ marginBottom: "8px" }}>
- <div style={{ display: "flex", justifyContent: "space-between", fontSize: "0.8rem", marginBottom: "4px" }}>
+ <div
+ style={{
+ display: "flex",
+ justifyContent: "space-between",
+ fontSize: "0.8rem",
+ marginBottom: "4px",
+ }}
+ >
<span>{label}</span>
- <span style={{ fontFamily: "var(--font-mono)", fontWeight: 600, color }}>{pct}%</span>
+ <span
+ style={{ fontFamily: "var(--font-mono)", fontWeight: 600, color }}
+ >
+ {pct}%
+ </span>
</div>
- <div style={{ background: "var(--bg)", borderRadius: "4px", height: "6px", overflow: "hidden" }}>
- <div style={{ width: `${pct}%`, height: "100%", background: color, borderRadius: "4px" }} />
+ <div
+ style={{
+ background: "var(--bg)",
+ borderRadius: "4px",
+ height: "6px",
+ overflow: "hidden",
+ }}
+ >
+ <div
+ style={{
+ width: `${pct}%`,
+ height: "100%",
+ background: color,
+ borderRadius: "4px",
+ }}
+ />
</div>
</div>
);
}
-export default function RunDetail({ run, transcriptLines }: RunDetailProps) {
+export default function RunDetail({
+ run,
+ transcriptLines,
+ axisValues,
+}: RunDetailProps) {
const { meta, eval_results, claude_output } = run;
return (
<div style={{ display: "flex", flexDirection: "column", gap: "24px" }}>
- {/* Config */}
- <div className="card">
- <h3 style={{ marginBottom: "12px" }}>Configuration</h3>
- <div style={{ display: "grid", gridTemplateColumns: "repeat(auto-fill, minmax(160px, 1fr))", gap: "12px" }}>
- {[
- ["Task", meta.task],
- ["Model", meta.model],
- ["Effort", meta.effort],
- ["Prompt", meta.prompt_style],
- ["Language", meta.language],
- ["Human Lang", meta.human_language],
- ["Linter", meta.linter],
- ["Playwright", meta.playwright],
- ["Context", meta.context_file],
- ["Sub-agents", meta.sub_agents],
- ["Web Search", meta.web_search],
- ["Budget", `$${meta.max_budget_usd}`],
- ].map(([label, value]) => (
- <div key={label as string}>
- <div style={{ fontSize: "0.7rem", color: "var(--text-muted)", textTransform: "uppercase" }}>
- {label}
- </div>
- <div style={{ fontFamily: "var(--font-mono)", fontSize: "0.875rem" }}>{value}</div>
- </div>
- ))}
- </div>
- </div>
-
- {/* Metrics */}
+ {/* Top bar: stats */}
<div className="stats-grid">
<div className="stat-card">
- <div className="stat-value" style={{ color: meta.exit_code === 0 ? "var(--green)" : "var(--red)" }}>
- {meta.exit_code === 0 ? "OK" : `Exit ${meta.exit_code ?? "?"}`}
- </div>
- <div className="stat-label">Status</div>
+ <ExitCodeBadge code={meta.exit_code} />
+ <div className="stat-label">Exit Code</div>
</div>
<div className="stat-card">
<div className="stat-value">
@@ -87,7 +223,9 @@ export default function RunDetail({ run, transcriptLines }: RunDetailProps) {
</div>
<div className="stat-card">
<div className="stat-value">
- {claude_output?.total_cost_usd != null ? `$${claude_output.total_cost_usd.toFixed(2)}` : "-"}
+ {claude_output?.total_cost_usd != null
+ ? `$${claude_output.total_cost_usd.toFixed(2)}`
+ : "-"}
</div>
<div className="stat-label">Cost</div>
</div>
@@ -98,42 +236,124 @@ export default function RunDetail({ run, transcriptLines }: RunDetailProps) {
<div className="stat-card">
<div className="stat-value">
{claude_output?.usage
- ? ((claude_output.usage.input_tokens ?? 0) + (claude_output.usage.output_tokens ?? 0)).toLocaleString()
+ ? (
+ (claude_output.usage.input_tokens ?? 0) +
+ (claude_output.usage.output_tokens ?? 0)
+ ).toLocaleString()
: "-"}
</div>
<div className="stat-label">Total Tokens</div>
</div>
</div>
- {/* Scores */}
- {eval_results && (
- <div className="card">
- <h3 style={{ marginBottom: "16px" }}>Evaluation Scores</h3>
- <ScoreBar label="Overall" score={eval_results.score} />
- <ScoreBar label="Structural" score={eval_results.structural?.score} />
- <ScoreBar label="Functional" score={eval_results.functional?.score} />
- <ScoreBar label="Quality" score={eval_results.quality?.score} />
-
- {/* Check details */}
- {eval_results.structural?.checks && (
- <div style={{ marginTop: "16px" }}>
- <h4 style={{ fontSize: "0.8rem", color: "var(--text-muted)", marginBottom: "8px" }}>
+ {/* Config pills */}
+ <div className="card">
+ <h3 style={{ marginBottom: "12px" }}>Configuration</h3>
+ {AXIS_CONFIG.map(({ key, label }) => {
+ const active = String(
+ (meta as Record<string, unknown>)[key] ?? ""
+ );
+ const all = (axisValues as Record<string, string[]>)[key] || [
+ active,
+ ];
+ if (!active) return null;
+ return (
+ <ConfigPills
+ key={key}
+ axisKey={key}
+ label={label}
+ activeValue={active}
+ allValues={all}
+ />
+ );
+ })}
+ </div>
+
+ {/* Two-column: transcript left, scores right */}
+ <div
+ style={{
+ display: "grid",
+ gridTemplateColumns: "1fr 360px",
+ gap: "24px",
+ alignItems: "start",
+ }}
+ >
+ {/* Transcript */}
+ <TranscriptViewer lines={transcriptLines} />
+
+ {/* Scores sidebar */}
+ <div
+ style={{
+ display: "flex",
+ flexDirection: "column",
+ gap: "16px",
+ position: "sticky",
+ top: "16px",
+ }}
+ >
+ {eval_results && (
+ <div className="card">
+ <h3 style={{ marginBottom: "16px" }}>Scores</h3>
+ <ScoreBar label="Overall" score={eval_results.score} />
+ <ScoreBar
+ label="Structural"
+ score={eval_results.structural?.score}
+ />
+ <ScoreBar
+ label="Functional"
+ score={eval_results.functional?.score}
+ />
+ <ScoreBar label="Quality" score={eval_results.quality?.score} />
+ </div>
+ )}
+
+ {eval_results?.structural?.checks && (
+ <div className="card">
+ <h4
+ style={{
+ fontSize: "0.8rem",
+ color: "var(--text-muted)",
+ marginBottom: "8px",
+ }}
+ >
Structural Checks
</h4>
- {eval_results.structural.checks.map((check, i) => (
- <div key={i} style={{ display: "flex", gap: "8px", fontSize: "0.8rem", marginBottom: "4px" }}>
- <span>{check.pass ? "+" : "-"}</span>
- <span>{check.name}</span>
- <span style={{ color: "var(--text-muted)" }}>{check.detail}</span>
- </div>
- ))}
+ {eval_results.structural.checks.map(
+ (
+ check: { pass: boolean; name: string; detail: string },
+ i: number
+ ) => (
+ <div
+ key={i}
+ style={{
+ display: "flex",
+ gap: "8px",
+ fontSize: "0.75rem",
+ marginBottom: "4px",
+ alignItems: "baseline",
+ }}
+ >
+ <span
+ style={{
+ color: check.pass ? "var(--green)" : "var(--red)",
+ flexShrink: 0,
+ }}
+ >
+ {check.pass ? "+" : "-"}
+ </span>
+ <span style={{ fontFamily: "var(--font-mono)" }}>
+ {check.name}
+ </span>
+ <span style={{ color: "var(--text-muted)" }}>
+ {check.detail}
+ </span>
+ </div>
+ )
+ )}
</div>
)}
</div>
- )}
-
- {/* Transcript */}
- <TranscriptViewer lines={transcriptLines} />
+ </div>
</div>
);
}
diff --git a/dashboard/src/components/TranscriptViewer.tsx b/dashboard/src/components/TranscriptViewer.tsx
@@ -164,8 +164,8 @@ export default function TranscriptViewer({ lines }: TranscriptViewerProps) {
.filter(Boolean);
return (
- <div className="card" style={{ maxHeight: "600px", overflow: "auto" }}>
- <h3 style={{ marginBottom: "16px", position: "sticky", top: 0, background: "var(--bg-card)", paddingBottom: "8px" }}>
+ <div className="card" style={{ maxHeight: "80vh", overflow: "auto" }}>
+ <h3 style={{ marginBottom: "16px", position: "sticky", top: 0, background: "var(--bg-card)", paddingBottom: "8px", zIndex: 1 }}>
Transcript ({events.length} events)
</h3>
{events.map((event, i) => renderEvent(event, i))}
diff --git a/dashboard/src/pages/run/[id].astro b/dashboard/src/pages/run/[id].astro
@@ -1,30 +1,46 @@
---
import Base from "../../layouts/Base.astro";
-import { loadAllRuns, loadTranscript } from "../../lib/data";
+import { loadAllRuns, loadTranscript, getAxisValues } from "../../lib/data";
import RunDetail from "../../components/RunDetail";
+const allRuns = loadAllRuns();
+const axisValues = getAxisValues(allRuns);
+
export function getStaticPaths() {
const runs = loadAllRuns();
+ const axisValues = getAxisValues(runs);
return runs.map((run) => ({
params: { id: run.meta.run_id },
- props: { run },
+ props: { run, axisValues },
}));
}
-const { run } = Astro.props;
+const { run, axisValues: av } = Astro.props;
const transcriptLines = loadTranscript(run.meta.run_id);
+
+// Format run ID for display
+const parts = run.meta.run_id.split("_run");
+const runNum = parts.length > 1 ? `Run #${parts[parts.length - 1]}` : "";
---
-<Base title={`Run: ${run.meta.run_id}`}>
+<Base title={`${run.meta.task} - ${run.meta.model} - ${run.meta.prompt_style}`}>
<div style="margin-bottom: 16px;">
<a href="/" style="font-size: 0.875rem;">Back to Grid</a>
</div>
- <h1 style="margin-bottom: 4px; font-size: 1.25rem; font-family: var(--font-mono);">
- {run.meta.run_id}
- </h1>
- <p style="color: var(--text-muted); margin-bottom: 24px; font-size: 0.875rem;">
- Run #{run.meta.run_number} - {run.meta.completed_at || "in progress"}
+ <div style="display: flex; align-items: baseline; gap: 12px; margin-bottom: 4px;">
+ <h1 style="font-size: 1.5rem;">{run.meta.task}</h1>
+ <span style="color: var(--text-muted); font-size: 0.875rem;">{runNum}</span>
+ </div>
+ <p style="color: var(--text-muted); margin-bottom: 24px; font-size: 0.8rem;">
+ {run.meta.completed_at || "in progress"}
</p>
- <RunDetail client:load run={run} transcriptLines={transcriptLines} />
+ <RunDetail client:load run={run} transcriptLines={transcriptLines} axisValues={av} />
</Base>
+
+<style>
+ :global(.container) {
+ max-width: none !important;
+ padding: 0 32px !important;
+ }
+</style>
diff --git a/harness/run.py b/harness/run.py
@@ -293,6 +293,16 @@ def main():
print("ERROR: claude CLI not found in PATH.")
sys.exit(1)
+ # Capture claude version for metadata
+ claude_version = "unknown"
+ try:
+ result = subprocess.run(
+ ["claude", "--version"], capture_output=True, text=True, timeout=5
+ )
+ claude_version = result.stdout.strip() or result.stderr.strip()
+ except Exception:
+ pass
+
print("=" * 40)
print("Loop Benchmarking Harness")
print("=" * 40)
@@ -354,6 +364,7 @@ def main():
**cell,
"run_id": run_id,
"run_number": run_num,
+ "claude_version": claude_version,
"started_at": datetime.now(timezone.utc).isoformat(),
}
(run_dir / "meta.json").write_text(json.dumps(meta, indent=2))