loop-benchmarking

Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.
git clone https://git.shiptheloop.com/loop-benchmarking.git
Log | Files | Refs | README

commit 9407eb22b3fcbb37681131bcbdcfcee5274d8afa
parent 2cebe2c3bd66b475f9445fa4305d91d173f6c5fe
Author: Brian Graham <brian@buildingbetterteams.de>
Date:   Sun,  5 Apr 2026 23:28:50 +0200

Show detailed score breakdowns on run page

Run detail now shows full breakdowns for all eval dimensions:

Gameplay Bot: all 16 test results (pass/fail with details), renderer
type, start mechanism, controls, pieces placed, lines cleared

Quality: lint (errors/warnings), typecheck (pass/errors), bundle size

Functional: pass/fail, error messages, individual test results

These join the existing structural checks, code analysis, and agent
behavior cards for a complete picture of why each score is what it is.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Diffstat:
Mdashboard/src/components/RunDetail.tsx | 127+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 127 insertions(+), 0 deletions(-)

diff --git a/dashboard/src/components/RunDetail.tsx b/dashboard/src/components/RunDetail.tsx @@ -292,6 +292,133 @@ export default function RunDetail({ run, transcriptLines, axisValues, contextCon })()} </div> )} + + {/* Gameplay Bot details */} + {(eval_results as Record<string, any>)?.gameplay_bot && ( + <div className="card" style={{ padding: "16px" }}> + <h4 style={{ fontSize: "0.8rem", color: "var(--text-muted)", marginBottom: "8px" }}>Gameplay Bot</h4> + {(() => { + const gb = (eval_results as Record<string, any>).gameplay_bot; + const report = gb.report; + const tests = report?.tests as Array<{ name: string; pass: boolean; detail: string }> | undefined; + const impl = report?.implementation; + const gameplay = report?.gameplay; + return ( + <div style={{ fontSize: "0.7rem", display: "flex", flexDirection: "column", gap: "3px" }}> + {tests && tests.map((t, i) => ( + <div key={i} style={{ display: "flex", gap: "6px", marginBottom: "1px" }}> + <span style={{ color: t.pass ? "var(--green)" : "var(--red)", flexShrink: 0 }}> + {t.pass ? "+" : "-"} + </span> + <span style={{ fontFamily: "var(--font-mono)", flexShrink: 0 }}>{t.name}</span> + <span style={{ color: "var(--text-muted)", overflow: "hidden", textOverflow: "ellipsis", whiteSpace: "nowrap" }}>{t.detail}</span> + </div> + ))} + {(impl || gameplay) && ( + <div style={{ borderTop: "1px solid var(--border)", marginTop: "6px", paddingTop: "6px", display: "flex", flexDirection: "column", gap: "3px" }}> + {impl?.renderer && <Stat label="Renderer" value={impl.renderer} />} + {impl?.start_mechanism && <Stat label="Start" value={impl.start_mechanism} />} + {impl?.controls && <Stat label="Controls" value={Object.entries(impl.controls).map(([k, v]) => `${k}:${v}`).join(", ")} />} + {gameplay?.pieces_placed != null && <Stat label="Pieces placed" value={gameplay.pieces_placed} />} + {gameplay?.lines_cleared != null && <Stat label="Lines cleared" value={gameplay.lines_cleared} />} + {gameplay?.max_score_observed != null && <Stat label="Max score" value={gameplay.max_score_observed} />} + </div> + )} + {!tests && !report && ( + <Stat label="Score" value={`${Math.round((gb.score ?? 0) * 100)}%`} /> + )} + </div> + ); + })()} + </div> + )} + + {/* Quality details */} + {eval_results?.quality && ( + <div className="card" style={{ padding: "16px" }}> + <h4 style={{ fontSize: "0.8rem", color: "var(--text-muted)", marginBottom: "8px" }}>Quality</h4> + {(() => { + const q = eval_results.quality as Record<string, any>; + const lint = q.lint; + const tc = q.typecheck; + const perf = q.performance; + return ( + <div style={{ fontSize: "0.7rem", display: "flex", flexDirection: "column", gap: "3px" }}> + {lint && ( + <> + <div style={{ display: "flex", gap: "6px", marginBottom: "1px" }}> + <span style={{ color: lint.pass ? "var(--green)" : "var(--red)", flexShrink: 0 }}> + {lint.pass ? "+" : "-"} + </span> + <span style={{ fontFamily: "var(--font-mono)" }}>lint</span> + <span style={{ color: "var(--text-muted)" }}> + {lint.errors > 0 ? `${lint.errors} errors` : ""} + {lint.errors > 0 && lint.warnings > 0 ? ", " : ""} + {lint.warnings > 0 ? `${lint.warnings} warnings` : ""} + {lint.errors === 0 && lint.warnings === 0 ? "clean" : ""} + </span> + </div> + </> + )} + {tc && ( + <div style={{ display: "flex", gap: "6px", marginBottom: "1px" }}> + <span style={{ color: tc.pass ? "var(--green)" : "var(--red)", flexShrink: 0 }}> + {tc.pass ? "+" : "-"} + </span> + <span style={{ fontFamily: "var(--font-mono)" }}>typecheck</span> + <span style={{ color: "var(--text-muted)" }}> + {tc.errors ? `${tc.errors} errors` : tc.pass ? "clean" : "failed"} + </span> + </div> + )} + {perf && ( + <div style={{ borderTop: "1px solid var(--border)", marginTop: "6px", paddingTop: "6px", display: "flex", flexDirection: "column", gap: "3px" }}> + <Stat label="Bundle size" value={perf.bundle_size_bytes != null ? `${(perf.bundle_size_bytes / 1024).toFixed(1)} KB` : "N/A"} /> + {perf.size_under_512kb != null && <Stat label="Under 512 KB" value={perf.size_under_512kb ? "yes" : "no"} />} + </div> + )} + </div> + ); + })()} + </div> + )} + + {/* Functional details */} + {eval_results?.functional && ( + <div className="card" style={{ padding: "16px" }}> + <h4 style={{ fontSize: "0.8rem", color: "var(--text-muted)", marginBottom: "8px" }}>Functional</h4> + {(() => { + const fn = eval_results.functional as Record<string, any>; + return ( + <div style={{ fontSize: "0.7rem", display: "flex", flexDirection: "column", gap: "3px" }}> + <div style={{ display: "flex", gap: "6px", marginBottom: "1px" }}> + <span style={{ color: fn.pass ? "var(--green)" : "var(--red)", flexShrink: 0 }}> + {fn.pass ? "+" : "-"} + </span> + <span style={{ fontFamily: "var(--font-mono)" }}> + {fn.pass ? "passed" : "failed"} + </span> + </div> + {fn.error && ( + <div style={{ color: "var(--text-muted)", fontStyle: "italic" }}>{fn.error}</div> + )} + {fn.total != null && ( + <Stat label="Tests" value={`${fn.passed ?? 0}/${fn.total} passed`} /> + )} + {fn.tests && (fn.tests as Array<{ name: string; pass: boolean; detail?: string }>).map((t, i) => ( + <div key={i} style={{ display: "flex", gap: "6px", marginBottom: "1px" }}> + <span style={{ color: t.pass ? "var(--green)" : "var(--red)", flexShrink: 0 }}> + {t.pass ? "+" : "-"} + </span> + <span style={{ fontFamily: "var(--font-mono)" }}>{t.name}</span> + {t.detail && <span style={{ color: "var(--text-muted)" }}>{t.detail}</span>} + </div> + ))} + </div> + ); + })()} + </div> + )} </div> {/* Bottom: transcript + artifact preview */}

Impressum · Datenschutz