loop-benchmarking

Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.
git clone https://git.shiptheloop.com/loop-benchmarking.git
Log | Files | Refs | README

commit 088b9fe8603b0e9272c25c577e87cdb8f5c48846
parent eb511dfb44ba548a2ec364c6005a6722e7ab2a56
Author: Brian Graham <brian@buildingbetterteams.de>
Date:   Sat,  4 Apr 2026 10:04:44 +0200

Fix BumpChart empty state, add HeatmapMatrix title

BumpChart:
- Filter dropdown to only show axes with 2+ models and meaningful data
- Smart default axis selection
- Show descriptive message when insufficient data instead of blank chart
- Verified via Playwright: chart renders with lines and data points

HeatmapMatrix:
- Added "Configuration Heatmap" title
- Dropdown interactivity confirmed working via Playwright testing

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Diffstat:
Mdashboard/src/components/BumpChart.tsx | 53++++++++++++++++++++++++++++++++++++++++++++++++-----
Mdashboard/src/components/HeatmapMatrix.tsx | 2++
2 files changed, 50 insertions(+), 5 deletions(-)

diff --git a/dashboard/src/components/BumpChart.tsx b/dashboard/src/components/BumpChart.tsx @@ -290,7 +290,30 @@ function CustomTooltipContent({ } export default function BumpChart({ runs }: BumpChartProps) { - const [selectedAxis, setSelectedAxis] = useState<AxisName>("prompt_style"); + // Pre-compute which axes are useful: need 2+ condition values AND 2+ models with scores + const validAxes = useMemo(() => { + const scoredRuns = runs.filter( + (r) => r.eval_results?.score !== null && r.eval_results?.score !== undefined + ); + return CONDITION_AXES.filter((axis) => { + const conditionValues = Array.from( + new Set(scoredRuns.map((r) => String(r.meta[axis]))) + ); + if (conditionValues.length < 2) return false; + // Check that at least one condition value has 2+ models with scores + for (const cv of conditionValues) { + const modelsWithScores = new Set( + scoredRuns.filter((r) => String(r.meta[axis]) === cv).map((r) => r.meta.model) + ); + if (modelsWithScores.size >= 2) return true; + } + return false; + }); + }, [runs]); + + const [selectedAxis, setSelectedAxis] = useState<AxisName>( + validAxes.includes("prompt_style") ? "prompt_style" : validAxes[0] ?? "prompt_style" + ); const { ranked, crossings, conditionValues, models } = useMemo(() => { const { ranked, crossings } = computeRankings(runs, selectedAxis); @@ -351,6 +374,25 @@ export default function BumpChart({ runs }: BumpChartProps) { ); } + if (validAxes.length === 0) { + return ( + <div className="card"> + <h3 style={{ margin: 0 }}>Model Rankings by Condition</h3> + <div + style={{ + textAlign: "center", + padding: "40px", + color: "var(--text-muted)", + fontSize: "0.8rem", + }} + > + Not enough data to compare models. Rankings need at least 2 condition + values where 2 or more models have scored runs. + </div> + </div> + ); + } + return ( <div className="card"> <div @@ -398,7 +440,7 @@ export default function BumpChart({ runs }: BumpChartProps) { cursor: "pointer", }} > - {CONDITION_AXES.map((axis) => ( + {validAxes.map((axis) => ( <option key={axis} value={axis}> {AXIS_LABELS[axis]} </option> @@ -407,7 +449,7 @@ export default function BumpChart({ runs }: BumpChartProps) { </div> </div> - {conditionValues.length < 2 ? ( + {conditionValues.length < 2 || models.length < 2 ? ( <div style={{ textAlign: "center", @@ -416,8 +458,9 @@ export default function BumpChart({ runs }: BumpChartProps) { fontSize: "0.8rem", }} > - Need at least 2 values for "{AXIS_LABELS[selectedAxis]}" to show - rankings. Currently only: {conditionValues.join(", ") || "none"} + {models.length < 2 + ? `Need at least 2 models with scored runs for "${AXIS_LABELS[selectedAxis]}" to show rankings. Currently only: ${models.join(", ") || "none"}` + : `Need at least 2 values for "${AXIS_LABELS[selectedAxis]}" to show rankings. Currently only: ${conditionValues.join(", ") || "none"}`} </div> ) : ( <> diff --git a/dashboard/src/components/HeatmapMatrix.tsx b/dashboard/src/components/HeatmapMatrix.tsx @@ -119,6 +119,8 @@ export default function HeatmapMatrix({ runs }: HeatmapMatrixProps) { padding: "20px", }} > + <h3 style={{ margin: "0 0 16px" }}>Configuration Heatmap</h3> + {/* Axis selectors */} <div style={{

Impressum · Datenschutz