commit 2f6ed75dc4e056efff1ea561d015f298ff63592b
parent 76fb10ff9eca33d3209b81e8d567d35ca2689dd9
Author: Brian Graham <brian@buildingbetterteams.de>
Date: Tue, 7 Apr 2026 18:00:18 +0200
Add n= confidence indicators to Grid page
- Box plot: dashed/dimmed for models with <3 cells, tooltip shows n=
- Top/Bottom 10: n= per bar, dashed border for n=1 cells
- Grid table: n= per cell row, dimmed when <3 runs
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Diffstat:
3 files changed, 44 insertions(+), 12 deletions(-)
diff --git a/dashboard/src/components/Charts.tsx b/dashboard/src/components/Charts.tsx
@@ -26,6 +26,7 @@ interface BoxPlotData {
q3: number;
max: number;
cellCount: number;
+ runCount: number;
scores: number[];
// Derived fields for recharts stacked bar trick
base: number; // invisible bar height = q1
@@ -161,14 +162,16 @@ function aggregateByModel(runs: Run[]): BoxPlotData[] {
return sortedEntries.map(([model, modelCells]) => {
const scores = modelCells.map((c) => Math.round(c.avgScore * 100));
+ const totalRuns = modelCells.reduce((sum, c) => sum + c.runCount, 0);
const stats = computeBoxStats(scores);
const baseModel = model;
return {
- label: `${model}|(n=${modelCells.length})`,
+ label: `${model}|(n=${totalRuns})`,
...stats,
base: stats.q1,
iqr: stats.q3 - stats.q1,
cellCount: modelCells.length,
+ runCount: totalRuns,
scores,
color: getModelColor(baseModel),
};
@@ -184,7 +187,9 @@ function BoxPlotShape(props: any) {
};
if (!payload || height === undefined) return null;
- const { min, median, max, color } = payload;
+ const { min, median, max, color, cellCount } = payload;
+ const lowN = cellCount < 3;
+ const boxOpacity = lowN ? 0.4 : 1;
// The bar is rendered from q1 (base) with height iqr (q3-q1).
// y is the top of the bar (q3 in chart coords), y+height is the bottom (q1).
const boxTop = y;
@@ -206,15 +211,15 @@ function BoxPlotShape(props: any) {
const whiskerHalfW = width * 0.3;
return (
- <g>
+ <g opacity={boxOpacity}>
{/* Whisker line: min to max */}
<line x1={centerX} y1={minY} x2={centerX} y2={maxY} stroke={SMUI.muted} strokeWidth={1} />
{/* Min whisker cap */}
<line x1={centerX - whiskerHalfW} y1={minY} x2={centerX + whiskerHalfW} y2={minY} stroke={SMUI.muted} strokeWidth={1} />
{/* Max whisker cap */}
<line x1={centerX - whiskerHalfW} y1={maxY} x2={centerX + whiskerHalfW} y2={maxY} stroke={SMUI.muted} strokeWidth={1} />
- {/* Box (IQR) */}
- <rect x={x} y={boxTop} width={width} height={Math.max(height, 1)} fill={color} fillOpacity={0.3} stroke={color} strokeWidth={1} />
+ {/* Box (IQR) -- dashed stroke when low sample size */}
+ <rect x={x} y={boxTop} width={width} height={Math.max(height, 1)} fill={color} fillOpacity={0.3} stroke={color} strokeWidth={1} strokeDasharray={lowN ? "4 2" : undefined} />
{/* Median line */}
<line x1={x} y1={medianY} x2={x + width} y2={medianY} stroke={color} strokeWidth={2} />
</g>
@@ -230,7 +235,8 @@ function ModelBoxTooltipContent({ active, payload, label }: { active?: boolean;
const d = payload[0].payload;
return (
<div style={TOOLTIP_STYLE}>
- <div style={{ marginBottom: 4, fontWeight: 600 }}>{label}</div>
+ <div style={{ marginBottom: 4, fontWeight: 600 }}>{label?.split("|")[0]}</div>
+ <div style={{ marginBottom: 4, color: SMUI.muted, fontSize: 10 }}>n={d.runCount} runs across {d.cellCount} cells</div>
<div>Max: {d.max}%</div>
<div>Q3: {Math.round(d.q3)}%</div>
<div>Median: {Math.round(d.median)}%</div>
@@ -267,7 +273,12 @@ export default function Charts({ runs }: ChartsProps) {
return (
<div className="card">
<div style={{ display: "flex", justifyContent: "space-between", alignItems: "flex-start", marginBottom: "16px", flexWrap: "wrap", gap: "8px" }}>
- <h3 style={{ margin: 0 }}>Score Distribution by Model</h3>
+ <div>
+ <h3 style={{ margin: 0 }}>Score Distribution by Model</h3>
+ <div style={{ fontSize: "10px", color: "var(--text-muted, hsl(213 14% 65%))", fontFamily: "'JetBrains Mono', monospace", marginTop: "2px" }}>
+ (n={filteredRuns.length} runs across {modelData.reduce((sum, d) => sum + d.cellCount, 0)} cells)
+ </div>
+ </div>
<ModelSelector
allModels={allModels}
selectedModels={selectedModels}
diff --git a/dashboard/src/components/Grid.tsx b/dashboard/src/components/Grid.tsx
@@ -230,6 +230,7 @@ export default function Grid({ runs, axisValues, tasks }: GridProps) {
<td>
<div style={{ fontSize: "0.75rem", display: "flex", alignItems: "center", gap: "6px" }}>
<a href={`/c/${g.runs[0]?.meta.short_cell_id || g.cell_id}`} style={{ color: "var(--accent)", fontSize: "0.65rem", textTransform: "uppercase", letterSpacing: "0.5px", opacity: 0.7 }} title="View cell detail">cell</a>
+ <span style={{ color: "var(--text-muted)", fontSize: "0.6rem", fontFamily: "var(--font-mono)", opacity: g.runs.length < 3 ? 0.5 : 0.8 }}>n={g.runs.length}</span>
{g.runs.map((r, i) => (
<span key={r.meta.run_id}>
{i > 0 && " "}
diff --git a/dashboard/src/components/TopBottomConfigs.tsx b/dashboard/src/components/TopBottomConfigs.tsx
@@ -112,6 +112,8 @@ function BarRow({
const pct = Math.round(entry.avgScore * 100);
const barWidth = maxScore > 0 ? (entry.avgScore / maxScore) * 100 : 0;
const modelColor = getModelColor(entry.model);
+ const lowN = entry.runCount < 3;
+ const singleRun = entry.runCount === 1;
return (
<div
@@ -122,6 +124,7 @@ function BarRow({
marginBottom: 3,
fontFamily: "'JetBrains Mono', monospace",
fontSize: 11,
+ opacity: lowN ? 0.4 : 1,
}}
>
{/* Bar */}
@@ -153,7 +156,7 @@ function BarRow({
left: 0,
height: "100%",
width: `${barWidth}%`,
- borderLeft: `2px solid ${accentColor}`,
+ borderLeft: singleRun ? `2px dashed ${accentColor}` : `2px solid ${accentColor}`,
boxSizing: "border-box",
}}
/>
@@ -172,6 +175,19 @@ function BarRow({
</span>
</div>
+ {/* n= indicator */}
+ <span
+ style={{
+ color: SMUI.muted,
+ fontSize: 9,
+ flexShrink: 0,
+ minWidth: 22,
+ fontFamily: "'JetBrains Mono', monospace",
+ }}
+ >
+ n={entry.runCount}
+ </span>
+
{/* Model name */}
<span
style={{
@@ -217,9 +233,9 @@ function BarRow({
}
export default function TopBottomConfigs({ runs }: TopBottomConfigsProps) {
- const { top10, bottom10 } = useMemo(() => {
+ const { top10, bottom10, totalCells, totalRuns } = useMemo(() => {
const cells = groupIntoCells(runs);
- if (cells.length === 0) return { top10: [], bottom10: [] };
+ if (cells.length === 0) return { top10: [], bottom10: [], totalCells: 0, totalRuns: 0 };
const { varyingAxes, defaults } = computeDefaults(cells);
@@ -237,8 +253,9 @@ export default function TopBottomConfigs({ runs }: TopBottomConfigsProps) {
const top10 = entries.slice(0, 10);
const bottom10 = entries.slice(-10).reverse(); // worst first (lowest at bottom)
+ const totalRuns = entries.reduce((sum, e) => sum + e.runCount, 0);
- return { top10, bottom10 };
+ return { top10, bottom10, totalCells: entries.length, totalRuns };
}, [runs]);
if (top10.length === 0) {
@@ -264,7 +281,10 @@ export default function TopBottomConfigs({ runs }: TopBottomConfigsProps) {
return (
<div className="card">
- <h3 style={{ margin: "0 0 16px 0" }}>Best & Worst Configurations</h3>
+ <h3 style={{ margin: 0 }}>Best & Worst Configurations</h3>
+ <div style={{ fontSize: "10px", color: "var(--text-muted, hsl(213 14% 65%))", fontFamily: "'JetBrains Mono', monospace", marginTop: "2px", marginBottom: "16px" }}>
+ (n={totalRuns} runs across {totalCells} cells)
+ </div>
<div style={{ display: "flex", gap: 24, flexWrap: "wrap" }}>
{/* Top 10 */}
<div style={{ flex: 1, minWidth: 200 }}>