commit 7925fb44e0bb06e82b6284b9aae10888b58a0128
parent c846bee44baf5e45fbb78d3b92494c10e718c9cb
Author: Brian Graham <brian@buildingbetterteams.de>
Date: Tue, 7 Apr 2026 17:14:54 +0200
Add variability violin chart to Compare page
Beeswarm chart showing CV% per cell grouped by model. Box plots with
jittered dots. Uses shared color palette. Lower = more consistent.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Diffstat:
2 files changed, 403 insertions(+), 0 deletions(-)
diff --git a/dashboard/src/components/VariabilityViolin.tsx b/dashboard/src/components/VariabilityViolin.tsx
@@ -0,0 +1,398 @@
+import { useMemo } from "react";
+import { getModelColor, modelSortOrder } from "../lib/colors";
+import {
+ ComposedChart,
+ XAxis,
+ YAxis,
+ CartesianGrid,
+ Tooltip,
+ ResponsiveContainer,
+ Scatter,
+ Cell,
+ ZAxis,
+ Bar,
+} from "recharts";
+import type { Run } from "../lib/types";
+import { groupIntoCells } from "../lib/analysis";
+
+interface VariabilityViolinProps {
+ runs: Run[];
+}
+
+const SMUI = {
+ surface0: "hsl(213 16% 12%)",
+ surface1: "hsl(217 16% 15.5%)",
+ surface2: "hsl(216 15% 19%)",
+ border: "hsl(217 17% 28%)",
+ muted: "hsl(213 14% 65%)",
+ frost1: "hsl(176 25% 65%)",
+ frost2: "hsl(193 44% 67%)",
+ frost3: "hsl(210 34% 63%)",
+ frost4: "hsl(213 32% 52%)",
+ green: "hsl(92 28% 65%)",
+ yellow: "hsl(40 71% 73%)",
+ red: "hsl(355 52% 64%)",
+ purple: "hsl(311 24% 63%)",
+};
+
+// Colors and sort order from shared palette
+
+const TOOLTIP_STYLE: React.CSSProperties = {
+ background: SMUI.surface1,
+ border: `1px solid ${SMUI.border}`,
+ borderRadius: "0",
+ fontFamily: "'JetBrains Mono', monospace",
+ fontSize: "11px",
+ padding: "8px 12px",
+};
+
+interface ModelCVData {
+ label: string;
+ model: string;
+ cvValues: number[];
+ min: number;
+ q1: number;
+ median: number;
+ q3: number;
+ max: number;
+ base: number;
+ iqr: number;
+ color: string;
+ cellCount: number;
+}
+
+interface ScatterPoint {
+ label: string;
+ cv: number;
+ color: string;
+ jitter: number;
+}
+
+function quantile(sorted: number[], q: number): number {
+ if (sorted.length === 0) return 0;
+ if (sorted.length === 1) return sorted[0];
+ const pos = q * (sorted.length - 1);
+ const lo = Math.floor(pos);
+ const hi = Math.ceil(pos);
+ if (lo === hi) return sorted[lo];
+ return sorted[lo] + (pos - lo) * (sorted[hi] - sorted[lo]);
+}
+
+function computeModelCV(runs: Run[]): ModelCVData[] {
+ const cells = groupIntoCells(runs);
+
+ const byModel: Record<string, number[]> = {};
+
+ for (const cell of cells) {
+ if (cell.n < 2) continue;
+ const scores = cell.runs
+ .map((r) => r.eval_results?.score)
+ .filter((s): s is number => s != null);
+ if (scores.length < 2) continue;
+ const mean = scores.reduce((a, b) => a + b, 0) / scores.length;
+ if (mean === 0) continue;
+ const stdDev = Math.sqrt(
+ scores.reduce((s, v) => s + (v - mean) ** 2, 0) / scores.length
+ );
+ const cv = (stdDev / mean) * 100;
+ const model = cell.meta.actual_model || cell.meta.model;
+ (byModel[model] ??= []).push(cv);
+ }
+
+ const sortedEntries = Object.entries(byModel).sort(
+ ([a], [b]) =>
+ modelSortOrder(a) - modelSortOrder(b) || a.localeCompare(b)
+ );
+
+ return sortedEntries.map(([model, cvs]) => {
+ const sorted = [...cvs].sort((a, b) => a - b);
+ const q1 = quantile(sorted, 0.25);
+ const q3 = quantile(sorted, 0.75);
+ return {
+ label: `${model}|(n=${cvs.length})`,
+ model,
+ cvValues: cvs,
+ min: sorted[0],
+ q1,
+ median: quantile(sorted, 0.5),
+ q3,
+ max: sorted[sorted.length - 1],
+ base: q1,
+ iqr: q3 - q1,
+ color: getModelColor(model),
+ cellCount: cvs.length,
+ };
+ });
+}
+
+function buildScatterData(data: ModelCVData[]): ScatterPoint[] {
+ const points: ScatterPoint[] = [];
+ // Deterministic jitter based on index within each model group
+ for (const d of data) {
+ const sorted = [...d.cvValues].sort((a, b) => a - b);
+ for (let i = 0; i < sorted.length; i++) {
+ // Simple deterministic jitter: alternate sides, proportional to index
+ const side = i % 2 === 0 ? 1 : -1;
+ const magnitude = ((i % 5) + 1) * 0.06;
+ points.push({
+ label: d.label,
+ cv: sorted[i],
+ color: d.color,
+ jitter: side * magnitude,
+ });
+ }
+ }
+ return points;
+}
+
+// Custom shape for the box + whiskers
+function CVBoxPlotShape(props: any) {
+ const { x, y, width, height, payload } = props as {
+ x: number;
+ y: number;
+ width: number;
+ height: number;
+ payload: ModelCVData;
+ };
+ if (!payload || height === undefined) return null;
+
+ const { min, median, max, q1, q3, color } = payload;
+ const boxTop = y;
+ const boxBottom = y + height;
+ const centerX = x + width / 2;
+
+ const dataToY = (val: number): number => {
+ if (q3 === q1) return boxTop;
+ return boxTop + ((q3 - val) / (q3 - q1)) * (boxBottom - boxTop);
+ };
+
+ const minY = dataToY(min);
+ const maxY = dataToY(max);
+ const medianY = dataToY(median);
+ const whiskerHalfW = width * 0.3;
+
+ return (
+ <g>
+ {/* Whisker line: min to max */}
+ <line
+ x1={centerX}
+ y1={minY}
+ x2={centerX}
+ y2={maxY}
+ stroke={SMUI.muted}
+ strokeWidth={1}
+ />
+ {/* Min whisker cap */}
+ <line
+ x1={centerX - whiskerHalfW}
+ y1={minY}
+ x2={centerX + whiskerHalfW}
+ y2={minY}
+ stroke={SMUI.muted}
+ strokeWidth={1}
+ />
+ {/* Max whisker cap */}
+ <line
+ x1={centerX - whiskerHalfW}
+ y1={maxY}
+ x2={centerX + whiskerHalfW}
+ y2={maxY}
+ stroke={SMUI.muted}
+ strokeWidth={1}
+ />
+ {/* Box (IQR) */}
+ <rect
+ x={x}
+ y={boxTop}
+ width={width}
+ height={Math.max(height, 1)}
+ fill={color}
+ fillOpacity={0.3}
+ stroke={color}
+ strokeWidth={1}
+ />
+ {/* Median line */}
+ <line
+ x1={x}
+ y1={medianY}
+ x2={x + width}
+ y2={medianY}
+ stroke={color}
+ strokeWidth={2}
+ />
+ </g>
+ );
+}
+
+function CVTooltipContent({
+ active,
+ payload,
+}: {
+ active?: boolean;
+ payload?: Array<{ payload: ModelCVData }>;
+ label?: string;
+}) {
+ if (!active || !payload || payload.length === 0) return null;
+ const d = payload[0].payload;
+ if (!d.model) return null;
+ return (
+ <div style={TOOLTIP_STYLE}>
+ <div style={{ marginBottom: 4, fontWeight: 600 }}>{d.model}</div>
+ <div>Cells: {d.cellCount}</div>
+ <div>Max CV: {d.max.toFixed(1)}%</div>
+ <div>Q3: {d.q3.toFixed(1)}%</div>
+ <div>Median: {d.median.toFixed(1)}%</div>
+ <div>Q1: {d.q1.toFixed(1)}%</div>
+ <div>Min CV: {d.min.toFixed(1)}%</div>
+ </div>
+ );
+}
+
+export default function VariabilityViolin({ runs }: VariabilityViolinProps) {
+ const data = useMemo(() => computeModelCV(runs), [runs]);
+ const scatterData = useMemo(() => buildScatterData(data), [data]);
+
+ if (data.length === 0) {
+ return (
+ <div
+ className="card"
+ style={{
+ textAlign: "center",
+ padding: "40px",
+ color: SMUI.muted,
+ fontFamily: "'JetBrains Mono', monospace",
+ }}
+ >
+ Not enough multi-run cells to compute variability.
+ </div>
+ );
+ }
+
+ const maxCV = Math.max(...data.map((d) => d.max), 10);
+ const yMax = Math.ceil(maxCV / 10) * 10;
+
+ return (
+ <div className="card">
+ <h3
+ style={{
+ marginBottom: "4px",
+ fontFamily: "'JetBrains Mono', monospace",
+ }}
+ >
+ Score Variability by Model (CV%)
+ </h3>
+ <p
+ style={{
+ color: SMUI.muted,
+ fontSize: "11px",
+ fontFamily: "'JetBrains Mono', monospace",
+ marginBottom: "16px",
+ }}
+ >
+ Lower = more consistent. Each dot is one cell's coefficient of
+ variation.
+ </p>
+ <ResponsiveContainer width="100%" height={320}>
+ <ComposedChart data={data} barCategoryGap="25%">
+ <CartesianGrid
+ strokeDasharray="3 3"
+ stroke={SMUI.border}
+ vertical={false}
+ />
+ <XAxis
+ dataKey="label"
+ stroke={SMUI.muted}
+ tickLine={false}
+ axisLine={{ stroke: SMUI.border }}
+ interval={0}
+ tick={({ x, y, payload }: any) => {
+ const [name, count] = (payload.value as string).split("|");
+ return (
+ <g>
+ <text
+ x={x}
+ y={y + 12}
+ textAnchor="middle"
+ fill={SMUI.muted}
+ fontSize={10}
+ fontFamily="'JetBrains Mono', monospace"
+ >
+ {name}
+ </text>
+ <text
+ x={x}
+ y={y + 24}
+ textAnchor="middle"
+ fill={SMUI.muted}
+ fontSize={8}
+ fontFamily="'JetBrains Mono', monospace"
+ opacity={0.6}
+ >
+ {count}
+ </text>
+ </g>
+ );
+ }}
+ height={40}
+ />
+ <YAxis
+ stroke={SMUI.muted}
+ fontSize={11}
+ fontFamily="'JetBrains Mono', monospace"
+ domain={[0, yMax]}
+ tickLine={false}
+ axisLine={false}
+ yAxisId="cv"
+ label={{
+ value: "CV%",
+ angle: -90,
+ position: "insideLeft",
+ style: {
+ fill: SMUI.muted,
+ fontSize: 10,
+ fontFamily: "'JetBrains Mono', monospace",
+ },
+ }}
+ />
+ <Tooltip
+ content={<CVTooltipContent />}
+ cursor={{ fill: "hsl(217 17% 28% / 0.3)" }}
+ />
+ {/* Invisible base bar to push the visible box up to q1 */}
+ <Bar
+ dataKey="base"
+ stackId="box"
+ fill="transparent"
+ barSize={40}
+ yAxisId="cv"
+ />
+ {/* Visible IQR box with custom shape for whiskers and median */}
+ <Bar
+ dataKey="iqr"
+ stackId="box"
+ barSize={40}
+ yAxisId="cv"
+ shape={<CVBoxPlotShape />}
+ >
+ {data.map((entry) => (
+ <Cell key={entry.label} fill={entry.color} />
+ ))}
+ </Bar>
+ {/* Jittered scatter dots for individual cell CV values */}
+ <Scatter
+ data={scatterData}
+ dataKey="cv"
+ yAxisId="cv"
+ fill={SMUI.frost2}
+ fillOpacity={0.6}
+ >
+ <ZAxis range={[50, 50]} />
+ {scatterData.map((pt, i) => (
+ <Cell key={i} fill={pt.color} fillOpacity={0.6} />
+ ))}
+ </Scatter>
+ </ComposedChart>
+ </ResponsiveContainer>
+ </div>
+ );
+}
diff --git a/dashboard/src/pages/compare.astro b/dashboard/src/pages/compare.astro
@@ -2,6 +2,7 @@
import Base from "../layouts/Base.astro";
import { loadAllRuns, getAxisValues, getTaskNames, AXIS_NAMES } from "../lib/data";
import type { Run, AxisName } from "../lib/data";
+import VariabilityViolin from "../components/VariabilityViolin";
const runs = loadAllRuns();
const axisValues = getAxisValues(runs);
@@ -198,4 +199,8 @@ for (const axis of AXIS_NAMES) {
</table>
</div>
)}
+
+ <div style="margin-top: 24px;">
+ <VariabilityViolin client:load runs={runs} />
+ </div>
</Base>