commit ae769a448ed5b0539181c89a2b3fc575989123ae
parent d7f2fbbca814d869ee1d4f46ef35b449c22ec226
Author: Brian Graham <brian@buildingbetterteams.de>
Date: Mon, 6 Apr 2026 10:32:54 +0200
Flexible axes on scatter plots and efficiency frontier
Both ScatterPlot and EfficiencyFrontier now have dropdown selectors
for x and y axes. 10 available metrics: outcome, gameplay, quality,
code quality, structural, SonarQube, transcript, cost, turns, time.
Cell interface expanded with quality, structural, sonarqube, transcript
fields. SonarQube metric extractor added to analysis.ts.
Default axes: cost vs outcome, turns vs outcome.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Diffstat:
2 files changed, 211 insertions(+), 45 deletions(-)
diff --git a/dashboard/src/components/EfficiencyFrontier.tsx b/dashboard/src/components/EfficiencyFrontier.tsx
@@ -13,8 +13,119 @@ import { groupIntoCells } from "../lib/analysis";
interface EfficiencyFrontierProps {
runs: Run[];
+ defaultX?: string;
+ defaultY?: string;
}
+type CellMetricKey =
+ | "cost"
+ | "score"
+ | "turns"
+ | "wall_time"
+ | "gameplay"
+ | "quality"
+ | "code_quality"
+ | "structural"
+ | "sonarqube"
+ | "transcript";
+
+interface MetricDef {
+ label: string;
+ cellKey: CellMetricKey;
+ scale: number;
+ format: (v: number) => string;
+ axisLabel: string;
+}
+
+const METRIC_CONFIG: Record<string, MetricDef> = {
+ cost: {
+ label: "Cost ($)",
+ cellKey: "cost",
+ scale: 1,
+ format: (v: number) => `$${v.toFixed(2)}`,
+ axisLabel: "Avg Cost ($)",
+ },
+ outcome: {
+ label: "Outcome Score (%)",
+ cellKey: "score",
+ scale: 1,
+ format: (v: number) => `${(v * 100).toFixed(0)}%`,
+ axisLabel: "Avg Score (%)",
+ },
+ gameplay: {
+ label: "Gameplay (%)",
+ cellKey: "gameplay",
+ scale: 1,
+ format: (v: number) => `${(v * 100).toFixed(0)}%`,
+ axisLabel: "Avg Gameplay (%)",
+ },
+ quality: {
+ label: "Quality (%)",
+ cellKey: "quality",
+ scale: 1,
+ format: (v: number) => `${(v * 100).toFixed(0)}%`,
+ axisLabel: "Avg Quality (%)",
+ },
+ code_quality: {
+ label: "Code Quality (%)",
+ cellKey: "code_quality",
+ scale: 1,
+ format: (v: number) => `${(v * 100).toFixed(0)}%`,
+ axisLabel: "Avg Code Quality (%)",
+ },
+ structural: {
+ label: "Structural (%)",
+ cellKey: "structural",
+ scale: 1,
+ format: (v: number) => `${(v * 100).toFixed(0)}%`,
+ axisLabel: "Avg Structural (%)",
+ },
+ sonarqube: {
+ label: "SonarQube (%)",
+ cellKey: "sonarqube",
+ scale: 1,
+ format: (v: number) => `${(v * 100).toFixed(0)}%`,
+ axisLabel: "Avg SonarQube (%)",
+ },
+ turns: {
+ label: "Turns",
+ cellKey: "turns",
+ scale: 1,
+ format: (v: number) => `${Math.round(v)}`,
+ axisLabel: "Avg Turns",
+ },
+ wall_time: {
+ label: "Time (s)",
+ cellKey: "wall_time",
+ scale: 1,
+ format: (v: number) => `${Math.round(v)}s`,
+ axisLabel: "Avg Time (s)",
+ },
+ transcript: {
+ label: "Transcript (%)",
+ cellKey: "transcript",
+ scale: 1,
+ format: (v: number) => `${(v * 100).toFixed(0)}%`,
+ axisLabel: "Avg Transcript (%)",
+ },
+};
+
+const METRIC_OPTIONS = Object.entries(METRIC_CONFIG).map(([key, conf]) => ({
+ value: key,
+ label: conf.label,
+}));
+
+const selectStyle: React.CSSProperties = {
+ background: "var(--surface-1, hsl(217 16% 15.5%))",
+ color: "var(--text, hsl(213 14% 80%))",
+ border: "1px solid var(--border, hsl(217 17% 28%))",
+ borderRadius: "2px",
+ fontFamily: "'JetBrains Mono', monospace",
+ fontSize: "11px",
+ padding: "4px 6px",
+ cursor: "pointer",
+};
+
const MODEL_COLORS: Record<string, string> = {
haiku: "hsl(193 44% 67%)",
sonnet: "hsl(40 71% 73%)",
@@ -42,32 +153,44 @@ function getModelColor(model: string): string {
return DEFAULT_COLOR;
}
-function aggregateByConfig(runs: Run[]): ConfigPoint[] {
+function aggregateByConfig(
+ runs: Run[],
+ xKey: CellMetricKey,
+ yKey: CellMetricKey,
+): ConfigPoint[] {
const cells = groupIntoCells(runs);
return cells
- .filter((c) => c.score.avg > 0 && c.cost.avg > 0)
- .map((c) => ({
- cell_id: c.cell_id,
- model: c.meta.model,
- avgCost: c.cost.avg,
- avgScore: c.score.avg,
- runCount: c.n,
- config: {
+ .filter((c) => {
+ const xAgg = c[xKey] as { avg: number; min: number; max: number };
+ const yAgg = c[yKey] as { avg: number; min: number; max: number };
+ return xAgg.avg > 0 && yAgg.avg > 0;
+ })
+ .map((c) => {
+ const xAgg = c[xKey] as { avg: number; min: number; max: number };
+ const yAgg = c[yKey] as { avg: number; min: number; max: number };
+ return {
+ cell_id: c.cell_id,
model: c.meta.model,
- effort: c.meta.effort,
- prompt_style: c.meta.prompt_style,
- language: c.meta.language,
- linter: c.meta.linter,
- playwright: c.meta.playwright,
- context_file: c.meta.context_file,
- sub_agents: c.meta.sub_agents,
- web_search: c.meta.web_search,
- max_budget: c.meta.max_budget,
- },
- isFrontier: false,
- label: "",
- }));
+ avgCost: xAgg.avg,
+ avgScore: yAgg.avg,
+ runCount: c.n,
+ config: {
+ model: c.meta.model,
+ effort: c.meta.effort,
+ prompt_style: c.meta.prompt_style,
+ language: c.meta.language,
+ linter: c.meta.linter,
+ playwright: c.meta.playwright,
+ context_file: c.meta.context_file,
+ sub_agents: c.meta.sub_agents,
+ web_search: c.meta.web_search,
+ max_budget: c.meta.max_budget,
+ },
+ isFrontier: false,
+ label: "",
+ };
+ });
}
function computeParetoFrontier(points: ConfigPoint[]): ConfigPoint[] {
@@ -136,9 +259,13 @@ interface TooltipPayloadEntry {
function CustomTooltip({
active,
payload,
+ xConf,
+ yConf,
}: {
active?: boolean;
payload?: TooltipPayloadEntry[];
+ xConf: MetricDef;
+ yConf: MetricDef;
}) {
if (!active || !payload || payload.length === 0) return null;
const point = payload[0]?.payload;
@@ -167,14 +294,14 @@ function CustomTooltip({
{point.cell_id.split("_").filter(s => s.includes("=")).map(s => s.replace("=", ": ")).join(" ")}
</div>
<div style={{ marginBottom: "6px" }}>
- <span style={{ color: "var(--text-muted)" }}>score: </span>
+ <span style={{ color: "var(--text-muted)" }}>{yConf.label}: </span>
<span style={{ fontWeight: 600 }}>
- {(point.avgScore * 100).toFixed(1)}%
+ {yConf.format(point.avgScore)}
</span>
</div>
<div style={{ marginBottom: "6px" }}>
- <span style={{ color: "var(--text-muted)" }}>cost: </span>
- <span style={{ fontWeight: 600 }}>${point.avgCost.toFixed(2)}</span>
+ <span style={{ color: "var(--text-muted)" }}>{xConf.label}: </span>
+ <span style={{ fontWeight: 600 }}>{xConf.format(point.avgCost)}</span>
</div>
<div style={{ marginBottom: "8px" }}>
<span style={{ color: "var(--text-muted)" }}>runs in cell: </span>
@@ -214,11 +341,20 @@ function CustomTooltip({
);
}
-export default function EfficiencyFrontier({ runs }: EfficiencyFrontierProps) {
+export default function EfficiencyFrontier({
+ runs,
+ defaultX = "cost",
+ defaultY = "outcome",
+}: EfficiencyFrontierProps) {
const [hoveredId, setHoveredId] = useState<string | null>(null);
+ const [xMetric, setXMetric] = useState(defaultX);
+ const [yMetric, setYMetric] = useState(defaultY);
+
+ const xConf = METRIC_CONFIG[xMetric] || METRIC_CONFIG.cost;
+ const yConf = METRIC_CONFIG[yMetric] || METRIC_CONFIG.outcome;
const points = useMemo(() => {
- const raw = aggregateByConfig(runs);
+ const raw = aggregateByConfig(runs, xConf.cellKey, yConf.cellKey);
const frontier = computeParetoFrontier(raw);
const frontierIds = new Set(frontier.map((p) => p.cell_id));
@@ -227,7 +363,7 @@ export default function EfficiencyFrontier({ runs }: EfficiencyFrontierProps) {
isFrontier: frontierIds.has(p.cell_id),
label: frontierIds.has(p.cell_id) ? findKeyDifference(p, raw) : "",
}));
- }, [runs]);
+ }, [runs, xMetric, yMetric]);
if (points.length === 0) {
return (
@@ -301,16 +437,38 @@ export default function EfficiencyFrontier({ runs }: EfficiencyFrontierProps) {
return (
<div className="card">
<h3 style={{ marginBottom: "4px" }}>Efficiency Frontier</h3>
- <p
+ <div
style={{
- color: "var(--text-muted)",
- fontSize: "11px",
+ display: "flex",
+ alignItems: "center",
+ gap: "8px",
marginBottom: "16px",
+ flexWrap: "wrap",
}}
>
- Cost vs score per cell (averaged across runs). Pareto frontier
- highlights cells not dominated on both axes.
- </p>
+ <select
+ value={xMetric}
+ onChange={(e) => setXMetric(e.target.value)}
+ style={selectStyle}
+ >
+ {METRIC_OPTIONS.map((opt) => (
+ <option key={opt.value} value={opt.value}>{opt.label}</option>
+ ))}
+ </select>
+ <span style={{ fontSize: "11px", color: "var(--text-muted)" }}>vs</span>
+ <select
+ value={yMetric}
+ onChange={(e) => setYMetric(e.target.value)}
+ style={selectStyle}
+ >
+ {METRIC_OPTIONS.map((opt) => (
+ <option key={opt.value} value={opt.value}>{opt.label}</option>
+ ))}
+ </select>
+ <span style={{ fontSize: "11px", color: "var(--text-muted)" }}>
+ -- Pareto frontier highlights cells not dominated on both axes.
+ </span>
+ </div>
{/* Legend */}
<div
@@ -357,13 +515,13 @@ export default function EfficiencyFrontier({ runs }: EfficiencyFrontierProps) {
<XAxis
dataKey="avgCost"
type="number"
- name="Avg Cost"
+ name={xConf.axisLabel}
stroke="var(--text-muted)"
fontSize={11}
fontFamily="'JetBrains Mono', monospace"
- tickFormatter={(v: number) => `$${v.toFixed(2)}`}
+ tickFormatter={(v: number) => xConf.format(v)}
label={{
- value: "Avg Cost ($)",
+ value: xConf.axisLabel,
position: "insideBottom",
offset: -10,
fill: "var(--text-muted)",
@@ -374,14 +532,13 @@ export default function EfficiencyFrontier({ runs }: EfficiencyFrontierProps) {
<YAxis
dataKey="avgScore"
type="number"
- name="Avg Score"
+ name={yConf.axisLabel}
stroke="var(--text-muted)"
fontSize={11}
fontFamily="'JetBrains Mono', monospace"
- domain={[0, 1]}
- tickFormatter={(v: number) => `${(v * 100).toFixed(0)}%`}
+ tickFormatter={(v: number) => yConf.format(v)}
label={{
- value: "Avg Score (%)",
+ value: yConf.axisLabel,
angle: -90,
position: "insideLeft",
offset: 0,
@@ -390,7 +547,7 @@ export default function EfficiencyFrontier({ runs }: EfficiencyFrontierProps) {
fontFamily: "'JetBrains Mono', monospace",
}}
/>
- <Tooltip content={<CustomTooltip />} cursor={false} />
+ <Tooltip content={<CustomTooltip xConf={xConf} yConf={yConf} />} cursor={false} />
{/* Non-frontier points (dimmed) */}
<Scatter
@@ -451,8 +608,8 @@ export default function EfficiencyFrontier({ runs }: EfficiencyFrontierProps) {
<span
style={{ color: "var(--text-muted)", marginLeft: "8px" }}
>
- ${point.avgCost.toFixed(2)} /{" "}
- {(point.avgScore * 100).toFixed(0)}%
+ {xConf.format(point.avgCost)} /{" "}
+ {yConf.format(point.avgScore)}
</span>
</div>
))}
diff --git a/dashboard/src/lib/analysis.ts b/dashboard/src/lib/analysis.ts
@@ -12,6 +12,10 @@ export interface Cell {
wall_time: { avg: number; min: number; max: number };
gameplay: { avg: number; min: number; max: number };
code_quality: { avg: number; min: number; max: number };
+ quality: { avg: number; min: number; max: number };
+ structural: { avg: number; min: number; max: number };
+ sonarqube: { avg: number; min: number; max: number };
+ transcript: { avg: number; min: number; max: number };
}
export interface EffectEntry {
@@ -68,6 +72,7 @@ const METRICS: Record<string, MetricExtractor> = {
structural: (r) => r.eval_results?.structural?.score ?? null,
quality: (r) => r.eval_results?.quality?.score ?? null,
transcript: (r) => (r.eval_results as Record<string, any>)?.transcript_analysis?.score ?? null,
+ sonarqube: (r) => (r.eval_results as Record<string, any>)?.sonarqube?.score ?? null,
};
function agg(values: number[]): { avg: number; min: number; max: number } {
@@ -114,6 +119,10 @@ export function groupIntoCells(runs: Run[]): Cell[] {
wall_time: agg(extractVals(METRICS.wall_time)),
gameplay: agg(extractVals(METRICS.gameplay)),
code_quality: agg(extractVals(METRICS.code_quality)),
+ quality: agg(extractVals(METRICS.quality)),
+ structural: agg(extractVals(METRICS.structural)),
+ sonarqube: agg(extractVals(METRICS.sonarqube)),
+ transcript: agg(extractVals(METRICS.transcript)),
});
}