loop-benchmarking

Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.
git clone https://git.shiptheloop.com/loop-benchmarking.git
Log | Files | Refs | README

RadarComparison.tsx (8580B)


      1 import { useState, useMemo } from "react";
      2 import {
      3   RadarChart,
      4   Radar,
      5   PolarGrid,
      6   PolarAngleAxis,
      7   PolarRadiusAxis,
      8   ResponsiveContainer,
      9   Tooltip,
     10 } from "recharts";
     11 import type { Run } from "../lib/types";
     12 import { groupIntoCells, type Cell } from "../lib/analysis";
     13 
     14 interface RadarComparisonProps {
     15   runs: Run[];
     16 }
     17 
     18 const DIMENSIONS = [
     19   "structural",
     20   "functional",
     21   "quality",
     22   "code_analysis",
     23   "gameplay_bot",
     24   "transcript_analysis",
     25 ] as const;
     26 
     27 type Dimension = (typeof DIMENSIONS)[number];
     28 
     29 const DIMENSION_LABELS: Record<Dimension, string> = {
     30   structural: "Structural",
     31   functional: "Functional",
     32   quality: "Quality",
     33   code_analysis: "Code Analysis",
     34   gameplay_bot: "Gameplay Bot",
     35   transcript_analysis: "Transcript",
     36 };
     37 
     38 const COLOR_A = "hsl(193 44% 67%)";
     39 const COLOR_B = "hsl(40 71% 73%)";
     40 
     41 function extractDimensionScore(run: Run, dim: Dimension): number | null {
     42   if (!run.eval_results) return null;
     43   const section = run.eval_results[dim as keyof typeof run.eval_results];
     44   if (section && typeof section === "object" && "score" in section) {
     45     const score = (section as { score: number }).score;
     46     return typeof score === "number" ? score : null;
     47   }
     48   return null;
     49 }
     50 
     51 interface CellConfig {
     52   cell_id: string;
     53   label: string;
     54   cell: Cell;
     55 }
     56 
     57 function buildCellConfigs(runs: Run[]): CellConfig[] {
     58   const cells = groupIntoCells(runs);
     59   return cells
     60     .map((cell) => {
     61       const m = cell.meta;
     62       const label = `${m.model} / ${m.language} / ${m.prompt_style} / ${m.effort}`;
     63       return { cell_id: cell.cell_id, label, cell };
     64     })
     65     .sort((a, b) => a.label.localeCompare(b.label));
     66 }
     67 
     68 function cellAverageScore(
     69   cell: Cell,
     70   dim: Dimension
     71 ): number | null {
     72   const scores = cell.runs
     73     .map((r) => extractDimensionScore(r, dim))
     74     .filter((s): s is number => s !== null);
     75   if (scores.length === 0) return null;
     76   return scores.reduce((a, b) => a + b, 0) / scores.length;
     77 }
     78 
     79 interface RadarDatum {
     80   dimension: string;
     81   scoreA: number;
     82   scoreB: number;
     83   labelA: string;
     84   labelB: string;
     85 }
     86 
     87 function CustomTick({
     88   payload,
     89   x,
     90   y,
     91   data,
     92 }: {
     93   payload: { value: string };
     94   x: number;
     95   y: number;
     96   data: RadarDatum[];
     97 }) {
     98   const datum = data.find((d) => d.dimension === payload.value);
     99   if (!datum) return null;
    100 
    101   return (
    102     <g transform={`translate(${x},${y})`}>
    103       <text
    104         textAnchor="middle"
    105         dy={-8}
    106         style={{
    107           fill: "var(--text)",
    108           fontSize: "11px",
    109           fontFamily: "'JetBrains Mono', monospace",
    110           fontWeight: 500,
    111         }}
    112       >
    113         {payload.value}
    114       </text>
    115       <text
    116         textAnchor="middle"
    117         dy={6}
    118         style={{
    119           fontSize: "10px",
    120           fontFamily: "'JetBrains Mono', monospace",
    121         }}
    122       >
    123         <tspan fill={COLOR_A}>{datum.labelA}</tspan>
    124         <tspan fill="var(--text-muted)"> / </tspan>
    125         <tspan fill={COLOR_B}>{datum.labelB}</tspan>
    126       </text>
    127     </g>
    128   );
    129 }
    130 
    131 export default function RadarComparison({ runs }: RadarComparisonProps) {
    132   const cellConfigs = useMemo(() => buildCellConfigs(runs), [runs]);
    133 
    134   const [selectedA, setSelectedA] = useState<string>(
    135     cellConfigs[0]?.cell_id ?? ""
    136   );
    137   const [selectedB, setSelectedB] = useState<string>(
    138     cellConfigs[1]?.cell_id ?? cellConfigs[0]?.cell_id ?? ""
    139   );
    140 
    141   const configA = cellConfigs.find((c) => c.cell_id === selectedA);
    142   const configB = cellConfigs.find((c) => c.cell_id === selectedB);
    143 
    144   const data: RadarDatum[] = useMemo(() => {
    145     return DIMENSIONS.map((dim) => {
    146       const scoreA = configA ? cellAverageScore(configA.cell, dim) : null;
    147       const scoreB = configB ? cellAverageScore(configB.cell, dim) : null;
    148       return {
    149         dimension: DIMENSION_LABELS[dim],
    150         scoreA: scoreA ?? 0,
    151         scoreB: scoreB ?? 0,
    152         labelA: scoreA !== null ? (scoreA * 100).toFixed(0) + "%" : "n/a",
    153         labelB: scoreB !== null ? (scoreB * 100).toFixed(0) + "%" : "n/a",
    154       };
    155     });
    156   }, [configA, configB]);
    157 
    158   if (cellConfigs.length === 0) {
    159     return (
    160       <div
    161         className="card"
    162         style={{
    163           textAlign: "center",
    164           padding: "40px",
    165           color: "var(--text-muted)",
    166           borderRadius: 0,
    167         }}
    168       >
    169         No configurations available for comparison.
    170       </div>
    171     );
    172   }
    173 
    174   return (
    175     <div
    176       style={{
    177         background: "var(--surface-1)",
    178         border: "1px solid var(--border)",
    179         borderRadius: 0,
    180         padding: "20px",
    181       }}
    182     >
    183       <h3 style={{ marginBottom: "16px" }}>Quality Radar Comparison</h3>
    184 
    185       <div
    186         style={{
    187           display: "flex",
    188           gap: "24px",
    189           marginBottom: "20px",
    190           flexWrap: "wrap",
    191         }}
    192       >
    193         <div className="filter-group">
    194           <label style={{ color: COLOR_A, fontWeight: 600 }}>Config A</label>
    195           <select
    196             value={selectedA}
    197             onChange={(e) => setSelectedA(e.target.value)}
    198           >
    199             {cellConfigs.map((c) => (
    200               <option key={c.cell_id} value={c.cell_id}>
    201                 {c.label} ({c.cell.n} runs)
    202               </option>
    203             ))}
    204           </select>
    205         </div>
    206 
    207         <div className="filter-group">
    208           <label style={{ color: COLOR_B, fontWeight: 600 }}>Config B</label>
    209           <select
    210             value={selectedB}
    211             onChange={(e) => setSelectedB(e.target.value)}
    212           >
    213             {cellConfigs.map((c) => (
    214               <option key={c.cell_id} value={c.cell_id}>
    215                 {c.label} ({c.cell.n} runs)
    216               </option>
    217             ))}
    218           </select>
    219         </div>
    220       </div>
    221 
    222       <ResponsiveContainer width="100%" height={420}>
    223         <RadarChart cx="50%" cy="50%" outerRadius="70%" data={data}>
    224           <PolarGrid
    225             stroke="var(--border)"
    226             strokeDasharray="3 3"
    227           />
    228           <PolarAngleAxis
    229             dataKey="dimension"
    230             tick={(props: Record<string, unknown>) => (
    231               <CustomTick
    232                 payload={props.payload as { value: string }}
    233                 x={props.x as number}
    234                 y={props.y as number}
    235                 data={data}
    236               />
    237             )}
    238           />
    239           <PolarRadiusAxis
    240             angle={90}
    241             domain={[0, 1]}
    242             tickCount={6}
    243             tick={{
    244               fill: "var(--text-muted)",
    245               fontSize: 10,
    246               fontFamily: "'JetBrains Mono', monospace",
    247             }}
    248             tickFormatter={(v: number) => (v * 100).toFixed(0) + "%"}
    249             stroke="var(--border)"
    250           />
    251           <Radar
    252             name="Config A"
    253             dataKey="scoreA"
    254             stroke={COLOR_A}
    255             fill={COLOR_A}
    256             fillOpacity={0.3}
    257             strokeWidth={2}
    258           />
    259           <Radar
    260             name="Config B"
    261             dataKey="scoreB"
    262             stroke={COLOR_B}
    263             fill={COLOR_B}
    264             fillOpacity={0.3}
    265             strokeWidth={2}
    266           />
    267           <Tooltip
    268             contentStyle={{
    269               background: "var(--surface-1)",
    270               border: "1px solid var(--border)",
    271               borderRadius: "2px",
    272               fontFamily: "'JetBrains Mono', monospace",
    273               fontSize: "11px",
    274             }}
    275             formatter={(value: unknown, name: unknown) => [
    276               ((Number(value) || 0) * 100).toFixed(1) + "%",
    277               String(name),
    278             ]}
    279           />
    280         </RadarChart>
    281       </ResponsiveContainer>
    282 
    283       <div
    284         style={{
    285           display: "flex",
    286           justifyContent: "center",
    287           gap: "24px",
    288           marginTop: "12px",
    289           fontSize: "11px",
    290           fontFamily: "var(--font-mono)",
    291         }}
    292       >
    293         <span>
    294           <span
    295             style={{
    296               display: "inline-block",
    297               width: "12px",
    298               height: "12px",
    299               background: COLOR_A,
    300               opacity: 0.7,
    301               marginRight: "6px",
    302               verticalAlign: "middle",
    303             }}
    304           />
    305           <span style={{ color: COLOR_A }}>Config A</span>
    306         </span>
    307         <span>
    308           <span
    309             style={{
    310               display: "inline-block",
    311               width: "12px",
    312               height: "12px",
    313               background: COLOR_B,
    314               opacity: 0.7,
    315               marginRight: "6px",
    316               verticalAlign: "middle",
    317             }}
    318           />
    319           <span style={{ color: COLOR_B }}>Config B</span>
    320         </span>
    321       </div>
    322     </div>
    323   );
    324 }

Impressum · Datenschutz