BumpChart.tsx (17657B)
1 import { useState, useMemo } from "react"; 2 import { 3 LineChart, 4 Line, 5 XAxis, 6 YAxis, 7 CartesianGrid, 8 Tooltip, 9 ResponsiveContainer, 10 ReferenceDot, 11 } from "recharts"; 12 import type { Run } from "../lib/types"; 13 import { AXIS_NAMES, type AxisName } from "../lib/types"; 14 import { groupIntoCells, type Cell } from "../lib/analysis"; 15 import { getModelColor } from "../lib/colors"; 16 17 interface BumpChartProps { 18 runs: Run[]; 19 } 20 21 const AXIS_LABELS: Record<AxisName, string> = { 22 model: "Model", 23 effort: "Effort", 24 prompt_style: "Prompt Style", 25 language: "Language", 26 human_language: "Human Language", 27 tool_read: "Read Tool", 28 tool_write: "Write Tool", 29 tool_edit: "Edit Tool", 30 tool_glob: "Glob Tool", 31 tool_grep: "Grep Tool", 32 linter: "Linter", 33 playwright: "Playwright", 34 context_file: "Context File", 35 web_search: "Web Search", 36 max_budget: "Budget", 37 tests_provided: "Tests Provided", 38 strategy: "Strategy", 39 design_guidance: "Design Guidance", 40 architecture: "Architecture", 41 error_checking: "Error Checking", 42 context_noise: "Context Noise", 43 renderer: "Renderer", 44 provider: "Provider", 45 }; 46 47 // All axes except "model" since we rank by model 48 const CONDITION_AXES = AXIS_NAMES.filter((a) => a !== "model"); 49 50 interface RankedPoint { 51 conditionValue: string; 52 rank: number; 53 avgScore: number; 54 model: string; 55 n: number; 56 } 57 58 interface CrossingPoint { 59 conditionValue: string; 60 x: number; 61 rank: number; 62 models: [string, string]; 63 } 64 65 function computeRankings( 66 runs: Run[], 67 axis: AxisName 68 ): { ranked: Record<string, RankedPoint[]>; crossings: CrossingPoint[] } { 69 // Group runs into cells, then work with cell averages 70 const cells = groupIntoCells(runs); 71 const scoredCells = cells.filter((c) => c.score.avg > 0); 72 73 // Get unique condition values for the selected axis 74 const conditionValues = Array.from( 75 new Set(scoredCells.map((c) => String(c.meta[axis]))) 76 ).sort(); 77 78 // Get unique models 79 const models = Array.from(new Set(scoredCells.map((c) => c.meta.model))).sort(); 80 81 // For each condition value, compute average of cell averages per model, then rank 82 const ranked: Record<string, RankedPoint[]> = {}; 83 for (const model of models) { 84 ranked[model] = []; 85 } 86 87 const prevRanks: Record<string, number> = {}; 88 89 const crossings: CrossingPoint[] = []; 90 91 for (let ci = 0; ci < conditionValues.length; ci++) { 92 const cv = conditionValues[ci]; 93 const cellsForCondition = scoredCells.filter( 94 (c) => String(c.meta[axis]) === cv 95 ); 96 97 // Compute average of cell averages per model for this condition 98 const modelScores: Array<{ 99 model: string; 100 avgScore: number; 101 n: number; 102 }> = []; 103 for (const model of models) { 104 const modelCells = cellsForCondition.filter( 105 (c) => c.meta.model === model 106 ); 107 108 if (modelCells.length > 0) { 109 const avg = modelCells.reduce((s, c) => s + c.score.avg, 0) / modelCells.length; 110 modelScores.push({ model, avgScore: avg, n: modelCells.length }); 111 } 112 } 113 114 // Sort by avgScore descending (higher score = rank 1) 115 modelScores.sort((a, b) => b.avgScore - a.avgScore); 116 117 // Assign ranks 118 const currentRanks: Record<string, number> = {}; 119 for (let i = 0; i < modelScores.length; i++) { 120 const ms = modelScores[i]; 121 const rank = i + 1; 122 currentRanks[ms.model] = rank; 123 ranked[ms.model].push({ 124 conditionValue: cv, 125 rank, 126 avgScore: ms.avgScore, 127 model: ms.model, 128 n: ms.n, 129 }); 130 } 131 132 // Detect crossings: if any two models swapped relative rank order 133 if (ci > 0) { 134 for (let i = 0; i < models.length; i++) { 135 for (let j = i + 1; j < models.length; j++) { 136 const m1 = models[i]; 137 const m2 = models[j]; 138 const prev1 = prevRanks[m1]; 139 const prev2 = prevRanks[m2]; 140 const curr1 = currentRanks[m1]; 141 const curr2 = currentRanks[m2]; 142 143 if ( 144 prev1 !== undefined && 145 prev2 !== undefined && 146 curr1 !== undefined && 147 curr2 !== undefined 148 ) { 149 // Check if they crossed: relative order changed 150 if ( 151 (prev1 < prev2 && curr1 > curr2) || 152 (prev1 > prev2 && curr1 < curr2) 153 ) { 154 // Approximate crossing rank as average of the two at the crossing point 155 const crossRank = (curr1 + curr2) / 2; 156 crossings.push({ 157 conditionValue: cv, 158 x: ci, 159 rank: crossRank, 160 models: [m1, m2], 161 }); 162 } 163 } 164 } 165 } 166 } 167 168 Object.assign(prevRanks, currentRanks); 169 } 170 171 return { ranked, crossings }; 172 } 173 174 // eslint-disable-next-line @typescript-eslint/no-explicit-any 175 type DotProps = { cx?: number; cy?: number; payload?: any; stroke?: string }; 176 177 function makeRankDot( 178 model: string, 179 color: string, 180 lookup: Record<string, Record<string, RankedPoint>> 181 ) { 182 return function RankDot({ cx, cy, payload }: DotProps) { 183 if (cx === undefined || cy === undefined || !payload) return null; 184 const point = lookup[model]?.[payload.conditionValue]; 185 if (!point) return null; 186 return ( 187 <g> 188 <circle 189 cx={cx} 190 cy={cy} 191 r={5} 192 fill={color} 193 stroke="var(--surface-1)" 194 strokeWidth={2} 195 /> 196 <text 197 x={cx + 10} 198 y={cy - 8} 199 fill="var(--text)" 200 fontSize={10} 201 fontFamily="'JetBrains Mono', monospace" 202 textAnchor="start" 203 > 204 {(point.avgScore * 100).toFixed(0)}% 205 </text> 206 </g> 207 ); 208 }; 209 } 210 211 function CustomTooltipContent({ 212 active, 213 payload, 214 lookup, 215 }: { 216 active?: boolean; 217 // eslint-disable-next-line @typescript-eslint/no-explicit-any 218 payload?: Array<{ dataKey?: string; payload?: any; stroke: string }>; 219 label?: string; 220 lookup: Record<string, Record<string, RankedPoint>>; 221 }) { 222 if (!active || !payload || payload.length === 0) return null; 223 224 const conditionValue = payload[0]?.payload?.conditionValue; 225 if (!conditionValue) return null; 226 227 // Resolve actual RankedPoint data from lookup 228 const resolved = payload 229 .filter((entry) => entry.dataKey && lookup[entry.dataKey]) 230 .map((entry) => ({ 231 point: lookup[entry.dataKey!]?.[conditionValue], 232 stroke: entry.stroke, 233 })) 234 .filter((r) => r.point); 235 236 const sorted = [...resolved].sort( 237 (a, b) => a.point!.rank - b.point!.rank 238 ); 239 240 return ( 241 <div 242 style={{ 243 background: "var(--surface-1)", 244 border: "1px solid var(--border)", 245 padding: "8px 12px", 246 fontFamily: "'JetBrains Mono', monospace", 247 fontSize: "11px", 248 }} 249 > 250 <div 251 style={{ 252 color: "var(--text)", 253 fontWeight: 600, 254 marginBottom: "6px", 255 }} 256 > 257 {conditionValue} 258 </div> 259 {sorted.map((entry) => ( 260 <div 261 key={entry.point!.model} 262 style={{ 263 display: "flex", 264 alignItems: "center", 265 gap: "8px", 266 marginBottom: "2px", 267 }} 268 > 269 <span 270 style={{ 271 display: "inline-block", 272 width: 8, 273 height: 8, 274 background: entry.stroke, 275 flexShrink: 0, 276 }} 277 /> 278 <span style={{ color: "var(--text-muted)", width: "16px" }}> 279 #{entry.point!.rank} 280 </span> 281 <span style={{ color: "var(--text)" }}> 282 {entry.point!.model} 283 </span> 284 <span style={{ color: "var(--text-muted)", marginLeft: "auto" }}> 285 {(entry.point!.avgScore * 100).toFixed(1)}% ({entry.point!.n} cells) 286 </span> 287 </div> 288 ))} 289 </div> 290 ); 291 } 292 293 export default function BumpChart({ runs }: BumpChartProps) { 294 // Pre-compute which axes are useful: need 2+ condition values AND 2+ models with scores 295 const validAxes = useMemo(() => { 296 const cells = groupIntoCells(runs); 297 const scoredCells = cells.filter((c) => c.score.avg > 0); 298 return CONDITION_AXES.filter((axis) => { 299 const conditionValues = Array.from( 300 new Set(scoredCells.map((c) => String(c.meta[axis]))) 301 ); 302 if (conditionValues.length < 2) return false; 303 // Check that at least one condition value has 2+ models with scores 304 for (const cv of conditionValues) { 305 const modelsWithScores = new Set( 306 scoredCells.filter((c) => String(c.meta[axis]) === cv).map((c) => c.meta.model) 307 ); 308 if (modelsWithScores.size >= 2) return true; 309 } 310 return false; 311 }); 312 }, [runs]); 313 314 const [selectedAxis, setSelectedAxis] = useState<AxisName>( 315 validAxes.includes("prompt_style") ? "prompt_style" : validAxes[0] ?? "prompt_style" 316 ); 317 318 const { ranked, crossings, conditionValues, models } = useMemo(() => { 319 const { ranked, crossings } = computeRankings(runs, selectedAxis); 320 const conditionValues = Array.from( 321 new Set(runs.map((r) => String(r.meta[selectedAxis]))) 322 ).sort(); 323 const models = Object.keys(ranked).filter( 324 (m) => ranked[m].length > 0 325 ); 326 return { ranked, crossings, conditionValues, models }; 327 }, [runs, selectedAxis]); 328 329 // Build a lookup: model -> conditionValue -> RankedPoint 330 const pointLookup = useMemo(() => { 331 const lookup: Record<string, Record<string, RankedPoint>> = {}; 332 for (const model of models) { 333 lookup[model] = {}; 334 for (const point of ranked[model]) { 335 lookup[model][point.conditionValue] = point; 336 } 337 } 338 return lookup; 339 }, [models, ranked]); 340 341 // Build recharts data: one entry per condition value 342 const chartData = useMemo(() => { 343 return conditionValues.map((cv) => { 344 const entry: Record<string, unknown> = { conditionValue: cv }; 345 for (const model of models) { 346 const point = pointLookup[model]?.[cv]; 347 if (point) { 348 entry[model] = point.rank; 349 } 350 } 351 return entry; 352 }); 353 }, [conditionValues, models, pointLookup]); 354 355 const maxRank = models.length; 356 357 const scoredCells = groupIntoCells(runs).filter((c) => c.score.avg > 0); 358 359 if (scoredCells.length === 0) { 360 return ( 361 <div 362 className="card" 363 style={{ 364 textAlign: "center", 365 padding: "40px", 366 color: "var(--text-muted)", 367 }} 368 > 369 No scored cells available for ranking. 370 </div> 371 ); 372 } 373 374 if (validAxes.length === 0) { 375 return ( 376 <div className="card"> 377 <h3 style={{ margin: 0 }}>Model Rankings by Condition</h3> 378 <div 379 style={{ 380 textAlign: "center", 381 padding: "40px", 382 color: "var(--text-muted)", 383 fontSize: "0.8rem", 384 }} 385 > 386 Not enough data to compare models. Rankings need at least 2 condition 387 values where 2 or more models have scored cells. 388 </div> 389 </div> 390 ); 391 } 392 393 return ( 394 <div className="card"> 395 <div 396 style={{ 397 display: "flex", 398 alignItems: "center", 399 justifyContent: "space-between", 400 marginBottom: "16px", 401 flexWrap: "wrap", 402 gap: "12px", 403 }} 404 > 405 <div> 406 <h3 style={{ margin: 0 }}>Model Rankings by Condition</h3> 407 <p 408 style={{ 409 color: "var(--text-muted)", 410 fontSize: "0.75rem", 411 margin: "4px 0 0", 412 }} 413 > 414 Rank 1 = best average cell score. Crossings indicate rank swaps. 415 </p> 416 </div> 417 <div style={{ display: "flex", alignItems: "center", gap: "8px" }}> 418 <label 419 style={{ 420 fontSize: "0.75rem", 421 color: "var(--text-muted)", 422 }} 423 > 424 Condition: 425 </label> 426 <select 427 value={selectedAxis} 428 onChange={(e) => setSelectedAxis(e.target.value as AxisName)} 429 style={{ 430 background: "var(--surface-2)", 431 color: "var(--text)", 432 border: "1px solid var(--border)", 433 padding: "4px 8px", 434 fontFamily: "'JetBrains Mono', monospace", 435 fontSize: "0.75rem", 436 borderRadius: "0", 437 cursor: "pointer", 438 }} 439 > 440 {validAxes.map((axis) => ( 441 <option key={axis} value={axis}> 442 {AXIS_LABELS[axis]} 443 </option> 444 ))} 445 </select> 446 </div> 447 </div> 448 449 {conditionValues.length < 2 || models.length < 2 ? ( 450 <div 451 style={{ 452 textAlign: "center", 453 padding: "40px", 454 color: "var(--text-muted)", 455 fontSize: "0.8rem", 456 }} 457 > 458 {models.length < 2 459 ? `Need at least 2 models with scored runs for "${AXIS_LABELS[selectedAxis]}" to show rankings. Currently only: ${models.join(", ") || "none"}` 460 : `Need at least 2 values for "${AXIS_LABELS[selectedAxis]}" to show rankings. Currently only: ${conditionValues.join(", ") || "none"}`} 461 </div> 462 ) : ( 463 <> 464 <ResponsiveContainer width="100%" height={300}> 465 <LineChart 466 data={chartData} 467 margin={{ top: 20, right: 60, bottom: 10, left: 10 }} 468 > 469 <CartesianGrid 470 strokeDasharray="3 3" 471 stroke="var(--border)" 472 vertical={false} 473 /> 474 <XAxis 475 dataKey="conditionValue" 476 stroke="var(--text-muted)" 477 fontSize={11} 478 fontFamily="'JetBrains Mono', monospace" 479 tickLine={false} 480 axisLine={{ stroke: "var(--border)" }} 481 /> 482 <YAxis 483 domain={[0.5, maxRank + 0.5]} 484 ticks={Array.from({ length: maxRank }, (_, i) => i + 1)} 485 reversed 486 stroke="var(--text-muted)" 487 fontSize={11} 488 fontFamily="'JetBrains Mono', monospace" 489 tickLine={false} 490 axisLine={{ stroke: "var(--border)" }} 491 label={{ 492 value: "Rank", 493 angle: -90, 494 position: "insideLeft", 495 fill: "var(--text-muted)", 496 fontSize: 11, 497 fontFamily: "'JetBrains Mono', monospace", 498 }} 499 tickFormatter={(v: number) => `#${v}`} 500 /> 501 <Tooltip 502 content={<CustomTooltipContent lookup={pointLookup} />} 503 cursor={{ stroke: "var(--border)", strokeDasharray: "3 3" }} 504 /> 505 {models.map((model) => ( 506 <Line 507 key={model} 508 type="linear" 509 dataKey={model} 510 stroke={getModelColor(model)} 511 strokeWidth={2.5} 512 dot={makeRankDot( 513 model, 514 getModelColor(model), 515 pointLookup 516 )} 517 activeDot={false} 518 name={model} 519 connectNulls 520 /> 521 ))} 522 {crossings.map((crossing, i) => ( 523 <ReferenceDot 524 key={`crossing-${i}`} 525 x={crossing.conditionValue} 526 y={crossing.rank} 527 r={10} 528 fill="none" 529 stroke="var(--yellow)" 530 strokeWidth={1.5} 531 strokeDasharray="3 2" 532 /> 533 ))} 534 </LineChart> 535 </ResponsiveContainer> 536 537 {/* Legend */} 538 <div 539 style={{ 540 display: "flex", 541 alignItems: "center", 542 justifyContent: "center", 543 gap: "20px", 544 marginTop: "12px", 545 flexWrap: "wrap", 546 }} 547 > 548 {models.map((model) => ( 549 <div 550 key={model} 551 style={{ 552 display: "flex", 553 alignItems: "center", 554 gap: "6px", 555 fontSize: "0.75rem", 556 fontFamily: "'JetBrains Mono', monospace", 557 }} 558 > 559 <span 560 style={{ 561 display: "inline-block", 562 width: 12, 563 height: 3, 564 background: getModelColor(model), 565 }} 566 /> 567 <span style={{ color: "var(--text)" }}>{model}</span> 568 </div> 569 ))} 570 {crossings.length > 0 && ( 571 <div 572 style={{ 573 display: "flex", 574 alignItems: "center", 575 gap: "6px", 576 fontSize: "0.75rem", 577 fontFamily: "'JetBrains Mono', monospace", 578 }} 579 > 580 <span 581 style={{ 582 display: "inline-block", 583 width: 12, 584 height: 12, 585 borderRadius: "50%", 586 border: "1.5px dashed var(--yellow)", 587 }} 588 /> 589 <span style={{ color: "var(--text-muted)" }}> 590 rank swap 591 </span> 592 </div> 593 )} 594 </div> 595 </> 596 )} 597 </div> 598 ); 599 }