TornadoChart.tsx (13712B)
1 import type { Run } from "../lib/types"; 2 import type { AxisEffect } from "../lib/analysis"; 3 import { groupIntoCells, confidenceInterval } from "../lib/analysis"; 4 5 interface TornadoChartProps { 6 effects: AxisEffect[]; 7 metric: string; 8 totalRuns?: number; 9 totalCells?: number; 10 runs?: Run[]; 11 } 12 13 const AXIS_LABELS: Record<string, string> = { 14 model: "Model", 15 effort: "Effort", 16 prompt_style: "Prompt Style", 17 language: "Language", 18 human_language: "Human Language", 19 tool_read: "Read Tool", 20 tool_write: "Write Tool", 21 tool_edit: "Edit Tool", 22 tool_glob: "Glob Tool", 23 tool_grep: "Grep Tool", 24 linter: "Linter", 25 playwright: "Playwright", 26 context_file: "Context File", 27 web_search: "Web Search", 28 max_budget: "Budget", 29 tests_provided: "Tests Provided", 30 strategy: "Strategy", 31 design_guidance: "Design Guidance", 32 architecture: "Architecture", 33 error_checking: "Error Checking", 34 context_noise: "Context Noise", 35 renderer: "Renderer", 36 provider: "Provider", 37 }; 38 39 // Metric extractors matching analysis.ts 40 const METRIC_EXTRACTORS: Record<string, (r: Run) => number | null> = { 41 score: (r) => r.eval_results?.score ?? null, 42 cost: (r) => r.claude_output?.total_cost_usd ?? null, 43 turns: (r) => r.claude_output?.num_turns ?? null, 44 wall_time: (r) => r.meta.wall_time_seconds ?? null, 45 gameplay: (r) => (r.eval_results as Record<string, any>)?.gameplay_bot?.score ?? null, 46 code_quality: (r) => (r.eval_results as Record<string, any>)?.code_analysis?.score ?? null, 47 structural: (r) => r.eval_results?.structural?.score ?? null, 48 quality: (r) => r.eval_results?.quality?.score ?? null, 49 transcript: (r) => (r.eval_results as Record<string, any>)?.transcript_analysis?.score ?? null, 50 sonarqube: (r) => (r.eval_results as Record<string, any>)?.sonarqube?.score ?? null, 51 }; 52 53 const SKIP_KEYS = new Set([ 54 "task", "cell_id", "run_id", "run_number", "runs_per_cell", 55 "max_budget_usd", "timeout_seconds", "base_tools", "started_at", 56 "completed_at", "wall_time_seconds", "exit_code", "short_id", 57 "short_cell_id", "claude_version", "sub_agents", "actual_model", 58 ]); 59 60 interface EffectCI { 61 ciLower: number; 62 ciUpper: number; 63 crossesZero: boolean; 64 } 65 66 function computeEffectCIs( 67 runs: Run[], 68 metric: string 69 ): Record<string, Record<string, EffectCI>> { 70 const extract = METRIC_EXTRACTORS[metric]; 71 if (!extract || runs.length === 0) return {}; 72 73 const cells = groupIntoCells(runs); 74 75 // Get per-cell averages 76 const cellData: Array<{ meta: Run["meta"]; avg: number }> = []; 77 for (const cell of cells) { 78 const vals: number[] = []; 79 for (const run of cell.runs) { 80 const v = extract(run); 81 if (v !== null) vals.push(v); 82 } 83 if (vals.length === 0) continue; 84 cellData.push({ meta: cell.meta, avg: vals.reduce((a, b) => a + b, 0) / vals.length }); 85 } 86 87 if (cellData.length === 0) return {}; 88 89 const grandMean = cellData.reduce((s, c) => s + c.avg, 0) / cellData.length; 90 91 const axisKeys = Object.keys(cellData[0].meta).filter((k) => !SKIP_KEYS.has(k)); 92 const result: Record<string, Record<string, EffectCI>> = {}; 93 94 for (const axis of axisKeys) { 95 const groups: Record<string, number[]> = {}; 96 for (const { meta, avg } of cellData) { 97 const key = String((meta as Record<string, unknown>)[axis] ?? "unknown"); 98 (groups[key] ??= []).push(avg); 99 } 100 101 result[axis] = {}; 102 for (const [val, avgs] of Object.entries(groups)) { 103 if (avgs.length < 2) { 104 result[axis][val] = { ciLower: 0, ciUpper: 0, crossesZero: true }; 105 continue; 106 } 107 // Compute CI of the effect (mean - grandMean) 108 const ci = confidenceInterval(avgs); 109 const effectLower = ci.lower - grandMean; 110 const effectUpper = ci.upper - grandMean; 111 result[axis][val] = { 112 ciLower: effectLower, 113 ciUpper: effectUpper, 114 crossesZero: effectLower <= 0 && effectUpper >= 0, 115 }; 116 } 117 } 118 119 return result; 120 } 121 122 export default function TornadoChart({ effects, metric, totalRuns, totalCells, runs }: TornadoChartProps) { 123 if (effects.length === 0) { 124 return ( 125 <div 126 className="card" 127 style={{ 128 textAlign: "center", 129 padding: "40px", 130 color: "var(--text-muted)", 131 }} 132 > 133 Not enough data to compute effects. Run more experiments with varying 134 configurations. 135 </div> 136 ); 137 } 138 139 // Compute CIs if runs are provided 140 const effectCIs = runs ? computeEffectCIs(runs, metric) : {}; 141 142 // Scale must account for variance bands and CI whiskers extending beyond effect bars 143 const ciExtents = Object.values(effectCIs).flatMap((axisCIs) => 144 Object.values(axisCIs).map((ci) => Math.max(Math.abs(ci.ciLower), Math.abs(ci.ciUpper))) 145 ); 146 const maxExtent = Math.max( 147 ...effects.flatMap((e) => 148 e.values.map((v) => Math.abs(v.effect) + v.variance) 149 ), 150 ...ciExtents 151 ); 152 const scale = maxExtent > 0 ? 200 / maxExtent : 1; 153 154 return ( 155 <div className="card"> 156 <h3 style={{ marginBottom: "4px" }}>Variable Impact on {metric}</h3> 157 {totalRuns != null && totalCells != null && ( 158 <div style={{ fontSize: "10px", fontFamily: "'JetBrains Mono', monospace", color: "var(--text-muted, hsl(213 14% 65%))", marginBottom: "4px" }}> 159 (n={totalRuns} runs across {totalCells} cells) 160 </div> 161 )} 162 <p 163 style={{ 164 color: "var(--text-muted)", 165 fontSize: "0.75rem", 166 marginBottom: "16px", 167 }} 168 > 169 Sorted by effect size. Solid bars show effect (deviation from grand 170 mean). Shaded bands show within-cell variance. 171 </p> 172 173 {effects.map((effect) => ( 174 <div 175 key={effect.axis} 176 style={{ 177 display: "flex", 178 alignItems: "center", 179 marginBottom: "12px", 180 gap: "12px", 181 }} 182 > 183 {/* Label */} 184 <div 185 style={{ 186 width: "120px", 187 textAlign: "right", 188 fontSize: "0.8rem", 189 flexShrink: 0, 190 }} 191 > 192 {AXIS_LABELS[effect.axis] || effect.axis} 193 </div> 194 195 {/* Bars */} 196 <div 197 style={{ 198 flex: 1, 199 display: "flex", 200 flexDirection: "column", 201 gap: "2px", 202 }} 203 > 204 {effect.values.map((entry) => { 205 const ci = effectCIs[effect.axis]?.[entry.value]; 206 const crossesZero = ci?.crossesZero ?? true; 207 const effectWidth = Math.abs(entry.effect) * scale; 208 const varianceBandWidth = 209 (Math.abs(entry.effect) + entry.variance) * scale; 210 const isPositive = entry.effect >= 0; 211 const isLowN = entry.n < 3; 212 // CI whisker positions (in px from left edge of bar area) 213 const ciLowerPx = ci ? Math.abs(ci.ciLower) * scale : 0; 214 const ciUpperPx = ci ? Math.abs(ci.ciUpper) * scale : 0; 215 // For the whisker, we show the full CI extent 216 const ciMaxPx = ci ? Math.max(ciLowerPx, ciUpperPx) : 0; 217 const ciMinPx = ci ? Math.min(ciLowerPx, ciUpperPx) : 0; 218 // Dim bars where CI crosses zero (effect not significant) 219 const notSignificant = ci && crossesZero && !isLowN; 220 const barContainerWidth = Math.max(varianceBandWidth, effectWidth, ciMaxPx, 2); 221 return ( 222 <div 223 key={entry.value} 224 style={{ 225 display: "flex", 226 alignItems: "center", 227 gap: "8px", 228 opacity: isLowN ? 0.4 : notSignificant ? 0.5 : 1, 229 }} 230 > 231 <div 232 style={{ 233 width: "50px", 234 textAlign: "right", 235 fontSize: "0.7rem", 236 fontFamily: "var(--font-mono)", 237 color: "var(--text-muted)", 238 flexShrink: 0, 239 }} 240 > 241 {entry.value} 242 </div> 243 <div 244 style={{ 245 position: "relative", 246 height: "16px", 247 width: `${barContainerWidth}px`, 248 }} 249 > 250 {/* Variance band (behind, wider, semi-transparent) */} 251 {entry.variance > 0 && ( 252 <div 253 style={{ 254 position: "absolute", 255 top: "1px", 256 left: 0, 257 height: "14px", 258 width: `${Math.max(varianceBandWidth, 2)}px`, 259 background: isPositive 260 ? "var(--green)" 261 : "var(--red)", 262 opacity: 0.15, 263 borderRadius: "2px", 264 }} 265 /> 266 )} 267 {/* Effect bar (foreground, solid) */} 268 <div 269 style={{ 270 position: "absolute", 271 top: 0, 272 left: 0, 273 height: "16px", 274 width: `${Math.max(effectWidth, 2)}px`, 275 background: isPositive 276 ? "var(--green)" 277 : "var(--red)", 278 borderRadius: "2px", 279 opacity: 0.8, 280 ...(isLowN ? { borderStyle: "dashed", borderWidth: "1px", borderColor: isPositive ? "var(--green)" : "var(--red)" } : {}), 281 }} 282 /> 283 {/* CI whisker */} 284 {ci && !isLowN && ciMaxPx > 0 && ( 285 <> 286 {/* Whisker line */} 287 <div 288 style={{ 289 position: "absolute", 290 top: "7px", 291 left: `${ciMinPx}px`, 292 width: `${Math.max(ciMaxPx - ciMinPx, 1)}px`, 293 height: "2px", 294 background: "var(--text-muted)", 295 }} 296 /> 297 {/* Left cap */} 298 <div 299 style={{ 300 position: "absolute", 301 top: "4px", 302 left: `${ciMinPx}px`, 303 width: "1px", 304 height: "8px", 305 background: "var(--text-muted)", 306 }} 307 /> 308 {/* Right cap */} 309 <div 310 style={{ 311 position: "absolute", 312 top: "4px", 313 left: `${ciMaxPx}px`, 314 width: "1px", 315 height: "8px", 316 background: "var(--text-muted)", 317 }} 318 /> 319 </> 320 )} 321 </div> 322 <div 323 style={{ 324 fontSize: "0.7rem", 325 fontFamily: "var(--font-mono)", 326 color: isPositive ? "var(--green)" : "var(--red)", 327 whiteSpace: "nowrap", 328 }} 329 > 330 {entry.effect >= 0 ? "+" : ""} 331 {(entry.effect * 100).toFixed(1)}% 332 {entry.variance > 0 && ( 333 <span 334 style={{ 335 color: "var(--text-muted)", 336 marginLeft: "4px", 337 }} 338 > 339 ±{(entry.variance * 100).toFixed(1)}% 340 </span> 341 )} 342 {ci && !isLowN && ( 343 <span 344 style={{ 345 color: crossesZero ? "var(--yellow)" : "var(--text-muted)", 346 marginLeft: "4px", 347 fontSize: "0.6rem", 348 }} 349 > 350 CI [{(ci.ciLower * 100).toFixed(1)}, {(ci.ciUpper * 100).toFixed(1)}] 351 {crossesZero ? " n.s." : ""} 352 </span> 353 )} 354 </div> 355 <div 356 style={{ 357 fontSize: "0.65rem", 358 fontFamily: "var(--font-mono)", 359 color: isLowN ? "var(--yellow)" : "var(--text-muted)", 360 whiteSpace: "nowrap", 361 fontWeight: isLowN ? 600 : 400, 362 }} 363 > 364 n={entry.n} cell{entry.n !== 1 ? "s" : ""} 365 </div> 366 </div> 367 ); 368 })} 369 </div> 370 371 {/* Spread */} 372 <div 373 style={{ 374 width: "60px", 375 textAlign: "right", 376 fontSize: "0.75rem", 377 fontFamily: "var(--font-mono)", 378 color: "var(--accent)", 379 flexShrink: 0, 380 }} 381 > 382 {(effect.spread * 100).toFixed(1)}% 383 </div> 384 </div> 385 ))} 386 </div> 387 ); 388 }