CorrelationMatrix.tsx (9500B)
1 import type { Run } from "../lib/types"; 2 import { groupIntoCells, type Cell } from "../lib/analysis"; 3 4 interface CorrelationMatrixProps { 5 runs: Run[]; 6 } 7 8 const CONFIG_AXES = [ 9 { key: "model", label: "Model" }, 10 { key: "effort", label: "Effort" }, 11 { key: "prompt_style", label: "Prompt Style" }, 12 { key: "language", label: "Language" }, 13 { key: "tool_read", label: "Read Tool" }, 14 { key: "tool_write", label: "Write Tool" }, 15 { key: "tool_edit", label: "Edit Tool" }, 16 { key: "tool_glob", label: "Glob Tool" }, 17 { key: "tool_grep", label: "Grep Tool" }, 18 { key: "linter", label: "Linter" }, 19 { key: "playwright", label: "Playwright" }, 20 { key: "context_file", label: "Context File" }, 21 { key: "web_search", label: "Web Search" }, 22 { key: "max_budget", label: "Budget" }, 23 { key: "tests_provided", label: "Tests Provided" }, 24 { key: "strategy", label: "Strategy" }, 25 { key: "design_guidance", label: "Design Guidance" }, 26 { key: "architecture", label: "Architecture" }, 27 { key: "error_checking", label: "Error Checking" }, 28 { key: "context_noise", label: "Context Noise" }, 29 { key: "renderer", label: "Renderer" }, 30 { key: "provider", label: "Provider" }, 31 ] as const; 32 33 type MetricExtractor = (run: Run) => number | null; 34 35 const OUTCOME_METRICS: Array<{ key: string; label: string; lowerIsBetter: boolean; extract: MetricExtractor }> = [ 36 { key: "overall", label: "Overall", lowerIsBetter: false, extract: (r) => r.eval_results?.score ?? null }, 37 { key: "gameplay", label: "Gameplay", lowerIsBetter: false, extract: (r) => (r.eval_results as Record<string, any>)?.gameplay_bot?.score ?? null }, 38 { key: "code", label: "Code", lowerIsBetter: false, extract: (r) => (r.eval_results as Record<string, any>)?.code_analysis?.score ?? null }, 39 { key: "structural", label: "Structural", lowerIsBetter: false, extract: (r) => r.eval_results?.structural?.score ?? null }, 40 { key: "quality", label: "Quality", lowerIsBetter: false, extract: (r) => r.eval_results?.quality?.score ?? null }, 41 { key: "transcript", label: "Transcript", lowerIsBetter: false, extract: (r) => (r.eval_results as Record<string, any>)?.transcript_analysis?.score ?? null }, 42 { key: "cost", label: "Cost \u2193", lowerIsBetter: true, extract: (r) => r.claude_output?.total_cost_usd ?? null }, 43 { key: "turns", label: "Turns \u2193", lowerIsBetter: true, extract: (r) => r.claude_output?.num_turns ?? null }, 44 { key: "time", label: "Time \u2193", lowerIsBetter: true, extract: (r) => r.meta.wall_time_seconds ?? null }, 45 ]; 46 47 function computeSpread(cells: Cell[], axisKey: string, extract: MetricExtractor): number | null { 48 // Compute per-cell metric averages, then group by axis value 49 const groups: Record<string, number[]> = {}; 50 for (const cell of cells) { 51 const vals: number[] = []; 52 for (const run of cell.runs) { 53 const v = extract(run); 54 if (v !== null) vals.push(v); 55 } 56 if (vals.length === 0) continue; 57 const cellAvg = vals.reduce((a, b) => a + b, 0) / vals.length; 58 const groupKey = String((cell.meta as Record<string, unknown>)[axisKey] ?? "unknown"); 59 (groups[groupKey] ??= []).push(cellAvg); 60 } 61 62 const keys = Object.keys(groups); 63 if (keys.length < 2) return null; 64 65 const means = keys.map((k) => { 66 const vals = groups[k]; 67 return vals.reduce((a, b) => a + b, 0) / vals.length; 68 }); 69 70 return Math.max(...means) - Math.min(...means); 71 } 72 73 export default function CorrelationMatrix({ runs }: CorrelationMatrixProps) { 74 if (runs.length === 0) { 75 return ( 76 <div 77 className="card" 78 style={{ 79 textAlign: "center", 80 padding: "40px", 81 color: "var(--text-muted)", 82 }} 83 > 84 No data available for correlation analysis. 85 </div> 86 ); 87 } 88 89 // Group runs into cells once, then compute spreads from cell averages 90 const cells = groupIntoCells(runs); 91 92 // Compute the full matrix: rows = config axes, columns = metrics 93 const matrix: Array<{ 94 key: string; 95 label: string; 96 spreads: Array<number | null>; 97 maxSpread: number; 98 }> = []; 99 100 for (const axis of CONFIG_AXES) { 101 const spreads = OUTCOME_METRICS.map((metric) => 102 computeSpread(cells, axis.key, metric.extract) 103 ); 104 const validSpreads = spreads.filter((s): s is number => s !== null); 105 const maxSpread = validSpreads.length > 0 ? Math.max(...validSpreads) : 0; 106 matrix.push({ key: axis.key, label: axis.label, spreads, maxSpread }); 107 } 108 109 // Sort rows by maximum spread (most impactful variable first) 110 matrix.sort((a, b) => b.maxSpread - a.maxSpread); 111 112 // Find global max spread for color scaling 113 const globalMax = Math.max(...matrix.map((r) => r.maxSpread), 0.001); 114 115 return ( 116 <div className="card" style={{ padding: "20px" }}> 117 <h3 style={{ marginBottom: "4px" }}>Variable Impact Matrix</h3> 118 <p 119 style={{ 120 color: "var(--text-muted)", 121 fontSize: "0.75rem", 122 marginBottom: "16px", 123 }} 124 > 125 Effect size (spread) of each configuration variable on each outcome. 126 Sorted by maximum impact. Stronger color = larger effect. 127 </p> 128 129 <div style={{ overflowX: "auto" }}> 130 <table 131 style={{ 132 borderCollapse: "collapse", 133 width: "auto", 134 minWidth: "100%", 135 }} 136 > 137 <thead> 138 <tr> 139 <th 140 style={{ 141 padding: "6px 12px", 142 fontSize: "11px", 143 textAlign: "right", 144 background: "var(--surface-2)", 145 borderBottom: "1px solid var(--border)", 146 borderRight: "1px solid var(--border)", 147 position: "sticky", 148 left: 0, 149 zIndex: 1, 150 }} 151 > 152 Variable 153 </th> 154 {OUTCOME_METRICS.map((metric) => ( 155 <th 156 key={metric.key} 157 style={{ 158 padding: "6px 8px", 159 fontSize: "11px", 160 textAlign: "center", 161 background: "var(--surface-2)", 162 borderBottom: "1px solid var(--border)", 163 fontFamily: "var(--font-mono)", 164 fontWeight: 500, 165 color: "var(--text-muted)", 166 textTransform: "uppercase", 167 letterSpacing: "0.5px", 168 whiteSpace: "nowrap", 169 }} 170 > 171 {metric.label} 172 </th> 173 ))} 174 </tr> 175 </thead> 176 <tbody> 177 {matrix.map((row) => ( 178 <tr key={row.key} style={{ background: "transparent" }}> 179 <td 180 style={{ 181 padding: "5px 12px", 182 fontSize: "11px", 183 fontFamily: "var(--font-mono)", 184 fontWeight: 500, 185 textAlign: "right", 186 whiteSpace: "nowrap", 187 borderBottom: "1px solid var(--border)", 188 borderRight: "1px solid var(--border)", 189 background: "var(--surface-1)", 190 position: "sticky", 191 left: 0, 192 zIndex: 1, 193 }} 194 > 195 {row.label} 196 </td> 197 {row.spreads.map((spread, i) => { 198 if (spread === null) { 199 return ( 200 <td 201 key={OUTCOME_METRICS[i].key} 202 style={{ 203 padding: "5px 8px", 204 textAlign: "center", 205 fontSize: "11px", 206 fontFamily: "var(--font-mono)", 207 color: "var(--text-muted)", 208 borderBottom: "1px solid var(--border)", 209 }} 210 > 211 -- 212 </td> 213 ); 214 } 215 216 const opacity = Math.min(spread / globalMax, 1) * 0.7 + 0.05; 217 const isScoreMetric = !["cost", "turns", "time"].includes( 218 OUTCOME_METRICS[i].key 219 ); 220 const displayValue = isScoreMetric 221 ? `${(spread * 100).toFixed(1)}%` 222 : OUTCOME_METRICS[i].key === "cost" 223 ? `$${spread.toFixed(2)}` 224 : OUTCOME_METRICS[i].key === "time" 225 ? `${Math.round(spread)}s` 226 : spread.toFixed(1); 227 228 return ( 229 <td 230 key={OUTCOME_METRICS[i].key} 231 style={{ 232 padding: "5px 8px", 233 textAlign: "center", 234 fontSize: "11px", 235 fontFamily: "var(--font-mono)", 236 fontWeight: 600, 237 color: "var(--text)", 238 borderBottom: "1px solid var(--border)", 239 background: `rgba(136, 192, 208, ${opacity})`, 240 }} 241 > 242 {displayValue} 243 </td> 244 ); 245 })} 246 </tr> 247 ))} 248 </tbody> 249 </table> 250 </div> 251 </div> 252 ); 253 }