commit c641e50fbc95253d2debbe8c25dc5e8357e58dc3
parent d240203118b1d2332118fdcb2cfd94594a523da2
Author: Brian Graham <brian@buildingbetterteams.de>
Date: Sun, 22 Mar 2026 21:49:48 +0100
Add category correlation heatmap to findings view
Pre-compute 14x14 Pearson correlation matrix between category-level
pass rates. Rendered as interactive SVG heatmap with hover tooltips.
Key findings surfaced:
- contamination <-> data_leakage r=0.87 (same decision)
- artifacts <-> stat_methodology r=0.05 (completely independent)
- human_studies <-> artifacts r=-0.24 (two cultures)
- Three independent rigor clusters: transparency, statistics, contamination
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Diffstat:
4 files changed, 119 insertions(+), 2 deletions(-)
diff --git a/explorer/src/data.ts b/explorer/src/data.ts
@@ -92,6 +92,10 @@ export interface Findings {
funding_gap: Record<string, GroupStat>;
repro_detail: Record<string, QuestionRate | number> & { full_pass_count: number; full_pass_pct: number };
game_pcts: Record<string, number>;
+ correlation: {
+ categories: string[];
+ matrix: { r: number | null; n: number }[][];
+ };
}
export interface TensionClaim {
diff --git a/explorer/src/views/findings.ts b/explorer/src/views/findings.ts
@@ -35,6 +35,7 @@ export async function renderFindings(app: HTMLElement) {
app.innerHTML = `
${renderQuestionRates(f)}
+ ${renderCorrelationHeatmap(f)}
${renderYearCategoryTrends(f)}
${renderVenueCitation(f)}
${renderOptimismRigor(f)}
@@ -79,6 +80,72 @@ function renderQuestionRates(f: Findings): string {
</div>`;
}
+function renderCorrelationHeatmap(f: Findings): string {
+ const { categories, matrix } = f.correlation;
+ const n = categories.length;
+ const cell = 38;
+ const labelW = 140;
+ const w = labelW + n * cell + 10;
+ const h = labelW + n * cell + 10;
+
+ function corrColor(r: number | null): string {
+ if (r === null) return 'var(--border)';
+ if (r >= 0) {
+ // Green intensity
+ const a = Math.min(r / 0.7, 1);
+ return `rgba(61, 214, 140, ${(a * 0.8 + 0.05).toFixed(2)})`;
+ } else {
+ // Red intensity
+ const a = Math.min(Math.abs(r) / 0.3, 1);
+ return `rgba(240, 101, 101, ${(a * 0.8 + 0.05).toFixed(2)})`;
+ }
+ }
+
+ let cells = '';
+ for (let i = 0; i < n; i++) {
+ for (let j = 0; j < n; j++) {
+ const d = matrix[i][j];
+ const x = labelW + j * cell;
+ const y = labelW + i * cell;
+ const fill = i === j ? 'var(--border)' : corrColor(d.r);
+ const rText = d.r !== null ? d.r.toFixed(2) : '';
+ const textColor = d.r !== null && Math.abs(d.r) > 0.35 ? '#fff' : 'var(--text-dim)';
+ cells += `<rect x="${x}" y="${y}" width="${cell - 1}" height="${cell - 1}" fill="${fill}" rx="2">
+ <title>${formatName(categories[i])} \u2194 ${formatName(categories[j])}\nr=${d.r !== null ? d.r.toFixed(3) : 'N/A'} (n=${d.n})</title>
+ </rect>`;
+ if (i !== j && d.r !== null) {
+ cells += `<text x="${x + cell / 2}" y="${y + cell / 2 + 4}" text-anchor="middle" fill="${textColor}" font-size="9">${rText}</text>`;
+ }
+ }
+ }
+
+ // Row labels (left)
+ let labels = '';
+ for (let i = 0; i < n; i++) {
+ labels += `<text x="${labelW - 4}" y="${labelW + i * cell + cell / 2 + 4}" text-anchor="end" font-size="10" fill="var(--text)">${formatName(categories[i])}</text>`;
+ }
+ // Column labels (top, rotated)
+ for (let j = 0; j < n; j++) {
+ labels += `<text x="0" y="0" text-anchor="end" font-size="10" fill="var(--text)" transform="translate(${labelW + j * cell + cell / 2 + 4}, ${labelW - 4}) rotate(-55)">${formatName(categories[j])}</text>`;
+ }
+
+ return `<div class="section">
+ <h2>Category Correlation Matrix</h2>
+ <p style="font-size:0.85rem;color:var(--text-dim);margin-bottom:1rem">Pearson correlation between category-level pass rates across ${matrix[0]?.[0]?.n || 0}+ papers. <span style="color:var(--green)">Green = positive</span>, <span style="color:var(--red)">red = negative</span>. Hover cells for details.</p>
+ <div style="overflow-x:auto">
+ <svg viewBox="0 0 ${w} ${h}" style="width:100%;max-width:${w}px;min-width:500px">
+ ${labels}${cells}
+ </svg>
+ </div>
+ <div style="font-size:0.82rem;color:var(--text-dim);margin-top:0.75rem">
+ <strong>Key patterns:</strong>
+ Contamination \u2194 data leakage (r=0.87) are effectively the same decision.
+ Artifacts \u2194 statistical methodology (r=0.05) are completely independent — releasing code says nothing about statistical rigor.
+ Human studies \u2194 artifacts (r=\u22120.24) is the strongest negative — two research traditions that don't speak to each other.
+ </div>
+ </div>`;
+}
+
function renderYearCategoryTrends(f: Findings): string {
const years = Object.keys(f.year_category_trends).sort();
const defaultCats = ['contamination', 'data_leakage', 'statistical_methodology', 'experimental_rigor'];
diff --git a/explorer/tests/explorer.spec.ts b/explorer/tests/explorer.spec.ts
@@ -206,8 +206,8 @@ test.describe('Findings', () => {
test('loads and shows all sections', async ({ page }) => {
await page.goto('/#/findings');
await expect(page.locator('.section').first()).toBeVisible({ timeout: 10000 });
- // Should have 10 sections
- expect(await page.locator('.section').count()).toBe(10);
+ // Should have 11 sections
+ expect(await page.locator('.section').count()).toBe(11);
});
test('shows per-question pass rates', async ({ page }) => {
diff --git a/scripts/build-explorer-data.py b/scripts/build-explorer-data.py
@@ -554,6 +554,51 @@ def build():
repro_detail["full_pass_count"] = repro_count
repro_detail["full_pass_pct"] = round(repro_count / total_papers * 100, 1) if total_papers else 0
+ # 10. Category correlation matrix
+ # Collect per-paper category score vectors
+ paper_cat_vectors = []
+ for p in papers_full:
+ cs = p["category_scores"]
+ # Convert percentage back to 0-1 for correlation
+ vec = {cat: cs[cat] / 100.0 for cat in ALL_CATEGORIES if cat in cs}
+ if len(vec) >= 5:
+ paper_cat_vectors.append(vec)
+
+ def pearson(xs, ys):
+ n = len(xs)
+ if n < 10:
+ return None
+ mx = sum(xs) / n
+ my = sum(ys) / n
+ num = sum((x - mx) * (y - my) for x, y in zip(xs, ys))
+ dx = sum((x - mx) ** 2 for x in xs) ** 0.5
+ dy = sum((y - my) ** 2 for y in ys) ** 0.5
+ if dx == 0 or dy == 0:
+ return None
+ return num / (dx * dy)
+
+ # Only include categories with enough data
+ corr_cats = [c for c in ALL_CATEGORIES
+ if sum(1 for v in paper_cat_vectors if c in v) >= 30]
+
+ corr_matrix = []
+ for c1 in corr_cats:
+ row = []
+ for c2 in corr_cats:
+ xs, ys = [], []
+ for v in paper_cat_vectors:
+ if c1 in v and c2 in v:
+ xs.append(v[c1])
+ ys.append(v[c2])
+ r = pearson(xs, ys)
+ row.append({"r": round(r, 3) if r is not None else None, "n": len(xs)})
+ corr_matrix.append(row)
+
+ correlation = {
+ "categories": corr_cats,
+ "matrix": corr_matrix,
+ }
+
findings = {
"question_rates": q_rates,
"year_category_trends": year_cat_trends,
@@ -566,6 +611,7 @@ def build():
"funding_gap": funding_gap,
"repro_detail": repro_detail,
"game_pcts": game_pcts,
+ "correlation": correlation,
}
# --- Citation network ---