Add category correlation heatmap to findings view - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

commit c641e50fbc95253d2debbe8c25dc5e8357e58dc3
parent d240203118b1d2332118fdcb2cfd94594a523da2
Author: Brian Graham <brian@buildingbetterteams.de>
Date:   Sun, 22 Mar 2026 21:49:48 +0100

Add category correlation heatmap to findings view

Pre-compute 14x14 Pearson correlation matrix between category-level
pass rates. Rendered as interactive SVG heatmap with hover tooltips.

Key findings surfaced:
- contamination <-> data_leakage r=0.87 (same decision)
- artifacts <-> stat_methodology r=0.05 (completely independent)
- human_studies <-> artifacts r=-0.24 (two cultures)
- Three independent rigor clusters: transparency, statistics, contamination

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Diffstat:
M explorer/src/data.ts  | 4 ++++
M explorer/src/views/findings.ts  | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M explorer/tests/explorer.spec.ts  | 4 ++--
M scripts/build-explorer-data.py  | 46 ++++++++++++++++++++++++++++++++++++++++++++++

4 files changed, 119 insertions(+), 2 deletions(-)
diff --git a/explorer/src/data.ts b/explorer/src/data.ts
@@ -92,6 +92,10 @@ export interface Findings {
   funding_gap: Record<string, GroupStat>;
   repro_detail: Record<string, QuestionRate | number> & { full_pass_count: number; full_pass_pct: number };
   game_pcts: Record<string, number>;
+  correlation: {
+    categories: string[];
+    matrix: { r: number | null; n: number }[][];
+  };
 }
 
 export interface TensionClaim {
diff --git a/explorer/src/views/findings.ts b/explorer/src/views/findings.ts
@@ -35,6 +35,7 @@ export async function renderFindings(app: HTMLElement) {
 
   app.innerHTML = `
     ${renderQuestionRates(f)}
+    ${renderCorrelationHeatmap(f)}
     ${renderYearCategoryTrends(f)}
     ${renderVenueCitation(f)}
     ${renderOptimismRigor(f)}
@@ -79,6 +80,72 @@ function renderQuestionRates(f: Findings): string {
   </div>`;
 }
 
+function renderCorrelationHeatmap(f: Findings): string {
+  const { categories, matrix } = f.correlation;
+  const n = categories.length;
+  const cell = 38;
+  const labelW = 140;
+  const w = labelW + n * cell + 10;
+  const h = labelW + n * cell + 10;
+
+  function corrColor(r: number | null): string {
+    if (r === null) return 'var(--border)';
+    if (r >= 0) {
+      // Green intensity
+      const a = Math.min(r / 0.7, 1);
+      return `rgba(61, 214, 140, ${(a * 0.8 + 0.05).toFixed(2)})`;
+    } else {
+      // Red intensity
+      const a = Math.min(Math.abs(r) / 0.3, 1);
+      return `rgba(240, 101, 101, ${(a * 0.8 + 0.05).toFixed(2)})`;
+    }
+  }
+
+  let cells = '';
+  for (let i = 0; i < n; i++) {
+    for (let j = 0; j < n; j++) {
+      const d = matrix[i][j];
+      const x = labelW + j * cell;
+      const y = labelW + i * cell;
+      const fill = i === j ? 'var(--border)' : corrColor(d.r);
+      const rText = d.r !== null ? d.r.toFixed(2) : '';
+      const textColor = d.r !== null && Math.abs(d.r) > 0.35 ? '#fff' : 'var(--text-dim)';
+      cells += `<rect x="${x}" y="${y}" width="${cell - 1}" height="${cell - 1}" fill="${fill}" rx="2">
+        <title>${formatName(categories[i])} \u2194 ${formatName(categories[j])}\nr=${d.r !== null ? d.r.toFixed(3) : 'N/A'} (n=${d.n})</title>
+      </rect>`;
+      if (i !== j && d.r !== null) {
+        cells += `<text x="${x + cell / 2}" y="${y + cell / 2 + 4}" text-anchor="middle" fill="${textColor}" font-size="9">${rText}</text>`;
+      }
+    }
+  }
+
+  // Row labels (left)
+  let labels = '';
+  for (let i = 0; i < n; i++) {
+    labels += `<text x="${labelW - 4}" y="${labelW + i * cell + cell / 2 + 4}" text-anchor="end" font-size="10" fill="var(--text)">${formatName(categories[i])}</text>`;
+  }
+  // Column labels (top, rotated)
+  for (let j = 0; j < n; j++) {
+    labels += `<text x="0" y="0" text-anchor="end" font-size="10" fill="var(--text)" transform="translate(${labelW + j * cell + cell / 2 + 4}, ${labelW - 4}) rotate(-55)">${formatName(categories[j])}</text>`;
+  }
+
+  return `<div class="section">
+    <h2>Category Correlation Matrix</h2>
+    <p style="font-size:0.85rem;color:var(--text-dim);margin-bottom:1rem">Pearson correlation between category-level pass rates across ${matrix[0]?.[0]?.n || 0}+ papers. <span style="color:var(--green)">Green = positive</span>, <span style="color:var(--red)">red = negative</span>. Hover cells for details.</p>
+    <div style="overflow-x:auto">
+      <svg viewBox="0 0 ${w} ${h}" style="width:100%;max-width:${w}px;min-width:500px">
+        ${labels}${cells}
+      </svg>
+    </div>
+    <div style="font-size:0.82rem;color:var(--text-dim);margin-top:0.75rem">
+      <strong>Key patterns:</strong>
+      Contamination \u2194 data leakage (r=0.87) are effectively the same decision.
+      Artifacts \u2194 statistical methodology (r=0.05) are completely independent — releasing code says nothing about statistical rigor.
+      Human studies \u2194 artifacts (r=\u22120.24) is the strongest negative — two research traditions that don't speak to each other.
+    </div>
+  </div>`;
+}
+
 function renderYearCategoryTrends(f: Findings): string {
   const years = Object.keys(f.year_category_trends).sort();
   const defaultCats = ['contamination', 'data_leakage', 'statistical_methodology', 'experimental_rigor'];
diff --git a/explorer/tests/explorer.spec.ts b/explorer/tests/explorer.spec.ts
@@ -206,8 +206,8 @@ test.describe('Findings', () => {
   test('loads and shows all sections', async ({ page }) => {
     await page.goto('/#/findings');
     await expect(page.locator('.section').first()).toBeVisible({ timeout: 10000 });
-    // Should have 10 sections
-    expect(await page.locator('.section').count()).toBe(10);
+    // Should have 11 sections
+    expect(await page.locator('.section').count()).toBe(11);
   });
 
   test('shows per-question pass rates', async ({ page }) => {
diff --git a/scripts/build-explorer-data.py b/scripts/build-explorer-data.py
@@ -554,6 +554,51 @@ def build():
     repro_detail["full_pass_count"] = repro_count
     repro_detail["full_pass_pct"] = round(repro_count / total_papers * 100, 1) if total_papers else 0
 
+    # 10. Category correlation matrix
+    # Collect per-paper category score vectors
+    paper_cat_vectors = []
+    for p in papers_full:
+        cs = p["category_scores"]
+        # Convert percentage back to 0-1 for correlation
+        vec = {cat: cs[cat] / 100.0 for cat in ALL_CATEGORIES if cat in cs}
+        if len(vec) >= 5:
+            paper_cat_vectors.append(vec)
+
+    def pearson(xs, ys):
+        n = len(xs)
+        if n < 10:
+            return None
+        mx = sum(xs) / n
+        my = sum(ys) / n
+        num = sum((x - mx) * (y - my) for x, y in zip(xs, ys))
+        dx = sum((x - mx) ** 2 for x in xs) ** 0.5
+        dy = sum((y - my) ** 2 for y in ys) ** 0.5
+        if dx == 0 or dy == 0:
+            return None
+        return num / (dx * dy)
+
+    # Only include categories with enough data
+    corr_cats = [c for c in ALL_CATEGORIES
+                 if sum(1 for v in paper_cat_vectors if c in v) >= 30]
+
+    corr_matrix = []
+    for c1 in corr_cats:
+        row = []
+        for c2 in corr_cats:
+            xs, ys = [], []
+            for v in paper_cat_vectors:
+                if c1 in v and c2 in v:
+                    xs.append(v[c1])
+                    ys.append(v[c2])
+            r = pearson(xs, ys)
+            row.append({"r": round(r, 3) if r is not None else None, "n": len(xs)})
+        corr_matrix.append(row)
+
+    correlation = {
+        "categories": corr_cats,
+        "matrix": corr_matrix,
+    }
+
     findings = {
         "question_rates": q_rates,
         "year_category_trends": year_cat_trends,
@@ -566,6 +611,7 @@ def build():
         "funding_gap": funding_gap,
         "repro_detail": repro_detail,
         "game_pcts": game_pcts,
+        "correlation": correlation,
     }
 
     # --- Citation network ---

	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs

M	explorer/src/data.ts	\|	4	++++
M	explorer/src/views/findings.ts	\|	67	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	explorer/tests/explorer.spec.ts	\|	4	++--
M	scripts/build-explorer-data.py	\|	46	++++++++++++++++++++++++++++++++++++++++++++++