ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

commit c641e50fbc95253d2debbe8c25dc5e8357e58dc3
parent d240203118b1d2332118fdcb2cfd94594a523da2
Author: Brian Graham <brian@buildingbetterteams.de>
Date:   Sun, 22 Mar 2026 21:49:48 +0100

Add category correlation heatmap to findings view

Pre-compute 14x14 Pearson correlation matrix between category-level
pass rates. Rendered as interactive SVG heatmap with hover tooltips.

Key findings surfaced:
- contamination <-> data_leakage r=0.87 (same decision)
- artifacts <-> stat_methodology r=0.05 (completely independent)
- human_studies <-> artifacts r=-0.24 (two cultures)
- Three independent rigor clusters: transparency, statistics, contamination

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Diffstat:
Mexplorer/src/data.ts | 4++++
Mexplorer/src/views/findings.ts | 67+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mexplorer/tests/explorer.spec.ts | 4++--
Mscripts/build-explorer-data.py | 46++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 119 insertions(+), 2 deletions(-)

diff --git a/explorer/src/data.ts b/explorer/src/data.ts @@ -92,6 +92,10 @@ export interface Findings { funding_gap: Record<string, GroupStat>; repro_detail: Record<string, QuestionRate | number> & { full_pass_count: number; full_pass_pct: number }; game_pcts: Record<string, number>; + correlation: { + categories: string[]; + matrix: { r: number | null; n: number }[][]; + }; } export interface TensionClaim { diff --git a/explorer/src/views/findings.ts b/explorer/src/views/findings.ts @@ -35,6 +35,7 @@ export async function renderFindings(app: HTMLElement) { app.innerHTML = ` ${renderQuestionRates(f)} + ${renderCorrelationHeatmap(f)} ${renderYearCategoryTrends(f)} ${renderVenueCitation(f)} ${renderOptimismRigor(f)} @@ -79,6 +80,72 @@ function renderQuestionRates(f: Findings): string { </div>`; } +function renderCorrelationHeatmap(f: Findings): string { + const { categories, matrix } = f.correlation; + const n = categories.length; + const cell = 38; + const labelW = 140; + const w = labelW + n * cell + 10; + const h = labelW + n * cell + 10; + + function corrColor(r: number | null): string { + if (r === null) return 'var(--border)'; + if (r >= 0) { + // Green intensity + const a = Math.min(r / 0.7, 1); + return `rgba(61, 214, 140, ${(a * 0.8 + 0.05).toFixed(2)})`; + } else { + // Red intensity + const a = Math.min(Math.abs(r) / 0.3, 1); + return `rgba(240, 101, 101, ${(a * 0.8 + 0.05).toFixed(2)})`; + } + } + + let cells = ''; + for (let i = 0; i < n; i++) { + for (let j = 0; j < n; j++) { + const d = matrix[i][j]; + const x = labelW + j * cell; + const y = labelW + i * cell; + const fill = i === j ? 'var(--border)' : corrColor(d.r); + const rText = d.r !== null ? d.r.toFixed(2) : ''; + const textColor = d.r !== null && Math.abs(d.r) > 0.35 ? '#fff' : 'var(--text-dim)'; + cells += `<rect x="${x}" y="${y}" width="${cell - 1}" height="${cell - 1}" fill="${fill}" rx="2"> + <title>${formatName(categories[i])} \u2194 ${formatName(categories[j])}\nr=${d.r !== null ? d.r.toFixed(3) : 'N/A'} (n=${d.n})</title> + </rect>`; + if (i !== j && d.r !== null) { + cells += `<text x="${x + cell / 2}" y="${y + cell / 2 + 4}" text-anchor="middle" fill="${textColor}" font-size="9">${rText}</text>`; + } + } + } + + // Row labels (left) + let labels = ''; + for (let i = 0; i < n; i++) { + labels += `<text x="${labelW - 4}" y="${labelW + i * cell + cell / 2 + 4}" text-anchor="end" font-size="10" fill="var(--text)">${formatName(categories[i])}</text>`; + } + // Column labels (top, rotated) + for (let j = 0; j < n; j++) { + labels += `<text x="0" y="0" text-anchor="end" font-size="10" fill="var(--text)" transform="translate(${labelW + j * cell + cell / 2 + 4}, ${labelW - 4}) rotate(-55)">${formatName(categories[j])}</text>`; + } + + return `<div class="section"> + <h2>Category Correlation Matrix</h2> + <p style="font-size:0.85rem;color:var(--text-dim);margin-bottom:1rem">Pearson correlation between category-level pass rates across ${matrix[0]?.[0]?.n || 0}+ papers. <span style="color:var(--green)">Green = positive</span>, <span style="color:var(--red)">red = negative</span>. Hover cells for details.</p> + <div style="overflow-x:auto"> + <svg viewBox="0 0 ${w} ${h}" style="width:100%;max-width:${w}px;min-width:500px"> + ${labels}${cells} + </svg> + </div> + <div style="font-size:0.82rem;color:var(--text-dim);margin-top:0.75rem"> + <strong>Key patterns:</strong> + Contamination \u2194 data leakage (r=0.87) are effectively the same decision. + Artifacts \u2194 statistical methodology (r=0.05) are completely independent — releasing code says nothing about statistical rigor. + Human studies \u2194 artifacts (r=\u22120.24) is the strongest negative — two research traditions that don't speak to each other. + </div> + </div>`; +} + function renderYearCategoryTrends(f: Findings): string { const years = Object.keys(f.year_category_trends).sort(); const defaultCats = ['contamination', 'data_leakage', 'statistical_methodology', 'experimental_rigor']; diff --git a/explorer/tests/explorer.spec.ts b/explorer/tests/explorer.spec.ts @@ -206,8 +206,8 @@ test.describe('Findings', () => { test('loads and shows all sections', async ({ page }) => { await page.goto('/#/findings'); await expect(page.locator('.section').first()).toBeVisible({ timeout: 10000 }); - // Should have 10 sections - expect(await page.locator('.section').count()).toBe(10); + // Should have 11 sections + expect(await page.locator('.section').count()).toBe(11); }); test('shows per-question pass rates', async ({ page }) => { diff --git a/scripts/build-explorer-data.py b/scripts/build-explorer-data.py @@ -554,6 +554,51 @@ def build(): repro_detail["full_pass_count"] = repro_count repro_detail["full_pass_pct"] = round(repro_count / total_papers * 100, 1) if total_papers else 0 + # 10. Category correlation matrix + # Collect per-paper category score vectors + paper_cat_vectors = [] + for p in papers_full: + cs = p["category_scores"] + # Convert percentage back to 0-1 for correlation + vec = {cat: cs[cat] / 100.0 for cat in ALL_CATEGORIES if cat in cs} + if len(vec) >= 5: + paper_cat_vectors.append(vec) + + def pearson(xs, ys): + n = len(xs) + if n < 10: + return None + mx = sum(xs) / n + my = sum(ys) / n + num = sum((x - mx) * (y - my) for x, y in zip(xs, ys)) + dx = sum((x - mx) ** 2 for x in xs) ** 0.5 + dy = sum((y - my) ** 2 for y in ys) ** 0.5 + if dx == 0 or dy == 0: + return None + return num / (dx * dy) + + # Only include categories with enough data + corr_cats = [c for c in ALL_CATEGORIES + if sum(1 for v in paper_cat_vectors if c in v) >= 30] + + corr_matrix = [] + for c1 in corr_cats: + row = [] + for c2 in corr_cats: + xs, ys = [], [] + for v in paper_cat_vectors: + if c1 in v and c2 in v: + xs.append(v[c1]) + ys.append(v[c2]) + r = pearson(xs, ys) + row.append({"r": round(r, 3) if r is not None else None, "n": len(xs)}) + corr_matrix.append(row) + + correlation = { + "categories": corr_cats, + "matrix": corr_matrix, + } + findings = { "question_rates": q_rates, "year_category_trends": year_cat_trends, @@ -566,6 +611,7 @@ def build(): "funding_gap": funding_gap, "repro_detail": repro_detail, "game_pcts": game_pcts, + "correlation": correlation, } # --- Citation network ---

Impressum · Datenschutz