ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

commit 0bf67124d60b5a1d8c6d27deb7def340c1f0c0f0
parent c641e50fbc95253d2debbe8c25dc5e8357e58dc3
Author: Brian Graham <brian@buildingbetterteams.de>
Date:   Sun, 22 Mar 2026 21:54:26 +0100

Add PCA scatter plot — paper methodology map

Project 708 papers from 9 category scores to 2D via PCA (52.8%
variance explained). Papers colored by archetype, hover for details,
click to navigate.

PC1 = overall rigor (limitations, data_integrity, claims dominate)
PC2 = practical detail vs reflection (cost, setup vs limitations)

Archetypes separate clearly: Complete clusters left (rigorous),
Minimal right (weak), Theater and Mixed overlap in the middle.
Hand-rolled PCA in build script (power iteration, no numpy needed).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Diffstat:
Mexplorer/src/data.ts | 8++++++++
Mexplorer/src/style.css | 9+++++++++
Mexplorer/src/views/findings.ts | 164+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mexplorer/tests/explorer.spec.ts | 4++--
Mscripts/build-explorer-data.py | 93+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 276 insertions(+), 2 deletions(-)

diff --git a/explorer/src/data.ts b/explorer/src/data.ts @@ -96,6 +96,14 @@ export interface Findings { categories: string[]; matrix: { r: number | null; n: number }[][]; }; + pca: { + points: { id: string; x: number; y: number; archetype: string; score: number }[]; + categories: string[]; + pc1_loadings: number[]; + pc2_loadings: number[]; + pc1_variance_pct: number; + pc2_variance_pct: number; + }; } export interface TensionClaim { diff --git a/explorer/src/style.css b/explorer/src/style.css @@ -508,3 +508,12 @@ td.score { } .toggle-btn:hover { border-color: var(--text-dim); } .toggle-btn.active { background: rgba(108, 140, 255, 0.08); } + +/* PCA scatter canvas */ +#pca-canvas { + width: 100%; + max-width: 800px; + border-radius: 8px; + border: 1px solid var(--border); + cursor: default; +} diff --git a/explorer/src/views/findings.ts b/explorer/src/views/findings.ts @@ -1,4 +1,5 @@ import { loadFindings, type Findings, type QuestionRate } from '../data'; +import { navigate } from '../router'; import { renderBarChart } from '../components/bar-chart'; import { renderMultiLineChart } from '../components/multi-line-chart'; @@ -36,6 +37,7 @@ export async function renderFindings(app: HTMLElement) { app.innerHTML = ` ${renderQuestionRates(f)} ${renderCorrelationHeatmap(f)} + ${renderPcaScatter(f)} ${renderYearCategoryTrends(f)} ${renderVenueCitation(f)} ${renderOptimismRigor(f)} @@ -49,6 +51,8 @@ export async function renderFindings(app: HTMLElement) { // Attach toggle listeners for year-category chart attachCategoryToggles(f); + // Attach PCA scatter interactivity + attachPcaScatter(f); } function renderQuestionRates(f: Findings): string { @@ -146,6 +150,166 @@ function renderCorrelationHeatmap(f: Findings): string { </div>`; } +const ARCH_COLORS: Record<string, string> = { + Complete: '#3dd68c', + Builder: '#6c8cff', + Theater: '#f0c050', + Mixed: '#8b8fa3', + Minimal: '#f06565', +}; + +function renderPcaScatter(f: Findings): string { + const { pca } = f; + // Build loading descriptions + const pc1Top = pca.categories + .map((c, i) => ({ cat: c, v: pca.pc1_loadings[i] })) + .sort((a, b) => Math.abs(b.v) - Math.abs(a.v)) + .slice(0, 3) + .map(d => `${formatName(d.cat)} (${d.v > 0 ? '+' : ''}${d.v.toFixed(2)})`) + .join(', '); + const pc2Top = pca.categories + .map((c, i) => ({ cat: c, v: pca.pc2_loadings[i] })) + .sort((a, b) => Math.abs(b.v) - Math.abs(a.v)) + .slice(0, 3) + .map(d => `${formatName(d.cat)} (${d.v > 0 ? '+' : ''}${d.v.toFixed(2)})`) + .join(', '); + + const legend = Object.entries(ARCH_COLORS) + .map(([name, color]) => `<span class="chart-legend-item"><span class="chart-legend-swatch" style="background:${color};height:8px;width:8px;border-radius:50%"></span>${name}</span>`) + .join(''); + + return `<div class="section"> + <h2>Paper Methodology Map (PCA)</h2> + <p style="font-size:0.85rem;color:var(--text-dim);margin-bottom:0.5rem">${pca.points.length} papers projected from ${pca.categories.length} category scores to 2D. Colors = archetype. Hover for title, click to view.</p> + <p style="font-size:0.8rem;color:var(--text-dim);margin-bottom:0.25rem"><strong>X-axis</strong> (${pca.pc1_variance_pct}% variance): ${pc1Top}</p> + <p style="font-size:0.8rem;color:var(--text-dim);margin-bottom:0.75rem"><strong>Y-axis</strong> (${pca.pc2_variance_pct}% variance): ${pc2Top}</p> + <canvas id="pca-canvas" width="800" height="500"></canvas> + <div class="chart-legend" style="margin-top:0.5rem">${legend}</div> + <div class="network-tooltip" id="pca-tooltip" style="display:none"></div> + </div>`; +} + +function attachPcaScatter(f: Findings) { + const canvas = document.getElementById('pca-canvas') as HTMLCanvasElement | null; + const tooltip = document.getElementById('pca-tooltip') as HTMLElement | null; + if (!canvas || !tooltip) return; + + const ctx = canvas.getContext('2d')!; + const { points } = f.pca; + const w = canvas.width, h = canvas.height; + const pad = { l: 50, r: 20, t: 20, b: 40 }; + + // Compute bounds + const xs = points.map(p => p.x); + const ys = points.map(p => p.y); + const xMin = Math.min(...xs), xMax = Math.max(...xs); + const yMin = Math.min(...ys), yMax = Math.max(...ys); + const xRange = xMax - xMin || 1; + const yRange = yMax - yMin || 1; + // Add 5% padding + const xPad = xRange * 0.05, yPad = yRange * 0.05; + + function toCanvas(px: number, py: number): [number, number] { + const cx = pad.l + ((px - xMin + xPad) / (xRange + 2 * xPad)) * (w - pad.l - pad.r); + const cy = pad.t + (1 - (py - yMin + yPad) / (yRange + 2 * yPad)) * (h - pad.t - pad.b); + return [cx, cy]; + } + + function getStyle(prop: string): string { + return getComputedStyle(document.documentElement).getPropertyValue(prop).trim(); + } + + function draw() { + const bgColor = getStyle('--surface'); + const borderColor = getStyle('--border'); + const textColor = getStyle('--text-dim'); + + ctx.fillStyle = bgColor; + ctx.fillRect(0, 0, w, h); + + // Grid lines + ctx.strokeStyle = borderColor; + ctx.lineWidth = 0.5; + const [zeroX, zeroY] = toCanvas(0, 0); + ctx.setLineDash([4, 4]); + ctx.beginPath(); ctx.moveTo(zeroX, pad.t); ctx.lineTo(zeroX, h - pad.b); ctx.stroke(); + ctx.beginPath(); ctx.moveTo(pad.l, zeroY); ctx.lineTo(w - pad.r, zeroY); ctx.stroke(); + ctx.setLineDash([]); + + // Axis labels + ctx.fillStyle = textColor; + ctx.font = '11px sans-serif'; + ctx.textAlign = 'center'; + ctx.fillText('\u2190 Higher rigor', pad.l + 60, h - 8); + ctx.fillText('Lower rigor \u2192', w - pad.r - 60, h - 8); + ctx.save(); + ctx.translate(14, h / 2); + ctx.rotate(-Math.PI / 2); + ctx.fillText('Practical detail \u2191', 0, 0); + ctx.restore(); + + // Points + for (const p of points) { + const [cx, cy] = toCanvas(p.x, p.y); + const color = ARCH_COLORS[p.archetype] || '#888'; + ctx.beginPath(); + ctx.arc(cx, cy, 4, 0, Math.PI * 2); + ctx.fillStyle = color; + ctx.globalAlpha = 0.7; + ctx.fill(); + ctx.globalAlpha = 1; + ctx.strokeStyle = 'rgba(0,0,0,0.2)'; + ctx.lineWidth = 0.5; + ctx.stroke(); + } + } + + draw(); + + // Mouse interaction + function canvasCoords(e: MouseEvent): [number, number] { + const rect = canvas!.getBoundingClientRect(); + return [ + (e.clientX - rect.left) * (w / rect.width), + (e.clientY - rect.top) * (h / rect.height), + ]; + } + + canvas.addEventListener('mousemove', e => { + const [mx, my] = canvasCoords(e); + let closest: typeof points[0] | null = null; + let closestDist = 20; + for (const p of points) { + const [cx, cy] = toCanvas(p.x, p.y); + const d = Math.sqrt((cx - mx) ** 2 + (cy - my) ** 2); + if (d < closestDist) { closest = p; closestDist = d; } + } + if (closest) { + canvas!.style.cursor = 'pointer'; + tooltip!.style.display = 'block'; + tooltip!.style.left = e.clientX + 14 + 'px'; + tooltip!.style.top = e.clientY + 14 + 'px'; + tooltip!.innerHTML = `<strong>${closest.id}</strong><br>Score: ${closest.score}%<br>Type: ${closest.archetype}`; + } else { + canvas!.style.cursor = 'default'; + tooltip!.style.display = 'none'; + } + }); + + canvas.addEventListener('mouseleave', () => { tooltip!.style.display = 'none'; }); + + canvas.addEventListener('click', e => { + const [mx, my] = canvasCoords(e); + for (const p of points) { + const [cx, cy] = toCanvas(p.x, p.y); + if (Math.sqrt((cx - mx) ** 2 + (cy - my) ** 2) < 20) { + navigate(`/paper/${p.id}`); + return; + } + } + }); +} + function renderYearCategoryTrends(f: Findings): string { const years = Object.keys(f.year_category_trends).sort(); const defaultCats = ['contamination', 'data_leakage', 'statistical_methodology', 'experimental_rigor']; diff --git a/explorer/tests/explorer.spec.ts b/explorer/tests/explorer.spec.ts @@ -206,8 +206,8 @@ test.describe('Findings', () => { test('loads and shows all sections', async ({ page }) => { await page.goto('/#/findings'); await expect(page.locator('.section').first()).toBeVisible({ timeout: 10000 }); - // Should have 11 sections - expect(await page.locator('.section').count()).toBe(11); + // Should have 12 sections + expect(await page.locator('.section').count()).toBe(12); }); test('shows per-question pass rates', async ({ page }) => { diff --git a/scripts/build-explorer-data.py b/scripts/build-explorer-data.py @@ -599,6 +599,98 @@ def build(): "matrix": corr_matrix, } + # 11. PCA scatter — project papers to 2D from category scores + PCA_CATS = [ + "artifacts", "statistical_methodology", "evaluation_design", + "claims_and_evidence", "setup_transparency", "limitations_and_scope", + "data_integrity", "conflicts_of_interest", "cost_and_practicality", + ] + + # Collect vectors, impute missing with median + pca_raw = [] + for p in papers_full: + cs = p["category_scores"] + vec = {cat: cs[cat] / 100.0 for cat in PCA_CATS if cat in cs} + non_none = len(vec) + if non_none >= 6: + pca_raw.append({"id": p["id"], "scores": vec, "archetype": p["archetype"], "title": p["title"], "score": p["score"]}) + + pca_medians = {} + for cat in PCA_CATS: + vals = sorted(r["scores"].get(cat, None) for r in pca_raw if r["scores"].get(cat) is not None) + pca_medians[cat] = vals[len(vals) // 2] if vals else 0.5 + + pca_vecs = [] + for r in pca_raw: + vec = [r["scores"].get(cat, pca_medians[cat]) for cat in PCA_CATS] + pca_vecs.append(vec) + + pca_n = len(pca_vecs) + pca_d = len(PCA_CATS) + + # Center + pca_means = [sum(v[j] for v in pca_vecs) / pca_n for j in range(pca_d)] + pca_centered = [[v[j] - pca_means[j] for j in range(pca_d)] for v in pca_vecs] + + # Covariance + pca_cov = [[0.0] * pca_d for _ in range(pca_d)] + for i in range(pca_d): + for j in range(pca_d): + pca_cov[i][j] = sum(row[i] * row[j] for row in pca_centered) / (pca_n - 1) + + # Power iteration for top 2 eigenvectors + import random as _rng + _rng.seed(42) + + def _power_iter(mat, num_iter=300, deflate=None): + dd = len(mat) + v = [_rng.gauss(0, 1) for _ in range(dd)] + if deflate: + for dv in deflate: + dot = sum(v[i] * dv[i] for i in range(dd)) + v = [v[i] - dot * dv[i] for i in range(dd)] + norm = sum(x * x for x in v) ** 0.5 + v = [x / norm for x in v] + for _ in range(num_iter): + nv = [sum(mat[i][j] * v[j] for j in range(dd)) for i in range(dd)] + if deflate: + for dv in deflate: + dot = sum(nv[i] * dv[i] for i in range(dd)) + nv = [nv[i] - dot * dv[i] for i in range(dd)] + norm = sum(x * x for x in nv) ** 0.5 + if norm == 0: + break + v = [x / norm for x in nv] + ev = sum(sum(mat[i][j] * v[j] for j in range(dd)) * v[i] for i in range(dd)) + return v, ev + + pc1_vec, ev1 = _power_iter(pca_cov) + pc2_vec, ev2 = _power_iter(pca_cov, deflate=[pc1_vec]) + total_var = sum(pca_cov[i][i] for i in range(pca_d)) + + # Project papers + pca_points = [] + for i, r in enumerate(pca_raw): + row = pca_centered[i] + x = sum(row[j] * pc1_vec[j] for j in range(pca_d)) + y = sum(row[j] * pc2_vec[j] for j in range(pca_d)) + pca_points.append({ + "id": r["id"], + "x": round(x, 4), + "y": round(y, 4), + "archetype": r["archetype"], + "score": r["score"], + }) + + pca_result = { + "points": pca_points, + "categories": PCA_CATS, + "pc1_loadings": [round(v, 3) for v in pc1_vec], + "pc2_loadings": [round(v, 3) for v in pc2_vec], + "pc1_variance_pct": round(ev1 / total_var * 100, 1), + "pc2_variance_pct": round(ev2 / total_var * 100, 1), + } + findings = { "question_rates": q_rates, "year_category_trends": year_cat_trends, @@ -612,6 +704,7 @@ def build(): "repro_detail": repro_detail, "game_pcts": game_pcts, "correlation": correlation, + "pca": pca_result, } # --- Citation network ---

Impressum · Datenschutz