commit 0bf67124d60b5a1d8c6d27deb7def340c1f0c0f0
parent c641e50fbc95253d2debbe8c25dc5e8357e58dc3
Author: Brian Graham <brian@buildingbetterteams.de>
Date: Sun, 22 Mar 2026 21:54:26 +0100
Add PCA scatter plot — paper methodology map
Project 708 papers from 9 category scores to 2D via PCA (52.8%
variance explained). Papers colored by archetype, hover for details,
click to navigate.
PC1 = overall rigor (limitations, data_integrity, claims dominate)
PC2 = practical detail vs reflection (cost, setup vs limitations)
Archetypes separate clearly: Complete clusters left (rigorous),
Minimal right (weak), Theater and Mixed overlap in the middle.
Hand-rolled PCA in build script (power iteration, no numpy needed).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Diffstat:
5 files changed, 276 insertions(+), 2 deletions(-)
diff --git a/explorer/src/data.ts b/explorer/src/data.ts
@@ -96,6 +96,14 @@ export interface Findings {
categories: string[];
matrix: { r: number | null; n: number }[][];
};
+ pca: {
+ points: { id: string; x: number; y: number; archetype: string; score: number }[];
+ categories: string[];
+ pc1_loadings: number[];
+ pc2_loadings: number[];
+ pc1_variance_pct: number;
+ pc2_variance_pct: number;
+ };
}
export interface TensionClaim {
diff --git a/explorer/src/style.css b/explorer/src/style.css
@@ -508,3 +508,12 @@ td.score {
}
.toggle-btn:hover { border-color: var(--text-dim); }
.toggle-btn.active { background: rgba(108, 140, 255, 0.08); }
+
+/* PCA scatter canvas */
+#pca-canvas {
+ width: 100%;
+ max-width: 800px;
+ border-radius: 8px;
+ border: 1px solid var(--border);
+ cursor: default;
+}
diff --git a/explorer/src/views/findings.ts b/explorer/src/views/findings.ts
@@ -1,4 +1,5 @@
import { loadFindings, type Findings, type QuestionRate } from '../data';
+import { navigate } from '../router';
import { renderBarChart } from '../components/bar-chart';
import { renderMultiLineChart } from '../components/multi-line-chart';
@@ -36,6 +37,7 @@ export async function renderFindings(app: HTMLElement) {
app.innerHTML = `
${renderQuestionRates(f)}
${renderCorrelationHeatmap(f)}
+ ${renderPcaScatter(f)}
${renderYearCategoryTrends(f)}
${renderVenueCitation(f)}
${renderOptimismRigor(f)}
@@ -49,6 +51,8 @@ export async function renderFindings(app: HTMLElement) {
// Attach toggle listeners for year-category chart
attachCategoryToggles(f);
+ // Attach PCA scatter interactivity
+ attachPcaScatter(f);
}
function renderQuestionRates(f: Findings): string {
@@ -146,6 +150,166 @@ function renderCorrelationHeatmap(f: Findings): string {
</div>`;
}
+const ARCH_COLORS: Record<string, string> = {
+ Complete: '#3dd68c',
+ Builder: '#6c8cff',
+ Theater: '#f0c050',
+ Mixed: '#8b8fa3',
+ Minimal: '#f06565',
+};
+
+function renderPcaScatter(f: Findings): string {
+ const { pca } = f;
+ // Build loading descriptions
+ const pc1Top = pca.categories
+ .map((c, i) => ({ cat: c, v: pca.pc1_loadings[i] }))
+ .sort((a, b) => Math.abs(b.v) - Math.abs(a.v))
+ .slice(0, 3)
+ .map(d => `${formatName(d.cat)} (${d.v > 0 ? '+' : ''}${d.v.toFixed(2)})`)
+ .join(', ');
+ const pc2Top = pca.categories
+ .map((c, i) => ({ cat: c, v: pca.pc2_loadings[i] }))
+ .sort((a, b) => Math.abs(b.v) - Math.abs(a.v))
+ .slice(0, 3)
+ .map(d => `${formatName(d.cat)} (${d.v > 0 ? '+' : ''}${d.v.toFixed(2)})`)
+ .join(', ');
+
+ const legend = Object.entries(ARCH_COLORS)
+ .map(([name, color]) => `<span class="chart-legend-item"><span class="chart-legend-swatch" style="background:${color};height:8px;width:8px;border-radius:50%"></span>${name}</span>`)
+ .join('');
+
+ return `<div class="section">
+ <h2>Paper Methodology Map (PCA)</h2>
+ <p style="font-size:0.85rem;color:var(--text-dim);margin-bottom:0.5rem">${pca.points.length} papers projected from ${pca.categories.length} category scores to 2D. Colors = archetype. Hover for title, click to view.</p>
+ <p style="font-size:0.8rem;color:var(--text-dim);margin-bottom:0.25rem"><strong>X-axis</strong> (${pca.pc1_variance_pct}% variance): ${pc1Top}</p>
+ <p style="font-size:0.8rem;color:var(--text-dim);margin-bottom:0.75rem"><strong>Y-axis</strong> (${pca.pc2_variance_pct}% variance): ${pc2Top}</p>
+ <canvas id="pca-canvas" width="800" height="500"></canvas>
+ <div class="chart-legend" style="margin-top:0.5rem">${legend}</div>
+ <div class="network-tooltip" id="pca-tooltip" style="display:none"></div>
+ </div>`;
+}
+
+function attachPcaScatter(f: Findings) {
+ const canvas = document.getElementById('pca-canvas') as HTMLCanvasElement | null;
+ const tooltip = document.getElementById('pca-tooltip') as HTMLElement | null;
+ if (!canvas || !tooltip) return;
+
+ const ctx = canvas.getContext('2d')!;
+ const { points } = f.pca;
+ const w = canvas.width, h = canvas.height;
+ const pad = { l: 50, r: 20, t: 20, b: 40 };
+
+ // Compute bounds
+ const xs = points.map(p => p.x);
+ const ys = points.map(p => p.y);
+ const xMin = Math.min(...xs), xMax = Math.max(...xs);
+ const yMin = Math.min(...ys), yMax = Math.max(...ys);
+ const xRange = xMax - xMin || 1;
+ const yRange = yMax - yMin || 1;
+ // Add 5% padding
+ const xPad = xRange * 0.05, yPad = yRange * 0.05;
+
+ function toCanvas(px: number, py: number): [number, number] {
+ const cx = pad.l + ((px - xMin + xPad) / (xRange + 2 * xPad)) * (w - pad.l - pad.r);
+ const cy = pad.t + (1 - (py - yMin + yPad) / (yRange + 2 * yPad)) * (h - pad.t - pad.b);
+ return [cx, cy];
+ }
+
+ function getStyle(prop: string): string {
+ return getComputedStyle(document.documentElement).getPropertyValue(prop).trim();
+ }
+
+ function draw() {
+ const bgColor = getStyle('--surface');
+ const borderColor = getStyle('--border');
+ const textColor = getStyle('--text-dim');
+
+ ctx.fillStyle = bgColor;
+ ctx.fillRect(0, 0, w, h);
+
+ // Grid lines
+ ctx.strokeStyle = borderColor;
+ ctx.lineWidth = 0.5;
+ const [zeroX, zeroY] = toCanvas(0, 0);
+ ctx.setLineDash([4, 4]);
+ ctx.beginPath(); ctx.moveTo(zeroX, pad.t); ctx.lineTo(zeroX, h - pad.b); ctx.stroke();
+ ctx.beginPath(); ctx.moveTo(pad.l, zeroY); ctx.lineTo(w - pad.r, zeroY); ctx.stroke();
+ ctx.setLineDash([]);
+
+ // Axis labels
+ ctx.fillStyle = textColor;
+ ctx.font = '11px sans-serif';
+ ctx.textAlign = 'center';
+ ctx.fillText('\u2190 Higher rigor', pad.l + 60, h - 8);
+ ctx.fillText('Lower rigor \u2192', w - pad.r - 60, h - 8);
+ ctx.save();
+ ctx.translate(14, h / 2);
+ ctx.rotate(-Math.PI / 2);
+ ctx.fillText('Practical detail \u2191', 0, 0);
+ ctx.restore();
+
+ // Points
+ for (const p of points) {
+ const [cx, cy] = toCanvas(p.x, p.y);
+ const color = ARCH_COLORS[p.archetype] || '#888';
+ ctx.beginPath();
+ ctx.arc(cx, cy, 4, 0, Math.PI * 2);
+ ctx.fillStyle = color;
+ ctx.globalAlpha = 0.7;
+ ctx.fill();
+ ctx.globalAlpha = 1;
+ ctx.strokeStyle = 'rgba(0,0,0,0.2)';
+ ctx.lineWidth = 0.5;
+ ctx.stroke();
+ }
+ }
+
+ draw();
+
+ // Mouse interaction
+ function canvasCoords(e: MouseEvent): [number, number] {
+ const rect = canvas!.getBoundingClientRect();
+ return [
+ (e.clientX - rect.left) * (w / rect.width),
+ (e.clientY - rect.top) * (h / rect.height),
+ ];
+ }
+
+ canvas.addEventListener('mousemove', e => {
+ const [mx, my] = canvasCoords(e);
+ let closest: typeof points[0] | null = null;
+ let closestDist = 20;
+ for (const p of points) {
+ const [cx, cy] = toCanvas(p.x, p.y);
+ const d = Math.sqrt((cx - mx) ** 2 + (cy - my) ** 2);
+ if (d < closestDist) { closest = p; closestDist = d; }
+ }
+ if (closest) {
+ canvas!.style.cursor = 'pointer';
+ tooltip!.style.display = 'block';
+ tooltip!.style.left = e.clientX + 14 + 'px';
+ tooltip!.style.top = e.clientY + 14 + 'px';
+ tooltip!.innerHTML = `<strong>${closest.id}</strong><br>Score: ${closest.score}%<br>Type: ${closest.archetype}`;
+ } else {
+ canvas!.style.cursor = 'default';
+ tooltip!.style.display = 'none';
+ }
+ });
+
+ canvas.addEventListener('mouseleave', () => { tooltip!.style.display = 'none'; });
+
+ canvas.addEventListener('click', e => {
+ const [mx, my] = canvasCoords(e);
+ for (const p of points) {
+ const [cx, cy] = toCanvas(p.x, p.y);
+ if (Math.sqrt((cx - mx) ** 2 + (cy - my) ** 2) < 20) {
+ navigate(`/paper/${p.id}`);
+ return;
+ }
+ }
+ });
+}
+
function renderYearCategoryTrends(f: Findings): string {
const years = Object.keys(f.year_category_trends).sort();
const defaultCats = ['contamination', 'data_leakage', 'statistical_methodology', 'experimental_rigor'];
diff --git a/explorer/tests/explorer.spec.ts b/explorer/tests/explorer.spec.ts
@@ -206,8 +206,8 @@ test.describe('Findings', () => {
test('loads and shows all sections', async ({ page }) => {
await page.goto('/#/findings');
await expect(page.locator('.section').first()).toBeVisible({ timeout: 10000 });
- // Should have 11 sections
- expect(await page.locator('.section').count()).toBe(11);
+ // Should have 12 sections
+ expect(await page.locator('.section').count()).toBe(12);
});
test('shows per-question pass rates', async ({ page }) => {
diff --git a/scripts/build-explorer-data.py b/scripts/build-explorer-data.py
@@ -599,6 +599,98 @@ def build():
"matrix": corr_matrix,
}
+ # 11. PCA scatter — project papers to 2D from category scores
+ PCA_CATS = [
+ "artifacts", "statistical_methodology", "evaluation_design",
+ "claims_and_evidence", "setup_transparency", "limitations_and_scope",
+ "data_integrity", "conflicts_of_interest", "cost_and_practicality",
+ ]
+
+ # Collect vectors, impute missing with median
+ pca_raw = []
+ for p in papers_full:
+ cs = p["category_scores"]
+ vec = {cat: cs[cat] / 100.0 for cat in PCA_CATS if cat in cs}
+ non_none = len(vec)
+ if non_none >= 6:
+ pca_raw.append({"id": p["id"], "scores": vec, "archetype": p["archetype"], "title": p["title"], "score": p["score"]})
+
+ pca_medians = {}
+ for cat in PCA_CATS:
+ vals = sorted(r["scores"].get(cat, None) for r in pca_raw if r["scores"].get(cat) is not None)
+ pca_medians[cat] = vals[len(vals) // 2] if vals else 0.5
+
+ pca_vecs = []
+ for r in pca_raw:
+ vec = [r["scores"].get(cat, pca_medians[cat]) for cat in PCA_CATS]
+ pca_vecs.append(vec)
+
+ pca_n = len(pca_vecs)
+ pca_d = len(PCA_CATS)
+
+ # Center
+ pca_means = [sum(v[j] for v in pca_vecs) / pca_n for j in range(pca_d)]
+ pca_centered = [[v[j] - pca_means[j] for j in range(pca_d)] for v in pca_vecs]
+
+ # Covariance
+ pca_cov = [[0.0] * pca_d for _ in range(pca_d)]
+ for i in range(pca_d):
+ for j in range(pca_d):
+ pca_cov[i][j] = sum(row[i] * row[j] for row in pca_centered) / (pca_n - 1)
+
+ # Power iteration for top 2 eigenvectors
+ import random as _rng
+ _rng.seed(42)
+
+ def _power_iter(mat, num_iter=300, deflate=None):
+ dd = len(mat)
+ v = [_rng.gauss(0, 1) for _ in range(dd)]
+ if deflate:
+ for dv in deflate:
+ dot = sum(v[i] * dv[i] for i in range(dd))
+ v = [v[i] - dot * dv[i] for i in range(dd)]
+ norm = sum(x * x for x in v) ** 0.5
+ v = [x / norm for x in v]
+ for _ in range(num_iter):
+ nv = [sum(mat[i][j] * v[j] for j in range(dd)) for i in range(dd)]
+ if deflate:
+ for dv in deflate:
+ dot = sum(nv[i] * dv[i] for i in range(dd))
+ nv = [nv[i] - dot * dv[i] for i in range(dd)]
+ norm = sum(x * x for x in nv) ** 0.5
+ if norm == 0:
+ break
+ v = [x / norm for x in nv]
+ ev = sum(sum(mat[i][j] * v[j] for j in range(dd)) * v[i] for i in range(dd))
+ return v, ev
+
+ pc1_vec, ev1 = _power_iter(pca_cov)
+ pc2_vec, ev2 = _power_iter(pca_cov, deflate=[pc1_vec])
+ total_var = sum(pca_cov[i][i] for i in range(pca_d))
+
+ # Project papers
+ pca_points = []
+ for i, r in enumerate(pca_raw):
+ row = pca_centered[i]
+ x = sum(row[j] * pc1_vec[j] for j in range(pca_d))
+ y = sum(row[j] * pc2_vec[j] for j in range(pca_d))
+ pca_points.append({
+ "id": r["id"],
+ "x": round(x, 4),
+ "y": round(y, 4),
+ "archetype": r["archetype"],
+ "score": r["score"],
+ })
+
+ pca_result = {
+ "points": pca_points,
+ "categories": PCA_CATS,
+ "pc1_loadings": [round(v, 3) for v in pc1_vec],
+ "pc2_loadings": [round(v, 3) for v in pc2_vec],
+ "pc1_variance_pct": round(ev1 / total_var * 100, 1),
+ "pc2_variance_pct": round(ev2 / total_var * 100, 1),
+ }
+
findings = {
"question_rates": q_rates,
"year_category_trends": year_cat_trends,
@@ -612,6 +704,7 @@ def build():
"repro_detail": repro_detail,
"game_pcts": game_pcts,
"correlation": correlation,
+ "pca": pca_result,
}
# --- Citation network ---