Add findings view with 10 analysis sections and code URL extraction - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

commit d240203118b1d2332118fdcb2cfd94594a523da2
parent 1818e336e2cc2445cd1006f83c3fa66c7eec7259
Author: Brian Graham <brian@buildingbetterteams.de>
Date:   Sun, 22 Mar 2026 21:33:58 +0100

Add findings view with 10 analysis sections and code URL extraction

New #/findings view surfaces deep analysis that was only in docs:
- Per-question pass rates (67 questions, worst: self_comparison_bias 0.8%)
- Year trends by category with toggleable lines (contamination 29%→7%)
- Venue & citation scoring (500+ cites score below average)
- Optimism-rigor inversion (positive claims from weaker papers)
- Quality homophily (high-quality papers cite high-quality 3x more)
- Sampling effect (median drops as long tail scanned)
- Benchmark monoculture (58% pure benchmark-eval)
- Funding gap (13pp between disclosed/undisclosed)
- Reproducibility drill-down (4.2% fully reproducible)
- All 6 named games (added Cherry-picked Comparisons, All Show No Substance)

Also: extracted 282 code URLs from scan justification text, shown as
"Code" link on paper detail pages.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Diffstat:
M explorer/index.html  | 1 +
A explorer/src/components/multi-line-chart.ts  | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
M explorer/src/data.ts  | 48 ++++++++++++++++++++++++++++++++++++------------
M explorer/src/main.ts  | 2 ++
M explorer/src/style.css  | 20 ++++++++++++++++++++
A explorer/src/views/findings.ts  | 304 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M explorer/src/views/paper-detail.ts  | 3 +++
M explorer/tests/explorer.spec.ts  | 46 ++++++++++++++++++++++++++++++++++++++++++++++
M scripts/build-explorer-data.py  | 234 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----

9 files changed, 680 insertions(+), 26 deletions(-)
diff --git a/explorer/index.html b/explorer/index.html
@@ -14,6 +14,7 @@
       <a href="#/papers">Papers</a>
       <a href="#/network">Network</a>
       <a href="#/tensions">Tensions</a>
+      <a href="#/findings">Findings</a>
     </nav>
     <button id="theme-toggle" aria-label="Toggle theme">☀</button>
   </header>
diff --git a/explorer/src/components/multi-line-chart.ts b/explorer/src/components/multi-line-chart.ts
@@ -0,0 +1,48 @@
+export interface LineData {
+  label: string;
+  color: string;
+  points: { x: number; y: number }[];
+}
+
+export function renderMultiLineChart(
+  lines: LineData[],
+  xLabels: string[],
+  opts: { width?: number; height?: number; yLabel?: string } = {}
+): string {
+  const w = opts.width || 600;
+  const h = opts.height || 220;
+  const pad = { l: 50, r: 20, t: 15, b: 30 };
+  const chartW = w - pad.l - pad.r;
+  const chartH = h - pad.t - pad.b;
+
+  const xScale = (i: number) => pad.l + (i / Math.max(xLabels.length - 1, 1)) * chartW;
+  const yScale = (v: number) => pad.t + chartH - (v / 100) * chartH;
+
+  // Grid + labels
+  let svg = '';
+  for (let v = 0; v <= 100; v += 25) {
+    svg += `<text x="${pad.l - 8}" y="${yScale(v) + 4}" text-anchor="end">${v}%</text>`;
+    svg += `<line class="grid-line" x1="${pad.l}" x2="${w - pad.r}" y1="${yScale(v)}" y2="${yScale(v)}" stroke-dasharray="3"/>`;
+  }
+  for (let i = 0; i < xLabels.length; i++) {
+    svg += `<text x="${xScale(i)}" y="${h - 5}" text-anchor="middle">${xLabels[i]}</text>`;
+  }
+
+  // Lines
+  for (const line of lines) {
+    if (line.points.length < 2) continue;
+    const pts = line.points.map(p => `${xScale(p.x)},${yScale(p.y)}`).join(' ');
+    svg += `<polyline points="${pts}" fill="none" stroke="${line.color}" stroke-width="2"/>`;
+    for (const p of line.points) {
+      svg += `<circle cx="${xScale(p.x)}" cy="${yScale(p.y)}" r="3" fill="${line.color}"/>`;
+    }
+  }
+
+  // Legend
+  const legend = lines.map(l =>
+    `<span class="chart-legend-item"><span class="chart-legend-swatch" style="background:${l.color}"></span>${l.label}</span>`
+  ).join('');
+
+  return `<svg viewBox="0 0 ${w} ${h}" style="width:100%;max-width:${w}px">${svg}</svg>
+    <div class="chart-legend">${legend}</div>`;
+}
diff --git a/explorer/src/data.ts b/explorer/src/data.ts
@@ -16,7 +16,6 @@ export interface RedFlag {
   detail: string;
 }
 
-// Slim entry for paper table (papers-index.json)
 export interface PaperIndex {
   id: string;
   title: string;
@@ -28,18 +27,9 @@ export interface PaperIndex {
   games: string[];
   arxiv_id: string;
   doi: string;
+  code_url: string | null;
 }
 
-export interface Pipeline {
-  registry_total: number;
-  v2_scanned: number;
-  v1_needs_rescan: number;
-  has_text_no_scan: number;
-  no_text: number;
-  excluded: number;
-}
-
-// Full detail for per-paper pages (papers/{slug}.json)
 export interface PaperDetail extends PaperIndex {
   category_scores: Record<string, number>;
   claims: Claim[];
@@ -56,6 +46,15 @@ export interface HistBin {
   count: number;
 }
 
+export interface Pipeline {
+  registry_total: number;
+  v2_scanned: number;
+  v1_needs_rescan: number;
+  has_text_no_scan: number;
+  no_text: number;
+  excluded: number;
+}
+
 export interface Dashboard {
   n: number;
   median: number;
@@ -70,6 +69,31 @@ export interface Dashboard {
   pipeline: Pipeline;
 }
 
+export interface QuestionRate {
+  rate: number;
+  n: number;
+}
+
+export interface GroupStat {
+  n: number;
+  mean: number;
+  median: number;
+}
+
+export interface Findings {
+  question_rates: Record<string, QuestionRate>;
+  year_category_trends: Record<string, Record<string, number>>;
+  venue_stats: Record<string, GroupStat>;
+  citation_band_stats: Record<string, GroupStat>;
+  optimism_rigor: Record<string, { positive_n: number; positive_mean: number; nuanced_n: number; nuanced_mean: number; gap: number }>;
+  homophily: { threshold: number; baseline_pct: number; high_cite_high_pct: number; high_cite_total: number };
+  sampling_effect: { checkpoints: { n: number; median: number }[] };
+  benchmark_monoculture: Record<string, { benchmark_only: number; total: number; pct: number }>;
+  funding_gap: Record<string, GroupStat>;
+  repro_detail: Record<string, QuestionRate | number> & { full_pass_count: number; full_pass_pct: number };
+  game_pcts: Record<string, number>;
+}
+
 export interface TensionClaim {
   paper_id: string;
   claim: string;
@@ -97,7 +121,6 @@ export interface CitationNetwork {
   edges: [string, string][];
 }
 
-// Lazy-loaded per-view data
 const cache: Record<string, unknown> = {};
 
 async function fetchJson<T>(path: string): Promise<T> {
@@ -109,6 +132,7 @@ async function fetchJson<T>(path: string): Promise<T> {
 }
 
 export const loadDashboard = () => fetchJson<Dashboard>('/data/dashboard.json');
+export const loadFindings = () => fetchJson<Findings>('/data/findings.json');
 export const loadPapersIndex = () => fetchJson<PaperIndex[]>('/data/papers-index.json');
 export const loadPaperDetail = (slug: string) => fetchJson<PaperDetail>(`/data/papers/${slug}.json`);
 export const loadNetwork = () => fetchJson<CitationNetwork>('/data/network.json');
diff --git a/explorer/src/main.ts b/explorer/src/main.ts
@@ -4,6 +4,7 @@ import { renderPapers } from './views/papers';
 import { renderPaperDetail } from './views/paper-detail';
 import { renderNetwork } from './views/network';
 import { renderTensions } from './views/tensions';
+import { renderFindings } from './views/findings';
 import { initTheme } from './theme';
 
 function init() {
@@ -15,6 +16,7 @@ function init() {
   route('/paper/:slug', ({ slug }) => renderPaperDetail(app, slug));
   route('/network', () => renderNetwork(app));
   route('/tensions', () => renderTensions(app));
+  route('/findings', () => renderFindings(app));
 
   startRouter();
 }
diff --git a/explorer/src/style.css b/explorer/src/style.css
@@ -488,3 +488,23 @@ td.score {
 
 /* Year trend chart */
 .trend-chart { margin-top: 0.5rem; }
+
+/* Multi-line chart legend */
+.chart-legend { display: flex; gap: 1rem; flex-wrap: wrap; margin-top: 0.5rem; }
+.chart-legend-item { font-size: 0.75rem; color: var(--text-dim); display: flex; align-items: center; gap: 4px; }
+.chart-legend-swatch { width: 14px; height: 3px; border-radius: 1px; display: inline-block; }
+
+/* Toggle buttons for category lines */
+.toggle-group { display: flex; gap: 0.5rem; flex-wrap: wrap; margin-bottom: 0.75rem; }
+.toggle-btn {
+  font-size: 0.72rem;
+  padding: 0.2rem 0.55rem;
+  border: 1px solid var(--border);
+  border-radius: 3px;
+  background: none;
+  color: var(--text-dim);
+  cursor: pointer;
+  transition: all 0.15s;
+}
+.toggle-btn:hover { border-color: var(--text-dim); }
+.toggle-btn.active { background: rgba(108, 140, 255, 0.08); }
diff --git a/explorer/src/views/findings.ts b/explorer/src/views/findings.ts
@@ -0,0 +1,304 @@
+import { loadFindings, type Findings, type QuestionRate } from '../data';
+import { renderBarChart } from '../components/bar-chart';
+import { renderMultiLineChart } from '../components/multi-line-chart';
+
+function formatName(name: string): string {
+  return name.replace(/_/g, ' ').replace(/\b\w/g, c => c.toUpperCase());
+}
+
+const CAT_COLORS: Record<string, string> = {
+  contamination: '#f06565',
+  data_leakage: '#e08050',
+  statistical_methodology: '#6c8cff',
+  experimental_rigor: '#f0c050',
+  artifacts: '#3dd68c',
+  evaluation_design: '#a080f0',
+  claims_and_evidence: '#50c0c0',
+  survey_methodology: '#ff80b0',
+  setup_transparency: '#90b060',
+  limitations_and_scope: '#c0a060',
+  cost_and_practicality: '#8090a0',
+  human_studies: '#b070b0',
+  data_integrity: '#70a0d0',
+  conflicts_of_interest: '#d07070',
+};
+
+const TENSION_NAMES: Record<string, string> = {
+  productivity: 'Productivity',
+  benchmarks: 'Benchmarks',
+  agents: 'Agents',
+};
+
+export async function renderFindings(app: HTMLElement) {
+  app.innerHTML = '<div class="spinner"></div>';
+  const f = await loadFindings();
+
+  app.innerHTML = `
+    ${renderQuestionRates(f)}
+    ${renderYearCategoryTrends(f)}
+    ${renderVenueCitation(f)}
+    ${renderOptimismRigor(f)}
+    ${renderHomophily(f)}
+    ${renderSamplingEffect(f)}
+    ${renderBenchmarkMonoculture(f)}
+    ${renderFundingGap(f)}
+    ${renderReproDetail(f)}
+    ${renderGames(f)}
+  `;
+
+  // Attach toggle listeners for year-category chart
+  attachCategoryToggles(f);
+}
+
+function renderQuestionRates(f: Findings): string {
+  const sorted = Object.entries(f.question_rates)
+    .sort((a, b) => a[1].rate - b[1].rate);
+  const worst20 = sorted.slice(0, 20);
+  const best10 = sorted.slice(-10).reverse();
+
+  return `<div class="section">
+    <h2>Per-Question Pass Rates</h2>
+    <p style="font-size:0.85rem;color:var(--text-dim);margin-bottom:1rem">${sorted.length} questions across 14 categories. Sorted by pass rate, worst first.</p>
+    <h3 style="font-size:0.85rem;color:var(--red);margin-bottom:0.5rem">Worst 20</h3>
+    ${worst20.map(([key, d]) => {
+      const [cat, q] = key.split('.');
+      const color = d.rate < 10 ? 'var(--red)' : d.rate < 30 ? 'var(--yellow)' : 'var(--accent)';
+      return `<div class="hbar">
+        <div class="hbar-label"><span>${formatName(q)} <span style="color:var(--text-dim);font-size:0.7rem">${formatName(cat)}</span></span><span>${d.rate}% <span style="color:var(--text-dim)">(n=${d.n})</span></span></div>
+        <div class="hbar-track"><div class="hbar-fill" style="width:${d.rate}%;background:${color}"></div></div>
+      </div>`;
+    }).join('')}
+    <h3 style="font-size:0.85rem;color:var(--green);margin:1rem 0 0.5rem">Best 10</h3>
+    ${best10.map(([key, d]) => {
+      const [cat, q] = key.split('.');
+      return `<div class="hbar">
+        <div class="hbar-label"><span>${formatName(q)} <span style="color:var(--text-dim);font-size:0.7rem">${formatName(cat)}</span></span><span>${d.rate}% <span style="color:var(--text-dim)">(n=${d.n})</span></span></div>
+        <div class="hbar-track"><div class="hbar-fill" style="width:${d.rate}%;background:var(--green)"></div></div>
+      </div>`;
+    }).join('')}
+  </div>`;
+}
+
+function renderYearCategoryTrends(f: Findings): string {
+  const years = Object.keys(f.year_category_trends).sort();
+  const defaultCats = ['contamination', 'data_leakage', 'statistical_methodology', 'experimental_rigor'];
+  const allCats = Object.keys(CAT_COLORS);
+
+  const toggles = allCats.map(cat => {
+    const active = defaultCats.includes(cat) ? ' active' : '';
+    return `<button class="toggle-btn${active}" data-cat="${cat}" style="border-color:${active ? CAT_COLORS[cat] : ''};color:${active ? CAT_COLORS[cat] : ''}">${formatName(cat)}</button>`;
+  }).join('');
+
+  const lines = defaultCats.map(cat => ({
+    label: formatName(cat),
+    color: CAT_COLORS[cat],
+    points: years.map((y, i) => ({ x: i, y: f.year_category_trends[y]?.[cat] ?? 0 }))
+      .filter(p => p.y > 0),
+  }));
+
+  return `<div class="section">
+    <h2>Year Trends by Category</h2>
+    <div class="toggle-group" id="cat-toggles">${toggles}</div>
+    <div id="cat-chart">${renderMultiLineChart(lines, years, { width: 700 })}</div>
+  </div>`;
+}
+
+function attachCategoryToggles(f: Findings) {
+  const container = document.getElementById('cat-toggles');
+  const chartEl = document.getElementById('cat-chart');
+  if (!container || !chartEl) return;
+
+  container.addEventListener('click', e => {
+    const btn = (e.target as HTMLElement).closest('.toggle-btn') as HTMLElement;
+    if (!btn) return;
+    btn.classList.toggle('active');
+    const cat = btn.dataset.cat!;
+    const color = CAT_COLORS[cat];
+    if (btn.classList.contains('active')) {
+      btn.style.borderColor = color;
+      btn.style.color = color;
+    } else {
+      btn.style.borderColor = '';
+      btn.style.color = '';
+    }
+
+    // Re-render chart with active categories
+    const activeCats = Array.from(container.querySelectorAll('.toggle-btn.active'))
+      .map(b => (b as HTMLElement).dataset.cat!);
+    const years = Object.keys(f.year_category_trends).sort();
+    const lines = activeCats.map(c => ({
+      label: formatName(c),
+      color: CAT_COLORS[c],
+      points: years.map((y, i) => ({ x: i, y: f.year_category_trends[y]?.[c] ?? 0 }))
+        .filter(p => p.y > 0),
+    }));
+    chartEl.innerHTML = renderMultiLineChart(lines, years, { width: 700 });
+  });
+}
+
+function renderVenueCitation(f: Findings): string {
+  const venues = Object.entries(f.venue_stats).sort((a, b) => b[1].mean - a[1].mean);
+  const bands = ['0', '1-50', '51-500', '500+'];
+
+  return `<div class="section">
+    <h2>Venue & Citation Scoring</h2>
+    <div class="detail-grid">
+      <div>
+        <h3 style="font-size:0.85rem;color:var(--text-dim);margin-bottom:0.5rem">Score by Venue (3+ papers)</h3>
+        ${venues.map(([v, d]) => {
+          const color = d.mean < 40 ? 'var(--red)' : d.mean < 55 ? 'var(--yellow)' : 'var(--green)';
+          return `<div class="hbar">
+            <div class="hbar-label"><span>${v}</span><span>${d.mean}% <span style="color:var(--text-dim)">(n=${d.n})</span></span></div>
+            <div class="hbar-track"><div class="hbar-fill" style="width:${d.mean}%;background:${color}"></div></div>
+          </div>`;
+        }).join('')}
+      </div>
+      <div>
+        <h3 style="font-size:0.85rem;color:var(--text-dim);margin-bottom:0.5rem">Score by Citation Count</h3>
+        ${bands.map(band => {
+          const d = f.citation_band_stats[band];
+          if (!d) return '';
+          const color = d.mean < 40 ? 'var(--red)' : d.mean < 55 ? 'var(--yellow)' : 'var(--green)';
+          return `<div class="hbar">
+            <div class="hbar-label"><span>${band} citations</span><span>${d.mean}% <span style="color:var(--text-dim)">(n=${d.n})</span></span></div>
+            <div class="hbar-track"><div class="hbar-fill" style="width:${d.mean}%;background:${color}"></div></div>
+          </div>`;
+        }).join('')}
+        <p style="font-size:0.8rem;color:var(--text-dim);margin-top:0.75rem">Most-cited papers (500+) score <strong style="color:var(--red)">below average</strong>. Citations measure influence, not rigor.</p>
+      </div>
+    </div>
+  </div>`;
+}
+
+function renderOptimismRigor(f: Findings): string {
+  return `<div class="section">
+    <h2>Optimism-Rigor Inversion</h2>
+    <p style="font-size:0.85rem;color:var(--text-dim);margin-bottom:1rem">Across all three claim tensions, papers making positive/optimistic claims have <strong>lower</strong> methodology scores than papers with nuanced findings.</p>
+    ${Object.entries(f.optimism_rigor).map(([key, d]) => `
+      <div class="game-row">
+        <span class="game-name">${TENSION_NAMES[key] || key}</span>
+        <span style="font-family:var(--font);font-size:0.85rem">
+          <span style="color:var(--yellow)">Positive ${d.positive_mean}%</span>
+          <span style="color:var(--text-dim)"> vs </span>
+          <span style="color:var(--green)">Nuanced ${d.nuanced_mean}%</span>
+          <span style="color:var(--accent)"> (+${d.gap}pp)</span>
+        </span>
+      </div>
+    `).join('')}
+  </div>`;
+}
+
+function renderHomophily(f: Findings): string {
+  const h = f.homophily;
+  const ratio = h.high_cite_total > 0 ? (h.high_cite_high_pct / h.baseline_pct).toFixed(1) : '?';
+  return `<div class="section">
+    <h2>Quality Homophily</h2>
+    <p style="font-size:0.85rem;color:var(--text-dim);margin-bottom:1rem">Do high-quality papers cite other high-quality papers more than expected? (threshold: ${h.threshold}%+ score)</p>
+    <div class="hbar">
+      <div class="hbar-label"><span>Expected (baseline)</span><span>${h.baseline_pct}%</span></div>
+      <div class="hbar-track"><div class="hbar-fill" style="width:${h.baseline_pct}%;background:var(--text-dim)"></div></div>
+    </div>
+    <div class="hbar">
+      <div class="hbar-label"><span>Observed (high cites high)</span><span>${h.high_cite_high_pct}%</span></div>
+      <div class="hbar-track"><div class="hbar-fill" style="width:${Math.min(h.high_cite_high_pct, 100)}%;background:var(--green)"></div></div>
+    </div>
+    <p style="font-size:0.85rem;margin-top:0.5rem"><strong>${ratio}x</strong> more likely to cite high-quality work <span style="color:var(--text-dim)">(n=${h.high_cite_total} citations)</span></p>
+  </div>`;
+}
+
+function renderSamplingEffect(f: Findings): string {
+  const pts = f.sampling_effect.checkpoints;
+  const w = 400, h = 150;
+  const pad = { l: 50, r: 20, t: 15, b: 30 };
+  const chartW = w - pad.l - pad.r;
+  const chartH = h - pad.t - pad.b;
+
+  const xScale = (i: number) => pad.l + (i / (pts.length - 1)) * chartW;
+  const yMin = 40, yMax = 60;
+  const yScale = (v: number) => pad.t + chartH - ((v - yMin) / (yMax - yMin)) * chartH;
+
+  let svg = '';
+  for (let v = yMin; v <= yMax; v += 5) {
+    svg += `<text x="${pad.l - 8}" y="${yScale(v) + 4}" text-anchor="end">${v}%</text>`;
+    svg += `<line class="grid-line" x1="${pad.l}" x2="${w - pad.r}" y1="${yScale(v)}" y2="${yScale(v)}" stroke-dasharray="3"/>`;
+  }
+
+  const polyline = pts.map((p, i) => `${xScale(i)},${yScale(p.median)}`).join(' ');
+  svg += `<polyline points="${polyline}" fill="none" stroke="var(--yellow)" stroke-width="2"/>`;
+  pts.forEach((p, i) => {
+    svg += `<circle cx="${xScale(i)}" cy="${yScale(p.median)}" r="4" fill="var(--yellow)"/>`;
+    svg += `<text x="${xScale(i)}" y="${yScale(p.median) - 10}" text-anchor="middle" fill="var(--yellow)" font-size="11">${p.median}%</text>`;
+    svg += `<text x="${xScale(i)}" y="${h - 5}" text-anchor="middle">n=${p.n}</text>`;
+  });
+
+  return `<div class="section">
+    <h2>Sampling Effect</h2>
+    <p style="font-size:0.85rem;color:var(--text-dim);margin-bottom:0.5rem">Median score drops as the long tail is scanned. Visibility correlates with quality.</p>
+    <svg viewBox="0 0 ${w} ${h}" style="width:100%;max-width:${w}px">${svg}</svg>
+  </div>`;
+}
+
+function renderBenchmarkMonoculture(f: Findings): string {
+  const years = Object.keys(f.benchmark_monoculture).sort();
+  const pts = years.map((y, i) => ({ x: i, y: f.benchmark_monoculture[y].pct }));
+  const lines = [{
+    label: 'Benchmark-only papers',
+    color: '#f0c050',
+    points: pts,
+  }];
+
+  return `<div class="section">
+    <h2>Benchmark Monoculture</h2>
+    <p style="font-size:0.85rem;color:var(--text-dim);margin-bottom:0.5rem">Share of papers using only benchmark evaluation, no other methodology.</p>
+    ${renderMultiLineChart(lines, years)}
+    ${years.length > 0 ? `<p style="font-size:0.85rem;margin-top:0.5rem">${f.benchmark_monoculture[years[years.length - 1]]?.pct ?? 0}% of ${years[years.length - 1]} papers are pure benchmark-eval.</p>` : ''}
+  </div>`;
+}
+
+function renderFundingGap(f: Findings): string {
+  const disc = f.funding_gap.disclosed;
+  const nodisc = f.funding_gap.not_disclosed;
+  if (!disc || !nodisc) return '';
+  const gap = (disc.mean - nodisc.mean).toFixed(1);
+
+  return `<div class="section">
+    <h2>Funding Disclosure Gap</h2>
+    <div class="hbar">
+      <div class="hbar-label"><span>Funding disclosed</span><span>${disc.mean}% <span style="color:var(--text-dim)">(n=${disc.n})</span></span></div>
+      <div class="hbar-track"><div class="hbar-fill" style="width:${disc.mean}%;background:var(--green)"></div></div>
+    </div>
+    <div class="hbar">
+      <div class="hbar-label"><span>Not disclosed</span><span>${nodisc.mean}% <span style="color:var(--text-dim)">(n=${nodisc.n})</span></span></div>
+      <div class="hbar-track"><div class="hbar-fill" style="width:${nodisc.mean}%;background:var(--red)"></div></div>
+    </div>
+    <p style="font-size:0.85rem;margin-top:0.5rem"><strong>${gap}pp gap</strong> — papers that disclose funding score substantially higher.</p>
+  </div>`;
+}
+
+function renderReproDetail(f: Findings): string {
+  const qs = ['code_released', 'data_released', 'environment_specified', 'reproduction_instructions'];
+  return `<div class="section">
+    <h2>Reproducibility Drill-Down</h2>
+    ${qs.map(q => {
+      const d = f.repro_detail[q] as QuestionRate | undefined;
+      if (!d || typeof d === 'number') return '';
+      const color = d.rate < 15 ? 'var(--red)' : d.rate < 50 ? 'var(--yellow)' : 'var(--green)';
+      return `<div class="hbar">
+        <div class="hbar-label"><span>${formatName(q)}</span><span>${d.rate}% <span style="color:var(--text-dim)">(n=${d.n})</span></span></div>
+        <div class="hbar-track"><div class="hbar-fill" style="width:${d.rate}%;background:${color}"></div></div>
+      </div>`;
+    }).join('')}
+    <p style="font-size:1rem;margin-top:1rem;font-weight:600"><span style="color:var(--red);font-family:var(--font);font-size:1.3rem">${f.repro_detail.full_pass_pct}%</span> of papers are fully reproducible <span style="color:var(--text-dim)">(${f.repro_detail.full_pass_count} papers pass all 4 criteria)</span></p>
+  </div>`;
+}
+
+function renderGames(f: Findings): string {
+  const sorted = Object.entries(f.game_pcts).sort((a, b) => b[1] - a[1]);
+  return `<div class="section">
+    <h2>Named Games</h2>
+    <p style="font-size:0.85rem;color:var(--text-dim);margin-bottom:0.75rem">Recurring methodological patterns detected across the corpus.</p>
+    ${sorted.map(([name, pct]) =>
+      `<div class="game-row"><span class="game-name">${name}</span><span class="game-pct">${pct}%</span></div>`
+    ).join('')}
+  </div>`;
+}
diff --git a/explorer/src/views/paper-detail.ts b/explorer/src/views/paper-detail.ts
@@ -50,6 +50,9 @@ export async function renderPaperDetail(app: HTMLElement, slug: string) {
   if (paper.source_url && !paper.source_url.includes('arxiv.org')) {
     links.push(`<a href="${paper.source_url}" target="_blank" rel="noopener">Source</a>`);
   }
+  if (paper.code_url) {
+    links.push(`<a href="${paper.code_url}" target="_blank" rel="noopener">Code</a>`);
+  }
 
   // Load network for citations (lazy, non-blocking)
   let incomingHtml = '';
diff --git a/explorer/tests/explorer.spec.ts b/explorer/tests/explorer.spec.ts
@@ -202,6 +202,52 @@ test.describe('Navigation', () => {
   });
 });
 
+test.describe('Findings', () => {
+  test('loads and shows all sections', async ({ page }) => {
+    await page.goto('/#/findings');
+    await expect(page.locator('.section').first()).toBeVisible({ timeout: 10000 });
+    // Should have 10 sections
+    expect(await page.locator('.section').count()).toBe(10);
+  });
+
+  test('shows per-question pass rates', async ({ page }) => {
+    await page.goto('/#/findings');
+    await expect(page.locator('.section').first()).toBeVisible({ timeout: 10000 });
+    await expect(page.locator('h2', { hasText: 'Per-Question Pass Rates' })).toBeVisible();
+    // Should have horizontal bars
+    expect(await page.locator('.section').first().locator('.hbar').count()).toBeGreaterThan(10);
+  });
+
+  test('shows year category trends with toggles', async ({ page }) => {
+    await page.goto('/#/findings');
+    await expect(page.locator('#cat-toggles')).toBeVisible({ timeout: 10000 });
+    const activeToggles = page.locator('.toggle-btn.active');
+    expect(await activeToggles.count()).toBe(4);
+    // Click a toggle to deactivate
+    await activeToggles.first().click();
+    expect(await page.locator('.toggle-btn.active').count()).toBe(3);
+  });
+
+  test('shows funding gap', async ({ page }) => {
+    await page.goto('/#/findings');
+    await expect(page.locator('h2', { hasText: 'Funding Disclosure Gap' })).toBeVisible({ timeout: 10000 });
+    await expect(page.locator('text=pp gap')).toBeVisible();
+  });
+
+  test('shows reproducibility drill-down', async ({ page }) => {
+    await page.goto('/#/findings');
+    await expect(page.locator('h2', { hasText: 'Reproducibility Drill-Down' })).toBeVisible({ timeout: 10000 });
+    await expect(page.locator('text=fully reproducible')).toBeVisible();
+  });
+
+  test('shows 6 named games', async ({ page }) => {
+    await page.goto('/#/findings');
+    const gamesSection = page.locator('.section', { has: page.locator('h2', { hasText: 'Named Games' }) });
+    await expect(gamesSection).toBeVisible({ timeout: 10000 });
+    expect(await gamesSection.locator('.game-row').count()).toBe(6);
+  });
+});
+
 test.describe('Theme', () => {
   test('toggle switches theme', async ({ page }) => {
     await page.goto('/');
diff --git a/scripts/build-explorer-data.py b/scripts/build-explorer-data.py
@@ -6,8 +6,9 @@ Reads v2 scan.json files, metadata.json, citation-graph.json, and registry.jsonl
 Outputs view-specific JSON files for fast loading, plus a full explorer.json for power users.
 
 Output files:
-  explorer/public/data/dashboard.json    — aggregation stats only (~0.5KB)
-  explorer/public/data/papers-index.json — table data without checklists (~200KB)
+  explorer/public/data/dashboard.json    — aggregation stats only
+  explorer/public/data/findings.json     — deep analysis findings
+  explorer/public/data/papers-index.json — table data without checklists
   explorer/public/data/papers/{slug}.json — full detail per paper
   explorer/public/data/network.json      — citation network
   explorer/public/data/tensions.json     — claim tensions
@@ -18,6 +19,7 @@ Usage:
 """
 
 import json
+import re
 from collections import Counter, defaultdict
 from pathlib import Path
 
@@ -38,6 +40,10 @@ CONDITIONAL_CATEGORIES = [
 ]
 ALL_CATEGORIES = BASE_CATEGORIES + CONDITIONAL_CATEGORIES
 
+CODE_URL_RE = re.compile(
+    r'https?://(?:github\.com|gitlab\.com|zenodo\.org|bitbucket\.org|huggingface\.co)[^\s,)\"\'<>]+'
+)
+
 
 def classify_archetype(cat_scores):
     ed = cat_scores.get("evaluation_design", 0)
@@ -125,9 +131,28 @@ def detect_games(checklist, score, cat_scores):
     bc = checklist.get("contamination", {}).get("benchmark_contamination_addressed", {})
     if bc.get("applies") and not bc.get("answer"):
         games.append("Contamination Dodge")
+    # Cherry-picked Comparisons
+    bc2 = checklist.get("evaluation_design", {}).get("baselines_contemporary", {})
+    if bc2.get("applies") and not bc2.get("answer"):
+        games.append("Cherry-picked Comparisons")
+    # All Show No Substance
+    ed = cat_scores.get("evaluation_design", 0)
+    sm = cat_scores.get("statistical_methodology", 0)
+    ar = cat_scores.get("artifacts", 0)
+    if ed >= 0.8 and sm < 0.2 and ar < 0.2:
+        games.append("All Show No Substance")
     return games
 
 
+def extract_code_url(checklist):
+    cr = checklist.get("artifacts", {}).get("code_released", {})
+    if cr.get("applies") and cr.get("answer"):
+        urls = CODE_URL_RE.findall(cr.get("justification", ""))
+        if urls:
+            return urls[0].rstrip(".,;:")
+    return None
+
+
 def load_registry():
     entries = {}
     with open(REGISTRY_PATH) as f:
@@ -161,14 +186,25 @@ def write_json(path, data):
         json.dump(data, f, ensure_ascii=False, separators=(",", ":"))
 
 
+def safe_mean(scores):
+    return round(sum(scores) / len(scores), 1) if scores else 0
+
+
+def safe_median(scores):
+    if not scores:
+        return 0
+    s = sorted(scores)
+    return round(s[len(s) // 2], 1)
+
+
 def build():
     registry = load_registry()
     citation_data = load_citation_graph()
 
     # Accumulators
-    papers_full = []      # full paper objects (for explorer.json)
-    papers_index = []     # slim objects (for papers-index.json)
-    paper_details = {}    # slug -> detail object (for papers/{slug}.json)
+    papers_full = []
+    papers_index = []
+    paper_details = {}
     all_scores = []
     cat_pass_counts = defaultdict(lambda: {"passed": 0, "applicable": 0})
     year_scores = defaultdict(list)
@@ -177,6 +213,15 @@ def build():
     game_counts = Counter()
     total_papers = 0
 
+    # Findings accumulators
+    question_pass_counts = defaultdict(lambda: {"passed": 0, "applicable": 0})
+    year_cat_scores = defaultdict(lambda: defaultdict(lambda: {"passed": 0, "applicable": 0}))
+    venue_scores = defaultdict(list)
+    citation_band_scores = defaultdict(list)
+    benchmark_only_by_year = defaultdict(lambda: {"benchmark_only": 0, "total": 0})
+    funding_groups = {"disclosed": [], "not_disclosed": []}
+    score_map = {}  # paper_id -> score_pct (built incrementally for homophily)
+
     tensions = {
         "productivity": {"positive": [], "nuanced": []},
         "benchmarks": {"positive": [], "nuanced": []},
@@ -211,6 +256,7 @@ def build():
         total_papers += 1
         score_pct = round(overall * 100, 1)
         all_scores.append(score_pct)
+        score_map[paper_id] = score_pct
 
         year = paper_meta.get("year") or reg_entry.get("year")
         venue = metadata.get("venue") or paper_meta.get("venue") or reg_entry.get("venue", "")
@@ -223,6 +269,9 @@ def build():
         doi = paper_meta.get("doi") or reg_entry.get("doi", "")
         source_url = reg_entry.get("source_url", "")
 
+        # Code URL extraction
+        code_url = extract_code_url(checklist)
+
         year_scores[year].append(score_pct)
         for t in tags:
             tag_counts[t] += 1
@@ -230,7 +279,7 @@ def build():
         for g in games:
             game_counts[g] += 1
 
-        # Category aggregations
+        # Category + question aggregations
         for cat in ALL_CATEGORIES:
             cat_data = checklist.get(cat, {})
             if not isinstance(cat_data, dict):
@@ -240,8 +289,47 @@ def build():
                     continue
                 if q_data["applies"]:
                     cat_pass_counts[cat]["applicable"] += 1
+                    question_pass_counts[f"{cat}.{q_name}"]["applicable"] += 1
                     if q_data.get("answer", False):
                         cat_pass_counts[cat]["passed"] += 1
+                        question_pass_counts[f"{cat}.{q_name}"]["passed"] += 1
+                    # Year × category
+                    if year:
+                        year_cat_scores[year][cat]["applicable"] += 1
+                        if q_data.get("answer", False):
+                            year_cat_scores[year][cat]["passed"] += 1
+
+        # Venue scoring (skip arXiv — it's a preprint server, not a venue)
+        venue_clean = venue.strip()
+        if venue_clean and venue_clean.lower() not in ("arxiv", "arxiv.org", ""):
+            venue_scores[venue_clean].append(score_pct)
+
+        # Citation band scoring
+        cit = metadata.get("citation_count")
+        if cit is not None:
+            if cit == 0:
+                band = "0"
+            elif cit <= 50:
+                band = "1-50"
+            elif cit <= 500:
+                band = "51-500"
+            else:
+                band = "500+"
+            citation_band_scores[band].append(score_pct)
+
+        # Benchmark monoculture
+        if year:
+            benchmark_only_by_year[year]["total"] += 1
+            if tags == ["benchmark-eval"]:
+                benchmark_only_by_year[year]["benchmark_only"] += 1
+
+        # Funding gap
+        fd = checklist.get("conflicts_of_interest", {}).get("funding_disclosed", {})
+        if fd.get("applies"):
+            if fd.get("answer"):
+                funding_groups["disclosed"].append(score_pct)
+            else:
+                funding_groups["not_disclosed"].append(score_pct)
 
         claims = scan.get("claims", [])
         red_flags = scan.get("red_flags", [])
@@ -270,7 +358,7 @@ def build():
 
         cat_scores_pct = {k: round(v * 100, 1) for k, v in cat_scores.items()}
 
-        # Slim index entry (for table)
+        # Slim index entry
         index_entry = {
             "id": paper_id,
             "title": paper_meta.get("title", reg_entry.get("title", paper_id)),
@@ -282,10 +370,11 @@ def build():
             "games": games,
             "arxiv_id": arxiv_id,
             "doi": doi,
+            "code_url": code_url,
         }
         papers_index.append(index_entry)
 
-        # Full detail (for per-paper file)
+        # Full detail
         detail = {
             **index_entry,
             "category_scores": cat_scores_pct,
@@ -297,11 +386,9 @@ def build():
             "source_url": source_url,
         }
         paper_details[paper_id] = detail
-
-        # Full entry for explorer.json
         papers_full.append(detail)
 
-    # --- Aggregations ---
+    # --- Dashboard aggregations ---
     all_scores.sort()
     n = len(all_scores)
     median = all_scores[n // 2] if n else 0
@@ -368,9 +455,121 @@ def build():
         "pipeline": pipeline,
     }
 
+    # --- Findings aggregations ---
+
+    # 1. Per-question pass rates
+    q_rates = {}
+    for key, d in question_pass_counts.items():
+        if d["applicable"] > 0:
+            q_rates[key] = {"rate": round(d["passed"] / d["applicable"] * 100, 1), "n": d["applicable"]}
+
+    # 2. Year trends by category
+    year_cat_trends = {}
+    for y in sorted(year_cat_scores.keys()):
+        year_cat_trends[str(y)] = {}
+        for cat in ALL_CATEGORIES:
+            d = year_cat_scores[y][cat]
+            if d["applicable"] > 0:
+                year_cat_trends[str(y)][cat] = round(d["passed"] / d["applicable"] * 100, 1)
+
+    # 3. Venue & citation scoring
+    venue_stats = {}
+    for v, scores in venue_scores.items():
+        if len(scores) >= 3:
+            venue_stats[v] = {"n": len(scores), "mean": safe_mean(scores), "median": safe_median(scores)}
+
+    cit_band_stats = {}
+    for band in ["0", "1-50", "51-500", "500+"]:
+        scores = citation_band_scores.get(band, [])
+        if scores:
+            cit_band_stats[band] = {"n": len(scores), "mean": safe_mean(scores), "median": safe_median(scores)}
+
+    # 4. Optimism-rigor inversion
+    optimism_rigor = {}
+    for key, sides in tensions.items():
+        pos = [c["score"] for c in sides["positive"]]
+        nua = [c["score"] for c in sides["nuanced"]]
+        optimism_rigor[key] = {
+            "positive_n": len(pos), "positive_mean": safe_mean(pos),
+            "nuanced_n": len(nua), "nuanced_mean": safe_mean(nua),
+            "gap": round(safe_mean(nua) - safe_mean(pos), 1),
+        }
+
+    # 5. Quality homophily
+    threshold = 60
+    high_quality_ids = {pid for pid, sc in score_map.items() if sc >= threshold}
+    baseline_pct = round(len(high_quality_ids) / total_papers * 100, 1) if total_papers else 0
+
+    cited_high = 0
+    cited_total = 0
+    for edge in citation_data.get("edges", []):
+        s, t = edge["source"], edge["target"]
+        if s in high_quality_ids and t in score_map:
+            cited_total += 1
+            if score_map[t] >= threshold:
+                cited_high += 1
+
+    homophily = {
+        "threshold": threshold,
+        "baseline_pct": baseline_pct,
+        "high_cite_high_pct": round(cited_high / cited_total * 100, 1) if cited_total else 0,
+        "high_cite_total": cited_total,
+    }
+
+    # 6. Sampling effect (historical checkpoints + current)
+    sampling_effect = {
+        "checkpoints": [
+            {"n": 135, "median": 53.3},
+            {"n": 271, "median": 50.6},
+            {"n": 467, "median": 50.0},
+            {"n": total_papers, "median": round(median, 1)},
+        ]
+    }
+
+    # 7. Benchmark monoculture
+    bench_mono = {}
+    for y in sorted(benchmark_only_by_year.keys()):
+        d = benchmark_only_by_year[y]
+        if d["total"] > 0:
+            bench_mono[str(y)] = {
+                "benchmark_only": d["benchmark_only"],
+                "total": d["total"],
+                "pct": round(d["benchmark_only"] / d["total"] * 100, 1),
+            }
+
+    # 8. Funding gap
+    funding_gap = {}
+    for group, scores in funding_groups.items():
+        if scores:
+            funding_gap[group] = {"n": len(scores), "mean": safe_mean(scores), "median": safe_median(scores)}
+
+    # 9. Reproducibility drill-down
+    artifacts_qs = ["code_released", "data_released", "environment_specified", "reproduction_instructions"]
+    repro_detail = {}
+    for q in artifacts_qs:
+        key = f"artifacts.{q}"
+        d = question_pass_counts.get(key, {"passed": 0, "applicable": 0})
+        if d["applicable"] > 0:
+            repro_detail[q] = {"rate": round(d["passed"] / d["applicable"] * 100, 1), "n": d["applicable"]}
+    repro_detail["full_pass_count"] = repro_count
+    repro_detail["full_pass_pct"] = round(repro_count / total_papers * 100, 1) if total_papers else 0
+
+    findings = {
+        "question_rates": q_rates,
+        "year_category_trends": year_cat_trends,
+        "venue_stats": venue_stats,
+        "citation_band_stats": cit_band_stats,
+        "optimism_rigor": optimism_rigor,
+        "homophily": homophily,
+        "sampling_effect": sampling_effect,
+        "benchmark_monoculture": bench_mono,
+        "funding_gap": funding_gap,
+        "repro_detail": repro_detail,
+        "game_pcts": game_pcts,
+    }
+
     # --- Citation network ---
     v2_ids = {p["id"] for p in papers_full}
-    score_map = {p["id"]: p["score"] for p in papers_full}
     year_map = {p["id"]: p["year"] for p in papers_full}
 
     all_graph_ids = {n["id"] for n in citation_data.get("nodes", [])}
@@ -401,7 +600,7 @@ def build():
     papers_detail_dir = OUTPUT_DIR / "papers"
     papers_detail_dir.mkdir(parents=True, exist_ok=True)
 
-    # Add unscanned registry entries to papers-index (score=null, no detail page)
+    # Add unscanned registry entries to papers-index
     scanned_ids = {p["id"] for p in papers_index}
     for entry in registry.values():
         if entry["id"] in scanned_ids:
@@ -419,9 +618,11 @@ def build():
             "games": [],
             "arxiv_id": entry.get("arxiv_id", ""),
             "doi": entry.get("doi", ""),
+            "code_url": None,
         })
 
     write_json(OUTPUT_DIR / "dashboard.json", dashboard)
+    write_json(OUTPUT_DIR / "findings.json", findings)
     write_json(OUTPUT_DIR / "papers-index.json", papers_index)
     write_json(OUTPUT_DIR / "network.json", network)
     write_json(OUTPUT_DIR / "tensions.json", tensions)
@@ -431,9 +632,10 @@ def build():
 
     # Full monolith
     explorer = {
-        "generated": "2026-03-18",
+        "generated": "2026-03-21",
         "papers": papers_full,
         "agg": dashboard,
+        "findings": findings,
         "tensions": tensions,
         "citation_network": network,
     }
@@ -441,15 +643,19 @@ def build():
 
     # Report
     dash_size = (OUTPUT_DIR / "dashboard.json").stat().st_size
+    find_size = (OUTPUT_DIR / "findings.json").stat().st_size
     idx_size = (OUTPUT_DIR / "papers-index.json").stat().st_size
     net_size = (OUTPUT_DIR / "network.json").stat().st_size
     tens_size = (OUTPUT_DIR / "tensions.json").stat().st_size
     full_size = (OUTPUT_DIR / "explorer.json").stat().st_size
+    code_url_count = sum(1 for p in papers_full if p.get("code_url"))
     print(f"Papers: {total_papers}")
     print(f"Median score: {median:.1f}%")
+    print(f"Code URLs extracted: {code_url_count}")
     print(f"Network: {len(net_nodes)} nodes, {len(net_edges)} edges")
     print(f"Files:")
     print(f"  dashboard.json:    {dash_size:>8,} bytes")
+    print(f"  findings.json:     {find_size:>8,} bytes")
     print(f"  papers-index.json: {idx_size:>8,} bytes")
     print(f"  papers/*.json:     {len(paper_details):>5} files")
     print(f"  network.json:      {net_size:>8,} bytes")

	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs

M	explorer/index.html	\|	1	+
A	explorer/src/components/multi-line-chart.ts	\|	48	++++++++++++++++++++++++++++++++++++++++++++++++
M	explorer/src/data.ts	\|	48	++++++++++++++++++++++++++++++++++++------------
M	explorer/src/main.ts	\|	2	++
M	explorer/src/style.css	\|	20	++++++++++++++++++++
A	explorer/src/views/findings.ts	\|	304	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	explorer/src/views/paper-detail.ts	\|	3	+++
M	explorer/tests/explorer.spec.ts	\|	46	++++++++++++++++++++++++++++++++++++++++++++++
M	scripts/build-explorer-data.py	\|	234	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----