ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

commit fab02ffcee6cc4b837e996171068e5654295171d
parent f40a5cabd9d1f25608787a26f5ebfc43cd07177e
Author: Brian Graham <brian@buildingbetterteams.de>
Date:   Wed, 18 Mar 2026 14:11:09 +0100

Split data pipeline, add light/dark mode, fix network and detail views

- Split explorer.json into per-view files: dashboard.json (1.6KB),
  papers-index.json (150KB), papers/{slug}.json, network.json, tensions.json.
  Dashboard now loads instantly instead of waiting for 9MB.
- Add light/dark mode toggle with localStorage persistence and
  prefers-color-scheme detection.
- Fix network: higher edge contrast (theme-aware), larger hit radius for
  hover/click, pointer cursor, "click to view" hint, drag vs click
  distinction, node outlines.
- Add arXiv/DOI/source links on paper detail pages.
- Add CSS spinner on all view loads.
- Gitignore all generated data files (explorer/public/data/).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Diffstat:
M.gitignore | 2+-
Mexplorer/index.html | 3++-
Mexplorer/src/data.ts | 43+++++++++++++++++++++++++------------------
Mexplorer/src/main.ts | 18++++++++----------
Mexplorer/src/style.css | 93+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------
Aexplorer/src/theme.ts | 24++++++++++++++++++++++++
Mexplorer/src/views/dashboard.ts | 24+++++++++++-------------
Mexplorer/src/views/network.ts | 61+++++++++++++++++++++++++++++++++++++++----------------------
Mexplorer/src/views/paper-detail.ts | 84++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------
Mexplorer/src/views/papers.ts | 25+++++++++++++------------
Mexplorer/src/views/tensions.ts | 20++++++++++----------
Mexplorer/tests/explorer.spec.ts | 61+++++++++++++++++++++++++++++++++----------------------------
Mscripts/build-explorer-data.py | 214+++++++++++++++++++++++++++++++++++++++++--------------------------------------
13 files changed, 415 insertions(+), 257 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -31,7 +31,7 @@ __pycache__/ # Explorer (generated/build artifacts) explorer/node_modules/ explorer/dist/ -explorer/public/data/explorer.json +explorer/public/data/ explorer/test-results/ # Playwright diff --git a/explorer/index.html b/explorer/index.html @@ -1,5 +1,5 @@ <!DOCTYPE html> -<html lang="en"> +<html lang="en" data-theme="dark"> <head> <meta charset="UTF-8" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" /> @@ -15,6 +15,7 @@ <a href="#/network">Network</a> <a href="#/tensions">Tensions</a> </nav> + <button id="theme-toggle" aria-label="Toggle theme">☀</button> </header> <main id="app"></main> <script type="module" src="/src/main.ts"></script> diff --git a/explorer/src/data.ts b/explorer/src/data.ts @@ -16,21 +16,29 @@ export interface RedFlag { detail: string; } -export interface Paper { +// Slim entry for paper table (papers-index.json) +export interface PaperIndex { id: string; title: string; year: number; venue: string; tags: string[]; score: number; - category_scores: Record<string, number>; archetype: string; games: string[]; + arxiv_id: string; + doi: string; +} + +// Full detail for per-paper pages (papers/{slug}.json) +export interface PaperDetail extends PaperIndex { + category_scores: Record<string, number>; claims: Claim[]; red_flags: RedFlag[]; checklist: ChecklistItem[]; key_findings: string; active_modules: string[]; + source_url: string; } export interface HistBin { @@ -39,14 +47,13 @@ export interface HistBin { count: number; } -export interface Agg { +export interface Dashboard { n: number; median: number; mean: number; full_reproducibility_pct: number; histogram: HistBin[]; category_rates: Record<string, number>; - question_rates: Record<string, { rate: number; n: number }>; year_trends: Record<string, { n: number; mean: number; median: number }>; game_pcts: Record<string, number>; archetype_counts: Record<string, number>; @@ -80,19 +87,19 @@ export interface CitationNetwork { edges: [string, string][]; } -export interface ExplorerData { - generated: string; - papers: Paper[]; - agg: Agg; - tensions: Tensions; - citation_network: CitationNetwork; -} - -let cached: ExplorerData | null = null; +// Lazy-loaded per-view data +const cache: Record<string, unknown> = {}; -export async function loadData(): Promise<ExplorerData> { - if (cached) return cached; - const resp = await fetch('/data/explorer.json'); - cached = await resp.json(); - return cached!; +async function fetchJson<T>(path: string): Promise<T> { + if (cache[path]) return cache[path] as T; + const resp = await fetch(path); + const data = await resp.json(); + cache[path] = data; + return data as T; } + +export const loadDashboard = () => fetchJson<Dashboard>('/data/dashboard.json'); +export const loadPapersIndex = () => fetchJson<PaperIndex[]>('/data/papers-index.json'); +export const loadPaperDetail = (slug: string) => fetchJson<PaperDetail>(`/data/papers/${slug}.json`); +export const loadNetwork = () => fetchJson<CitationNetwork>('/data/network.json'); +export const loadTensions = () => fetchJson<Tensions>('/data/tensions.json'); diff --git a/explorer/src/main.ts b/explorer/src/main.ts @@ -1,22 +1,20 @@ -import { loadData } from './data'; import { route, startRouter } from './router'; import { renderDashboard } from './views/dashboard'; import { renderPapers } from './views/papers'; import { renderPaperDetail } from './views/paper-detail'; import { renderNetwork } from './views/network'; import { renderTensions } from './views/tensions'; +import { initTheme } from './theme'; -async function init() { +function init() { const app = document.getElementById('app')!; - app.innerHTML = '<div class="loading">Loading data...</div>'; + initTheme(); - const data = await loadData(); - - route('/', () => renderDashboard(app, data)); - route('/papers', () => renderPapers(app, data)); - route('/paper/:slug', ({ slug }) => renderPaperDetail(app, data, slug)); - route('/network', () => renderNetwork(app, data)); - route('/tensions', () => renderTensions(app, data)); + route('/', () => renderDashboard(app)); + route('/papers', () => renderPapers(app)); + route('/paper/:slug', ({ slug }) => renderPaperDetail(app, slug)); + route('/network', () => renderNetwork(app)); + route('/tensions', () => renderTensions(app)); startRouter(); } diff --git a/explorer/src/style.css b/explorer/src/style.css @@ -1,4 +1,5 @@ -:root { +/* Dark theme (default) */ +:root, [data-theme="dark"] { --bg: #0f1117; --surface: #1a1d27; --border: #2a2d3a; @@ -9,9 +10,31 @@ --red: #f06565; --yellow: #f0c050; --gray: #555; + --net-edge: rgba(140, 170, 255, 0.25); + --net-bg: #1a1d27; + --grid-line: #2a2d3a; + --hover-bg: rgba(108, 140, 255, 0.08); --font: 'Menlo', 'Consolas', 'Monaco', monospace; } +/* Light theme */ +[data-theme="light"] { + --bg: #f5f6f8; + --surface: #ffffff; + --border: #d8dbe3; + --text: #1a1d27; + --text-dim: #5c6070; + --accent: #3b5ccc; + --green: #1a8f55; + --red: #c93c3c; + --yellow: #b38a1a; + --gray: #999; + --net-edge: rgba(60, 90, 200, 0.3); + --net-bg: #ffffff; + --grid-line: #e0e2e8; + --hover-bg: rgba(59, 92, 204, 0.06); +} + * { margin: 0; padding: 0; box-sizing: border-box; } body { @@ -36,7 +59,7 @@ header { header h1 { font-size: 1.1rem; font-weight: 600; } header h1 a { color: var(--text); text-decoration: none; } -nav { display: flex; gap: 1.5rem; } +nav { display: flex; gap: 1.5rem; flex: 1; } nav a { color: var(--text-dim); text-decoration: none; @@ -50,12 +73,42 @@ nav a:hover, nav a.active { border-bottom-color: var(--accent); } +#theme-toggle { + background: none; + border: 1px solid var(--border); + color: var(--text); + font-size: 1.1rem; + cursor: pointer; + border-radius: 4px; + padding: 0.2rem 0.5rem; + transition: border-color 0.15s; +} +#theme-toggle:hover { border-color: var(--accent); } + main { max-width: 1400px; margin: 0 auto; padding: 2rem; } +/* Spinner */ +.spinner { + display: flex; + justify-content: center; + align-items: center; + padding: 4rem; +} +.spinner::after { + content: ''; + width: 32px; + height: 32px; + border: 3px solid var(--border); + border-top-color: var(--accent); + border-radius: 50%; + animation: spin 0.6s linear infinite; +} +@keyframes spin { to { transform: rotate(360deg); } } + /* Cards */ .cards { display: grid; @@ -96,6 +149,7 @@ main { svg text { fill: var(--text-dim); font-size: 11px; } svg .bar { transition: opacity 0.15s; } svg .bar:hover { opacity: 0.8; } +svg .grid-line { stroke: var(--grid-line); } /* Horizontal bar chart */ .hbar { margin-bottom: 0.5rem; } @@ -148,14 +202,14 @@ thead th { user-select: none; } thead th:hover { color: var(--text); } -thead th.sorted::after { content: ' ▾'; } -thead th.sorted.asc::after { content: ' ▴'; } +thead th.sorted::after { content: ' \u25BE'; } +thead th.sorted.asc::after { content: ' \u25B4'; } tbody tr { border-bottom: 1px solid var(--border); cursor: pointer; transition: background 0.1s; } -tbody tr:hover { background: rgba(108, 140, 255, 0.08); } +tbody tr:hover { background: var(--hover-bg); } td { padding: 0.5rem 0.75rem; } td.score { font-family: var(--font); @@ -256,7 +310,7 @@ td.score { /* Paper detail */ .paper-header { margin-bottom: 1.5rem; } .paper-header h2 { font-size: 1.3rem; font-weight: 600; margin-bottom: 0.5rem; color: var(--text); text-transform: none; letter-spacing: normal; } -.paper-meta { font-size: 0.85rem; color: var(--text-dim); display: flex; gap: 1rem; flex-wrap: wrap; } +.paper-meta { font-size: 0.85rem; color: var(--text-dim); display: flex; gap: 1rem; flex-wrap: wrap; align-items: center; } .paper-meta .tag { background: rgba(108, 140, 255, 0.15); color: var(--accent); @@ -264,6 +318,23 @@ td.score { border-radius: 3px; font-size: 0.75rem; } +.paper-links { + display: flex; + gap: 0.75rem; + margin-top: 0.5rem; +} +.paper-links a { + color: var(--accent); + text-decoration: none; + font-size: 0.8rem; + padding: 0.15rem 0.5rem; + border: 1px solid var(--accent); + border-radius: 3px; + transition: background 0.15s; +} +.paper-links a:hover { + background: rgba(108, 140, 255, 0.12); +} .back-link { display: inline-block; color: var(--accent); @@ -286,7 +357,7 @@ td.score { #network-canvas { width: 100%; border-radius: 8px; - background: var(--surface); + background: var(--net-bg); border: 1px solid var(--border); cursor: grab; } @@ -301,6 +372,7 @@ td.score { pointer-events: none; z-index: 200; max-width: 300px; + box-shadow: 0 2px 8px rgba(0,0,0,0.3); } /* Tensions */ @@ -355,10 +427,3 @@ td.score { /* Year trend chart */ .trend-chart { margin-top: 0.5rem; } - -/* Loading */ -.loading { - text-align: center; - padding: 4rem; - color: var(--text-dim); -} diff --git a/explorer/src/theme.ts b/explorer/src/theme.ts @@ -0,0 +1,24 @@ +const STORAGE_KEY = 'theme'; + +export function initTheme() { + const saved = localStorage.getItem(STORAGE_KEY); + const prefersDark = window.matchMedia('(prefers-color-scheme: dark)').matches; + const theme = saved || (prefersDark ? 'dark' : 'light'); + applyTheme(theme); + + const btn = document.getElementById('theme-toggle'); + if (btn) { + btn.addEventListener('click', () => { + const current = document.documentElement.getAttribute('data-theme') || 'dark'; + const next = current === 'dark' ? 'light' : 'dark'; + applyTheme(next); + localStorage.setItem(STORAGE_KEY, next); + }); + } +} + +function applyTheme(theme: string) { + document.documentElement.setAttribute('data-theme', theme); + const btn = document.getElementById('theme-toggle'); + if (btn) btn.textContent = theme === 'dark' ? '☀' : '☾'; +} diff --git a/explorer/src/views/dashboard.ts b/explorer/src/views/dashboard.ts @@ -1,11 +1,11 @@ -import type { ExplorerData } from '../data'; +import { loadDashboard, type Dashboard } from '../data'; import { renderHistogram } from '../components/histogram'; import { renderBarChart } from '../components/bar-chart'; -export function renderDashboard(app: HTMLElement, data: ExplorerData) { - const { agg } = data; +export async function renderDashboard(app: HTMLElement) { + app.innerHTML = '<div class="spinner"></div>'; + const agg = await loadDashboard(); - // Top game percentage const topGame = Object.entries(agg.game_pcts).sort((a, b) => b[1] - a[1])[0]; app.innerHTML = ` @@ -67,25 +67,23 @@ function renderYearTrends(trends: Record<string, { n: number; mean: number; medi `<text x="${xScale(y)}" y="${h - 5}" text-anchor="middle">${y}</text>` ).join(''); - // Y-axis labels for (let v = 0; v <= 100; v += 25) { labels += `<text x="${pad.l - 8}" y="${yScale(v) + 4}" text-anchor="end">${v}%</text>`; - labels += `<line x1="${pad.l}" x2="${w - pad.r}" y1="${yScale(v)}" y2="${yScale(v)}" stroke="#2a2d3a" stroke-dasharray="3"/>`; + labels += `<line class="grid-line" x1="${pad.l}" x2="${w - pad.r}" y1="${yScale(v)}" y2="${yScale(v)}" stroke-dasharray="3"/>`; } - // Data labels const dataLabels = years.map(y => { const t = trends[y]; - return `<text x="${xScale(y)}" y="${yScale(t.median) - 8}" text-anchor="middle" fill="#f0c050" font-size="10">${t.median}% (n=${t.n})</text>`; + return `<text x="${xScale(y)}" y="${yScale(t.median) - 8}" text-anchor="middle" fill="var(--yellow)" font-size="10">${t.median}% (n=${t.n})</text>`; }).join(''); return `<svg viewBox="0 0 ${w} ${h}" class="trend-chart" style="width:100%;max-width:${w}px"> ${labels} - <polyline points="${medianPoints}" fill="none" stroke="#f0c050" stroke-width="2"/> - <polyline points="${meanPoints}" fill="none" stroke="#6c8cff" stroke-width="2" stroke-dasharray="4"/> - ${years.map(y => `<circle cx="${xScale(y)}" cy="${yScale(trends[y].median)}" r="3" fill="#f0c050"/>`).join('')} + <polyline points="${medianPoints}" fill="none" stroke="var(--yellow)" stroke-width="2"/> + <polyline points="${meanPoints}" fill="none" stroke="var(--accent)" stroke-width="2" stroke-dasharray="4"/> + ${years.map(y => `<circle cx="${xScale(y)}" cy="${yScale(trends[y].median)}" r="3" fill="var(--yellow)"/>`).join('')} ${dataLabels} - <text x="${w - pad.r}" y="${pad.t + 12}" text-anchor="end" fill="#f0c050" font-size="10">median</text> - <text x="${w - pad.r}" y="${pad.t + 24}" text-anchor="end" fill="#6c8cff" font-size="10">mean</text> + <text x="${w - pad.r}" y="${pad.t + 12}" text-anchor="end" fill="var(--yellow)" font-size="10">median</text> + <text x="${w - pad.r}" y="${pad.t + 24}" text-anchor="end" fill="var(--accent)" font-size="10">mean</text> </svg>`; } diff --git a/explorer/src/views/network.ts b/explorer/src/views/network.ts @@ -1,4 +1,4 @@ -import type { ExplorerData, NetNode } from '../data'; +import { loadNetwork, type NetNode } from '../data'; import { navigate } from '../router'; interface SimNode extends NetNode { @@ -8,8 +8,9 @@ interface SimNode extends NetNode { vy: number; } -export function renderNetwork(app: HTMLElement, data: ExplorerData) { - const { nodes, edges } = data.citation_network; +export async function renderNetwork(app: HTMLElement) { + app.innerHTML = '<div class="spinner"></div>'; + const { nodes, edges } = await loadNetwork(); app.innerHTML = ` <div class="filters" style="margin-bottom:1rem"> @@ -25,7 +26,6 @@ export function renderNetwork(app: HTMLElement, data: ExplorerData) { const ctx = canvas.getContext('2d')!; const tooltip = document.getElementById('net-tooltip')!; - // Build adjacency for degree filtering const adjCount = new Map<string, number>(); for (const [s, t] of edges) { adjCount.set(s, (adjCount.get(s) || 0) + 1); @@ -58,7 +58,6 @@ export function renderNetwork(app: HTMLElement, data: ExplorerData) { const countEl = document.getElementById('net-count'); if (countEl) countEl.textContent = `${fNodes.length} nodes, ${fEdges.length} edges`; - // Init positions randomly simNodes = fNodes.map(n => ({ ...n, x: (Math.random() - 0.5) * 800 + 600, @@ -73,7 +72,6 @@ export function renderNetwork(app: HTMLElement, data: ExplorerData) { if (si !== undefined && ti !== undefined) simEdges.push([si, ti]); } - // Simple force simulation let alpha = 1; cancelAnimationFrame(animId); @@ -81,7 +79,6 @@ export function renderNetwork(app: HTMLElement, data: ExplorerData) { alpha *= 0.995; if (alpha < 0.001) { draw(); return; } - // Repulsion (Barnes-Hut would be better but this works for <600 nodes) for (let i = 0; i < simNodes.length; i++) { let fx = 0, fy = 0; for (let j = 0; j < simNodes.length; j++) { @@ -93,7 +90,6 @@ export function renderNetwork(app: HTMLElement, data: ExplorerData) { fx += dx * f; fy += dy * f; } - // Center gravity fx += (600 - simNodes[i].x) * 0.01; fy += (350 - simNodes[i].y) * 0.01; @@ -101,7 +97,6 @@ export function renderNetwork(app: HTMLElement, data: ExplorerData) { simNodes[i].vy = (simNodes[i].vy + fy * alpha) * 0.6; } - // Link force for (const [si, ti] of simEdges) { const dx = simNodes[ti].x - simNodes[si].x; const dy = simNodes[ti].y - simNodes[si].y; @@ -128,23 +123,34 @@ export function renderNetwork(app: HTMLElement, data: ExplorerData) { } function scoreToColor(score: number | null): string { - if (score === null) return '#555'; + if (score === null) return '#888'; if (score < 30) return '#f06565'; if (score < 50) return '#f0c050'; if (score < 70) return '#6c8cff'; return '#3dd68c'; } + // Read theme-aware edge color from CSS variable + function getEdgeColor(): string { + return getComputedStyle(document.documentElement).getPropertyValue('--net-edge').trim(); + } + function draw() { const w = canvas.width, h = canvas.height; ctx.clearRect(0, 0, w, h); + + // Fill background with theme color + const bgColor = getComputedStyle(document.documentElement).getPropertyValue('--net-bg').trim(); + ctx.fillStyle = bgColor; + ctx.fillRect(0, 0, w, h); + ctx.save(); ctx.translate(transform.x, transform.y); ctx.scale(transform.k, transform.k); - // Edges - ctx.strokeStyle = 'rgba(108,140,255,0.1)'; - ctx.lineWidth = 0.5; + // Edges — visible on both themes + ctx.strokeStyle = getEdgeColor(); + ctx.lineWidth = 0.8; for (const [si, ti] of simEdges) { ctx.beginPath(); ctx.moveTo(simNodes[si].x, simNodes[si].y); @@ -154,11 +160,15 @@ export function renderNetwork(app: HTMLElement, data: ExplorerData) { // Nodes for (const n of simNodes) { - const r = Math.max(2, Math.min(8, 2 + n.in_degree * 0.5)); + const r = Math.max(3, Math.min(10, 3 + n.in_degree * 0.6)); ctx.beginPath(); ctx.arc(n.x, n.y, r, 0, Math.PI * 2); ctx.fillStyle = scoreToColor(n.score); ctx.fill(); + // Subtle outline for visibility + ctx.strokeStyle = 'rgba(0,0,0,0.3)'; + ctx.lineWidth = 0.5; + ctx.stroke(); } ctx.restore(); @@ -166,40 +176,46 @@ export function renderNetwork(app: HTMLElement, data: ExplorerData) { // Pan & zoom let dragging = false; + let dragMoved = false; let lastX = 0, lastY = 0; canvas.addEventListener('mousedown', e => { dragging = true; + dragMoved = false; lastX = e.clientX; lastY = e.clientY; }); canvas.addEventListener('mousemove', e => { if (dragging) { - transform.x += e.clientX - lastX; - transform.y += e.clientY - lastY; + const dx = e.clientX - lastX, dy = e.clientY - lastY; + if (Math.abs(dx) > 2 || Math.abs(dy) > 2) dragMoved = true; + transform.x += dx; + transform.y += dy; lastX = e.clientX; lastY = e.clientY; draw(); } - // Tooltip + // Tooltip — larger hit radius const rect = canvas.getBoundingClientRect(); const mx = (e.clientX - rect.left - transform.x) / transform.k; const my = (e.clientY - rect.top - transform.y) / transform.k; let closest: SimNode | null = null; - let closestDist = 15; + let closestDist = 25; for (const n of simNodes) { const d = Math.sqrt((n.x - mx) ** 2 + (n.y - my) ** 2); if (d < closestDist) { closest = n; closestDist = d; } } if (closest) { + canvas.style.cursor = closest.has_scan ? 'pointer' : 'default'; tooltip.style.display = 'block'; - tooltip.style.left = e.clientX + 12 + 'px'; - tooltip.style.top = e.clientY + 12 + 'px'; + tooltip.style.left = e.clientX + 14 + 'px'; + tooltip.style.top = e.clientY + 14 + 'px'; tooltip.innerHTML = `<strong>${closest.title}</strong><br> ${closest.score != null ? `Score: ${closest.score}%` : 'Not scanned'}<br> - Connections: ${closest.in_degree}`; + Connections: ${closest.in_degree}${closest.has_scan ? '<br><em>Click to view</em>' : ''}`; } else { + canvas.style.cursor = dragging ? 'grabbing' : 'grab'; tooltip.style.display = 'none'; } }); @@ -219,12 +235,13 @@ export function renderNetwork(app: HTMLElement, data: ExplorerData) { }, { passive: false }); canvas.addEventListener('click', e => { + if (dragMoved) return; // don't navigate after a drag const rect = canvas.getBoundingClientRect(); const mx = (e.clientX - rect.left - transform.x) / transform.k; const my = (e.clientY - rect.top - transform.y) / transform.k; for (const n of simNodes) { const d = Math.sqrt((n.x - mx) ** 2 + (n.y - my) ** 2); - if (d < 12 && n.has_scan) { navigate(`/paper/${n.id}`); return; } + if (d < 20 && n.has_scan) { navigate(`/paper/${n.id}`); return; } } }); diff --git a/explorer/src/views/paper-detail.ts b/explorer/src/views/paper-detail.ts @@ -1,10 +1,10 @@ -import type { ExplorerData, Paper } from '../data'; +import { loadPaperDetail, loadNetwork } from '../data'; function scoreColor(s: number): string { - if (s < 30) return '#f06565'; - if (s < 50) return '#f0c050'; - if (s < 70) return '#6c8cff'; - return '#3dd68c'; + if (s < 30) return 'var(--red)'; + if (s < 50) return 'var(--yellow)'; + if (s < 70) return 'var(--accent)'; + return 'var(--green)'; } function formatCatName(name: string): string { @@ -15,9 +15,18 @@ function formatQName(name: string): string { return name.replace(/_/g, ' ').replace(/\b\w/g, c => c.toUpperCase()); } -export function renderPaperDetail(app: HTMLElement, data: ExplorerData, slug: string) { - const paper = data.papers.find(p => p.id === slug); - if (!paper) { +export async function renderPaperDetail(app: HTMLElement, slug: string) { + app.innerHTML = '<div class="spinner"></div>'; + + let paper; + try { + paper = await loadPaperDetail(slug); + } catch { + app.innerHTML = `<p>Paper not found: ${slug}</p><a class="back-link" href="#/papers">Back to papers</a>`; + return; + } + + if (!paper || !paper.id) { app.innerHTML = `<p>Paper not found: ${slug}</p><a class="back-link" href="#/papers">Back to papers</a>`; return; } @@ -29,15 +38,46 @@ export function renderPaperDetail(app: HTMLElement, data: ExplorerData, slug: st categories.get(item.category)!.push(item); } - // Find citations (incoming/outgoing) - const net = data.citation_network; - const incoming = net.edges.filter(e => e[1] === slug).map(e => e[0]); - const outgoing = net.edges.filter(e => e[0] === slug).map(e => e[1]); + // External links + const links: string[] = []; + if (paper.arxiv_id) { + links.push(`<a href="https://arxiv.org/abs/${paper.arxiv_id}" target="_blank" rel="noopener">arXiv</a>`); + links.push(`<a href="https://arxiv.org/pdf/${paper.arxiv_id}" target="_blank" rel="noopener">PDF</a>`); + } + if (paper.doi) { + links.push(`<a href="https://doi.org/${paper.doi}" target="_blank" rel="noopener">DOI</a>`); + } + if (paper.source_url && !paper.source_url.includes('arxiv.org')) { + links.push(`<a href="${paper.source_url}" target="_blank" rel="noopener">Source</a>`); + } + + // Load network for citations (lazy, non-blocking) + let incomingHtml = ''; + let outgoingHtml = ''; + try { + const net = await loadNetwork(); + const incoming = net.edges.filter(e => e[1] === slug).map(e => e[0]); + const outgoing = net.edges.filter(e => e[0] === slug).map(e => e[1]); + const nodeMap = new Map(net.nodes.map(n => [n.id, n])); - const nodeMap = new Map(net.nodes.map(n => [n.id, n])); + if (incoming.length) { + incomingHtml = `<h3 style="font-size:0.85rem;color:var(--text-dim);margin:0.5rem 0">Cited by (${incoming.length})</h3> + ${incoming.map(id => { + const n = nodeMap.get(id); + return `<div style="font-size:0.82rem;padding:0.2rem 0"><a href="#/paper/${id}" style="color:var(--accent);text-decoration:none">${n?.title || id}</a>${n?.score != null ? ` <span style="color:${scoreColor(n.score)};font-family:var(--font);font-size:0.75rem">${n.score}%</span>` : ''}</div>`; + }).join('')}`; + } + if (outgoing.length) { + outgoingHtml = `<h3 style="font-size:0.85rem;color:var(--text-dim);margin:0.5rem 0">Cites (${outgoing.length})</h3> + ${outgoing.map(id => { + const n = nodeMap.get(id); + return `<div style="font-size:0.82rem;padding:0.2rem 0"><a href="#/paper/${id}" style="color:var(--accent);text-decoration:none">${n?.title || id}</a>${n?.score != null ? ` <span style="color:${scoreColor(n.score)};font-family:var(--font);font-size:0.75rem">${n.score}%</span>` : ''}</div>`; + }).join('')}`; + } + } catch { /* network data optional */ } app.innerHTML = ` - <a class="back-link" href="#/papers">← Back to papers</a> + <a class="back-link" href="#/papers">\u2190 Back to papers</a> <div class="paper-header"> <h2>${paper.title}</h2> <div class="paper-meta"> @@ -47,6 +87,7 @@ export function renderPaperDetail(app: HTMLElement, data: ExplorerData, slug: st <span class="archetype ${paper.archetype}">${paper.archetype}</span> ${paper.tags.map(t => `<span class="tag">${t}</span>`).join('')} </div> + ${links.length ? `<div class="paper-links">${links.join('')}</div>` : ''} </div> ${paper.key_findings ? `<div class="section"><h2>Key Findings</h2><p style="font-size:0.9rem">${paper.key_findings}</p></div>` : ''} @@ -60,7 +101,7 @@ export function renderPaperDetail(app: HTMLElement, data: ExplorerData, slug: st <div class="checklist-category"> <h3>${formatCatName(cat)} ${paper.category_scores[cat] != null ? `<span style="color:${scoreColor(paper.category_scores[cat])};font-size:0.8rem">${paper.category_scores[cat]}%</span>` : ''}</h3> ${items.map(item => { - const icon = !item.applies ? '—' : item.answer ? '✓' : '✗'; + const icon = !item.applies ? '\u2014' : item.answer ? '\u2713' : '\u2717'; const cls = !item.applies ? 'na' : item.answer ? 'pass' : 'fail'; return `<div class="checklist-item"> <span class="checklist-icon ${cls}">${icon}</span> @@ -91,18 +132,9 @@ export function renderPaperDetail(app: HTMLElement, data: ExplorerData, slug: st </div>`).join('')} </div>` : ''} - ${incoming.length || outgoing.length ? `<div class="section"> + ${incomingHtml || outgoingHtml ? `<div class="section"> <h2>Internal Citations</h2> - ${incoming.length ? `<h3 style="font-size:0.85rem;color:var(--text-dim);margin:0.5rem 0">Cited by (${incoming.length})</h3> - ${incoming.map(id => { - const n = nodeMap.get(id); - return `<div style="font-size:0.82rem;padding:0.2rem 0"><a href="#/paper/${id}" style="color:var(--accent);text-decoration:none">${n?.title || id}</a>${n?.score != null ? ` <span style="color:${scoreColor(n.score)};font-family:var(--font);font-size:0.75rem">${n.score}%</span>` : ''}</div>`; - }).join('')}` : ''} - ${outgoing.length ? `<h3 style="font-size:0.85rem;color:var(--text-dim);margin:0.5rem 0">Cites (${outgoing.length})</h3> - ${outgoing.map(id => { - const n = nodeMap.get(id); - return `<div style="font-size:0.82rem;padding:0.2rem 0"><a href="#/paper/${id}" style="color:var(--accent);text-decoration:none">${n?.title || id}</a>${n?.score != null ? ` <span style="color:${scoreColor(n.score)};font-family:var(--font);font-size:0.75rem">${n.score}%</span>` : ''}</div>`; - }).join('')}` : ''} + ${incomingHtml}${outgoingHtml} </div>` : ''} <div class="section"> diff --git a/explorer/src/views/papers.ts b/explorer/src/views/papers.ts @@ -1,23 +1,24 @@ -import type { ExplorerData, Paper } from '../data'; +import { loadDashboard, loadPapersIndex, type PaperIndex } from '../data'; import { navigate } from '../router'; import { createFilters, updateFilterCount, type FilterState } from '../components/search-filter'; import { renderSortableTable, type Column } from '../components/table'; function scoreColor(s: number): string { - if (s < 30) return '#f06565'; - if (s < 50) return '#f0c050'; - if (s < 70) return '#6c8cff'; - return '#3dd68c'; + if (s < 30) return 'var(--red)'; + if (s < 50) return 'var(--yellow)'; + if (s < 70) return 'var(--accent)'; + return 'var(--green)'; } -export function renderPapers(app: HTMLElement, data: ExplorerData) { +export async function renderPapers(app: HTMLElement) { + app.innerHTML = '<div class="spinner"></div>'; + const [papers, dashboard] = await Promise.all([loadPapersIndex(), loadDashboard()]); + app.innerHTML = ''; - const papers = data.papers; const archetypes = [...new Set(papers.map(p => p.archetype))].sort(); - const tags = Object.keys(data.agg.tag_counts).sort(); + const tags = Object.keys(dashboard.tag_counts).sort(); let filtered = [...papers]; - let tableEl: HTMLElement | null = null; const filtersEl = createFilters(archetypes, tags, (state: FilterState) => { filtered = papers.filter(p => { @@ -38,7 +39,7 @@ export function renderPapers(app: HTMLElement, data: ExplorerData) { const tableContainer = document.createElement('div'); app.appendChild(tableContainer); - const columns: Column<Paper>[] = [ + const columns: Column<PaperIndex>[] = [ { key: 'title', label: 'Title', render: p => p.title.length > 70 ? p.title.slice(0, 67) + '...' : p.title, sortValue: p => p.title }, { key: 'year', label: 'Year', render: p => String(p.year || ''), sortValue: p => p.year || 0 }, { key: 'score', label: 'Score', render: p => `<span style="color:${scoreColor(p.score)}">${p.score}%</span>`, sortValue: p => p.score }, @@ -48,8 +49,8 @@ export function renderPapers(app: HTMLElement, data: ExplorerData) { function renderTable() { tableContainer.innerHTML = ''; - tableEl = renderSortableTable(filtered, columns, p => navigate(`/paper/${p.id}`)); - tableContainer.appendChild(tableEl); + const el = renderSortableTable(filtered, columns, p => navigate(`/paper/${p.id}`)); + tableContainer.appendChild(el); } renderTable(); diff --git a/explorer/src/views/tensions.ts b/explorer/src/views/tensions.ts @@ -1,4 +1,4 @@ -import type { ExplorerData, TensionClaim } from '../data'; +import { loadTensions, type TensionClaim } from '../data'; const TENSION_META: Record<string, { title: string; positive: string; nuanced: string }> = { productivity: { @@ -24,14 +24,15 @@ function meanScore(claims: TensionClaim[]): number { } function scoreColor(s: number): string { - if (s < 30) return '#f06565'; - if (s < 50) return '#f0c050'; - if (s < 70) return '#6c8cff'; - return '#3dd68c'; + if (s < 30) return 'var(--red)'; + if (s < 50) return 'var(--yellow)'; + if (s < 70) return 'var(--accent)'; + return 'var(--green)'; } -export function renderTensions(app: HTMLElement, data: ExplorerData) { - const { tensions } = data; +export async function renderTensions(app: HTMLElement) { + app.innerHTML = '<div class="spinner"></div>'; + const tensions = await loadTensions(); app.innerHTML = Object.entries(tensions).map(([key, sides]) => { const meta = TENSION_META[key]; @@ -40,7 +41,7 @@ export function renderTensions(app: HTMLElement, data: ExplorerData) { return `<div class="tension-group section"> <h2>${meta.title}</h2> - <div class="tension-stat">Positive claims: ${sides.positive.length} (mean score ${posMean}%) · Nuanced claims: ${sides.nuanced.length} (mean score ${nuaMean}%)</div> + <div class="tension-stat">Positive claims: ${sides.positive.length} (mean score ${posMean}%) \u00b7 Nuanced claims: ${sides.nuanced.length} (mean score ${nuaMean}%)</div> <div class="tension-columns"> <div class="tension-col"> <h4>${meta.positive}</h4> @@ -57,14 +58,13 @@ export function renderTensions(app: HTMLElement, data: ExplorerData) { function renderClaims(claims: TensionClaim[]): string { if (!claims.length) return '<p style="color:var(--text-dim);font-size:0.82rem">No claims in this category.</p>'; - // Show first 20, sorted by score const sorted = [...claims].sort((a, b) => b.score - a.score).slice(0, 20); return sorted.map(c => ` <div class="tension-claim"> <span class="support-badge ${c.supported}">${c.supported}</span> <span style="color:${scoreColor(c.score)};font-family:var(--font);font-size:0.75rem">${c.score}%</span> ${c.claim.length > 120 ? c.claim.slice(0, 117) + '...' : c.claim} - <a href="#/paper/${c.paper_id}">→</a> + <a href="#/paper/${c.paper_id}">\u2192</a> </div> `).join(''); } diff --git a/explorer/tests/explorer.spec.ts b/explorer/tests/explorer.spec.ts @@ -3,31 +3,30 @@ import { test, expect } from '@playwright/test'; test.describe('Dashboard', () => { test('loads and shows headline stats', async ({ page }) => { await page.goto('/'); - // Wait for data to load await expect(page.locator('.card .value').first()).toBeVisible({ timeout: 10000 }); - - // Should show 467 papers const cards = page.locator('.card'); await expect(cards).toHaveCount(4); await expect(cards.nth(0).locator('.value')).toHaveText('467'); - // Median score await expect(cards.nth(1).locator('.value')).toHaveText('50%'); }); + test('shows spinner then content', async ({ page }) => { + await page.goto('/'); + // Spinner should appear briefly (may be too fast to catch, but structure exists) + await expect(page.locator('.card .value').first()).toBeVisible({ timeout: 10000 }); + }); + test('shows histogram', async ({ page }) => { await page.goto('/'); await expect(page.locator('.card .value').first()).toBeVisible({ timeout: 10000 }); - // Histogram SVG should have bars const bars = page.locator('svg .bar'); expect(await bars.count()).toBeGreaterThan(5); - // At least some bars should be visible (bins with count > 0) await expect(page.locator('svg .bar[height]:not([height="0"])')).not.toHaveCount(0); }); test('shows category pass rates', async ({ page }) => { await page.goto('/'); await expect(page.locator('.card .value').first()).toBeVisible({ timeout: 10000 }); - // Should have horizontal bars const hbars = page.locator('.hbar'); expect(await hbars.count()).toBeGreaterThan(5); }); @@ -50,9 +49,7 @@ test.describe('Papers Browser', () => { test('shows paper table', async ({ page }) => { await page.goto('/#/papers'); await expect(page.locator('table tbody tr').first()).toBeVisible({ timeout: 10000 }); - // Should show all 467 papers expect(await page.locator('table tbody tr').count()).toBe(467); - // Filter count shows await expect(page.locator('#f-count')).toHaveText('467 / 467'); }); @@ -60,9 +57,7 @@ test.describe('Papers Browser', () => { await page.goto('/#/papers'); await expect(page.locator('table tbody tr').first()).toBeVisible({ timeout: 10000 }); await page.fill('#f-search', 'metr'); - // Should have fewer rows - const rows = page.locator('table tbody tr'); - const count = await rows.count(); + const count = await page.locator('table tbody tr').count(); expect(count).toBeGreaterThan(0); expect(count).toBeLessThan(467); }); @@ -80,27 +75,26 @@ test.describe('Papers Browser', () => { await page.goto('/#/papers'); await expect(page.locator('table tbody tr').first()).toBeVisible({ timeout: 10000 }); await page.locator('table tbody tr').first().click(); - await expect(page.locator('.paper-header h2')).toBeVisible({ timeout: 5000 }); + await expect(page.locator('.paper-header h2')).toBeVisible({ timeout: 10000 }); }); test('sort by score works', async ({ page }) => { await page.goto('/#/papers'); await expect(page.locator('table tbody tr').first()).toBeVisible({ timeout: 10000 }); - // Click Score header to sort ascending await page.locator('thead th', { hasText: 'Score' }).click(); - // First row should have a low score const firstScore = await page.locator('table tbody tr').first().locator('td.score').textContent(); expect(parseFloat(firstScore!)).toBeLessThan(30); }); }); test.describe('Paper Detail', () => { - test('shows METR paper detail', async ({ page }) => { + test('shows METR paper detail with links', async ({ page }) => { await page.goto('/#/paper/metr-rct-2025'); await expect(page.locator('.paper-header h2')).toBeVisible({ timeout: 10000 }); await expect(page.locator('.paper-header h2')).toContainText('Measuring the Impact'); - // Score should be visible await expect(page.locator('.paper-meta')).toContainText('69.8%'); + // Should have arXiv link + await expect(page.locator('.paper-links a', { hasText: 'arXiv' })).toBeVisible(); }); test('shows checklist items', async ({ page }) => { @@ -126,7 +120,7 @@ test.describe('Paper Detail', () => { await page.goto('/#/paper/metr-rct-2025'); await expect(page.locator('.back-link')).toBeVisible({ timeout: 10000 }); await page.locator('.back-link').click(); - await expect(page.locator('table tbody tr').first()).toBeVisible({ timeout: 5000 }); + await expect(page.locator('table tbody tr').first()).toBeVisible({ timeout: 10000 }); }); test('shows 404 for unknown paper', async ({ page }) => { @@ -153,10 +147,8 @@ test.describe('Citation Network', () => { await expect(page.locator('#net-count')).toBeVisible({ timeout: 10000 }); const before = await page.locator('#net-count').textContent(); await page.fill('#net-min-conn', '5'); - // Wait for re-render await page.waitForTimeout(500); const after = await page.locator('#net-count').textContent(); - // Should have fewer nodes const beforeNodes = parseInt(before!.match(/(\d+) nodes/)![1]); const afterNodes = parseInt(after!.match(/(\d+) nodes/)![1]); expect(afterNodes).toBeLessThan(beforeNodes); @@ -182,21 +174,17 @@ test.describe('Navigation', () => { await page.goto('/'); await expect(page.locator('.card .value').first()).toBeVisible({ timeout: 10000 }); - // Click Papers await page.locator('nav a', { hasText: 'Papers' }).click(); - await expect(page.locator('table')).toBeVisible({ timeout: 5000 }); + await expect(page.locator('table')).toBeVisible({ timeout: 10000 }); - // Click Network await page.locator('nav a', { hasText: 'Network' }).click(); - await expect(page.locator('#network-canvas')).toBeVisible({ timeout: 5000 }); + await expect(page.locator('#network-canvas')).toBeVisible({ timeout: 10000 }); - // Click Tensions await page.locator('nav a', { hasText: 'Tensions' }).click(); - await expect(page.locator('.tension-group').first()).toBeVisible({ timeout: 5000 }); + await expect(page.locator('.tension-group').first()).toBeVisible({ timeout: 10000 }); - // Click Dashboard await page.locator('nav a', { hasText: 'Dashboard' }).click(); - await expect(page.locator('.card .value').first()).toBeVisible({ timeout: 5000 }); + await expect(page.locator('.card .value').first()).toBeVisible({ timeout: 10000 }); }); test('active nav state updates', async ({ page }) => { @@ -205,3 +193,20 @@ test.describe('Navigation', () => { await expect(page.locator('nav a.active')).toHaveText('Papers'); }); }); + +test.describe('Theme', () => { + test('toggle switches theme', async ({ page }) => { + await page.goto('/'); + await expect(page.locator('.card .value').first()).toBeVisible({ timeout: 10000 }); + const themeBefore = await page.locator('html').getAttribute('data-theme'); + expect(themeBefore).toMatch(/^(dark|light)$/); + // Click toggle — should switch + await page.click('#theme-toggle'); + const themeAfter = await page.locator('html').getAttribute('data-theme'); + expect(themeAfter).not.toBe(themeBefore); + // Toggle back + await page.click('#theme-toggle'); + const themeBack = await page.locator('html').getAttribute('data-theme'); + expect(themeBack).toBe(themeBefore); + }); +}); diff --git a/scripts/build-explorer-data.py b/scripts/build-explorer-data.py @@ -1,16 +1,23 @@ #!/usr/bin/env python3 """ -Build the explorer.json data file for the static data explorer. +Build data files for the static data explorer. Reads v2 scan.json files, metadata.json, citation-graph.json, and registry.jsonl. -Outputs a single JSON file with per-paper data, aggregations, tensions, and citation network. +Outputs view-specific JSON files for fast loading, plus a full explorer.json for power users. + +Output files: + explorer/public/data/dashboard.json — aggregation stats only (~0.5KB) + explorer/public/data/papers-index.json — table data without checklists (~200KB) + explorer/public/data/papers/{slug}.json — full detail per paper + explorer/public/data/network.json — citation network + explorer/public/data/tensions.json — claim tensions + explorer/public/data/explorer.json — full monolith for queries Usage: python3 scripts/build-explorer-data.py """ import json -import math from collections import Counter, defaultdict from pathlib import Path @@ -20,7 +27,6 @@ PAPERS_DIR = ROOT / "papers" ANALYSIS_DIR = ROOT / "analysis" OUTPUT_DIR = ROOT / "explorer" / "public" / "data" -# Categories and their questions (base checklist) BASE_CATEGORIES = [ "artifacts", "statistical_methodology", "evaluation_design", "claims_and_evidence", "setup_transparency", "limitations_and_scope", @@ -32,7 +38,7 @@ CONDITIONAL_CATEGORIES = [ ] ALL_CATEGORIES = BASE_CATEGORIES + CONDITIONAL_CATEGORIES -# Archetype classification thresholds + def classify_archetype(cat_scores): ed = cat_scores.get("evaluation_design", 0) sm = cat_scores.get("statistical_methodology", 0) @@ -49,7 +55,6 @@ def classify_archetype(cat_scores): def compute_category_score(category_data): - """Compute pass rate for a category: count(answer=true where applies=true) / count(applies=true).""" applicable = 0 passed = 0 for q_name, q_data in category_data.items(): @@ -65,7 +70,6 @@ def compute_category_score(category_data): def compute_overall_score(checklist): - """Overall score: count(answer=true where applies=true) / count(applies=true) across all categories.""" applicable = 0 passed = 0 for cat_name, cat_data in checklist.items(): @@ -84,7 +88,6 @@ def compute_overall_score(checklist): def flatten_checklist(checklist): - """Flatten checklist into list of {category, question, applies, answer, justification}.""" flat = [] for cat_name in ALL_CATEGORIES: cat_data = checklist.get(cat_name, {}) @@ -104,27 +107,21 @@ def flatten_checklist(checklist): def detect_games(checklist, score, cat_scores): - """Detect which 'named games' a paper exhibits.""" games = [] - sm = cat_scores.get("statistical_methodology", 0) - # Big Numbers No Error Bars: claims results but no CIs/variance ci = checklist.get("statistical_methodology", {}).get("confidence_intervals_or_error_bars", {}) var = checklist.get("statistical_methodology", {}).get("variance_reported", {}) if ci.get("applies") and not ci.get("answer") and var.get("applies") and not var.get("answer"): games.append("Big Numbers No Error Bars") - # Overclaiming: abstract claims not supported or generalization unbounded ac = checklist.get("claims_and_evidence", {}).get("abstract_claims_supported", {}) gb = checklist.get("claims_and_evidence", {}).get("generalization_bounded", {}) if (ac.get("applies") and not ac.get("answer")) or (gb.get("applies") and not gb.get("answer")): games.append("Overclaiming") - # Open Source Theater: claims code release but no environment/instructions cr = checklist.get("artifacts", {}).get("code_released", {}) env = checklist.get("artifacts", {}).get("environment_specified", {}) ri = checklist.get("artifacts", {}).get("reproduction_instructions", {}) if cr.get("applies") and cr.get("answer"): if (env.get("applies") and not env.get("answer")) or (ri.get("applies") and not ri.get("answer")): games.append("Open Source Theater") - # Contamination Dodge: benchmark eval without addressing contamination bc = checklist.get("contamination", {}).get("benchmark_contamination_addressed", {}) if bc.get("applies") and not bc.get("answer"): games.append("Contamination Dodge") @@ -158,37 +155,39 @@ def load_metadata(paper_id): return json.load(f) +def write_json(path, data): + path.parent.mkdir(parents=True, exist_ok=True) + with open(path, "w") as f: + json.dump(data, f, ensure_ascii=False, separators=(",", ":")) + + def build(): registry = load_registry() citation_data = load_citation_graph() - papers = [] + # Accumulators + papers_full = [] # full paper objects (for explorer.json) + papers_index = [] # slim objects (for papers-index.json) + paper_details = {} # slug -> detail object (for papers/{slug}.json) all_scores = [] cat_pass_counts = defaultdict(lambda: {"passed": 0, "applicable": 0}) - question_pass_counts = defaultdict(lambda: {"passed": 0, "applicable": 0}) year_scores = defaultdict(list) tag_counts = Counter() archetype_counts = Counter() game_counts = Counter() total_papers = 0 - # Claim tension buckets tensions = { "productivity": {"positive": [], "nuanced": []}, "benchmarks": {"positive": [], "nuanced": []}, "agents": {"positive": [], "nuanced": []}, } - # Productivity keywords - prod_positive = {"faster", "speedup", "productivity gain", "improves productivity", "increases output"} - prod_nuanced = {"slower", "no significant", "depends on", "mixed", "skill-dependent", "worse code"} - for scan_path in sorted(PAPERS_DIR.glob("*/scan.json")): paper_id = scan_path.parent.name with open(scan_path) as f: scan = json.load(f) - # Only v2 scans if scan.get("scan_version", 1) < 2: continue @@ -197,7 +196,6 @@ def build(): reg_entry = registry.get(paper_id, {}) metadata = load_metadata(paper_id) - # Compute scores overall = compute_overall_score(checklist) if overall is None: continue @@ -210,7 +208,6 @@ def build(): if cs is not None: cat_scores[cat] = cs - # Update aggregations total_papers += 1 score_pct = round(overall * 100, 1) all_scores.append(score_pct) @@ -221,6 +218,11 @@ def build(): archetype = classify_archetype(cat_scores) games = detect_games(checklist, overall, cat_scores) + # External links + arxiv_id = paper_meta.get("arxiv_id") or reg_entry.get("arxiv_id", "") + doi = paper_meta.get("doi") or reg_entry.get("doi", "") + source_url = reg_entry.get("source_url", "") + year_scores[year].append(score_pct) for t in tags: tag_counts[t] += 1 @@ -228,7 +230,7 @@ def build(): for g in games: game_counts[g] += 1 - # Category and question aggregations + # Category aggregations for cat in ALL_CATEGORIES: cat_data = checklist.get(cat, {}) if not isinstance(cat_data, dict): @@ -238,92 +240,85 @@ def build(): continue if q_data["applies"]: cat_pass_counts[cat]["applicable"] += 1 - question_pass_counts[f"{cat}.{q_name}"]["applicable"] += 1 if q_data.get("answer", False): cat_pass_counts[cat]["passed"] += 1 - question_pass_counts[f"{cat}.{q_name}"]["passed"] += 1 - # Claims claims = scan.get("claims", []) red_flags = scan.get("red_flags", []) - # Tension classification (simple keyword matching on claims) - key_findings = scan.get("key_findings", "").lower() + # Tension classification for claim in claims: ct = claim.get("claim", "").lower() - # Productivity tension if any(k in ct for k in ["productivity", "developer speed", "completion time", "speedup", "faster"]): bucket = "positive" if any(k in ct for k in ["faster", "speedup", "improves", "increases", "gain"]) else "nuanced" tensions["productivity"][bucket].append({ - "paper_id": paper_id, - "claim": claim["claim"], - "supported": claim.get("supported", ""), - "score": score_pct, + "paper_id": paper_id, "claim": claim["claim"], + "supported": claim.get("supported", ""), "score": score_pct, }) - # Benchmark tension if any(k in ct for k in ["benchmark", "evaluation", "leaderboard", "swe-bench"]): bucket = "positive" if any(k in ct for k in ["state-of-the-art", "outperforms", "achieves", "best"]) else "nuanced" tensions["benchmarks"][bucket].append({ - "paper_id": paper_id, - "claim": claim["claim"], - "supported": claim.get("supported", ""), - "score": score_pct, + "paper_id": paper_id, "claim": claim["claim"], + "supported": claim.get("supported", ""), "score": score_pct, }) - # Agent tension if any(k in ct for k in ["agent", "autonomous", "multi-agent"]): bucket = "positive" if any(k in ct for k in ["solves", "achieves", "succeeds", "capable", "outperforms"]) else "nuanced" tensions["agents"][bucket].append({ - "paper_id": paper_id, - "claim": claim["claim"], - "supported": claim.get("supported", ""), - "score": score_pct, + "paper_id": paper_id, "claim": claim["claim"], + "supported": claim.get("supported", ""), "score": score_pct, }) - # Build paper entry - papers.append({ + cat_scores_pct = {k: round(v * 100, 1) for k, v in cat_scores.items()} + + # Slim index entry (for table) + index_entry = { "id": paper_id, "title": paper_meta.get("title", reg_entry.get("title", paper_id)), "year": year, "venue": venue, "tags": tags, "score": score_pct, - "category_scores": {k: round(v * 100, 1) for k, v in cat_scores.items()}, "archetype": archetype, "games": games, + "arxiv_id": arxiv_id, + "doi": doi, + } + papers_index.append(index_entry) + + # Full detail (for per-paper file) + detail = { + **index_entry, + "category_scores": cat_scores_pct, "claims": [{"claim": c["claim"], "supported": c.get("supported", "")} for c in claims], "red_flags": [{"flag": r["flag"], "detail": r["detail"]} for r in red_flags], "checklist": flatten_checklist(checklist), "key_findings": scan.get("key_findings", ""), "active_modules": scan.get("active_modules", []), - }) + "source_url": source_url, + } + paper_details[paper_id] = detail + + # Full entry for explorer.json + papers_full.append(detail) - # Build aggregations + # --- Aggregations --- all_scores.sort() n = len(all_scores) median = all_scores[n // 2] if n else 0 mean = sum(all_scores) / n if n else 0 - # Histogram bins (0-100, 5% bins) hist_bins = [] for lo in range(0, 100, 5): hi = lo + 5 count = sum(1 for s in all_scores if lo <= s < hi) hist_bins.append({"lo": lo, "hi": hi, "count": count}) - # Category pass rates cat_rates = {} for cat in ALL_CATEGORIES: d = cat_pass_counts[cat] if d["applicable"] > 0: cat_rates[cat] = round(d["passed"] / d["applicable"] * 100, 1) - # Question pass rates - q_rates = {} - for qk, d in question_pass_counts.items(): - if d["applicable"] > 0: - q_rates[qk] = {"rate": round(d["passed"] / d["applicable"] * 100, 1), "n": d["applicable"]} - - # Year trends year_trends = {} for y in sorted(year_scores.keys()): scores = year_scores[y] @@ -333,19 +328,27 @@ def build(): "median": round(sorted(scores)[len(scores) // 2], 1), } - # Game percentages game_pcts = {g: round(c / total_papers * 100, 1) for g, c in game_counts.items()} + repro_count = sum(1 for p in papers_full if p["category_scores"].get("artifacts", 0) == 100) + + dashboard = { + "n": total_papers, + "median": round(median, 1), + "mean": round(mean, 1), + "full_reproducibility_pct": round(repro_count / total_papers * 100, 1) if total_papers else 0, + "histogram": hist_bins, + "category_rates": cat_rates, + "year_trends": year_trends, + "game_pcts": game_pcts, + "archetype_counts": dict(archetype_counts), + "tag_counts": dict(tag_counts), + } - # Reproducibility rate - repro_count = sum(1 for p in papers - if p["category_scores"].get("artifacts", 0) == 100) - - # Citation network - include all graph nodes, mark v2-scanned ones with scores - v2_ids = {p["id"] for p in papers} - score_map = {p["id"]: p["score"] for p in papers} - year_map = {p["id"]: p["year"] for p in papers} + # --- Citation network --- + v2_ids = {p["id"] for p in papers_full} + score_map = {p["id"]: p["score"] for p in papers_full} + year_map = {p["id"]: p["year"] for p in papers_full} - # Build edges first to compute in-degree all_graph_ids = {n["id"] for n in citation_data.get("nodes", [])} net_edges = [] in_degree = Counter() @@ -361,46 +364,53 @@ def build(): net_nodes.append({ "id": nid, "title": node.get("title", nid), - "score": score_map.get(nid), # None if not v2 scanned + "score": score_map.get(nid), "year": year_map.get(nid, registry.get(nid, {}).get("year")), "in_degree": in_degree.get(nid, 0), "has_scan": nid in v2_ids, }) - # Build output - output = { - "generated": "2026-03-17", - "papers": papers, - "agg": { - "n": total_papers, - "median": round(median, 1), - "mean": round(mean, 1), - "full_reproducibility_pct": round(repro_count / total_papers * 100, 1) if total_papers else 0, - "histogram": hist_bins, - "category_rates": cat_rates, - "question_rates": q_rates, - "year_trends": year_trends, - "game_pcts": game_pcts, - "archetype_counts": dict(archetype_counts), - "tag_counts": dict(tag_counts), - }, - "tensions": tensions, - "citation_network": { - "nodes": net_nodes, - "edges": net_edges, - }, - } + network = {"nodes": net_nodes, "edges": net_edges} + # --- Write files --- OUTPUT_DIR.mkdir(parents=True, exist_ok=True) - out_path = OUTPUT_DIR / "explorer.json" - with open(out_path, "w") as f: - json.dump(output, f, ensure_ascii=False, separators=(",", ":")) - - print(f"Wrote {out_path}") - print(f" Papers: {total_papers}") - print(f" Median score: {median:.1f}%") - print(f" Network: {len(net_nodes)} nodes, {len(net_edges)} edges") - print(f" File size: {out_path.stat().st_size / 1024:.0f} KB") + papers_detail_dir = OUTPUT_DIR / "papers" + papers_detail_dir.mkdir(parents=True, exist_ok=True) + + write_json(OUTPUT_DIR / "dashboard.json", dashboard) + write_json(OUTPUT_DIR / "papers-index.json", papers_index) + write_json(OUTPUT_DIR / "network.json", network) + write_json(OUTPUT_DIR / "tensions.json", tensions) + + for slug, detail in paper_details.items(): + write_json(papers_detail_dir / f"{slug}.json", detail) + + # Full monolith + explorer = { + "generated": "2026-03-18", + "papers": papers_full, + "agg": dashboard, + "tensions": tensions, + "citation_network": network, + } + write_json(OUTPUT_DIR / "explorer.json", explorer) + + # Report + dash_size = (OUTPUT_DIR / "dashboard.json").stat().st_size + idx_size = (OUTPUT_DIR / "papers-index.json").stat().st_size + net_size = (OUTPUT_DIR / "network.json").stat().st_size + tens_size = (OUTPUT_DIR / "tensions.json").stat().st_size + full_size = (OUTPUT_DIR / "explorer.json").stat().st_size + print(f"Papers: {total_papers}") + print(f"Median score: {median:.1f}%") + print(f"Network: {len(net_nodes)} nodes, {len(net_edges)} edges") + print(f"Files:") + print(f" dashboard.json: {dash_size:>8,} bytes") + print(f" papers-index.json: {idx_size:>8,} bytes") + print(f" papers/*.json: {len(paper_details):>5} files") + print(f" network.json: {net_size:>8,} bytes") + print(f" tensions.json: {tens_size:>8,} bytes") + print(f" explorer.json: {full_size:>8,} bytes") if __name__ == "__main__":

Impressum · Datenschutz