ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

commit 6d3758b3b52628e5f9c0bb8cb38aae235f766dde
parent 0bf67124d60b5a1d8c6d27deb7def340c1f0c0f0
Author: Brian Graham <brian@buildingbetterteams.de>
Date:   Mon, 23 Mar 2026 08:46:46 +0100

Add 4 new games (10 total), DNA profile strips in paper table

New games detecting orthogonal methodology failures:
- Trust Us (40.5%): no raw data AND no code — unverifiable
- The Black Box (12.3%): no prompts AND no hyperparameters — unreplicable
- Moving Goalpost (26.9%): causal claims without causal design
- Limitation Theater (4.2%): has limitations section, all boilerplate

DNA strips: colored inline heatmap per paper row showing 11 base
category scores at a glance (red→yellow→blue→green). Replaces
venue column — methodology profile is more useful.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Diffstat:
Mexplorer/src/data.ts | 1+
Mexplorer/src/style.css | 13+++++++++++++
Mexplorer/src/views/papers.ts | 27+++++++++++++++++++++++++--
Mexplorer/tests/explorer.spec.ts | 2+-
Mscripts/build-explorer-data.py | 24++++++++++++++++++++++++
5 files changed, 64 insertions(+), 3 deletions(-)

diff --git a/explorer/src/data.ts b/explorer/src/data.ts @@ -28,6 +28,7 @@ export interface PaperIndex { arxiv_id: string; doi: string; code_url: string | null; + dna: (number | null)[] | null; } export interface PaperDetail extends PaperIndex { diff --git a/explorer/src/style.css b/explorer/src/style.css @@ -509,6 +509,19 @@ td.score { .toggle-btn:hover { border-color: var(--text-dim); } .toggle-btn.active { background: rgba(108, 140, 255, 0.08); } +/* DNA strip (paper profile in table) */ +.dna-strip { + display: inline-flex; + gap: 1px; + vertical-align: middle; +} +.dna-cell { + display: inline-block; + width: 6px; + height: 14px; + border-radius: 1px; +} + /* PCA scatter canvas */ #pca-canvas { width: 100%; diff --git a/explorer/src/views/papers.ts b/explorer/src/views/papers.ts @@ -3,6 +3,13 @@ import { navigate } from '../router'; import { createFilters, updateFilterCount, type FilterState } from '../components/search-filter'; import { renderSortableTable, type Column } from '../components/table'; +const DNA_CATS = [ + 'artifacts', 'stat_method', 'eval_design', + 'claims', 'setup', 'limitations', + 'data_integrity', 'conflicts', 'contamination', + 'human', 'cost', +]; + function scoreColor(s: number): string { if (s < 30) return 'var(--red)'; if (s < 50) return 'var(--yellow)'; @@ -10,6 +17,22 @@ function scoreColor(s: number): string { return 'var(--green)'; } +function dnaCellColor(v: number | null): string { + if (v === null) return 'var(--border)'; + if (v < 25) return '#d03030'; + if (v < 50) return '#c08020'; + if (v < 75) return '#4070cc'; + return '#20a060'; +} + +function renderDna(dna: (number | null)[] | null): string { + if (!dna) return ''; + const cells = dna.map((v, i) => + `<span class="dna-cell" style="background:${dnaCellColor(v)}" title="${DNA_CATS[i]}: ${v !== null ? v + '%' : 'N/A'}"></span>` + ).join(''); + return `<span class="dna-strip">${cells}</span>`; +} + export async function renderPapers(app: HTMLElement) { app.innerHTML = '<div class="spinner"></div>'; const [papers, dashboard] = await Promise.all([loadPapersIndex(), loadDashboard()]); @@ -41,10 +64,10 @@ export async function renderPapers(app: HTMLElement) { app.appendChild(tableContainer); const columns: Column<PaperIndex>[] = [ - { key: 'title', label: 'Title', render: p => p.title.length > 70 ? p.title.slice(0, 67) + '...' : p.title, sortValue: p => p.title }, + { key: 'title', label: 'Title', render: p => p.title.length > 60 ? p.title.slice(0, 57) + '...' : p.title, sortValue: p => p.title }, { key: 'year', label: 'Year', render: p => String(p.year || ''), sortValue: p => p.year || 0 }, { key: 'score', label: 'Score', render: p => p.score != null ? `<span style="color:${scoreColor(p.score)}">${p.score}%</span>` : '<span style="color:var(--gray)">--</span>', sortValue: p => p.score ?? -1 }, - { key: 'venue', label: 'Venue', render: p => (p.venue || '').length > 20 ? (p.venue || '').slice(0, 17) + '...' : (p.venue || ''), sortValue: p => p.venue || '' }, + { key: 'dna', label: 'Profile', render: p => renderDna(p.dna), sortValue: p => p.score ?? -1 }, { key: 'archetype', label: 'Type', render: p => p.archetype ? `<span class="archetype ${p.archetype}">${p.archetype}</span>` : '<span style="color:var(--gray)">--</span>', sortValue: p => p.archetype || '' }, ]; diff --git a/explorer/tests/explorer.spec.ts b/explorer/tests/explorer.spec.ts @@ -244,7 +244,7 @@ test.describe('Findings', () => { await page.goto('/#/findings'); const gamesSection = page.locator('.section', { has: page.locator('h2', { hasText: 'Named Games' }) }); await expect(gamesSection).toBeVisible({ timeout: 10000 }); - expect(await gamesSection.locator('.game-row').count()).toBe(6); + expect(await gamesSection.locator('.game-row').count()).toBe(10); }); }); diff --git a/scripts/build-explorer-data.py b/scripts/build-explorer-data.py @@ -141,6 +141,25 @@ def detect_games(checklist, score, cat_scores): ar = cat_scores.get("artifacts", 0) if ed >= 0.8 and sm < 0.2 and ar < 0.2: games.append("All Show No Substance") + # Trust Us: no raw data AND no code — completely unverifiable + rd = checklist.get("data_integrity", {}).get("raw_data_available", {}) + if rd.get("applies") and not rd.get("answer") and cr.get("applies") and not cr.get("answer"): + games.append("Trust Us") + # The Black Box: no prompts AND no hyperparameters — can't replicate + pr = checklist.get("setup_transparency", {}).get("prompts_provided", {}) + hp = checklist.get("setup_transparency", {}).get("hyperparameters_reported", {}) + if pr.get("applies") and not pr.get("answer") and hp.get("applies") and not hp.get("answer"): + games.append("The Black Box") + # Moving Goalpost: causal claims without causal design + cc = checklist.get("claims_and_evidence", {}).get("causal_claims_justified", {}) + if cc.get("applies") and not cc.get("answer"): + games.append("Moving Goalpost") + # Limitation Theater: has section but says nothing specific + ls = checklist.get("limitations_and_scope", {}).get("limitations_section_present", {}) + tv = checklist.get("limitations_and_scope", {}).get("threats_to_validity_specific", {}) + sb = checklist.get("limitations_and_scope", {}).get("scope_boundaries_stated", {}) + if ls.get("applies") and ls.get("answer") and tv.get("applies") and not tv.get("answer") and sb.get("applies") and not sb.get("answer"): + games.append("Limitation Theater") return games @@ -358,6 +377,9 @@ def build(): cat_scores_pct = {k: round(v * 100, 1) for k, v in cat_scores.items()} + # DNA strip: compact array of base category scores (0-100, null if N/A) + dna = [cat_scores_pct.get(cat) for cat in BASE_CATEGORIES] + # Slim index entry index_entry = { "id": paper_id, @@ -371,6 +393,7 @@ def build(): "arxiv_id": arxiv_id, "doi": doi, "code_url": code_url, + "dna": dna, } papers_index.append(index_entry) @@ -758,6 +781,7 @@ def build(): "arxiv_id": entry.get("arxiv_id", ""), "doi": entry.get("doi", ""), "code_url": None, + "dna": None, }) write_json(OUTPUT_DIR / "dashboard.json", dashboard)

Impressum · Datenschutz