tensions.ts - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

tensions.ts (10664B)
      1 import { loadTensions, type TensionClaim } from '../data';
      2 
      3 const TENSION_META: Record<string, { title: string; desc: string; positive: string; nuanced: string }> = {
      4   productivity: {
      5     title: 'Productivity Paradox',
      6     desc: 'Does AI actually make developers faster? Studies claiming large speedups tend to use weaker methodology than those finding mixed or negative results. The only RCT with experienced developers found a 19% slowdown.',
      7     positive: 'AI increases productivity',
      8     nuanced: 'Effects are mixed or negative',
      9   },
     10   benchmarks: {
     11     title: 'Benchmark Validity Crisis',
     12     desc: 'Papers simultaneously build on benchmarks and distrust them. SOTA claims proliferate, but fewer than half of benchmark papers discuss whether their benchmark actually measures the claimed capability.',
     13     positive: 'Benchmark success = capability',
     14     nuanced: 'Benchmarks are flawed or gamed',
     15   },
     16   agents: {
     17     title: 'Agent Capability Gap',
     18     desc: 'Success claims outnumber limitation findings, but the limitations come from more rigorous papers. Agents succeed in sandboxes; failures are found in deployment. The gap between demo and production is the real story.',
     19     positive: 'Agents succeed at tasks',
     20     nuanced: 'Agents fail in deployment',
     21   },
     22   security: {
     23     title: 'Security Arms Race',
     24     desc: 'Defense papers claim their mitigations work; attack papers show they can be bypassed. Attacks outnumber defenses and the cycle repeats with each new technique. Neither side engages seriously with the other.',
     25     positive: 'Defenses work',
     26     nuanced: 'Attacks succeed',
     27   },
     28   code_quality: {
     29     title: 'Code Quality Paradox',
     30     desc: 'LLMs simultaneously repair bugs and introduce new ones. The same tools that fix code also generate insecure configurations, hallucinate APIs, and increase cognitive complexity. Which effect dominates depends on the task.',
     31     positive: 'LLMs improve code',
     32     nuanced: 'LLMs introduce defects',
     33   },
     34   scaling: {
     35     title: 'Scaling Debate',
     36     desc: 'Smaller efficient models claim to match larger ones at a fraction of the cost. But scaling skeptics find diminishing returns, prohibitive inference costs, and capability gaps that distillation cannot close. The field is split nearly evenly.',
     37     positive: 'Scaling is efficient',
     38     nuanced: 'Scaling hits limits',
     39   },
     40 };
     41 
     42 function meanScore(claims: TensionClaim[]): number {
     43   if (!claims.length) return 0;
     44   return Math.round(claims.reduce((s, c) => s + c.score, 0) / claims.length * 10) / 10;
     45 }
     46 
     47 function scoreColor(s: number): string {
     48   if (s < 30) return 'var(--red)';
     49   if (s < 50) return 'var(--yellow)';
     50   if (s < 70) return 'var(--accent)';
     51   return 'var(--green)';
     52 }
     53 
     54 function scoreHex(s: number): string {
     55   if (s < 35) return '#d04040';
     56   if (s < 45) return '#c08030';
     57   if (s < 55) return '#a0a040';
     58   return '#40a060';
     59 }
     60 
     61 export async function renderTensions(app: HTMLElement) {
     62   app.innerHTML = '<div class="spinner"></div>';
     63   const tensions = await loadTensions();
     64 
     65   app.innerHTML = Object.entries(tensions).map(([key, sides]) => {
     66     const meta = TENSION_META[key];
     67     const posMean = meanScore(sides.positive);
     68     const nuaMean = meanScore(sides.nuanced);
     69 
     70     return `<div class="tension-group section">
     71       <h2>${meta.title}</h2>
     72       <p style="font-size:0.85rem;color:var(--text-dim);margin-bottom:0.75rem">${meta.desc}</p>
     73       <div class="tension-stat">Positive claims: ${sides.positive.length} (mean score ${posMean}%) \u00b7 Nuanced claims: ${sides.nuanced.length} (mean score ${nuaMean}%)</div>
     74 
     75       ${renderButterfly(sides.positive, sides.nuanced, meta)}
     76 
     77       <div class="tension-columns">
     78         <div class="tension-col">
     79           <h4>${meta.positive}</h4>
     80           ${renderClaims(sides.positive)}
     81         </div>
     82         <div class="tension-col">
     83           <h4>${meta.nuanced}</h4>
     84           ${renderClaims(sides.nuanced)}
     85         </div>
     86       </div>
     87     </div>`;
     88   }).join('');
     89 }
     90 
     91 function renderButterfly(positive: TensionClaim[], nuanced: TensionClaim[], meta: { positive: string; nuanced: string }): string {
     92   const posByYear = new Map<number, TensionClaim[]>();
     93   const nuaByYear = new Map<number, TensionClaim[]>();
     94 
     95   for (const c of positive) {
     96     if (!c.year || c.year < 2022) continue;
     97     if (!posByYear.has(c.year)) posByYear.set(c.year, []);
     98     posByYear.get(c.year)!.push(c);
     99   }
    100   for (const c of nuanced) {
    101     if (!c.year || c.year < 2022) continue;
    102     if (!nuaByYear.has(c.year)) nuaByYear.set(c.year, []);
    103     nuaByYear.get(c.year)!.push(c);
    104   }
    105 
    106   const sortedYears = [...new Set([...posByYear.keys(), ...nuaByYear.keys()])].sort();
    107   if (sortedYears.length < 2) return '';
    108 
    109   let maxCount = 1;
    110   for (const y of sortedYears) {
    111     maxCount = Math.max(maxCount, (posByYear.get(y) || []).length, (nuaByYear.get(y) || []).length);
    112   }
    113 
    114   // Layout: bars with border thickness encoding methodology score
    115   const w = 600, h = 280;
    116   const pad = { l: 30, r: 15, t: 30, b: 30 };
    117   const chartW = w - pad.l - pad.r;
    118   const chartH = h - pad.t - pad.b;
    119   const midY = pad.t + chartH / 2;
    120   const barMaxH = chartH / 2 - 15;
    121   const colW = chartW / sortedYears.length;
    122 
    123   // Map score to bar width: 20% → 30% of column, 70% → 85% of column
    124   function barWidth(score: number): number {
    125     const t = Math.max(0, Math.min(1, (score - 20) / 50));
    126     return colW * (0.25 + t * 0.5);
    127   }
    128 
    129   let svg = '';
    130 
    131   // Zero line
    132   svg += `<line x1="${pad.l}" y1="${midY}" x2="${w - pad.r}" y2="${midY}" stroke="var(--border)" stroke-width="1"/>`;
    133 
    134   // Count scale ticks
    135   for (const count of [Math.round(maxCount / 2), maxCount]) {
    136     const tickH = (count / maxCount) * barMaxH;
    137     svg += `<text x="${pad.l - 4}" y="${midY - tickH + 3}" text-anchor="end" font-size="9" fill="var(--text-dim)">${count}</text>`;
    138     svg += `<text x="${pad.l - 4}" y="${midY + tickH + 3}" text-anchor="end" font-size="9" fill="var(--text-dim)">${count}</text>`;
    139   }
    140 
    141   // Side labels
    142   svg += `<text x="${w - pad.r}" y="${pad.t + 8}" text-anchor="end" font-size="10" fill="var(--text-dim)">\u2191 ${meta.positive}</text>`;
    143   svg += `<text x="${w - pad.r}" y="${h - pad.b - 2}" text-anchor="end" font-size="10" fill="var(--text-dim)">\u2193 ${meta.nuanced}</text>`;
    144 
    145   for (let i = 0; i < sortedYears.length; i++) {
    146     const y = sortedYears[i];
    147     const cx = pad.l + i * colW + colW / 2;
    148     const posC = posByYear.get(y) || [];
    149     const nuaC = nuaByYear.get(y) || [];
    150     const posCount = posC.length;
    151     const nuaCount = nuaC.length;
    152     const posMean = posC.length ? posC.reduce((s, c) => s + c.score, 0) / posC.length : 0;
    153     const nuaMean = nuaC.length ? nuaC.reduce((s, c) => s + c.score, 0) / nuaC.length : 0;
    154 
    155     // Year label
    156     svg += `<text x="${cx}" y="${h - 8}" text-anchor="middle" font-size="11" fill="var(--text)">${y}</text>`;
    157 
    158     // Positive bar — width = score, height = count
    159     if (posCount > 0) {
    160       const barH = (posCount / maxCount) * barMaxH;
    161       const bw = barWidth(posMean);
    162       svg += `<rect x="${cx - bw / 2}" y="${midY - barH - 1}" width="${bw}" height="${barH}" rx="2" fill="var(--accent)" opacity="0.7">
    163         <title>${y} ${meta.positive}: ${posCount} claims, mean score ${Math.round(posMean)}%</title>
    164       </rect>`;
    165       svg += `<text x="${cx}" y="${midY - barH - 5}" text-anchor="middle" font-size="9" fill="var(--text)">${posCount} \u00b7 ${Math.round(posMean)}%</text>`;
    166     }
    167 
    168     // Nuanced bar — width = score, height = count
    169     if (nuaCount > 0) {
    170       const barH = (nuaCount / maxCount) * barMaxH;
    171       const bw = barWidth(nuaMean);
    172       svg += `<rect x="${cx - bw / 2}" y="${midY + 1}" width="${bw}" height="${barH}" rx="2" fill="var(--text-dim)" opacity="0.5">
    173         <title>${y} ${meta.nuanced}: ${nuaCount} claims, mean score ${Math.round(nuaMean)}%</title>
    174       </rect>`;
    175       svg += `<text x="${cx}" y="${midY + barH + 13}" text-anchor="middle" font-size="9" fill="var(--text)">${nuaCount} \u00b7 ${Math.round(nuaMean)}%</text>`;
    176     }
    177   }
    178 
    179   // Quality-weighted balance line: for each year, compute
    180   // (positive_count * positive_mean - nuanced_count * nuanced_mean) / total_count
    181   // Positive values = optimism dominates, negative = skepticism dominates
    182   // Scale: maps to y position between top and bottom of chart
    183   const balancePoints: string[] = [];
    184   for (let i = 0; i < sortedYears.length; i++) {
    185     const y = sortedYears[i];
    186     const cx = pad.l + i * colW + colW / 2;
    187     const posC = posByYear.get(y) || [];
    188     const nuaC = nuaByYear.get(y) || [];
    189     if (posC.length + nuaC.length === 0) continue;
    190 
    191     const posMean = posC.length ? posC.reduce((s, c) => s + c.score, 0) / posC.length : 0;
    192     const nuaMean = nuaC.length ? nuaC.reduce((s, c) => s + c.score, 0) / nuaC.length : 0;
    193     // Weighted balance: positive pushes up, nuanced pushes down, weighted by mean score
    194     const posWeight = posC.length * posMean;
    195     const nuaWeight = nuaC.length * nuaMean;
    196     const totalWeight = posWeight + nuaWeight;
    197     // Balance from -1 (all nuanced) to +1 (all positive)
    198     const balance = totalWeight > 0 ? (posWeight - nuaWeight) / totalWeight : 0;
    199     // Map to y: +1 → top, -1 → bottom, 0 → midY
    200     const lineY = midY - balance * barMaxH * 0.8;
    201     balancePoints.push(`${cx},${lineY}`);
    202   }
    203 
    204   if (balancePoints.length >= 2) {
    205     svg += `<polyline points="${balancePoints.join(' ')}" fill="none" stroke="var(--text)" stroke-width="2" stroke-dasharray="6,3" opacity="0.6"/>`;
    206     for (const pt of balancePoints) {
    207       const [px, py] = pt.split(',');
    208       svg += `<circle cx="${px}" cy="${py}" r="3" fill="var(--text)" opacity="0.6"/>`;
    209     }
    210   }
    211 
    212   return `<div style="margin:1rem 0">
    213     <p style="font-size:0.78rem;color:var(--text-dim);margin-bottom:0.5rem"><strong>Height</strong> = claim count. <strong>Width</strong> = methodology score (wider = more rigorous). A tall narrow bar = many claims from weak papers. Dashed line = quality-weighted balance.</p>
    214     <svg viewBox="0 0 ${w} ${h}" style="width:100%;max-width:${w}px">${svg}</svg>
    215   </div>`;
    216 }
    217 
    218 function renderClaims(claims: TensionClaim[]): string {
    219   if (!claims.length) return '<p style="color:var(--text-dim);font-size:0.82rem">No claims in this category.</p>';
    220   const sorted = [...claims].sort((a, b) => b.score - a.score).slice(0, 20);
    221   return sorted.map(c => `
    222     <div class="tension-claim">
    223       <span class="support-badge ${c.supported}">${c.supported}</span>
    224       <span style="color:${scoreColor(c.score)};font-family:var(--font);font-size:0.75rem">${c.score}%</span>
    225       ${c.claim.length > 120 ? c.claim.slice(0, 117) + '...' : c.claim}
    226       <a href="#/paper/${c.paper_id}">\u2192</a>
    227     </div>
    228   `).join('');
    229 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs