scan-v4.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan-v4.json (19105B)
      1 {
      2   "scan_version": 4,
      3   "paper_type": "position",
      4   "paper": {
      5     "title": "On the Future of Software Reuse in the Era of AI Native Software Engineering",
      6     "authors": [
      7       "A. Taivalsaari",
      8       "T. Mikkonen",
      9       "Cesare Pautasso"
     10     ],
     11     "year": 2025,
     12     "venue": "arXiv.org",
     13     "arxiv_id": "2508.19834",
     14     "doi": "10.48550/arXiv.2508.19834"
     15   },
     16   "checklist": {
     17     "claims_and_evidence": {
     18       "abstract_claims_supported": {
     19         "applies": true,
     20         "answer": true,
     21         "justification": "The abstract claims to 'discuss the implications of AI-assisted generative software reuse, bring forth relevant questions, and define a research agenda.' The paper delivers on all three in Sections 4-5.",
     22         "source": "opus"
     23       },
     24       "causal_claims_justified": {
     25         "applies": false,
     26         "answer": false,
     27         "justification": "The paper makes no causal claims; it discusses implications and poses questions rather than asserting causal relationships.",
     28         "source": "opus"
     29       },
     30       "generalization_bounded": {
     31         "applies": true,
     32         "answer": false,
     33         "justification": "The paper makes broad claims about AI-native software engineering as a paradigm shift based primarily on anecdotal experience ('based on our cumulative experience of over hundred years') without bounding these generalizations to specific contexts or technologies.",
     34         "source": "opus"
     35       },
     36       "alternative_explanations_discussed": {
     37         "applies": true,
     38         "answer": false,
     39         "justification": "The paper presents one interpretation (generative reuse as cargo cult development) without seriously considering alternatives. For instance, it does not discuss whether AI-generated code might develop its own quality norms distinct from human conventions, or whether the cargo cult framing overstates the problem.",
     40         "source": "opus"
     41       },
     42       "proxy_outcome_distinction": {
     43         "applies": false,
     44         "answer": false,
     45         "justification": "Theoretical paper with no measurements of its own.",
     46         "source": "opus"
     47       }
     48     },
     49     "limitations_and_scope": {
     50       "limitations_section_present": {
     51         "applies": true,
     52         "answer": false,
     53         "justification": "No dedicated limitations section. The paper has Discussion (Section 6) and Conclusions (Section 7) but neither contains a substantive discussion of limitations of the paper's own analysis or arguments.",
     54         "source": "opus"
     55       },
     56       "threats_to_validity_specific": {
     57         "applies": true,
     58         "answer": false,
     59         "justification": "No threats to validity discussed. The authors do not acknowledge, for instance, that their perspective may be biased by their specific experience or that the cargo cult framing may not generalize.",
     60         "source": "opus"
     61       },
     62       "scope_boundaries_stated": {
     63         "applies": true,
     64         "answer": false,
     65         "justification": "The paper does not explicitly state what its analysis does NOT cover. It makes sweeping statements about AI-native software engineering without bounding the scope to specific types of development, specific tools, or specific domains.",
     66         "source": "opus"
     67       }
     68     },
     69     "conflicts_of_interest": {
     70       "funding_disclosed": {
     71         "applies": true,
     72         "answer": false,
     73         "justification": "No funding or acknowledgments section present in the paper.",
     74         "source": "opus"
     75       },
     76       "affiliations_disclosed": {
     77         "applies": true,
     78         "answer": true,
     79         "justification": "Author affiliations are clearly listed: Nokia Technologies (Taivalsaari), University of Jyväskylä (Mikkonen), and USI (Pautasso).",
     80         "source": "opus"
     81       },
     82       "funder_independent_of_outcome": {
     83         "applies": true,
     84         "answer": false,
     85         "justification": "Taivalsaari is affiliated with Nokia Technologies, which has a stake in software development practices. No funding disclosure is provided to assess independence.",
     86         "source": "opus"
     87       },
     88       "financial_interests_declared": {
     89         "applies": true,
     90         "answer": false,
     91         "justification": "No competing interests statement present. Taivalsaari's Nokia affiliation represents a potential interest that is not explicitly addressed.",
     92         "source": "opus"
     93       }
     94     },
     95     "scope_and_framing": {
     96       "key_terms_defined": {
     97         "applies": true,
     98         "answer": true,
     99         "justification": "Core terms are defined with reasonable precision: 'AI Native Software Engineering' is defined in Section 3, 'generative reuse' is defined and distinguished from opportunistic reuse, and 'cargo cult development' is explained via Feynman and Lippert citations.",
    100         "source": "haiku"
    101       },
    102       "intended_contribution_clear": {
    103         "applies": true,
    104         "answer": true,
    105         "justification": "The paper clearly states its contribution: to 'discuss the implications of AI-assisted generative software reuse, bring forth relevant questions, and define a research agenda for tackling the central issues associated with this emerging approach.'",
    106         "source": "haiku"
    107       },
    108       "engagement_with_prior_work": {
    109         "applies": true,
    110         "answer": true,
    111         "justification": "The paper engages substantively with prior work — discussing Biggerstaff's generative reuse analysis, comparing Peng et al. and Becker et al. productivity studies, and situating current AI tools within the sixty-year historical trajectory of software reuse research.",
    112         "source": "haiku"
    113       }
    114     }
    115   },
    116   "type_checklist": {
    117     "position": {
    118       "argument_quality": {
    119         "argument_internally_consistent": {
    120           "applies": true,
    121           "answer": true,
    122           "justification": "The cargo cult framing is applied consistently throughout from introduction to research agenda. The acknowledgment of contradictory productivity evidence is handled without contradicting the paper's core thesis.",
    123           "source": "haiku"
    124         },
    125         "counterarguments_addressed": {
    126           "applies": true,
    127           "answer": false,
    128           "justification": "The paper mentions studies showing productivity gains but does not engage with the strongest counterargument to the cargo cult analogy — that unlike genuine cargo cults, AI tools demonstrably produce functional outputs with measurable value. The analogy's limits are not examined.",
    129           "source": "haiku"
    130         },
    131         "analogies_appropriate": {
    132           "applies": true,
    133           "answer": false,
    134           "justification": "The cargo cult analogy is central but imprecise: cargo cults involve rituals with no causal mechanism, while AI code generation produces functional outputs the paper concedes 'can be nearly perfect.' This tension is not reconciled.",
    135           "source": "haiku"
    136         },
    137         "prescriptions_proportional": {
    138           "applies": true,
    139           "answer": true,
    140           "justification": "The paper's prescriptions are primarily research questions for a research agenda rather than strong policy recommendations, which is proportional to the evidence presented. The authors explicitly frame their '80/20 rule' as anecdotal.",
    141           "source": "haiku"
    142         },
    143         "evidence_for_claims_cited": {
    144           "applies": true,
    145           "answer": false,
    146           "justification": "Most factual claims are cited, but the hallucination rate claim ('1-3% to nearly 80%') is asserted with no specific citations. The '80/20 rule' observation is explicitly acknowledged by the authors as lacking empirical grounding.",
    147           "source": "haiku"
    148         },
    149         "alternatives_discussed": {
    150           "applies": true,
    151           "answer": false,
    152           "justification": "The paper presents one dominant framing (AI reuse as cargo cult) without discussing alternative frameworks such as viewing AI as a genuine new abstraction layer, a democratizing force for development, or a productivity amplifier that improves with experience.",
    153           "source": "haiku"
    154         },
    155         "historical_context_accurate": {
    156           "applies": true,
    157           "answer": true,
    158           "justification": "The historical account of software reuse — from the NATO 1968 conference through 1980s reuse research, commercial component frameworks in the 1990s, to NPM and PyPI — appears accurate and is well-referenced.",
    159           "source": "haiku"
    160         }
    161       },
    162       "clarity_and_scope": {
    163         "key_terms_defined_precisely": {
    164           "applies": true,
    165           "answer": true,
    166           "justification": "Core terms are defined precisely in context: 'AI Native Software Engineering' receives a scoped definition in Section 3, 'generative reuse' is distinguished from opportunistic reuse, and 'cargo cult' is sourced to Feynman and Lippert.",
    167           "source": "haiku"
    168         },
    169         "engages_with_existing_literature": {
    170           "applies": true,
    171           "answer": true,
    172           "justification": "The paper engages with existing literature substantively — discussing Biggerstaff's 1998 generative reuse analysis including his 'KLOC subsystem' finding, comparing multiple productivity RCTs, and situating the current moment in sixty years of reuse research.",
    173           "source": "haiku"
    174         },
    175         "intended_audience_clear": {
    176           "applies": true,
    177           "answer": false,
    178           "justification": "The intended audience is never explicitly stated. The research agenda framing implies academic researchers, but the tone and breadth could address practitioners or policymakers, and no explicit statement about intended readership is made.",
    179           "source": "haiku"
    180         },
    181         "assumptions_stated": {
    182           "applies": true,
    183           "answer": false,
    184           "justification": "The paper's central assumption — that developers using AI-generated code do not understand it, making it analogous to cargo cult — is presented as self-evident rather than stated as an explicit assumption requiring justification.",
    185           "source": "haiku"
    186         },
    187         "scope_of_applicability_discussed": {
    188           "applies": true,
    189           "answer": false,
    190           "justification": "The paper does not discuss where its cargo cult framing does NOT apply — for example, whether concerns differ for safety-critical vs. prototyping contexts, or for expert vs. novice developers using AI tools.",
    191           "source": "haiku"
    192         }
    193       }
    194     }
    195   },
    196   "claims": [
    197     {
    198       "claim": "AI native software development is a new form of cargo cult development in which developers use code generated by opaque AI systems without understanding it.",
    199       "evidence": "Analogical argument drawing on Feynman's cargo cult science and prior work on opportunistic reuse; no empirical measurement of developer understanding of AI-generated code.",
    200       "supported": "weak"
    201     },
    202     {
    203       "claim": "Junior developers benefit more from AI tools than experienced developers.",
    204       "evidence": "Cited across multiple studies: Cui et al. (2025) Copilot RCTs, Peng et al. (2023) GitHub Copilot study, Alanazi et al. (2025) meta-analysis of AI tools in programming courses.",
    205       "supported": "moderate"
    206     },
    207     {
    208       "claim": "Experienced open-source developers take 19% longer to complete tasks when using AI tools, despite expecting a 24% speedup.",
    209       "evidence": "Becker et al. (2025) RCT — a specific result from a controlled study with experienced open-source developers.",
    210       "supported": "strong"
    211     },
    212     {
    213       "claim": "AI tools increased weekly completed tasks by 26% across nearly 5,000 developers in three large companies.",
    214       "evidence": "Cui et al. (2025) — three large-scale randomized controlled trials at Microsoft, Accenture, and a third company over 2-8 months.",
    215       "supported": "strong"
    216     },
    217     {
    218       "claim": "AI prompt engineering follows an '80/20 rule': 80% of requirements achieved easily, remaining 20% consumes approximately 80% of development time.",
    219       "evidence": "Authors' own anecdotal experience: 'we have not yet performed any truly scientific empirical studies on actual percentages, this observation seems to hold true for those development activities that we have carried out so far.'",
    220       "supported": "unsupported"
    221     },
    222     {
    223       "claim": "LLM hallucination rates range from 1-3% to nearly 80% depending on use case and domain.",
    224       "evidence": "'According to studies' — no specific studies are cited for this range, making it unverifiable.",
    225       "supported": "weak"
    226     }
    227   ],
    228   "methodology_tags": [
    229     "theoretical",
    230     "qualitative"
    231   ],
    232   "key_findings": "This position paper argues that AI-native software development constitutes a new form of generative software reuse analogous to cargo cult development, where developers trust and reuse code generated by opaque AI systems without adequate understanding of its inner workings. The paper surveys contradictory productivity evidence — some RCTs showing 26-58% task completion gains, one showing a 19% slowdown for experienced developers — concluding that AI tools lower barriers but cannot replace deep technical expertise. The paper proposes a research agenda of approximately 20 open questions about the limits of prompt engineering, systematic practices for generative reuse, copyright implications, and security. The authors honestly acknowledge their key '80/20 rule' observation about prompt engineering effectiveness is anecdotal and not empirically grounded.",
    233   "red_flags": [
    234     {
    235       "flag": "Anecdotal 80/20 rule presented as a finding",
    236       "detail": "The paper presents an '80/20 rule' for prompt engineering effectiveness as a notable empirical observation while acknowledging in the same paragraph that 'we have not yet performed any truly scientific empirical studies on actual percentages.' Presenting this as a finding rather than speculation is misleading."
    237     },
    238     {
    239       "flag": "Uncited hallucination rate range",
    240       "detail": "The claim that hallucination rates range from '1-3% to nearly 80%' is attributed to 'studies' without any specific citations, making the range unverifiable and unfalsifiable."
    241     },
    242     {
    243       "flag": "Cargo cult analogy applied without examining its limits",
    244       "detail": "The central cargo cult analogy is pushed throughout without acknowledging where it breaks down. Unlike ritual cargo cult behavior, AI tools demonstrably produce functional outputs — the paper concedes code 'can be nearly perfect' without reconciling this with the analogy."
    245     },
    246     {
    247       "flag": "Appeal to authority as evidence",
    248       "detail": "The paper invokes 'over a hundred years of cumulative expertise' multiple times as grounds for credibility. This is an appeal to authority, not a methodological justification for the claims made."
    249     },
    250     {
    251       "flag": "No limitations section; no discussion of author bias",
    252       "detail": "A position paper proposing a research agenda for a major paradigm shift offers no limitations section, no discussion of where the analysis might be wrong, and no acknowledgment of the Nokia-affiliated author's potential industry conflicts."
    253     }
    254   ],
    255   "cited_papers": [
    256     {
    257       "title": "Measuring the Impact of Early-2025 AI on Experienced Open-Source Developer Productivity",
    258       "relevance": "Key RCT finding that experienced developers are 19% slower with AI tools — the most counterintuitive productivity result discussed in the paper."
    259     },
    260     {
    261       "title": "The Effects of Generative AI on High-Skilled Work: Evidence from Three Field Experiments with Software Developers",
    262       "relevance": "Large-scale RCT across ~5,000 developers at Microsoft and Accenture showing 26% task completion gains with Copilot."
    263     },
    264     {
    265       "title": "The Impact of AI on Developer Productivity: Evidence from GitHub Copilot",
    266       "relevance": "Early lab study reporting 58% speedup on an HTTP server task; cited as showing large productivity gains in controlled settings."
    267     },
    268     {
    269       "title": "On the Dangers of Stochastic Parrots: Can Language Models Be Too Big?",
    270       "relevance": "Central reference for the 'stochastic parrot' and hallucination framing used throughout the paper to characterize LLM limitations."
    271     },
    272     {
    273       "title": "Generative AI for Code Generation: Software Reuse Implications",
    274       "relevance": "Direct prior work on the software reuse implications of generative AI code generation, framing the paper's contribution."
    275     },
    276     {
    277       "title": "Vibe Coding vs. Agentic Coding: Fundamentals and Practical Implications of Agentic AI",
    278       "relevance": "Cited for 'vibe coding' and 'agentic coding' terminology defining the emerging development paradigm the paper analyzes."
    279     },
    280     {
    281       "title": "The Prompt Report: A Systematic Survey of Prompt Engineering Techniques",
    282       "relevance": "Systematic survey of prompt engineering, directly relevant to the paper's discussion of prompting as the new development interface."
    283     },
    284     {
    285       "title": "A Perspective of Generative Reuse",
    286       "relevance": "Biggerstaff's 1998 analysis of generative reuse, including the KLOC subsystem finding cited to argue AI-generated monolithic code lacks maintainability structure."
    287     }
    288   ],
    289   "engagement_factors": {
    290     "practical_relevance": {
    291       "score": 2,
    292       "justification": "The benefit/challenge taxonomy and research agenda are useful frameworks for practitioners evaluating AI tool adoption, though no actionable prescriptions are offered."
    293     },
    294     "surprise_contrarian": {
    295       "score": 2,
    296       "justification": "The 19% productivity slowdown for experienced developers is a counterintuitive result highlighted prominently; the cargo cult framing of AI-assisted development challenges mainstream enthusiasm."
    297     },
    298     "fear_safety": {
    299       "score": 1,
    300       "justification": "Security concerns (slopsquatting, prompt injection, jailbreaking) are discussed but are peripheral to the main argument rather than the primary driver."
    301     },
    302     "drama_conflict": {
    303       "score": 1,
    304       "justification": "Mild tension between AI optimism and cargo cult skepticism, but the paper is measured and academic rather than polemical."
    305     },
    306     "demo_ability": {
    307       "score": 0,
    308       "justification": "No demo, tool, or artifact to try — purely a position and survey paper."
    309     },
    310     "brand_recognition": {
    311       "score": 1,
    312       "justification": "Nokia affiliation adds minor industry recognition, but the authors are not widely known outside the software reuse research community."
    313     }
    314   },
    315   "hn_data": {
    316     "threads": [
    317       {
    318         "hn_id": "43905792",
    319         "title": "Unveiling the Hidden: Movie Genre and User Bias in Spoiler Detection",
    320         "points": 3,
    321         "comments": 0,
    322         "url": "https://news.ycombinator.com/item?id=43905792",
    323         "created_at": "2025-05-06T14:49:36Z"
    324       }
    325     ],
    326     "top_points": 3,
    327     "total_points": 3,
    328     "total_comments": 0
    329   }
    330 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs