ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

scan-v5.json (18604B)


      1 {
      2   "scan_version": 5,
      3   "paper_type": "position",
      4   "paper": {
      5     "title": "On the Future of Software Reuse in the Era of AI Native Software Engineering",
      6     "authors": [
      7       "A. Taivalsaari",
      8       "T. Mikkonen",
      9       "Cesare Pautasso"
     10     ],
     11     "year": 2025,
     12     "venue": "arXiv.org",
     13     "arxiv_id": "2508.19834",
     14     "doi": "10.48550/arXiv.2508.19834"
     15   },
     16   "checklist": {
     17     "claims_and_evidence": {
     18       "abstract_claims_supported": {
     19         "applies": true,
     20         "answer": true,
     21         "justification": "The abstract's claims about AI taking center stage in software creation and the cargo cult parallel are supported by the paper's content, including cited empirical studies and historical analysis; they are framed as emerging trends rather than proven facts.",
     22         "source": "haiku"
     23       },
     24       "causal_claims_justified": {
     25         "applies": true,
     26         "answer": false,
     27         "justification": "The paper cites causal claims from external studies (26% productivity increase, 19% slowdown) without evaluating their internal validity or resolving contradictions between them; the paper presents opposing findings but does not adjudicate the causal evidence.",
     28         "source": "haiku"
     29       },
     30       "generalization_bounded": {
     31         "applies": true,
     32         "answer": false,
     33         "justification": "The paper makes sweeping unbounded generalizations such as 'this might very well turn out to be the most dramatic and impactful paradigm shift ever' and 'rapidly being replaced' without bounding these to specific developer populations, task types, or domains.",
     34         "source": "haiku"
     35       },
     36       "alternative_explanations_discussed": {
     37         "applies": true,
     38         "answer": true,
     39         "justification": "The paper presents contradictory productivity evidence (gains and losses) and acknowledges that AI effectiveness varies by developer experience and context, giving meaningful consideration to alternative outcomes.",
     40         "source": "haiku"
     41       },
     42       "proxy_outcome_distinction": {
     43         "applies": true,
     44         "answer": true,
     45         "justification": "The paper distinguishes measured proxies (task completion time, commit frequency) from actual productivity, and the Becker et al. finding that developers perceived speedup while experiencing slowdown explicitly illustrates this distinction.",
     46         "source": "haiku"
     47       }
     48     },
     49     "limitations_and_scope": {
     50       "limitations_section_present": {
     51         "applies": true,
     52         "answer": false,
     53         "justification": "There is no dedicated limitations or threats-to-validity section; the paper moves from history to implications to a research agenda without any formal self-critique.",
     54         "source": "haiku"
     55       },
     56       "threats_to_validity_specific": {
     57         "applies": true,
     58         "answer": false,
     59         "justification": "No threats to validity are discussed; the paper does not acknowledge that its own 80/20 observation is based on informal experience rather than empirical study, nor does it address potential biases in its selective literature review.",
     60         "source": "haiku"
     61       },
     62       "scope_boundaries_stated": {
     63         "applies": true,
     64         "answer": false,
     65         "justification": "The paper does not explicitly delineate what the argument does not cover; it focuses on software reuse but does not state which types of software, developers, or contexts are outside its scope.",
     66         "source": "haiku"
     67       }
     68     },
     69     "conflicts_of_interest": {
     70       "funding_disclosed": {
     71         "applies": true,
     72         "answer": false,
     73         "justification": "No funding source is disclosed anywhere in the paper.",
     74         "source": "haiku"
     75       },
     76       "affiliations_disclosed": {
     77         "applies": true,
     78         "answer": true,
     79         "justification": "Author affiliations are clearly disclosed in the header: Nokia Technologies, University of Jyväskylä, and Università della Svizzera italiana.",
     80         "source": "haiku"
     81       },
     82       "funder_independent_of_outcome": {
     83         "applies": false,
     84         "answer": false,
     85         "justification": "No funder is disclosed; this criterion is not applicable.",
     86         "source": "haiku"
     87       },
     88       "financial_interests_declared": {
     89         "applies": true,
     90         "answer": false,
     91         "justification": "No competing interests or financial interests statement is present; one author is affiliated with Nokia Technologies, a major software industry stakeholder, without any disclosure of potential conflicts.",
     92         "source": "haiku"
     93       }
     94     },
     95     "scope_and_framing": {
     96       "key_terms_defined": {
     97         "applies": true,
     98         "answer": true,
     99         "justification": "Section 3 explicitly defines 'AI Native Software Engineering' across the full development lifecycle; 'generative reuse' and 'opportunistic reuse' are also conceptually defined with references to prior work.",
    100         "source": "haiku"
    101       },
    102       "intended_contribution_clear": {
    103         "applies": true,
    104         "answer": true,
    105         "justification": "The abstract explicitly states the paper discusses implications of generative software reuse, 'brings forth relevant questions, and defines a research agenda' — the contribution is clearly stated.",
    106         "source": "haiku"
    107       },
    108       "engagement_with_prior_work": {
    109         "applies": true,
    110         "answer": true,
    111         "justification": "The paper engages substantively with 55 references spanning 60 years of software reuse history, building its argument directly on prior frameworks and distinguishing AI-native development from earlier opportunistic reuse paradigms.",
    112         "source": "haiku"
    113       }
    114     }
    115   },
    116   "type_checklist": {
    117     "position": {
    118       "argument_quality": {
    119         "argument_internally_consistent": {
    120           "applies": true,
    121           "answer": true,
    122           "justification": "The argument is internally consistent: AI generates code developers don't understand (cargo cult), this poses quality and safety challenges, therefore research is needed. No contradictions undermine the main thesis.",
    123           "source": "haiku"
    124         },
    125         "counterarguments_addressed": {
    126           "applies": true,
    127           "answer": false,
    128           "justification": "The paper presents contradictory productivity evidence but does not engage with the strongest counterargument — that AI-assisted development is a legitimate engineering discipline rather than cargo cult, or that not deeply understanding all generated code is a rational division of labor.",
    129           "source": "haiku"
    130         },
    131         "analogies_appropriate": {
    132           "applies": true,
    133           "answer": true,
    134           "justification": "The cargo cult analogy is substantively explored with reference to Feynman's original definition and prior software engineering literature on cargo cult programming; the parallel to AI-generated code is apt and grounded rather than a false equivalence.",
    135           "source": "haiku"
    136         },
    137         "prescriptions_proportional": {
    138           "applies": true,
    139           "answer": true,
    140           "justification": "The paper's prescriptions are a research agenda of open questions rather than policy mandates, which is proportional to its exploratory and argumentative nature.",
    141           "source": "haiku"
    142         },
    143         "evidence_for_claims_cited": {
    144           "applies": true,
    145           "answer": true,
    146           "justification": "Most factual claims cite references, including specific productivity studies, historical milestones, and hallucination research; the main uncited claim is the informal 80/20 observation, which the authors acknowledge is not empirically validated.",
    147           "source": "haiku"
    148         },
    149         "alternatives_discussed": {
    150           "applies": true,
    151           "answer": false,
    152           "justification": "The paper does not seriously discuss alternative analytical frameworks for understanding AI-assisted development; the cargo cult framing is treated as self-evidently correct without comparing it to alternative interpretations.",
    153           "source": "haiku"
    154         },
    155         "historical_context_accurate": {
    156           "applies": true,
    157           "answer": true,
    158           "justification": "Historical claims appear accurate and well-cited: the NATO 1968 conference, McIlroy's paper, Java's introduction in May 1995, and the growth of package repositories are consistent with the public record.",
    159           "source": "haiku"
    160         }
    161       },
    162       "clarity_and_scope": {
    163         "key_terms_defined_precisely": {
    164           "applies": true,
    165           "answer": true,
    166           "justification": "Section 3 provides an explicit definition of 'AI Native Software Engineering' covering design, coding, debugging, testing, deployment, and orchestration; 'generative reuse' and 'opportunistic reuse' are also defined.",
    167           "source": "haiku"
    168         },
    169         "engages_with_existing_literature": {
    170           "applies": true,
    171           "answer": true,
    172           "justification": "The paper substantively engages with literature from 1968 to 2025, building its argument on prior software reuse research and contrasting AI-native development with established frameworks from Biggerstaff, Krueger, and others.",
    173           "source": "haiku"
    174         },
    175         "intended_audience_clear": {
    176           "applies": true,
    177           "answer": false,
    178           "justification": "The paper does not explicitly state its intended audience; it appears aimed at software engineering researchers but could also target practitioners, and this distinction is never made.",
    179           "source": "haiku"
    180         },
    181         "assumptions_stated": {
    182           "applies": true,
    183           "answer": false,
    184           "justification": "Core assumptions — that understanding generated code is necessary, that cargo cult development is harmful, that systematic reuse is preferable to ad hoc reuse — are not stated as assumptions but treated as self-evident premises.",
    185           "source": "haiku"
    186         },
    187         "scope_of_applicability_discussed": {
    188           "applies": true,
    189           "answer": false,
    190           "justification": "The paper does not discuss where its arguments do not apply — for example, whether throwaway scripts, certain domains, or specific developer contexts are exempt from the cargo cult concerns raised.",
    191           "source": "haiku"
    192         }
    193       }
    194     }
    195   },
    196   "claims": [
    197     {
    198       "claim": "Developers using AI tools complete coding tasks 26% faster in large-scale field experiments",
    199       "evidence": "Cited from Cui et al. (2025), a study of ~5,000 developers across Microsoft, Accenture, and a third company over 2-8 months",
    200       "supported": "moderate"
    201     },
    202     {
    203       "claim": "Lab experiments show a 58% decrease in time to complete coding tasks with GitHub Copilot",
    204       "evidence": "Cited from Peng et al. (2023), a controlled laboratory study using an HTTP server implementation task",
    205       "supported": "moderate"
    206     },
    207     {
    208       "claim": "Experienced open-source developers take 19% longer to complete issues when using AI tools",
    209       "evidence": "Cited from Becker et al. (2025), a randomized controlled trial; notable that developers still believed AI had sped them up",
    210       "supported": "moderate"
    211     },
    212     {
    213       "claim": "Hallucination rates with current AI development tools range from 1-3% to nearly 80% depending on use case",
    214       "evidence": "Cited vaguely as 'according to studies' with no specific citation for the hallucination rate range",
    215       "supported": "weak"
    216     },
    217     {
    218       "claim": "AI prompt engineering follows an 80/20 rule: can meet ~80% of requirements easily but the remaining 20% consumes ~80% of development time",
    219       "evidence": "Explicitly acknowledged by the authors as based on informal experience: 'we have not yet performed any truly scientific empirical studies on actual percentages'",
    220       "supported": "weak"
    221     },
    222     {
    223       "claim": "AI-assisted generative reuse represents a new form of cargo cult development in which developers trust code they do not understand",
    224       "evidence": "Argued by analogy to classic cargo cult programming literature (Feynman, Lippert); no empirical measurement of actual developer comprehension rates",
    225       "supported": "weak"
    226     }
    227   ],
    228   "methodology_tags": [
    229     "theoretical",
    230     "qualitative"
    231   ],
    232   "key_findings": "This position paper argues that AI-assisted code generation constitutes a new form of 'cargo cult development' — a paradigm shift from opportunistic software reuse to AI-native generative reuse in which developers increasingly rely on code they do not understand. The paper surveys 60 years of software reuse history, reviews contradictory empirical evidence on productivity (ranging from 58% speedups in lab settings to 19% slowdowns in real-world RCTs for experienced developers), and identifies challenges including hallucination, copyright ambiguity, energy consumption, and developer skill atrophy. The paper's main contribution is a research agenda of 20+ open questions about the limits of prompt engineering and the feasibility of systematic practices for generative reuse.",
    233   "red_flags": [
    234     {
    235       "flag": "Uncited hallucination rate range",
    236       "detail": "The claim that hallucination rates range from '1-3% to nearly 80%' is presented without any specific citation, making it unverifiable."
    237     },
    238     {
    239       "flag": "Informal 80/20 observation presented as insight",
    240       "detail": "The central 80/20 rule claim is explicitly acknowledged by the authors as based on informal experience rather than empirical study, yet it is presented as a key finding in the research agenda section."
    241     },
    242     {
    243       "flag": "No limitations section",
    244       "detail": "The paper has no dedicated limitations or threats-to-validity section for its own analysis, argument selection, or literature coverage."
    245     },
    246     {
    247       "flag": "Nokia affiliation without conflict disclosure",
    248       "detail": "One author is from Nokia Technologies, a major software industry stakeholder, with no competing interests or financial interests statement anywhere in the paper."
    249     },
    250     {
    251       "flag": "Energy statistics without specific citations",
    252       "detail": "Data center electricity consumption figures (200 TWh, 53-76 TWh for AI) are attributed only to 'public sources' without specific references."
    253     },
    254     {
    255       "flag": "Cargo cult thesis argued by analogy only",
    256       "detail": "The central thesis — that AI-assisted development is cargo cult development — is supported by analogy rather than empirical measurement of developer comprehension, code quality outcomes, or failure rates attributable to lack of understanding."
    257     }
    258   ],
    259   "cited_papers": [
    260     {
    261       "title": "Measuring the Impact of Early-2025 AI on Experienced Open-Source Developer Productivity",
    262       "relevance": "RCT showing 19% productivity slowdown for experienced developers using AI tools — key contradictory evidence for AI productivity claims"
    263     },
    264     {
    265       "title": "The Effects of Generative AI on High-Skilled Work: Evidence from Three Field Experiments with Software Developers",
    266       "relevance": "Large-scale field experiment showing 26% productivity gain with Copilot; central evidence in the productivity debate"
    267     },
    268     {
    269       "title": "The Impact of AI on Developer Productivity: Evidence from GitHub Copilot",
    270       "relevance": "Lab study showing 58% speedup with Copilot; frequently cited benchmark for AI coding assistant productivity"
    271     },
    272     {
    273       "title": "On the Dangers of Stochastic Parrots: Can Language Models Be Too Big?",
    274       "relevance": "Foundational critique of LLMs as stochastic parrots; the paper directly adopts this framing for its hallucination discussion"
    275     },
    276     {
    277       "title": "Generative AI for Code Generation: Software Reuse Implications",
    278       "relevance": "Directly addresses the intersection of generative AI and software reuse — the paper's core topic"
    279     },
    280     {
    281       "title": "The Influence of Artificial Intelligence Tools on Learning Outcomes in Computer Programming: A Systematic Review and Meta-Analysis",
    282       "relevance": "Meta-analysis cited as evidence that junior developers benefit more from AI assistance than experienced developers"
    283     },
    284     {
    285       "title": "Software Reuse in the Era of Opportunistic Design",
    286       "relevance": "Authors' prior work establishing the opportunistic reuse framework that this paper extends to the AI era"
    287     },
    288     {
    289       "title": "Vibe Coding vs. Agentic Coding: Fundamentals and Practical Implications of Agentic AI",
    290       "relevance": "Defines 'vibe coding' and 'agentic coding' concepts discussed in the paper's treatment of prompt engineering"
    291     }
    292   ],
    293   "engagement_factors": {
    294     "practical_relevance": {
    295       "score": 2,
    296       "justification": "The research agenda and balanced review of AI tool limitations and benefits are directly actionable for practitioners adopting AI-assisted development."
    297     },
    298     "surprise_contrarian": {
    299       "score": 2,
    300       "justification": "The Becker et al. finding that AI makes experienced developers 19% slower (while they believe it's helping) and the cargo cult framing both challenge uncritical AI adoption narratives."
    301     },
    302     "fear_safety": {
    303       "score": 1,
    304       "justification": "Raises concerns about skill atrophy, hallucination in production code, slopsquatting supply chain attacks, and agentic AI security, but these are secondary to the main reuse argument."
    305     },
    306     "drama_conflict": {
    307       "score": 1,
    308       "justification": "The cargo cult analogy is mildly provocative and the productivity contradiction (developers think AI helps but it slows them down) is compelling, but the paper's tone is measured and academic."
    309     },
    310     "demo_ability": {
    311       "score": 0,
    312       "justification": "This is a theoretical position paper with no demo, tool, dataset, or code artifact to try."
    313     },
    314     "brand_recognition": {
    315       "score": 1,
    316       "justification": "Nokia Technologies affiliation adds minor industry recognition; no major AI lab or widely-known product involvement."
    317     }
    318   },
    319   "hn_data": {
    320     "threads": [
    321       {
    322         "hn_id": "43905792",
    323         "title": "Unveiling the Hidden: Movie Genre and User Bias in Spoiler Detection",
    324         "points": 3,
    325         "comments": 0,
    326         "url": "https://news.ycombinator.com/item?id=43905792",
    327         "created_at": "2025-05-06T14:49:36Z"
    328       }
    329     ],
    330     "top_points": 3,
    331     "total_points": 3,
    332     "total_comments": 0
    333   }
    334 }

Impressum · Datenschutz