scan.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan.json (28517B)
      1 {
      2   "paper": {
      3     "title": "AI-Assisted Programming Decreases the Productivity of Experienced Developers by Increasing the Technical Debt and Maintenance Burden",
      4     "authors": [
      5       "Feiyang (Amber) Xu",
      6       "Poonacha K. Medappa",
      7       "Murat M. Tunc",
      8       "Martijn Vroegindeweij",
      9       "Jan C. Fransoo"
     10     ],
     11     "year": 2025,
     12     "venue": "arXiv",
     13     "arxiv_id": "2510.10165",
     14     "doi": null
     15   },
     16   "checklist": {
     17     "artifacts": {
     18       "code_released": {
     19         "applies": true,
     20         "answer": false,
     21         "justification": "No repository URL, code archive, or supplementary materials link is provided anywhere in the paper. The data was collected via GitHub API but no analysis code is shared."
     22       },
     23       "data_released": {
     24         "applies": true,
     25         "answer": false,
     26         "justification": "No dataset download link or data archive is provided. The authors describe collecting data via GitHub's API for Microsoft-owned repositories but do not release the dataset."
     27       },
     28       "environment_specified": {
     29         "applies": true,
     30         "answer": false,
     31         "justification": "No environment specifications, library versions, requirements files, or software dependencies are mentioned. The econometric software used for the DiD analysis is not specified."
     32       },
     33       "reproduction_instructions": {
     34         "applies": true,
     35         "answer": false,
     36         "justification": "No step-by-step reproduction instructions, README, or scripts are provided. While the methodology is described in detail (Section 3), there are no instructions for replicating the data collection or analysis pipeline."
     37       }
     38     },
     39     "statistical_methodology": {
     40       "confidence_intervals_or_error_bars": {
     41         "applies": true,
     42         "answer": true,
     43         "justification": "95% confidence intervals are reported in the lead-lag analysis (Table 10) and subgroup analyses (Tables 11-14). Figure 5 shows 95% confidence intervals on the subgroup coefficient plots. Standard errors are reported in parentheses in all regression tables."
     44       },
     45       "significance_tests": {
     46         "applies": true,
     47         "answer": true,
     48         "justification": "Statistical significance is tested throughout using t-statistics and p-values. Regression tables (Tables 5, 6, 8) report significance levels (*p<0.1; **p<0.05; ***p<0.01). The lead-lag analysis (Table 10) reports full t-statistics and p-values."
     49       },
     50       "effect_sizes_reported": {
     51         "applies": true,
     52         "answer": true,
     53         "justification": "Effect sizes are reported as percentage changes derived from log-transformed coefficients. For example, '17.7% increase in lines of code added (exp(0.163)-1)', '2.4% more code rework (exp(0.024)-1)', '19% decrease in commits' for core contributors, and '43.5% increase in commit activity' for peripheral contributors (Sections 4.1, 4.2, 6.2)."
     54       },
     55       "sample_size_justified": {
     56         "applies": true,
     57         "answer": false,
     58         "justification": "No power analysis or explicit justification for the sample size is provided. The sample includes 2,755 repositories and 1,699 contributors, which are described but not justified in terms of statistical adequacy. No discussion of whether these sample sizes are sufficient for the subgroup analyses."
     59       },
     60       "variance_reported": {
     61         "applies": true,
     62         "answer": true,
     63         "justification": "Standard deviations are reported in descriptive statistics tables (Tables 3, 4). Standard errors are reported for all regression coefficients. The study uses clustered robust standard errors at the project/individual level to account for heteroskedasticity."
     64       }
     65     },
     66     "evaluation_design": {
     67       "baselines_included": {
     68         "applies": true,
     69         "answer": true,
     70         "justification": "The DiD design inherently includes a baseline comparison: the control group consists of repositories/contributors using non-Copilot-endorsed languages (R, C, C#, C++, Java, PHP, Scala). Pre-treatment periods serve as the temporal baseline."
     71       },
     72       "baselines_contemporary": {
     73         "applies": true,
     74         "answer": true,
     75         "justification": "The control group uses contemporary non-Copilot-endorsed languages from the same time period and platform (Microsoft-owned GitHub repositories), ensuring contemporaneous comparison. The study also references and compares with concurrent work by Yeverechyahu et al. (2024)."
     76       },
     77       "ablation_study": {
     78         "applies": false,
     79         "answer": false,
     80         "justification": "This is an observational econometric study using a natural experiment design, not a system with components that could be ablated."
     81       },
     82       "multiple_metrics": {
     83         "applies": true,
     84         "answer": true,
     85         "justification": "Multiple outcome metrics are used: lines of code added, commits, PRs (development activity); PR rework (technical debt); PR reviews and PR reviewed repositories (maintenance effort). These are measured at both project and individual levels."
     86       },
     87       "human_evaluation": {
     88         "applies": false,
     89         "answer": false,
     90         "justification": "Human evaluation of system outputs is not relevant here. This is an econometric analysis of developer behavior data, not a system whose outputs require human judgment."
     91       },
     92       "held_out_test_set": {
     93         "applies": false,
     94         "answer": false,
     95         "justification": "This is not a predictive modeling study. The DiD framework analyzes the full dataset without train/test splits."
     96       },
     97       "per_category_breakdown": {
     98         "applies": true,
     99         "answer": true,
    100         "justification": "Extensive per-category breakdowns are provided. Figure 5 and Tables 11-14 show results by contributor subgroup (0-25%, 25-50%, 50-75%, 75-100% based on pretreatment commits). The core vs. peripheral contributor analysis (Table 6) provides further breakdown."
    101       },
    102       "failure_cases_discussed": {
    103         "applies": true,
    104         "answer": true,
    105         "justification": "The paper discusses where effects are not significant: peripheral contributors show no significant change in PR reviews or PR reviewed repos, and the 50-75% subgroup shows no significant change in commits or PRs. The paper acknowledges that Copilot usage cannot be identified at the individual level, which is a limitation of the analysis."
    106       },
    107       "negative_results_reported": {
    108         "applies": true,
    109         "answer": true,
    110         "justification": "The paper reports that core contributors experienced a 19% decrease in commit activity post-Copilot. Several subgroup coefficients are not statistically significant (e.g., 50-75% subgroup for commits, p=0.532). The core result itself is arguably a negative finding about AI-assisted programming."
    111       }
    112     },
    113     "claims_and_evidence": {
    114       "abstract_claims_supported": {
    115         "applies": true,
    116         "answer": true,
    117         "justification": "The abstract claims are supported by results: productivity increases (Table 5), primarily driven by peripheral developers (Tables 11-14, Figure 5), code requires more rework (Table 5, PR rework coefficient), core developers review 6.5% more code (Table 6, PR Review coefficient exp(0.06)-1), and show a 19% drop in productivity (Table 6, Core Contributor x Copilot coefficient for commits). All claims are traceable to specific regression results."
    118       },
    119       "causal_claims_justified": {
    120         "applies": true,
    121         "answer": true,
    122         "justification": "The paper uses a Difference-in-Differences (DiD) design exploiting the natural experiment of Copilot's staggered language endorsement (Section 3.1). The parallel trends assumption is tested with an event study (Section 3.4.2, Figure 4, Table 10). Robustness checks include CEM matching (Section 5.1) and Oster sensitivity analysis for omitted variable bias (Section 5.2, delta=7.82). This is an appropriate causal identification strategy."
    123       },
    124       "generalization_bounded": {
    125         "applies": true,
    126         "answer": false,
    127         "justification": "The title and abstract make broad claims about 'AI-Assisted Programming' decreasing productivity of 'Experienced Developers', but the study only examines Microsoft-owned OSS repositories during the Copilot technical preview period (June 2021 - July 2022). The paper does not adequately bound its claims to this specific setting. The discussion extends to 'knowledge-intensive industries' generally (Section 7.2) without sufficient hedging."
    128       },
    129       "alternative_explanations_discussed": {
    130         "applies": true,
    131         "answer": true,
    132         "justification": "The paper addresses alternative explanations through multiple robustness checks: CEM matching to address selection bias (Section 5.1), Oster sensitivity analysis for omitted variable bias (Section 5.2, delta=7.82), and parallel trends testing (Section 3.4.2). The DiD design controls for time-invariant repository characteristics and common temporal shocks. However, the paper acknowledges it cannot observe individual Copilot usage."
    133       }
    134     },
    135     "setup_transparency": {
    136       "model_versions_specified": {
    137         "applies": true,
    138         "answer": false,
    139         "justification": "The paper mentions GitHub Copilot was 'developed on OpenAI's GPT-3 model' but does not specify the exact model version, API version, or snapshot date. No specific Copilot version is identified beyond 'technical preview.'"
    140       },
    141       "prompts_provided": {
    142         "applies": false,
    143         "answer": false,
    144         "justification": "This is an observational study analyzing the effects of Copilot adoption on developer behavior. The authors did not use prompting as part of their methodology; they analyzed naturally occurring development activity."
    145       },
    146       "hyperparameters_reported": {
    147         "applies": false,
    148         "answer": false,
    149         "justification": "This is an econometric observational study, not an ML system. While the paper could report econometric model specifications in more detail, the 'hyperparameters' concept applies to ML/LLM settings. The regression specifications and fixed effects are described in Section 3.4."
    150       },
    151       "scaffolding_described": {
    152         "applies": false,
    153         "answer": false,
    154         "justification": "No agentic scaffolding is used. This is an econometric analysis of developer behavior, not an AI system."
    155       },
    156       "data_preprocessing_documented": {
    157         "applies": true,
    158         "answer": true,
    159         "justification": "The data preprocessing pipeline is documented: data collected via GitHub API (Section 3.2), treatment/control groups defined by programming language endorsement, 2,755 repositories total (1,660 treatment, 1,095 control), individual-level filtering from 37,334 contributors to 5,308 to 1,699 (footnote 14 explains the filtering criteria), data aggregated to monthly panels (Section 3.1), and log transformations applied to dependent variables (Table 2 note)."
    160       }
    161     },
    162     "limitations_and_scope": {
    163       "limitations_section_present": {
    164         "applies": true,
    165         "answer": false,
    166         "justification": "There is no dedicated limitations or threats-to-validity section in the paper. The discussion section (Section 7) focuses on contributions and future research but does not include a separate subsection addressing limitations."
    167       },
    168       "threats_to_validity_specific": {
    169         "applies": true,
    170         "answer": false,
    171         "justification": "No specific threats to validity are discussed in a structured manner. While the paper addresses some methodological concerns implicitly through robustness checks (matching, Oster analysis), it does not explicitly enumerate threats such as the inability to observe individual Copilot usage, potential confounds from concurrent events, or limitations of the programming-language-based treatment assignment."
    172       },
    173       "scope_boundaries_stated": {
    174         "applies": true,
    175         "answer": false,
    176         "justification": "The paper does not explicitly state what the results do NOT show. The study is limited to Microsoft-owned OSS repositories during the technical preview period, but the paper does not clearly bound its claims to this setting. The title and discussion generalize to 'AI-Assisted Programming' broadly without stating scope boundaries."
    177       }
    178     },
    179     "data_integrity": {
    180       "raw_data_available": {
    181         "applies": true,
    182         "answer": false,
    183         "justification": "Raw data is not made available. No data download link, supplementary data files, or database access is provided. The data was collected via GitHub's API but is not released for independent verification."
    184       },
    185       "data_collection_described": {
    186         "applies": true,
    187         "answer": true,
    188         "justification": "Data collection is described in Section 3.2: data collected via GitHub's API service, focusing on Microsoft-owned OSS repositories. The observation period is July 2020 to July 2022 (12 months before and after Copilot technical preview). Treatment and control groups are defined by programming language endorsement. Repository-level and individual-level datasets are described with sample sizes."
    189       },
    190       "recruitment_methods_described": {
    191         "applies": false,
    192         "answer": false,
    193         "justification": "No human participants were recruited. This is a mining study of publicly available GitHub data. The data source is a standard platform (GitHub) with selection criteria described."
    194       },
    195       "data_pipeline_documented": {
    196         "applies": true,
    197         "answer": true,
    198         "justification": "The data pipeline is documented: collection from GitHub API, treatment/control group assignment by language, filtering from 37,334 contributors to 5,308 (active in 3+ repos) to 1,699 (language filter) as explained in footnote 14. Monthly aggregation of measures is described. Log transformation with +1 offset for zeros is documented (Table 2 note). CEM matching sample reduction from 2,755 to 2,510 repositories is documented (Section 5.1)."
    199       }
    200     },
    201     "conflicts_of_interest": {
    202       "funding_disclosed": {
    203         "applies": true,
    204         "answer": false,
    205         "justification": "No funding source is disclosed anywhere in the paper. There is no acknowledgments section listing grants, corporate sponsors, or funding agencies."
    206       },
    207       "affiliations_disclosed": {
    208         "applies": true,
    209         "answer": true,
    210         "justification": "All authors are listed as affiliated with Tilburg University, the Netherlands, with institutional email addresses provided. Tilburg University is not the company whose product is being evaluated (Microsoft/GitHub Copilot)."
    211       },
    212       "funder_independent_of_outcome": {
    213         "applies": true,
    214         "answer": false,
    215         "justification": "No funding information is disclosed, so it is impossible to assess whether the funder is independent of the outcome. The absence of a funding disclosure makes this unanswerable in the affirmative."
    216       },
    217       "financial_interests_declared": {
    218         "applies": true,
    219         "answer": false,
    220         "justification": "No competing interests statement or financial interests declaration is present in the paper."
    221       }
    222     },
    223     "contamination": {
    224       "training_cutoff_stated": {
    225         "applies": false,
    226         "answer": false,
    227         "justification": "This study does not evaluate a pre-trained model's capability on any benchmark. It is an observational study examining the effects of Copilot adoption on developer behavior and code quality metrics."
    228       },
    229       "train_test_overlap_discussed": {
    230         "applies": false,
    231         "answer": false,
    232         "justification": "This study does not evaluate a pre-trained model on any benchmark. It analyzes naturally occurring developer behavior data."
    233       },
    234       "benchmark_contamination_addressed": {
    235         "applies": false,
    236         "answer": false,
    237         "justification": "This study does not evaluate a pre-trained model on any benchmark. It is a mining/observational study."
    238       }
    239     },
    240     "human_studies": {
    241       "pre_registered": {
    242         "applies": false,
    243         "answer": false,
    244         "justification": "No human participants were recruited. This is a mining study of publicly available GitHub repository data."
    245       },
    246       "irb_or_ethics_approval": {
    247         "applies": false,
    248         "answer": false,
    249         "justification": "No human participants. The study mines public GitHub data on repository-level and contributor-level activity."
    250       },
    251       "demographics_reported": {
    252         "applies": false,
    253         "answer": false,
    254         "justification": "No human participants were directly recruited. The study analyzes GitHub contributors' activity data, not demographics of recruited subjects."
    255       },
    256       "inclusion_exclusion_criteria": {
    257         "applies": false,
    258         "answer": false,
    259         "justification": "No human participants. The study uses inclusion/exclusion criteria for repositories and contributors (described in Section 3.2 and footnote 14), but these are data selection criteria, not human recruitment criteria."
    260       },
    261       "randomization_described": {
    262         "applies": false,
    263         "answer": false,
    264         "justification": "No human participants. The 'randomization' in this study comes from the natural experiment of Copilot's language endorsement, not from assignment of human subjects."
    265       },
    266       "blinding_described": {
    267         "applies": false,
    268         "answer": false,
    269         "justification": "No human participants or experimental conditions requiring blinding."
    270       },
    271       "attrition_reported": {
    272         "applies": false,
    273         "answer": false,
    274         "justification": "No human participants. Contributor filtering from 37,334 to 1,699 is described as data selection criteria."
    275       }
    276     },
    277     "cost_and_practicality": {
    278       "inference_cost_reported": {
    279         "applies": false,
    280         "answer": false,
    281         "justification": "This is an observational econometric study, not an AI system. There is no inference cost to report for the authors' methodology."
    282       },
    283       "compute_budget_stated": {
    284         "applies": false,
    285         "answer": false,
    286         "justification": "This is an econometric analysis, not a computationally intensive AI system. The compute requirements for running DiD regressions are trivial."
    287       }
    288     }
    289   },
    290   "claims": [
    291     {
    292       "claim": "Repositories using Copilot-endorsed languages experienced a 17.7% increase in lines of code added, 4.1% increase in commits, and 4.3% increase in PRs after Copilot's introduction.",
    293       "evidence": "Table 5: DiD coefficients of 0.163*** (code added), 0.04* (commits), and 0.042*** (PRs) with project and month fixed effects. N=66,120-66,168.",
    294       "supported": "strong"
    295     },
    296     {
    297       "claim": "PRs submitted to Copilot-endorsed repositories required 2.4% more rework, indicating increased technical debt.",
    298       "evidence": "Table 5: PR Rework coefficient of 0.024** (p<0.05) with PR controls, project FE, and month FE. N=66,168, Adj. R2=0.81.",
    299       "supported": "moderate"
    300     },
    301     {
    302       "claim": "Core contributors decreased their commit activity by 19% and increased PR reviews by 6.5% after Copilot's introduction.",
    303       "evidence": "Table 6 and Section 6.2: Core Contributor x Copilot interaction coefficient of -0.357*** for commits (exp(-0.357+0.142)-1 ≈ -19%) and 0.06* for PR Reviews. Figure 5 shows subgroup breakdowns. The PR review effect is significant only at the 10% level.",
    304       "supported": "moderate"
    305     },
    306     {
    307       "claim": "Peripheral contributors (bottom 25%) increased commit activity by 43.5% and submitted 17.7% more PRs.",
    308       "evidence": "Table 13: 0-25% subgroup coefficient of 0.3614*** (t=8.08, p<0.001) for commits. Table 14: 0-25% subgroup coefficient of 0.1630*** (t=5.15, p<0.001) for PRs. Both highly significant.",
    309       "supported": "strong"
    310     },
    311     {
    312       "claim": "The treatment effect on PR rework is robust to omitted variable bias, with an Oster delta of 7.82.",
    313       "evidence": "Table 9: Oster sensitivity analysis shows delta=7.82, meaning unobservables would need to be 7.82 times as influential as observables to explain away the result. Rmax set to 1.00.",
    314       "supported": "strong"
    315     },
    316     {
    317       "claim": "The parallel trends assumption holds for the DiD design.",
    318       "evidence": "Table 10 and Figure 4: All pre-treatment period coefficients (b12 through b2) are statistically insignificant (p-values ranging from 0.170 to 0.987), supporting the parallel trends assumption.",
    319       "supported": "strong"
    320     }
    321   ],
    322   "methodology_tags": [
    323     "observational"
    324   ],
    325   "key_findings": "This study uses a Difference-in-Differences design exploiting GitHub Copilot's staggered programming language endorsement to analyze its impact on Microsoft-owned OSS projects. While Copilot-endorsed repositories saw increased development productivity (17.7% more code added, 4.3% more PRs), they also experienced 2.4% more PR rework, indicating technical debt accumulation. The effects are heterogeneous: peripheral contributors (bottom 25%) increased commits by 43.5%, while core contributors (top 25%) decreased commits by 19% and increased PR reviews by 6.5%, suggesting a redistribution of effort from development to maintenance among experienced developers.",
    326   "red_flags": [
    327     {
    328       "flag": "Cannot observe individual Copilot usage",
    329       "detail": "The study cannot identify which individual contributors actually used Copilot. Treatment assignment is based on programming language of the repository, not verified tool usage. Contributors to Copilot-endorsed language repositories may not have used Copilot, and some control group members may have used other AI tools."
    330     },
    331     {
    332       "flag": "Overgeneralized title and claims",
    333       "detail": "The title claims 'AI-Assisted Programming Decreases the Productivity of Experienced Developers' but the study examines only Microsoft-owned OSS repositories during the Copilot technical preview period (June 2021 - July 2022) with a specific set of programming languages. The generalization to all AI-assisted programming and all experienced developers is not warranted."
    334     },
    335     {
    336       "flag": "No dedicated limitations section",
    337       "detail": "Despite strong econometric methodology, the paper lacks a dedicated limitations or threats-to-validity section. Key concerns such as the intent-to-treat nature of the analysis, potential confounds from other concurrent changes at Microsoft, and external validity are not systematically addressed."
    338     },
    339     {
    340       "flag": "Small effect size on key outcome",
    341       "detail": "The core technical debt claim rests on a 2.4% increase in PR rework, which is statistically significant at the 5% level but substantively small. The practical significance of this effect is not discussed. The paper focuses attention on the larger heterogeneous effects (19% decrease for core, 43.5% increase for peripheral) which are more striking but come from subgroup analyses."
    342     },
    343     {
    344       "flag": "No data or code released",
    345       "detail": "Despite using publicly available GitHub API data, neither the dataset nor the analysis code is released, making independent verification impossible."
    346     },
    347     {
    348       "flag": "Missing funding and conflict of interest disclosures",
    349       "detail": "The paper contains no funding disclosure, no competing interests statement, and no financial interests declaration. While the authors are at Tilburg University (not a direct conflict), the absence of any such disclosures is a transparency gap."
    350     }
    351   ],
    352   "cited_papers": [
    353     {
    354       "title": "The Impact of AI on Developer Productivity: Evidence from Github Copilot",
    355       "authors": ["Sida Peng", "Eirini Kalliamvakou", "Peter Cihon", "Mert Demirer"],
    356       "year": 2023,
    357       "arxiv_id": "2302.06590",
    358       "relevance": "Foundational RCT on Copilot productivity showing 55.8% faster task completion, directly motivates this study's investigation of secondary effects."
    359     },
    360     {
    361       "title": "The Impact of Large Language Models on Open-source Innovation: Evidence from GitHub Copilot",
    362       "authors": ["Doron Yeverechyahu", "Raveesh Mayya", "Gal Oestreicher-Singer"],
    363       "year": 2024,
    364       "arxiv_id": "2409.08379",
    365       "relevance": "Uses similar DiD design with Copilot language endorsement to study innovation effects; finds shift toward routine and incremental changes in OSS."
    366     },
    367     {
    368       "title": "The Impact of Generative AI on Collaborative Open-Source Software Development: Evidence from GitHub Copilot",
    369       "authors": ["Frank Song", "Ashish Agarwal", "Wen Wen"],
    370       "year": 2024,
    371       "arxiv_id": "2410.02091",
    372       "relevance": "Studies Copilot's impact on collaborative OSS development, finding increased contributions but also increased coordination time for code integration."
    373     },
    374     {
    375       "title": "GitHub Copilot AI Pair Programmer: Asset or Liability?",
    376       "authors": ["Arghavan Moradi Dakhel", "Vahid Majdinasab", "Amin Nikanjam", "Foutse Khomh", "Michel C. Desmarais", "Zhen Ming (Jack) Jiang"],
    377       "year": 2023,
    378       "relevance": "Evaluates quality of Copilot-generated code and cautions that novice programmers may place undue trust in AI-generated code."
    379     },
    380     {
    381       "title": "Generative AI and the Nature of Work",
    382       "authors": ["Manuel Hoffmann", "Sam Boysel", "Frank Nagle", "Sida Peng", "Kevin Xu"],
    383       "year": 2025,
    384       "relevance": "Shows GitHub Copilot access reallocates developers' effort toward core coding tasks and away from project management, complementary to this study's findings."
    385     },
    386     {
    387       "title": "Does AI-Assisted Coding Deliver? A Difference-in-Differences Study of Cursor's Impact on Software Projects",
    388       "authors": ["Han He", "Chris Miller", "Siddharth Agarwal", "Christian Kästner", "Bogdan Vasilescu"],
    389       "year": 2025,
    390       "arxiv_id": "2511.04427",
    391       "relevance": "Uses DiD design to study Cursor's impact on software projects, extending the causal analysis framework to another AI coding assistant."
    392     },
    393     {
    394       "title": "Generative AI at Work",
    395       "authors": ["Erik Brynjolfsson", "Danielle Li", "Lindsey R. Raymond"],
    396       "year": 2025,
    397       "relevance": "Foundational study on GenAI productivity effects in customer support, showing less-experienced workers benefit more -- a pattern this paper finds in OSS."
    398     },
    399     {
    400       "title": "Security Vulnerabilities in AI-Generated Code: A Large-Scale Analysis of Public GitHub Repositories",
    401       "authors": ["Marcel Schreiber", "Paul Tippe"],
    402       "year": 2025,
    403       "relevance": "Documents security vulnerabilities in AI-generated code and technical debt from latent security weaknesses, directly relevant to code quality concerns."
    404     },
    405     {
    406       "title": "Good Vibrations? A Qualitative Study of Co-Creation, Communication, Flow, and Trust in Vibe Coding",
    407       "authors": ["Vlada Pimenova", "Sarah Fakhoury", "Christian Bird", "Margaret-Anne Storey", "Madeline Endres"],
    408       "year": 2025,
    409       "arxiv_id": "2509.12491",
    410       "relevance": "Qualitative study of vibe coding practices documenting speed-quality tradeoffs and maintenance challenges that this study quantifies."
    411     },
    412     {
    413       "title": "When Combinations of Humans and AI are Useful: A Systematic Review and Meta-Analysis",
    414       "authors": ["Michael Vaccaro", "Abdullah Almaatouq", "Thomas Malone"],
    415       "year": 2024,
    416       "relevance": "Meta-analytic evidence on human-AI collaboration effectiveness, relevant to understanding AI's role in development teams."
    417     },
    418     {
    419       "title": "Technical Debt and Firm Performance",
    420       "authors": ["Rajiv D. Banker", "Yi Liang", "Narayan Ramasubbu"],
    421       "year": 2021,
    422       "doi": "10.1287/mnsc.2020.3tried",
    423       "relevance": "Foundational study on economic consequences of technical debt that this paper extends to the AI-assisted development context."
    424     },
    425     {
    426       "title": "Vibe Coding in Practice: Motivations, Challenges, and a Future Outlook – a Grey Literature Review",
    427       "authors": ["Ahmed Fawzy", "Ahmad Tahir", "Kelly Blincoe"],
    428       "year": 2025,
    429       "arxiv_id": "2510.00328",
    430       "relevance": "Documents the speed-quality paradox in vibe coding, finding that rapid AI-assisted development creates maintenance demands."
    431     }
    432   ]
    433 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs