scan.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan.json (29893B)
      1 {
      2   "paper": {
      3     "title": "Adoption of Generative Artificial Intelligence in the German Software Engineering Industry: An Empirical Study",
      4     "authors": [
      5       "Ludwig Felder",
      6       "Tobias Eisenreich",
      7       "Mahsa Fischer",
      8       "Stefan Wagner",
      9       "Chunyang Chen"
     10     ],
     11     "year": 2026,
     12     "venue": "ACM (preprint, 2026)",
     13     "arxiv_id": "2601.16700",
     14     "doi": "10.1145/nnnnnnn.nnnnnnn"
     15   },
     16   "checklist": {
     17     "artifacts": {
     18       "code_released": {
     19         "applies": true,
     20         "answer": false,
     21         "justification": "No GitHub link, Zenodo archive, or any repository URL is provided in the paper. The survey instrument is described but not released as a downloadable artifact."
     22       },
     23       "data_released": {
     24         "applies": true,
     25         "answer": false,
     26         "justification": "The survey data (109 responses collected via LimeSurvey) and interview transcripts are not released. The paper states data collection was done anonymously, but no public dataset link is provided."
     27       },
     28       "environment_specified": {
     29         "applies": true,
     30         "answer": false,
     31         "justification": "The study performed computational analyses (k-means clustering, chi-squared tests, Spearman correlations) but does not specify the statistical software, package versions, or environment used. A survey study with quantitative analysis can and should provide environment specifications for reproducibility."
     32       },
     33       "reproduction_instructions": {
     34         "applies": true,
     35         "answer": false,
     36         "justification": "No step-by-step instructions are provided for reproducing the study. The methodology is described at a high level (LimeSurvey, semi-structured interviews), but no replication package or detailed protocol document is linked."
     37       }
     38     },
     39     "statistical_methodology": {
     40       "confidence_intervals_or_error_bars": {
     41         "applies": true,
     42         "answer": false,
     43         "justification": "The paper reports means on 5-point scales (e.g., 'Mean=4.0', 'Mean=3.9') and percentages without any confidence intervals, error bars, or uncertainty estimates around those point estimates."
     44       },
     45       "significance_tests": {
     46         "applies": true,
     47         "answer": true,
     48         "justification": "Chi-squared tests are reported for the experience-prompting strategy relationship (chi2(2, N=109) = 10.84, p_adj = 0.022) and for k-means cluster demographics (chi2(5, N=109) = 13.91, p_adj = 0.016 and chi2(4, N=109) = 11.30, p_adj = 0.023). Spearman correlations are reported with p-values (e.g., rho=-0.33, p<0.001)."
     49       },
     50       "effect_sizes_reported": {
     51         "applies": true,
     52         "answer": true,
     53         "justification": "Correlation coefficients (rho values) are reported throughout Section 5, providing effect sizes for key relationships. Chi-squared statistics with N are also given. Percentage breakdowns with counts provide practical magnitude context."
     54       },
     55       "sample_size_justified": {
     56         "applies": true,
     57         "answer": false,
     58         "justification": "The sample size of n=109 survey respondents is not justified by power analysis or formal reasoning. The paper does not explain why 109 is sufficient for the subgroup analyses performed (e.g., experience strata with n=22 junior developers)."
     59       },
     60       "variance_reported": {
     61         "applies": true,
     62         "answer": false,
     63         "justification": "The paper reports means on 5-point scales (e.g., 'Mean=4.0') without standard deviations. Percentage distributions are shown in figures, but no variance or spread measure is reported for the mean scores."
     64       }
     65     },
     66     "evaluation_design": {
     67       "baselines_included": {
     68         "applies": true,
     69         "answer": true,
     70         "justification": "The paper explicitly compares results to the 2025 Stack Overflow Developer Survey (Section 4 'Global Context'), providing a meaningful external reference point for the German sample's patterns."
     71       },
     72       "baselines_contemporary": {
     73         "applies": true,
     74         "answer": true,
     75         "justification": "The Stack Overflow Developer Survey used for comparison is the 2025 edition, which is contemporary with the study's 2025 data collection period."
     76       },
     77       "ablation_study": {
     78         "applies": false,
     79         "answer": false,
     80         "justification": "This is a survey/interview study, not a system with components to ablate. Ablation is structurally inapplicable."
     81       },
     82       "multiple_metrics": {
     83         "applies": true,
     84         "answer": true,
     85         "justification": "The study uses multiple measurement approaches: adoption rates, usage frequency across tasks (7 tasks), perceived effectiveness ratings (10 prompting strategies), challenge severity ratings (14 challenges), and impact on 5 workflow dimensions."
     86       },
     87       "human_evaluation": {
     88         "applies": false,
     89         "answer": false,
     90         "justification": "This is a human-subjects survey study itself; a separate human evaluation of outputs is not applicable to this study type."
     91       },
     92       "held_out_test_set": {
     93         "applies": false,
     94         "answer": false,
     95         "justification": "This is a survey study, not an ML/benchmark evaluation study. The concept of a held-out test set does not apply."
     96       },
     97       "per_category_breakdown": {
     98         "applies": true,
     99         "answer": true,
    100         "justification": "The paper provides extensive breakdowns: usage by company size (Figure 10), prompting effectiveness by experience level (Figure 8), tool adoption rates by tool type, and challenge ratings per challenge item."
    101       },
    102       "failure_cases_discussed": {
    103         "applies": true,
    104         "answer": true,
    105         "justification": "The paper discusses failure modes including hallucinations, context wall limitations, and cases where prompting 'ends up taking longer than writing code.' Section 5.4 identifies specific failure patterns such as spatial context blindness and temporal context decay."
    106       },
    107       "negative_results_reported": {
    108         "applies": true,
    109         "answer": true,
    110         "justification": "The paper reports several negative findings: bug fixing efficiency improved for only 40% of respondents (39% reported no change), advanced prompting strategies such as role prompting showed poor effectiveness (Mean=2.9), and experienced developers showed greater skepticism toward AI tools."
    111       }
    112     },
    113     "claims_and_evidence": {
    114       "abstract_claims_supported": {
    115         "applies": true,
    116         "answer": true,
    117         "justification": "Abstract claims are supported by results: 'experience level moderates the perceived benefits' (supported by chi-squared tests in Section 5.1), 'organizational size affects tool selection and intensity' (supported by Figure 10), and 'limited awareness of the project context is the most significant barrier' (supported by Figure 6 showing it as highest-rated customization challenge)."
    118       },
    119       "causal_claims_justified": {
    120         "applies": true,
    121         "answer": false,
    122         "justification": "The paper makes causal-sounding claims such as 'professional experience seems to have an impact on developers' interaction with AI' and discusses how experience 'moderates' effectiveness. These causal interpretations are drawn from observational survey data without randomization or causal identification. The paper does acknowledge in Section 5.4 that 'causality cannot be inferred from these observed correlations,' but this disclaimer does not cover all causal language used throughout."
    123       },
    124       "generalization_bounded": {
    125         "applies": true,
    126         "answer": true,
    127         "justification": "The paper explicitly scopes findings to Germany and acknowledges the convenience sample limitation in Section 7 (Threats to Validity). The title, abstract, and conclusions consistently reference 'German software engineers' rather than making broad universal claims."
    128       },
    129       "alternative_explanations_discussed": {
    130         "applies": true,
    131         "answer": true,
    132         "justification": "Section 5.5 explicitly lists multiple alternative explanations for the Proficiency Cycle pattern, including lower baseline performance, task type differences, self-reinforcing proficiency cycles, and response bias. Section 5.4 explicitly states the correlation between workflow speed and distrust 'is consistent with two interpretations.'"
    133       }
    134     },
    135     "setup_transparency": {
    136       "model_versions_specified": {
    137         "applies": false,
    138         "answer": false,
    139         "justification": "This is a survey/interview study that does not itself use LLMs as a methodology tool. The LLMs evaluated are tools used by the surveyed practitioners, not experimental variables controlled by the researchers."
    140       },
    141       "prompts_provided": {
    142         "applies": false,
    143         "answer": false,
    144         "justification": "This is a survey/interview study. The study does not use LLM prompting as part of its methodology; it asks human participants about their prompting practices."
    145       },
    146       "hyperparameters_reported": {
    147         "applies": false,
    148         "answer": false,
    149         "justification": "This is a survey/interview study with no LLM hyperparameters to report. The k-means clustering uses k=2, which is stated."
    150       },
    151       "scaffolding_described": {
    152         "applies": false,
    153         "answer": false,
    154         "justification": "No agentic scaffolding is used in the study's methodology."
    155       },
    156       "data_preprocessing_documented": {
    157         "applies": true,
    158         "answer": true,
    159         "justification": "Section 2.2 describes the filtering: 210 initial responses, 101 excluded for being incomplete (most unanswered), leaving n=109 complete submissions. The exclusion criterion (incomplete responses) is stated. Interview transcription and open coding process with inter-rater agreement are described in Section 2.1."
    160       }
    161     },
    162     "limitations_and_scope": {
    163       "limitations_section_present": {
    164         "applies": true,
    165         "answer": true,
    166         "justification": "Section 7 'Threats to Validity' is a dedicated section discussing internal validity, external validity, and construct validity concerns."
    167       },
    168       "threats_to_validity_specific": {
    169         "applies": true,
    170         "answer": true,
    171         "justification": "Section 7 identifies specific threats: fixed question order may cause carryover effects (internal validity), convenience sampling via LinkedIn may not represent the broader population (external validity), and self-reported frequency/effectiveness measures introduce recall bias (construct validity). These are specific to this study's design."
    172       },
    173       "scope_boundaries_stated": {
    174         "applies": true,
    175         "answer": true,
    176         "justification": "Section 7 explicitly states findings may not generalize beyond Germany and 2025 usage patterns ('The findings were sampled in 2025 and may not generalize to future states'). The paper also explicitly notes the cross-sectional design prevents causal inference, directing future work to longitudinal designs."
    177       }
    178     },
    179     "data_integrity": {
    180       "raw_data_available": {
    181         "applies": true,
    182         "answer": false,
    183         "justification": "Raw survey response data and interview transcripts are not publicly available. No data repository link is provided. Only summary statistics and figures are presented."
    184       },
    185       "data_collection_described": {
    186         "applies": true,
    187         "answer": true,
    188         "justification": "Section 2.2 describes the data collection: LimeSurvey platform, distribution via personal contacts and LinkedIn, collection period April 15 to August 20, 2025, voluntary participation with no incentives, anonymous collection. The survey structure with six blocks is described."
    189       },
    190       "recruitment_methods_described": {
    191         "applies": true,
    192         "answer": true,
    193         "justification": "Section 2.2 states participants were recruited through 'personal contacts and on social media platforms like LinkedIn,' explicitly noting this is convenience sampling. Section 7 acknowledges this as a threat to validity (potential selection bias)."
    194       },
    195       "data_pipeline_documented": {
    196         "applies": true,
    197         "answer": true,
    198         "justification": "The pipeline is documented: 210 raw responses → 101 excluded (incomplete) → 109 complete submissions analyzed. The open coding process for qualitative interviews (two researchers independently coded, then met to resolve discrepancies) is described in Section 2.1."
    199       }
    200     },
    201     "conflicts_of_interest": {
    202       "funding_disclosed": {
    203         "applies": true,
    204         "answer": false,
    205         "justification": "No funding disclosure or acknowledgments section is present in the paper. There is no mention of any grants, corporate sponsors, or funding agencies."
    206       },
    207       "affiliations_disclosed": {
    208         "applies": true,
    209         "answer": true,
    210         "justification": "Author affiliations are listed on the title page: Ludwig Felder, Tobias Eisenreich, Stefan Wagner, and Chunyang Chen at Technical University of Munich (Heilbronn campus); Mahsa Fischer at Heilbronn University of Applied Science. No commercial affiliations are indicated."
    211       },
    212       "funder_independent_of_outcome": {
    213         "applies": false,
    214         "answer": false,
    215         "justification": "No funding source is disclosed, so funder independence cannot be assessed. Appears to be unfunded academic research."
    216       },
    217       "financial_interests_declared": {
    218         "applies": true,
    219         "answer": false,
    220         "justification": "There is no competing interests statement, patent disclosure, or financial interests declaration in the paper."
    221       }
    222     },
    223     "contamination": {
    224       "training_cutoff_stated": {
    225         "applies": false,
    226         "answer": false,
    227         "justification": "This is a survey/interview study that does not evaluate a pre-trained model's capability on any benchmark. Contamination is not applicable."
    228       },
    229       "train_test_overlap_discussed": {
    230         "applies": false,
    231         "answer": false,
    232         "justification": "This is a survey/interview study. No benchmark evaluation is performed, so train/test overlap is not applicable."
    233       },
    234       "benchmark_contamination_addressed": {
    235         "applies": false,
    236         "answer": false,
    237         "justification": "This is a survey/interview study with no benchmark evaluation, so benchmark contamination is not applicable."
    238       }
    239     },
    240     "human_studies": {
    241       "pre_registered": {
    242         "applies": true,
    243         "answer": false,
    244         "justification": "No pre-registration link (OSF, AsPredicted, etc.) is mentioned in the paper. The study was not pre-registered."
    245       },
    246       "irb_or_ethics_approval": {
    247         "applies": true,
    248         "answer": false,
    249         "justification": "No IRB or ethics board approval is mentioned in the paper, despite collecting data from 18 interview participants and 109 survey respondents."
    250       },
    251       "demographics_reported": {
    252         "applies": true,
    253         "answer": true,
    254         "justification": "Section 2.2 reports detailed demographics: role distribution (62% developers, 13% team leads/managers, 8% architects/requirements engineers), experience (mean 12.1 years, median 10 years, 26% senior 15+ years, 54% mid-level, 20% junior), company size breakdown, education (51% Master's, 22% Bachelor's, 14% Doctorate), and geographic distribution (88% Germany)."
    255       },
    256       "inclusion_exclusion_criteria": {
    257         "applies": true,
    258         "answer": false,
    259         "justification": "No formal inclusion/exclusion criteria are stated for survey participation. The paper only states participants were software engineers working primarily in Germany, but no screening process or explicit eligibility criteria are described. The 101 excluded responses were excluded for being incomplete, not for failing inclusion criteria."
    260       },
    261       "randomization_described": {
    262         "applies": false,
    263         "answer": false,
    264         "justification": "This is a cross-sectional survey study, not an experimental study with condition assignment. Randomization to conditions is not applicable."
    265       },
    266       "blinding_described": {
    267         "applies": false,
    268         "answer": false,
    269         "justification": "This is a cross-sectional survey, not an experiment with treatment/control conditions. Blinding is not applicable."
    270       },
    271       "attrition_reported": {
    272         "applies": true,
    273         "answer": true,
    274         "justification": "Section 2.2 reports: 210 initial responses recorded, 101 excluded (most with no questions answered at all), leaving n=109 complete submissions. The attrition rate (48%) and primary reason (no questions answered) are stated."
    275       }
    276     },
    277     "cost_and_practicality": {
    278       "inference_cost_reported": {
    279         "applies": false,
    280         "answer": false,
    281         "justification": "This is a survey/interview study with no LLM inference performed as part of the methodology."
    282       },
    283       "compute_budget_stated": {
    284         "applies": false,
    285         "answer": false,
    286         "justification": "This is a survey/interview study requiring no significant computational budget to report."
    287       }
    288     }
    289   },
    290   "claims": [
    291     {
    292       "claim": "Professional experience moderates perceived effectiveness of AI tools: junior engineers (< 5 years) rate specific instructions as 78% effective vs. only 39% for seniors (>15 years), a statistically significant difference.",
    293       "evidence": "Section 5.1 reports chi2(2, N=109) = 10.84, p_adj = 0.022 from cross-tabulation of experience level by perceived prompting strategy effectiveness. Figure 8 shows the stratified percentages.",
    294       "supported": "strong"
    295     },
    296     {
    297       "claim": "The 'Context Wall' (inability to understand full project context) is the most significant barrier to effective GenAI use, with 51% of respondents rating it as very or extremely challenging.",
    298       "evidence": "Section 3.5 and Figure 6 show 'Understanding the entire project context' as the highest-rated integration challenge. Section 5.4 develops this into the 'Context Wall' pattern with Mean=3.3 for limited codebase context awareness.",
    299       "supported": "strong"
    300     },
    301     {
    302       "claim": "AI hallucinations are the single most significant challenge, rated very/extremely challenging by 51% of respondents with a mean severity of 3.4.",
    303       "evidence": "Section 3.4 and Figure 5 report hallucinations with Mean=3.4, the highest of 14 challenges, with 51% rating it very/extremely challenging.",
    304       "supported": "strong"
    305     },
    306     {
    307       "claim": "ChatGPT is adopted by approximately 90% of German software engineer respondents, far exceeding other tools.",
    308       "evidence": "Section 3.1 and Figure 2 report 89.9% (n=98) adoption for ChatGPT, with GitHub Copilot second at 55.0% (n=60).",
    309       "supported": "strong"
    310     },
    311     {
    312       "claim": "Power users (high-frequency AI users) tend to have lower levels of education and work in smaller companies, suggesting benefits are not evenly distributed.",
    313       "evidence": "Section 5.5 reports k-means clustering (k=2) revealed two clusters, with power users (C1, n=46) showing lower education (chi2(5, N=109)=13.91, p_adj=0.016) and smaller company affiliation (chi2(4, N=109)=11.30, p_adj=0.023).",
    314       "supported": "moderate"
    315     },
    316     {
    317       "claim": "76% of respondents report an increase in individual workflow speed due to GenAI tools.",
    318       "evidence": "Section 3.6 and Figure 7 report 76% perceive increased individual workflow speed, and 73% report faster learning of new topics.",
    319       "supported": "moderate"
    320     },
    321     {
    322       "claim": "Effective AI interaction depends more on clear communication (providing context, being specific) than on advanced prompting techniques like role prompting.",
    323       "evidence": "Section 5.2 shows Spearman correlations: 'Being specific' correlates with workflow speed (rho=0.33) and bug fixing (rho=0.39); 'Giving context' correlates with bug fixing (rho=0.39). Role prompting shows weaker associations. Figure 9 presents the full correlation matrix.",
    324       "supported": "moderate"
    325     }
    326   ],
    327   "methodology_tags": [
    328     "qualitative",
    329     "observational"
    330   ],
    331   "key_findings": "This mixed-methods study of 18 interviews and 109 survey responses from German software engineers finds that while AI tools (especially ChatGPT at 90% adoption) are widely used, effectiveness varies substantially by experience level: junior engineers perceive specific prompting instructions as highly effective (78%) while senior engineers are more skeptical (39%), a statistically significant difference. The primary barrier to effective use is the 'Context Wall' — AI systems' inability to understand full project context — identified as very/extremely challenging by 51% of respondents. Organizational size shapes tool selection and usage intensity, with smaller enterprises showing higher code generation frequency and bimodal self-hosted model adoption between medium and very large companies. The paper concludes that providing clear context and specific instructions outperforms advanced prompting techniques like role prompting, and that productivity benefits are disproportionately concentrated among heavy adopters working in smaller organizations.",
    332   "red_flags": [
    333     {
    334       "flag": "Convenience sampling via LinkedIn/personal contacts",
    335       "detail": "Participants recruited through personal contacts and LinkedIn represent a self-selected, highly educated cohort (51% Master's, 14% Doctorates) with a mean experience of 12.1 years. This skewed-senior, high-education sample likely does not represent the broader software engineering population, limiting external validity of claims about 'German software engineers' generally."
    336     },
    337     {
    338       "flag": "No pre-registration",
    339       "detail": "The study is not pre-registered, meaning hypotheses and analysis plans could have been adjusted after seeing the data. Multiple statistical tests are run across many variables without correcting for multiple comparisons beyond using adjusted p-values for chi-squared tests."
    340     },
    341     {
    342       "flag": "Missing no-confidence-interval reporting for means",
    343       "detail": "The paper extensively reports means on 5-point scales (e.g., hallucinations Mean=3.4, context awareness Mean=3.3) without standard deviations or confidence intervals, making it impossible to assess the uncertainty or overlap between ratings."
    344     },
    345     {
    346       "flag": "No IRB or ethics approval mentioned",
    347       "detail": "The study collects interview data from 18 practitioners and survey data from 109 participants but does not mention ethics board approval or informed consent procedures beyond stating participation was 'strictly voluntary' and 'anonymous.'"
    348     },
    349     {
    350       "flag": "No funding disclosure",
    351       "detail": "The paper contains no acknowledgments section and no funding disclosure, which is unusual for an ACM publication. The source of financial support for the study is entirely absent."
    352     },
    353     {
    354       "flag": "High attrition rate",
    355       "detail": "101 of 210 responses (48%) were excluded. While the stated reason is incompleteness, this high dropout rate combined with convenience sampling raises questions about systematic differences between completers and non-completers."
    356     },
    357     {
    358       "flag": "Interview-to-survey sequential design with potential researcher bias",
    359       "detail": "Themes from qualitative interviews directly guided the survey questions. While methodologically intentional (sequential exploratory design), this risks confirmation bias where the survey instrument is structured to confirm interview findings rather than test them."
    360     }
    361   ],
    362   "cited_papers": [
    363     {
    364       "title": "Grounded Copilot: How Programmers Interact with Code-Generating Models",
    365       "authors": [
    366         "Shraddha Barke",
    367         "Michael B. James",
    368         "Nadia Polikarpova"
    369       ],
    370       "year": 2022,
    371       "arxiv_id": "2206.15000",
    372       "relevance": "Establishes bimodal model of developer-AI interaction (acceleration vs. exploration modes), directly relevant to understanding how developers use LLM coding assistants."
    373     },
    374     {
    375       "title": "Measuring the Impact of Early-2025 AI on Experienced Open-Source Developer Productivity",
    376       "authors": [
    377         "Joel Becker",
    378         "Nate Rush",
    379         "Elizabeth Barnes",
    380         "David Rein"
    381       ],
    382       "year": 2025,
    383       "arxiv_id": "2507.09089",
    384       "relevance": "Counterpoint study finding experienced open-source developers using AI actually increased completion time by 19%, directly relevant to the experience-moderates-AI-effectiveness question."
    385     },
    386     {
    387       "title": "The Impact of AI Tool on Engineering at ANZ Bank: An Empirical Study on GitHub Copilot within Corporate Environment",
    388       "authors": [
    389         "Sayan Chatterjee",
    390         "Ching Louis Liu",
    391         "Gareth Rowland",
    392         "Tim Hogarth"
    393       ],
    394       "year": 2024,
    395       "arxiv_id": "2402.05636",
    396       "relevance": "Empirical study of GitHub Copilot adoption in an enterprise setting, directly relevant to organizational adoption of AI coding assistants."
    397     },
    398     {
    399       "title": "The Effects of Generative AI on High-Skilled Work: Evidence from Three Field Experiments with Software Developers",
    400       "authors": [
    401         "Kevin Zheyuan Cui",
    402         "Mert Demirer",
    403         "Sonia Jaffe",
    404         "Leon Musolff",
    405         "Sida Peng",
    406         "Tobias Salz"
    407       ],
    408       "relevance": "Large-scale field experiment (4,867 developers at Microsoft, Accenture, Fortune 100) showing 26% task completion increase with AI tools, with experience moderating effects."
    409     },
    410     {
    411       "title": "An Industry Case Study on Adoption of AI-based Programming Assistants",
    412       "authors": [
    413         "Nicole Davila",
    414         "Igor Wiese",
    415         "Igor Steinmacher"
    416       ],
    417       "year": 2024,
    418       "doi": "10.1145/3639477.3643648",
    419       "relevance": "ICSE-SEIP 2024 case study on AI programming assistant adoption in industry, relevant to the survey's industrial adoption focus."
    420     },
    421     {
    422       "title": "Beyond Code Generation: An Observational Study of ChatGPT Usage in Software Engineering Practice",
    423       "authors": [
    424         "Ranim Khojah",
    425         "Mazen Mohamad",
    426         "Philipp Leitner",
    427         "Francisco Gomes de Oliveira Neto"
    428       ],
    429       "year": 2024,
    430       "doi": "10.1145/3660788",
    431       "relevance": "Observational study finding practitioners use ChatGPT more for guidance/learning (62%) than code generation, highly relevant to understanding developer-AI interaction patterns."
    432     },
    433     {
    434       "title": "A Large-Scale Survey on the Usability of AI Programming Assistants: Successes and Challenges",
    435       "authors": [
    436         "Jenny T. Liang",
    437         "Chenyang Yang",
    438         "Brad A. Myers"
    439       ],
    440       "year": 2024,
    441       "doi": "10.1145/3597503.3608128",
    442       "relevance": "Survey of 410 developers on AI assistant usability, directly relevant to the survey's scope on adoption patterns and challenges."
    443     },
    444     {
    445       "title": "The Impact of AI on Developer Productivity: Evidence from GitHub Copilot",
    446       "authors": [
    447         "Sida Peng",
    448         "Eirini Kalliamvakou",
    449         "Peter Cihon",
    450         "Mert Demirer"
    451       ],
    452       "year": 2023,
    453       "arxiv_id": "2302.06590",
    454       "relevance": "Randomized experiment with 95 developers showing 56% task completion speedup with Copilot, a key productivity baseline study for AI-assisted software development."
    455     },
    456     {
    457       "title": "Navigating the Complexity of Generative AI Adoption in Software Engineering",
    458       "authors": [
    459         "Daniel Russo"
    460       ],
    461       "year": 2024,
    462       "doi": "10.1145/3652154",
    463       "relevance": "Survey study on GenAI adoption patterns in software engineering, directly related to the study's focus on AI adoption dynamics."
    464     },
    465     {
    466       "title": "Between Policy and Practice: GenAI Adoption in Agile Software Development Teams",
    467       "authors": [
    468         "Michael Neumann",
    469         "Lasse Bischof",
    470         "Nic Elias Hinz"
    471       ],
    472       "year": 2026,
    473       "arxiv_id": "2601.07051",
    474       "relevance": "Qualitative research on regulatory pressures in German organizations affecting AI adoption, used to support the paper's contextual framing."
    475     },
    476     {
    477       "title": "AI Tool Use and Adoption in Software Development by Individuals and Organizations: A Grounded Theory Study",
    478       "authors": [
    479         "Ze Shi Li",
    480         "Nowshin Nawar Arony",
    481         "Ahmed Musa Awon",
    482         "Daniela Damian",
    483         "Bowen Xu"
    484       ],
    485       "year": 2024,
    486       "arxiv_id": "2406.17325",
    487       "relevance": "Grounded theory study on AI tool adoption factors in software development, directly relevant to the survey's scope."
    488     },
    489     {
    490       "title": "Examining the Use and Impact of an AI Code Assistant on Developer Productivity and Experience in the Enterprise",
    491       "authors": [
    492         "Justin D. Weisz",
    493         "Shraddha Vijay Kumar",
    494         "Michael Muller"
    495       ],
    496       "year": 2025,
    497       "doi": "10.1145/3706599.3706670",
    498       "relevance": "IBM enterprise study finding 43% of users felt less effective with AI coding tools, providing important counterpoint data on enterprise adoption barriers."
    499     },
    500     {
    501       "title": "Human-AI Collaboration in Software Development: A Mixed-Methods Study of Developers' Use of GitHub Copilot and ChatGPT",
    502       "authors": [
    503         "Viktoria Stray",
    504         "Astri Barbala",
    505         "Viggo Tellefsen Wivestad"
    506       ],
    507       "year": 2025,
    508       "doi": "10.1145/3696630.3730566",
    509       "relevance": "Mixed-methods study of Copilot and ChatGPT use with workflow compatibility and experience as key adoption factors, directly relevant to this paper's themes."
    510     }
    511   ]
    512 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs