scan.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan.json (23866B)
      1 {
      2   "paper": {
      3     "title": "AI in Software Engineering: Perceived Roles and Their Impact on Adoption",
      4     "authors": ["Ilya Zakharov", "Ekaterina Koshchenko", "Agnia Sergeyuk"],
      5     "year": 2025,
      6     "venue": "FSE Companion '25",
      7     "arxiv_id": "2504.20329",
      8     "doi": "10.1145/3696630.3730563"
      9   },
     10   "checklist": {
     11     "artifacts": {
     12       "code_released": {
     13         "applies": true,
     14         "answer": false,
     15         "justification": "No source code or analysis scripts are released. The paper mentions a survey questionnaire on Zenodo (DOI: 10.5281/zenodo.14973853) but no analysis code repository is provided."
     16       },
     17       "data_released": {
     18         "applies": true,
     19         "answer": false,
     20         "justification": "The paper states 'All anonymized data analyzed in this study is available upon request' (Section 4), which counts as NO per schema rules. The survey instrument is available on Zenodo, but the underlying response data is not publicly released."
     21       },
     22       "environment_specified": {
     23         "applies": true,
     24         "answer": false,
     25         "justification": "No environment specification is provided. The paper mentions using Python packages 'factor-analyzer' and SciPy but does not provide version details for the environment, requirements.txt, or any reproduction environment setup."
     26       },
     27       "reproduction_instructions": {
     28         "applies": true,
     29         "answer": false,
     30         "justification": "No step-by-step reproduction instructions are provided. The analysis methods are described at a high level (factor analysis, Pearson correlations) but there are no scripts, commands, or detailed reproduction steps."
     31       }
     32     },
     33     "statistical_methodology": {
     34       "confidence_intervals_or_error_bars": {
     35         "applies": true,
     36         "answer": false,
     37         "justification": "No confidence intervals or error bars are reported. Table 1 reports correlation coefficients with significance levels (p-value stars) but no confidence intervals for the correlations."
     38       },
     39       "significance_tests": {
     40         "applies": true,
     41         "answer": true,
     42         "justification": "Statistical significance is reported for Pearson correlation coefficients in Table 1, with significance levels marked as *** p<0.001, ** p<0.01, * p<0.05."
     43       },
     44       "effect_sizes_reported": {
     45         "applies": true,
     46         "answer": true,
     47         "justification": "Pearson correlation coefficients (r values) are reported in Table 1 (e.g., r=0.59, r=0.41, r=0.34 for PU correlations), which serve as effect sizes. Factor loadings are also reported with specific values (e.g., 'problem solver' loading=0.599, 'advisor' loading=0.662)."
     48       },
     49       "sample_size_justified": {
     50         "applies": true,
     51         "answer": false,
     52         "justification": "No justification for the sample size of N=102 survey participants or N=38 interview participants is provided. No power analysis is discussed. For factor analysis with 14 role variables, N=102 is arguably on the lower end, but this is not addressed."
     53       },
     54       "variance_reported": {
     55         "applies": true,
     56         "answer": false,
     57         "justification": "No variance, standard deviations, or spread measures are reported for the TAM scale scores (PU, PEU) or role counts. Only correlation coefficients and factor loadings are presented."
     58       }
     59     },
     60     "evaluation_design": {
     61       "baselines_included": {
     62         "applies": true,
     63         "answer": false,
     64         "justification": "No baselines or comparisons to prior work measuring similar constructs are included. The paper does not compare its correlation findings against prior studies of role attribution or TAM."
     65       },
     66       "baselines_contemporary": {
     67         "applies": true,
     68         "answer": false,
     69         "justification": "No baselines are included at all, so contemporaneity cannot be assessed."
     70       },
     71       "ablation_study": {
     72         "applies": false,
     73         "answer": false,
     74         "justification": "This is a survey/interview study, not a system with components to ablate."
     75       },
     76       "multiple_metrics": {
     77         "applies": true,
     78         "answer": true,
     79         "justification": "Multiple outcome variables are used: Perceived Usefulness (PU), Perceived Ease of Use (PEU), and number of AI tools tried. Factor analysis also extracts two factors (Expert Roles, Support Roles)."
     80       },
     81       "human_evaluation": {
     82         "applies": false,
     83         "answer": false,
     84         "justification": "This paper does not produce system outputs that need human evaluation. It is a survey study about user perceptions, not an evaluation of an AI system's outputs."
     85       },
     86       "held_out_test_set": {
     87         "applies": false,
     88         "answer": false,
     89         "justification": "This is a survey study, not a machine learning or prediction task. There is no test set concept applicable here."
     90       },
     91       "per_category_breakdown": {
     92         "applies": true,
     93         "answer": true,
     94         "justification": "Per-role frequency counts are reported (e.g., assistant n=70, tool n=69), and per-factor breakdowns are provided through factor analysis (Expert Roles vs. Support Roles) with individual role loadings."
     95       },
     96       "failure_cases_discussed": {
     97         "applies": true,
     98         "answer": false,
     99         "justification": "No failure cases or limitations of the role attribution framework are discussed. The paper does not examine cases where the model breaks down or where role attribution does not predict acceptance."
    100       },
    101       "negative_results_reported": {
    102         "applies": true,
    103         "answer": true,
    104         "justification": "The paper reports that coding experience did not significantly correlate with any of the other variables (Table 1, e.g., r=-0.07, -0.20, 0.18, -0.05, -0.11, 0.18, all non-significant), which is a null/negative result."
    105       }
    106     },
    107     "claims_and_evidence": {
    108       "abstract_claims_supported": {
    109         "applies": true,
    110         "answer": true,
    111         "justification": "The abstract claims two Mental Models (tool vs. teammate), two factor groupings (Support Roles, Expert Roles), and positive correlation between role count and PU/PEU. All are supported by the qualitative analysis (Section 4.1), factor analysis (Section 4.2), and Table 1."
    112       },
    113       "causal_claims_justified": {
    114         "applies": true,
    115         "answer": false,
    116         "justification": "The abstract states that 'diverse conceptualizations enhance AI adoption,' which is a causal claim ('enhance'). The evidence is correlational (Pearson r from a cross-sectional survey). The study design does not support causal inference — it cannot distinguish whether assigning more roles causes higher acceptance or vice versa."
    117       },
    118       "generalization_bounded": {
    119         "applies": true,
    120         "answer": false,
    121         "justification": "The paper makes broad claims about 'developers' and 'AI4SE tools' generally, but the sample is drawn from a JetBrains user panel — people who consented to participate in JetBrains user studies. This is a convenience sample from a single company's ecosystem, but the paper does not bound its generalizations to this population."
    122       },
    123       "alternative_explanations_discussed": {
    124         "applies": true,
    125         "answer": false,
    126         "justification": "No alternative explanations for the observed correlations are discussed. For example, the correlation between number of roles and PU/PEU could be driven by a confound such as general AI enthusiasm or engagement level, but this possibility is not addressed."
    127       }
    128     },
    129     "setup_transparency": {
    130       "model_versions_specified": {
    131         "applies": false,
    132         "answer": false,
    133         "justification": "This paper does not use or evaluate any AI models. It is a survey study about developer perceptions of AI tools in general."
    134       },
    135       "prompts_provided": {
    136         "applies": false,
    137         "answer": false,
    138         "justification": "No LLM prompting is used in this study. It is a human survey and interview study."
    139       },
    140       "hyperparameters_reported": {
    141         "applies": true,
    142         "answer": true,
    143         "justification": "Factor analysis parameters are reported: varimax rotation, factor loading threshold of >0.4 (Section 3). The Python packages used (factor-analyzer, SciPy) are named."
    144       },
    145       "scaffolding_described": {
    146         "applies": false,
    147         "answer": false,
    148         "justification": "No agentic scaffolding is used. This is a survey/interview study."
    149       },
    150       "data_preprocessing_documented": {
    151         "applies": true,
    152         "answer": false,
    153         "justification": "Data preprocessing steps are not documented. The paper does not describe how survey responses were cleaned, whether any responses were excluded, or how the Revised TAM Questionnaire responses were processed beyond stating that 'each scale score was obtained by averaging the relevant items.'"
    154       }
    155     },
    156     "limitations_and_scope": {
    157       "limitations_section_present": {
    158         "applies": true,
    159         "answer": false,
    160         "justification": "There is no dedicated limitations or threats-to-validity section in the paper. The conclusion (Section 6) mentions future research directions but does not discuss limitations."
    161       },
    162       "threats_to_validity_specific": {
    163         "applies": true,
    164         "answer": false,
    165         "justification": "No threats to validity are discussed anywhere in the paper. Specific threats such as the JetBrains panel sampling bias, the small N for factor analysis, or the cross-sectional design are not mentioned."
    166       },
    167       "scope_boundaries_stated": {
    168         "applies": true,
    169         "answer": false,
    170         "justification": "No explicit scope boundaries are stated. The paper does not clarify what its results do NOT show (e.g., that correlations do not imply causation, that the sample may not represent developers outside the JetBrains ecosystem)."
    171       }
    172     },
    173     "data_integrity": {
    174       "raw_data_available": {
    175         "applies": true,
    176         "answer": false,
    177         "justification": "Raw data is not available. The paper states 'All anonymized data analyzed in this study is available upon request' (Section 4), which is not publicly accessible for independent verification."
    178       },
    179       "data_collection_described": {
    180         "applies": true,
    181         "answer": true,
    182         "justification": "Data collection is described: interviews were conducted in spring 2024 with 38 programmers (details in reference [15]), and an online survey was conducted in January 2025 with 102 participants recruited from a JetBrains curated panel (Section 3)."
    183       },
    184       "recruitment_methods_described": {
    185         "applies": true,
    186         "answer": true,
    187         "justification": "Recruitment methods are described: 'The link to the survey was sent to a curated list of people who had given their consent to participate in user studies conducted by JetBrains' (Section 3). Interview participants were from a prior study [15]."
    188       },
    189       "data_pipeline_documented": {
    190         "applies": true,
    191         "answer": false,
    192         "justification": "The data pipeline is not fully documented. It is unclear how many people received the survey link vs. the 102 who completed it (response rate unknown), whether any responses were excluded, or how the qualitative coding process worked for the interview re-analysis."
    193       }
    194     },
    195     "conflicts_of_interest": {
    196       "funding_disclosed": {
    197         "applies": true,
    198         "answer": false,
    199         "justification": "No funding source is disclosed. All three authors are affiliated with JetBrains Research, a company that makes developer tools, but no funding acknowledgment section is present."
    200       },
    201       "affiliations_disclosed": {
    202         "applies": true,
    203         "answer": true,
    204         "justification": "All authors list JetBrains Research as their affiliation, which is clearly stated in the paper header."
    205       },
    206       "funder_independent_of_outcome": {
    207         "applies": true,
    208         "answer": false,
    209         "justification": "JetBrains is a developer tools company that sells AI-powered development tools. The authors are JetBrains Research employees studying adoption of AI-powered development tools. The funder (JetBrains) has a direct commercial interest in the outcome — findings that role attribution increases AI tool adoption support the company's business case."
    210       },
    211       "financial_interests_declared": {
    212         "applies": true,
    213         "answer": false,
    214         "justification": "No competing interests or financial interests statement is present in the paper. The authors work for a company that sells the tools being studied, but this conflict is not explicitly acknowledged."
    215       }
    216     },
    217     "contamination": {
    218       "training_cutoff_stated": {
    219         "applies": false,
    220         "answer": false,
    221         "justification": "This paper does not evaluate any pre-trained model's capability on a benchmark. It is a survey/interview study about developer perceptions."
    222       },
    223       "train_test_overlap_discussed": {
    224         "applies": false,
    225         "answer": false,
    226         "justification": "Not applicable — no model evaluation on benchmarks is performed."
    227       },
    228       "benchmark_contamination_addressed": {
    229         "applies": false,
    230         "answer": false,
    231         "justification": "Not applicable — no benchmark evaluation is performed."
    232       }
    233     },
    234     "human_studies": {
    235       "pre_registered": {
    236         "applies": true,
    237         "answer": false,
    238         "justification": "No pre-registration is mentioned. No link to OSF, AsPredicted, or any pre-registration platform is provided."
    239       },
    240       "irb_or_ethics_approval": {
    241         "applies": true,
    242         "answer": false,
    243         "justification": "No IRB or ethics board approval is mentioned. The paper states the study 'was carried out according to JetBrains' ethical standards, adhering to the values and guidelines outlined in the ICC/ESOMAR International Code' (Section 3), but this is an industry marketing research code, not an institutional ethics board review."
    244       },
    245       "demographics_reported": {
    246         "applies": true,
    247         "answer": true,
    248         "justification": "Participant demographics are reported: coding experience breakdown (3 no experience, 9 <1 year, 12 1-2 years, 21 3-5 years, 19 6-10 years, 14 11-15 years, 24+ 16+ years), and job types for non-coders (management, digital design, online advertising, molecular biology) in Section 4.2."
    249       },
    250       "inclusion_exclusion_criteria": {
    251         "applies": true,
    252         "answer": false,
    253         "justification": "No inclusion or exclusion criteria are stated for the survey. Participants were drawn from 'a curated list of people who had given their consent to participate in user studies conducted by JetBrains' but no screening criteria or exclusion rules are described."
    254       },
    255       "randomization_described": {
    256         "applies": false,
    257         "answer": false,
    258         "justification": "This is a cross-sectional survey, not an experimental study with treatment conditions. Randomization is not applicable."
    259       },
    260       "blinding_described": {
    261         "applies": false,
    262         "answer": false,
    263         "justification": "This is a cross-sectional survey, not an experimental study. Blinding is not applicable."
    264       },
    265       "attrition_reported": {
    266         "applies": true,
    267         "answer": false,
    268         "justification": "Attrition is not reported. The paper states '102 participants fully completed the survey' but does not mention how many started and did not finish, or the total number of people invited."
    269       }
    270     },
    271     "cost_and_practicality": {
    272       "inference_cost_reported": {
    273         "applies": false,
    274         "answer": false,
    275         "justification": "This is a survey study, not a system or method with inference costs."
    276       },
    277       "compute_budget_stated": {
    278         "applies": false,
    279         "answer": false,
    280         "justification": "This is a survey study with minimal computational requirements. Compute budget is not applicable."
    281       }
    282     }
    283   },
    284   "claims": [
    285     {
    286       "claim": "Developers conceptualize AI-powered Development Tools along two primary dimensions: as inanimate tools or as human-like teammates.",
    287       "evidence": "Qualitative analysis of 38 interviews showed ~80% referred to AI as a machine/software/tool, while ~20% defined AI in human terms using words like colleague, companion, or assistant (Section 4.1).",
    288       "supported": "moderate"
    289     },
    290     {
    291       "claim": "AI roles can be grouped into two factors: Support Roles (assistant, reference guide, tool) and Expert Roles (problem solver, advisor, reviewer).",
    292       "evidence": "Factor analysis with varimax rotation on survey data from 102 participants, with factor loadings reported (e.g., assistant=0.657, problem solver=0.599, advisor=0.662, reviewer=0.691) in Section 4.2.",
    293       "supported": "moderate"
    294     },
    295     {
    296       "claim": "Assigning multiple roles to AI correlates positively with Perceived Usefulness and Perceived Ease of Use.",
    297       "evidence": "Table 1 shows correlations: total AI roles with PU r=0.59 (p<0.001), with PEU r=0.56 (p<0.001). Expert Roles with PU r=0.41 (p<0.001), PEU r=0.42 (p<0.001). Support Roles with PU r=0.34 (p<0.001), PEU r=0.27 (p<0.01).",
    298       "supported": "moderate"
    299     },
    300     {
    301       "claim": "Diverse conceptualizations enhance AI adoption.",
    302       "evidence": "Based on correlational evidence from Table 1. The causal language ('enhance') is not supported by the cross-sectional survey design, which cannot rule out reverse causation or confounding.",
    303       "supported": "weak"
    304     },
    305     {
    306       "claim": "Coding experience does not significantly correlate with AI role attribution or technology acceptance.",
    307       "evidence": "Table 1 shows non-significant correlations between coding experience and all other variables (r ranging from -0.20 to 0.18, none reaching p<0.05).",
    308       "supported": "moderate"
    309     }
    310   ],
    311   "methodology_tags": ["qualitative", "observational"],
    312   "key_findings": "Developers conceptualize AI-powered development tools along two dimensions: as inanimate tools (~80%) or human-like teammates (~20%). Factor analysis of survey data from 102 participants reveals two role groupings: Support Roles (assistant, reference guide, tool) and Expert Roles (advisor, problem solver, reviewer). Assigning more roles to AI correlates positively with Perceived Usefulness (r=0.59) and Perceived Ease of Use (r=0.56), while coding experience shows no significant relationship with role attribution or acceptance.",
    313   "red_flags": [
    314     {
    315       "flag": "Conflict of interest: JetBrains employees studying JetBrains tool users",
    316       "detail": "All authors are from JetBrains Research, a company that sells AI-powered development tools. The survey sample is drawn from JetBrains' own user panel. Findings that role attribution increases AI tool adoption directly serve the company's commercial interest. This conflict is not acknowledged."
    317     },
    318     {
    319       "flag": "Convenience sample presented as general",
    320       "detail": "The survey sample is drawn from 'a curated list of people who had given their consent to participate in user studies conducted by JetBrains.' This is a self-selected group of JetBrains users, likely more engaged with development tools than average developers, but the paper generalizes to 'developers' broadly."
    321     },
    322     {
    323       "flag": "Causal language from correlational data",
    324       "detail": "The abstract claims 'diverse conceptualizations enhance AI adoption' and the discussion states role attributions 'directly influence technology adoption decisions.' These are causal claims derived from cross-sectional correlational data, which cannot establish causality."
    325     },
    326     {
    327       "flag": "No limitations section",
    328       "detail": "A 5-page short paper at a top venue discusses no limitations, threats to validity, or scope boundaries. The cross-sectional design, small N for factor analysis, JetBrains panel bias, and correlational-vs-causal issues are all unaddressed."
    329     },
    330     {
    331       "flag": "Small sample for factor analysis",
    332       "detail": "N=102 for factor analysis with 14 role variables is on the lower end of recommended sample sizes. Rules of thumb suggest 5-10 participants per variable (70-140 needed), and some methodologists recommend N>200. The adequacy of the sample for factor analysis is not discussed."
    333     },
    334     {
    335       "flag": "Suspiciously high PU-PEU correlation",
    336       "detail": "PU and PEU correlate at r=0.94, which is unusually high for two purportedly distinct constructs. This raises concerns about discriminant validity of the TAM scales in this sample, but is not discussed."
    337     }
    338   ],
    339   "cited_papers": [
    340     {
    341       "title": "Beyond accuracy: The role of mental models in human-AI team performance",
    342       "authors": ["Gagan Bansal", "Besmira Nushi", "Ece Kamar", "Walter S Lasecki", "Daniel S Weld", "Eric Horvitz"],
    343       "year": 2019,
    344       "relevance": "Examines how mental models of AI affect human-AI team performance, directly relevant to understanding AI tool adoption."
    345     },
    346     {
    347       "title": "What Guides Our Choices? Modeling Developers' Trust and Behavioral Intentions Towards GenAI",
    348       "authors": ["Rudrajit Choudhuri", "Bianca Trinkenreich", "Rahul Pandita", "Eirini Kalliamvakou"],
    349       "year": 2024,
    350       "arxiv_id": "2409.04099",
    351       "relevance": "Models developer trust and behavioral intentions toward generative AI tools, directly relevant to AI adoption in software engineering."
    352     },
    353     {
    354       "title": "AI tool use and adoption in software development by individuals and organizations: a grounded theory study",
    355       "authors": ["Ze Shi Li", "Nowshin Nawar Arony", "Ahmed Musa Awon", "Daniela Damian", "Bowen Xu"],
    356       "year": 2024,
    357       "arxiv_id": "2406.17325",
    358       "relevance": "Grounded theory study of AI tool adoption in software development, complementary methodology to the surveyed paper."
    359     },
    360     {
    361       "title": "Navigating the complexity of generative AI adoption in software engineering",
    362       "authors": ["Daniel Russo"],
    363       "year": 2024,
    364       "relevance": "Studies generative AI adoption complexity in software engineering using TAM-related constructs."
    365     },
    366     {
    367       "title": "The Design Space of in-IDE Human-AI Experience",
    368       "authors": ["A. Sergeyuk", "E. Koshchenko", "I. Zakharov", "T. Bryksin", "M. Izadi"],
    369       "year": 2024,
    370       "arxiv_id": "2410.08676",
    371       "relevance": "Companion qualitative study (same group) on human-AI experience in IDEs; source of the interview data reanalyzed in this paper."
    372     },
    373     {
    374       "title": "Influencing human-AI interaction by priming beliefs about AI can increase perceived trustworthiness, empathy and effectiveness",
    375       "authors": ["Pat Pataranutaporn", "Ruby Liu", "Ed Finn", "Pattie Maes"],
    376       "year": 2023,
    377       "doi": "10.1038/s42256-023-00720-7",
    378       "relevance": "Demonstrates how priming beliefs about AI affects trust and interactions, relevant to mental models and AI adoption."
    379     },
    380     {
    381       "title": "Mutual theory of mind for human-AI communication",
    382       "authors": ["Qiaosi Wang", "Ashok K. Goel"],
    383       "year": 2022,
    384       "relevance": "Proposes framework for mutual theory of mind in human-AI interaction, theoretical foundation for studying AI role attribution."
    385     },
    386     {
    387       "title": "Mutual theory of mind in human-AI collaboration: An empirical study with LLM-driven AI agents in a real-time shared workspace task",
    388       "authors": ["Shao Zhang", "Xihuai Wang", "Wenhao Zhang"],
    389       "year": 2024,
    390       "arxiv_id": "2409.08811",
    391       "relevance": "Empirical study of mutual theory of mind with LLM agents, tests whether MToM frameworks improve human-AI collaboration."
    392     }
    393   ]
    394 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs