ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

scan.json (21924B)


      1 {
      2   "paper": {
      3     "title": "Toward Agentic Software Engineering Beyond Code: Framing Vision, Values, and Vocabulary",
      4     "authors": ["Rashina Hoda"],
      5     "year": 2025,
      6     "venue": "AGENT Workshop at ICSE 2026",
      7     "arxiv_id": "2510.19692"
      8   },
      9   "checklist": {
     10     "artifacts": {
     11       "code_released": {
     12         "applies": true,
     13         "answer": false,
     14         "justification": "No code was produced or released. The paper is a position/vision paper with no implementation. However, the criterion applies because the authors could have released analysis scripts or a structured dataset of existing frameworks they surveyed."
     15       },
     16       "data_released": {
     17         "applies": true,
     18         "answer": false,
     19         "justification": "No dataset is released. The paper references existing work (e.g., the AIDev dataset by Li et al.) but produces no new data and releases none."
     20       },
     21       "environment_specified": {
     22         "applies": false,
     23         "answer": false,
     24         "justification": "No software environment is used. This is a purely conceptual position paper with no experiments, code, or computational components."
     25       },
     26       "reproduction_instructions": {
     27         "applies": false,
     28         "answer": false,
     29         "justification": "There is nothing to reproduce. The paper proposes a vision, values, and vocabulary guidance with no experiments or computational artifacts."
     30       }
     31     },
     32     "statistical_methodology": {
     33       "confidence_intervals_or_error_bars": {
     34         "applies": false,
     35         "answer": false,
     36         "justification": "No empirical results are presented. This is a theoretical/position paper with no quantitative measurements."
     37       },
     38       "significance_tests": {
     39         "applies": false,
     40         "answer": false,
     41         "justification": "No comparative empirical claims are made that would require significance tests. The paper makes no quantitative comparisons."
     42       },
     43       "effect_sizes_reported": {
     44         "applies": false,
     45         "answer": false,
     46         "justification": "No effect sizes are applicable. This is a position paper with no empirical results."
     47       },
     48       "sample_size_justified": {
     49         "applies": false,
     50         "answer": false,
     51         "justification": "No sample of participants or examples is used. This is a theoretical paper."
     52       },
     53       "variance_reported": {
     54         "applies": false,
     55         "answer": false,
     56         "justification": "No quantitative measurements are made across runs or trials. Not applicable to a theoretical/vision paper."
     57       }
     58     },
     59     "evaluation_design": {
     60       "baselines_included": {
     61         "applies": false,
     62         "answer": false,
     63         "justification": "This is a position/vision paper, not an empirical evaluation. There are no experimental results to compare against baselines."
     64       },
     65       "baselines_contemporary": {
     66         "applies": false,
     67         "answer": false,
     68         "justification": "No baselines exist in a position paper proposing a conceptual framework. Not applicable."
     69       },
     70       "ablation_study": {
     71         "applies": false,
     72         "answer": false,
     73         "justification": "No system with components is implemented or evaluated. Not applicable to a vision paper."
     74       },
     75       "multiple_metrics": {
     76         "applies": false,
     77         "answer": false,
     78         "justification": "No empirical evaluation is conducted. The paper proposes conceptual frameworks without measuring outcomes."
     79       },
     80       "human_evaluation": {
     81         "applies": false,
     82         "answer": false,
     83         "justification": "No system outputs are produced to evaluate. Human evaluation is structurally inapplicable to a pure vision paper."
     84       },
     85       "held_out_test_set": {
     86         "applies": false,
     87         "answer": false,
     88         "justification": "No empirical evaluation with training/test splits. Not applicable to a theoretical paper."
     89       },
     90       "per_category_breakdown": {
     91         "applies": false,
     92         "answer": false,
     93         "justification": "No empirical results exist to break down by category. Not applicable."
     94       },
     95       "failure_cases_discussed": {
     96         "applies": false,
     97         "answer": false,
     98         "justification": "No system is evaluated, so there are no failure cases. Not applicable to a position paper."
     99       },
    100       "negative_results_reported": {
    101         "applies": false,
    102         "answer": false,
    103         "justification": "No experiments were run that could yield negative results. Not applicable to a vision/position paper."
    104       }
    105     },
    106     "claims_and_evidence": {
    107       "abstract_claims_supported": {
    108         "applies": true,
    109         "answer": true,
    110         "justification": "The abstract claims the paper contributes (a) a 'whole of process' vision, (b) CRAFT values and principles, and (c) vocabulary guidance. All three are delivered in the body of the paper in Sections 3, 4, and 5 respectively. The claims are descriptive of the paper's contributions, not empirical, so they are internally consistent."
    111       },
    112       "causal_claims_justified": {
    113         "applies": false,
    114         "answer": false,
    115         "justification": "The paper makes no causal claims. It presents a vision and principles, citing prior empirical work by others for motivational context, but makes no causal claims of its own."
    116       },
    117       "generalization_bounded": {
    118         "applies": true,
    119         "answer": false,
    120         "justification": "The paper makes broad claims about agentic SE as a paradigm shift and advocates for a 'whole of process' vision without clearly bounding the scope of its claims. For example, it extrapolates from early empirical studies (some under review as of Jan 2026) to general principles for the entire field. The abstract title 'Toward Agentic Software Engineering Beyond Code' implies comprehensive coverage but is grounded only in a selective literature review of a nascent field."
    121       },
    122       "alternative_explanations_discussed": {
    123         "applies": false,
    124         "answer": false,
    125         "justification": "The paper presents no empirical results, so there are no observed results requiring alternative explanations. This is a pure vision/position paper."
    126       }
    127     },
    128     "setup_transparency": {
    129       "model_versions_specified": {
    130         "applies": false,
    131         "answer": false,
    132         "justification": "The paper does not evaluate any AI model. It references existing agentic systems (Devin, Jules, Codex, Claude Code) descriptively, not as subjects of evaluation."
    133       },
    134       "prompts_provided": {
    135         "applies": false,
    136         "answer": false,
    137         "justification": "No prompting or LLM interaction is conducted in this paper. The paper is a theoretical position paper."
    138       },
    139       "hyperparameters_reported": {
    140         "applies": false,
    141         "answer": false,
    142         "justification": "No models are used or tuned. Not applicable to a position paper."
    143       },
    144       "scaffolding_described": {
    145         "applies": false,
    146         "answer": false,
    147         "justification": "No agentic scaffolding is implemented in this paper. The paper proposes a vision for how scaffolding should be designed, but does not build or evaluate any."
    148       },
    149       "data_preprocessing_documented": {
    150         "applies": false,
    151         "answer": false,
    152         "justification": "No data collection or preprocessing is performed. The paper cites existing literature selectively but does not conduct a systematic review with documented inclusion/exclusion criteria."
    153       }
    154     },
    155     "limitations_and_scope": {
    156       "limitations_section_present": {
    157         "applies": true,
    158         "answer": false,
    159         "justification": "There is no dedicated limitations or threats-to-validity section. Section 6 ('A Deliberate and Desirable Paradigm Shift') is the conclusion and briefly acknowledges that 'proposed ideas end-up being defined by practice' and 'we may not know what agentic SE truly looks like until it is studied empirically in the wild,' but this is a single brief paragraph, not a substantive limitations section."
    160       },
    161       "threats_to_validity_specific": {
    162         "applies": true,
    163         "answer": false,
    164         "justification": "No specific threats to validity are discussed. The paper's acknowledgment that 'agentic SE may not know what it truly looks like until studied empirically' is a generic disclaimer, not a threat specific to the paper's methodology or conclusions."
    165       },
    166       "scope_boundaries_stated": {
    167         "applies": true,
    168         "answer": false,
    169         "justification": "The paper does not explicitly state what its vision does NOT cover or what claims it is NOT making. It acknowledges its vision is preliminary and 'not meant to be exhaustive or final,' but this is a generic hedge rather than specific scope boundaries of the kind the criterion requires."
    170       }
    171     },
    172     "data_integrity": {
    173       "raw_data_available": {
    174         "applies": false,
    175         "answer": false,
    176         "justification": "No data is collected or analyzed. This is a position paper based on a literature survey. No raw data exists to release."
    177       },
    178       "data_collection_described": {
    179         "applies": true,
    180         "answer": false,
    181         "justification": "The paper selectively cites prior work to motivate its vision but does not describe a systematic data collection procedure. There is no protocol for how literature was identified, screened, or selected — it reads as an opinionated synthesis rather than a structured review."
    182       },
    183       "recruitment_methods_described": {
    184         "applies": false,
    185         "answer": false,
    186         "justification": "No human participants are recruited. Not applicable to a position paper."
    187       },
    188       "data_pipeline_documented": {
    189         "applies": false,
    190         "answer": false,
    191         "justification": "No data pipeline exists. The paper does not systematically collect or transform any data. Not applicable."
    192       }
    193     },
    194     "conflicts_of_interest": {
    195       "funding_disclosed": {
    196         "applies": true,
    197         "answer": false,
    198         "justification": "There is no acknowledgment of funding in the paper. The acknowledgments section thanks specific individuals for feedback but mentions no funding source."
    199       },
    200       "affiliations_disclosed": {
    201         "applies": true,
    202         "answer": true,
    203         "justification": "The author's affiliation (Monash University, Melbourne, Australia) is clearly stated on the title page. No products of the affiliated institution are being evaluated, so there is no direct conflict."
    204       },
    205       "funder_independent_of_outcome": {
    206         "applies": false,
    207         "answer": false,
    208         "justification": "No funding is disclosed, so funder independence cannot be assessed. Treated as not applicable due to absence of identified funder."
    209       },
    210       "financial_interests_declared": {
    211         "applies": true,
    212         "answer": false,
    213         "justification": "There is no competing interests or financial disclosure statement in the paper. Absence of declaration means this criterion is not satisfied."
    214       }
    215     },
    216     "contamination": {
    217       "training_cutoff_stated": {
    218         "applies": false,
    219         "answer": false,
    220         "justification": "The paper does not evaluate any pre-trained model on a benchmark. It is a position/vision paper. Not applicable."
    221       },
    222       "train_test_overlap_discussed": {
    223         "applies": false,
    224         "answer": false,
    225         "justification": "No model training or benchmark evaluation is conducted. Not applicable to a position paper."
    226       },
    227       "benchmark_contamination_addressed": {
    228         "applies": false,
    229         "answer": false,
    230         "justification": "No benchmarks are used. Not applicable to a position/vision paper."
    231       }
    232     },
    233     "human_studies": {
    234       "pre_registered": {
    235         "applies": false,
    236         "answer": false,
    237         "justification": "No human participants are involved. This is a theoretical/vision paper with no empirical studies."
    238       },
    239       "irb_or_ethics_approval": {
    240         "applies": false,
    241         "answer": false,
    242         "justification": "No human participants are involved. Not applicable."
    243       },
    244       "demographics_reported": {
    245         "applies": false,
    246         "answer": false,
    247         "justification": "No human participants are involved. Not applicable."
    248       },
    249       "inclusion_exclusion_criteria": {
    250         "applies": false,
    251         "answer": false,
    252         "justification": "No human participants are involved. Not applicable."
    253       },
    254       "randomization_described": {
    255         "applies": false,
    256         "answer": false,
    257         "justification": "No human participants or experimental conditions. Not applicable."
    258       },
    259       "blinding_described": {
    260         "applies": false,
    261         "answer": false,
    262         "justification": "No human participants or experimental conditions. Not applicable."
    263       },
    264       "attrition_reported": {
    265         "applies": false,
    266         "answer": false,
    267         "justification": "No human participants are involved. Not applicable."
    268       }
    269     },
    270     "cost_and_practicality": {
    271       "inference_cost_reported": {
    272         "applies": false,
    273         "answer": false,
    274         "justification": "This is a theoretical/vision paper. No system is built or evaluated, so there are no inference costs to report."
    275       },
    276       "compute_budget_stated": {
    277         "applies": false,
    278         "answer": false,
    279         "justification": "No computational experiments are conducted. Not applicable to a theoretical position paper."
    280       }
    281     }
    282   },
    283   "claims": [
    284     {
    285       "claim": "Agentic SE must expand beyond code to encompass a 'whole of process' vision covering requirements engineering, design, operations, ethics, and socio-technical concerns.",
    286       "evidence": "Motivated by citing early empirical studies (Xiao et al. on teamwork limitations, Akbar et al. on cross-phase applicability, Wang et al. on socio-technical concerns) and a review of 395 LLM-SE papers (Hou et al.) showing coding dominates; Section 3 and Figure 2.",
    287       "supported": "weak"
    288     },
    289     {
    290       "claim": "A set of CRAFT (Comprehensive, Responsible, Adaptive, Foundational, Translational) values and principles can guide the agentic SE research community.",
    291       "evidence": "Presented in Section 4 and Table 1 as a preliminary set 'to spark community conversation.' No empirical validation of the framework is provided.",
    292       "supported": "weak"
    293     },
    294     {
    295       "claim": "Well-defined vocabulary is a precursor to formalizing taxonomy and knowledge areas for agentic SE, and current terminology is inconsistent.",
    296       "evidence": "Section 5 notes 'drifts and discrepancies in terminology usage are already apparent,' citing syntactic differences (e.g., 'agentic AI software engineer' vs. 'AI software engineer'). No systematic analysis of terminology usage is provided.",
    297       "supported": "weak"
    298     }
    299   ],
    300   "methodology_tags": ["theoretical", "qualitative"],
    301   "key_findings": "This position paper proposes expanding the vision of agentic software engineering beyond code-centric activities to a 'whole of process' paradigm covering requirements, design, development, operations, and ethical alignment. It introduces CRAFT (Comprehensive, Responsible, Adaptive, Foundational, Translational) values and principles as a preliminary framework to guide the agentic SE research community. The paper also provides guidance on designing consistent vocabulary for the nascent field, arguing that terminological stability is foundational to knowledge area formalization. All contributions are conceptual and call for future empirical validation.",
    302   "red_flags": [
    303     {
    304       "flag": "No empirical grounding for proposed framework",
    305       "detail": "The CRAFT values, 'whole of process' vision, and vocabulary principles are proposed without any empirical validation, user study, expert survey, or structured elicitation process. The framework is entirely the views of a single author presented as community guidance."
    306     },
    307     {
    308       "flag": "Selective literature citation without systematic review protocol",
    309       "detail": "The paper draws motivational support from selected empirical studies (many described as 'under review as of Jan 2026') without documenting inclusion/exclusion criteria or search methodology. This makes it impossible to assess whether the cited evidence is representative or cherry-picked."
    310     },
    311     {
    312       "flag": "Overly broad claims from limited evidence base",
    313       "detail": "The paper generalizes from a handful of early empirical studies (several not yet published) to prescriptive principles for an entire engineering discipline. The scope of the claims significantly outruns the evidence cited."
    314     },
    315     {
    316       "flag": "No limitations section",
    317       "detail": "The paper includes no dedicated limitations or threats-to-validity section. The only acknowledgment of uncertainty is a brief sentence noting that 'we may not know what agentic SE truly looks like until it is studied empirically in the wild.'"
    318     }
    319   ],
    320   "cited_papers": [
    321     {
    322       "title": "Software Engineering by and for Humans in an AI Era",
    323       "authors": ["Silvia Abrahão", "John Grundy", "Mauro Pezzè", "Margaret-Anne Storey", "Damian A Tamburri"],
    324       "year": 2025,
    325       "doi": "10.1145/3716868",
    326       "relevance": "Raises socio-technical questions about human-AI collaboration in software engineering, directly relevant to the survey's focus on agentic AI and SE methodology."
    327     },
    328     {
    329       "title": "Agentic AI in Software Engineering: Practitioner Perspectives Across the Software Development Life Cycle",
    330       "authors": ["Muhammad Azeem Akbar", "Arif Ali Khan", "Muhammad Hamza", "Abdullah Ghaffar", "Arash Hajikhani"],
    331       "year": 2025,
    332       "relevance": "Empirical study of practitioner perspectives on agentic AI in SE, relevant as an empirical grounding for claims about cross-phase applicability gaps."
    333     },
    334     {
    335       "title": "Agentic Software Engineering: Foundational Pillars and a Research Roadmap",
    336       "authors": ["Ahmed E Hassan", "Hao Li", "Dayi Lin", "Bram Adams", "Tse-Hsun Chen", "Yutaro Kashiwa", "Dong Qiu"],
    337       "year": 2025,
    338       "arxiv_id": "2509.06216",
    339       "relevance": "Proposes a structured agentic software engineering (SASE) framework; directly in scope as a competing/complementary vision paper for agentic SE."
    340     },
    341     {
    342       "title": "LLM-Based Multi-Agent Systems for Software Engineering: Literature Review, Vision, and the Road Ahead",
    343       "authors": ["Junda He", "Christoph Treude", "David Lo"],
    344       "year": 2025,
    345       "relevance": "Literature review of LLM-based multi-agent systems for SE; highly relevant to the survey's scope on agentic AI for software engineering."
    346     },
    347     {
    348       "title": "Large language models for software engineering: A systematic literature review",
    349       "authors": ["Xinyi Hou"],
    350       "year": 2024,
    351       "relevance": "Systematic review of 395 LLM-SE papers; directly relevant as evidence about the distribution of research activity in LLM-based software engineering."
    352     },
    353     {
    354       "title": "The Rise of AI Teammates in Software Engineering (SE) 3.0: How Autonomous Coding Agents Are Reshaping Software Engineering",
    355       "authors": ["Hao Li", "Haoxiang Zhang", "Ahmed E Hassan"],
    356       "year": 2025,
    357       "arxiv_id": "2507.15003",
    358       "relevance": "Introduces the AIDev dataset of 450K+ autonomous agent pull requests; directly relevant as an empirical resource for studying agentic coding behavior."
    359     },
    360     {
    361       "title": "Agentic AI software engineer: Programming with trust",
    362       "authors": ["Abhik Roychoudhury", "Corina Pasareanu", "Michael Pradel", "Baishakhi Ray"],
    363       "year": 2025,
    364       "arxiv_id": "2502.13767",
    365       "relevance": "Proposes the concept of an agentic AI software engineer; directly relevant as a competing vision for agentic SE."
    366     },
    367     {
    368       "title": "AI agents vs. agentic AI: A conceptual taxonomy, applications and challenges",
    369       "authors": ["Ranjan Sapkota", "Konstantinos I Roumeliotis", "Manoj Karkee"],
    370       "year": 2025,
    371       "arxiv_id": "2505.10468",
    372       "relevance": "Provides a conceptual taxonomy distinguishing AI agents from agentic AI; relevant to the survey's interest in definitional clarity around agentic systems."
    373     },
    374     {
    375       "title": "AI agentic programming: A survey of techniques, challenges, and opportunities",
    376       "authors": ["Huanting Wang", "Jingzhi Gong", "Huawei Zhang", "Jie Xu", "Zheng Wang"],
    377       "year": 2025,
    378       "arxiv_id": "2508.11126",
    379       "relevance": "Comprehensive survey of AI agentic programming techniques; directly relevant to the survey's scope on agentic AI for software engineering."
    380     },
    381     {
    382       "title": "Agents in software engineering: Survey, landscape, and vision",
    383       "authors": ["Yanlin Wang"],
    384       "year": 2025,
    385       "relevance": "Survey of agents in software engineering including a taxonomy; relevant to the survey's scope on agentic approaches to software engineering tasks."
    386     },
    387     {
    388       "title": "AI Hasn't Fixed Teamwork, But It Shifted Collaborative Culture: A Longitudinal Study in a Project-Based Software Development Organization (2023-2025)",
    389       "authors": ["Qing Xiao"],
    390       "year": 2025,
    391       "arxiv_id": "2509.10956",
    392       "relevance": "Longitudinal empirical study of AI's impact on software development teamwork; relevant as empirical evidence about socio-technical effects of AI in SE contexts."
    393     },
    394     {
    395       "title": "Augmented agile: Human-centered AI-assisted software management",
    396       "authors": ["Rashina Hoda", "Hoa Dam", "Chakkrit Tantithamthavorn", "Patanamon Thongtanunam", "Margaret-Anne Storey"],
    397       "year": 2023,
    398       "relevance": "Proposes an AI-powered agile management framework; relevant as a precursor work to agentic SE concepts and human-AI collaboration in SE."
    399     }
    400   ]
    401 }

Impressum · Datenschutz