ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

scan.json (15146B)


      1 {
      2   "paper": {
      3     "title": "AI-Powered Software Development Life Cycle: From Requirements to Maintenance",
      4     "authors": ["Sandeep Burte"],
      5     "year": 2025,
      6     "venue": "AI Systems Engineering"
      7   },
      8   "checklist": {
      9     "artifacts": {
     10       "code_released": {
     11         "applies": false,
     12         "answer": false,
     13         "justification": "This is a viewpoint/opinion paper with no original code, experiments, or analysis scripts to release."
     14       },
     15       "data_released": {
     16         "applies": false,
     17         "answer": false,
     18         "justification": "No data was collected or analyzed. This is a narrative viewpoint paper."
     19       },
     20       "environment_specified": {
     21         "applies": false,
     22         "answer": false,
     23         "justification": "No computational experiments were conducted; no environment to specify."
     24       },
     25       "reproduction_instructions": {
     26         "applies": false,
     27         "answer": false,
     28         "justification": "No experiments or analyses to reproduce. This is a viewpoint paper."
     29       }
     30     },
     31     "statistical_methodology": {
     32       "confidence_intervals_or_error_bars": {
     33         "applies": false,
     34         "answer": false,
     35         "justification": "No quantitative results are reported. This is a narrative viewpoint paper with no experiments."
     36       },
     37       "significance_tests": {
     38         "applies": false,
     39         "answer": false,
     40         "justification": "No experiments or comparative claims backed by data."
     41       },
     42       "effect_sizes_reported": {
     43         "applies": false,
     44         "answer": false,
     45         "justification": "No empirical results to report effect sizes for."
     46       },
     47       "sample_size_justified": {
     48         "applies": false,
     49         "answer": false,
     50         "justification": "No sample or data collection of any kind."
     51       },
     52       "variance_reported": {
     53         "applies": false,
     54         "answer": false,
     55         "justification": "No experiments conducted."
     56       }
     57     },
     58     "evaluation_design": {
     59       "baselines_included": {
     60         "applies": false,
     61         "answer": false,
     62         "justification": "No evaluation or comparison is performed. This is a viewpoint paper describing potential AI applications."
     63       },
     64       "baselines_contemporary": {
     65         "applies": false,
     66         "answer": false,
     67         "justification": "No evaluation conducted."
     68       },
     69       "ablation_study": {
     70         "applies": false,
     71         "answer": false,
     72         "justification": "No system or experiments to ablate."
     73       },
     74       "multiple_metrics": {
     75         "applies": false,
     76         "answer": false,
     77         "justification": "No evaluation conducted."
     78       },
     79       "human_evaluation": {
     80         "applies": false,
     81         "answer": false,
     82         "justification": "No system outputs to evaluate. This is a viewpoint paper."
     83       },
     84       "held_out_test_set": {
     85         "applies": false,
     86         "answer": false,
     87         "justification": "No experiments conducted."
     88       },
     89       "per_category_breakdown": {
     90         "applies": false,
     91         "answer": false,
     92         "justification": "No results to break down."
     93       },
     94       "failure_cases_discussed": {
     95         "applies": true,
     96         "answer": false,
     97         "justification": "The paper discusses challenges at a high level in Section 10 but provides no concrete failure cases or evidence of where AI-powered SDLC approaches break down."
     98       },
     99       "negative_results_reported": {
    100         "applies": true,
    101         "answer": false,
    102         "justification": "No negative results are reported. Every claim about AI in SDLC is positive; Section 10 mentions challenges only in vague, general terms."
    103       }
    104     },
    105     "claims_and_evidence": {
    106       "abstract_claims_supported": {
    107         "applies": true,
    108         "answer": false,
    109         "justification": "The abstract claims AI demonstrates 'significant capabilities' in code generation, testing, deployment, and maintenance, but the paper provides no original evidence — only narrative descriptions and citations to papers that are largely unrelated to AI-powered software development (e.g., antibody discovery, drug formulation, ultrasonic testing)."
    110       },
    111       "causal_claims_justified": {
    112         "applies": true,
    113         "answer": false,
    114         "justification": "The paper makes numerous causal claims (e.g., 'AI can significantly improve accuracy of project planning', 'automation not only accelerates development but also ensures consistency') without any experimental evidence or causal identification strategy."
    115       },
    116       "generalization_bounded": {
    117         "applies": true,
    118         "answer": false,
    119         "justification": "Claims are made broadly about 'AI' capabilities across the entire SDLC without bounding to specific tools, models, domains, or contexts. No qualifications on when or where these claims apply."
    120       },
    121       "alternative_explanations_discussed": {
    122         "applies": true,
    123         "answer": false,
    124         "justification": "No alternative explanations are discussed. The paper presents a uniformly positive view of AI in SDLC without considering that observed benefits might be due to other factors or that the cited evidence may not support the claims."
    125       }
    126     },
    127     "setup_transparency": {
    128       "model_versions_specified": {
    129         "applies": false,
    130         "answer": false,
    131         "justification": "No models were used in experiments. The paper is a narrative viewpoint piece."
    132       },
    133       "prompts_provided": {
    134         "applies": false,
    135         "answer": false,
    136         "justification": "No prompting experiments conducted."
    137       },
    138       "hyperparameters_reported": {
    139         "applies": false,
    140         "answer": false,
    141         "justification": "No experiments conducted."
    142       },
    143       "scaffolding_described": {
    144         "applies": false,
    145         "answer": false,
    146         "justification": "No agentic system built or evaluated."
    147       },
    148       "data_preprocessing_documented": {
    149         "applies": false,
    150         "answer": false,
    151         "justification": "No data collected or processed."
    152       }
    153     },
    154     "limitations_and_scope": {
    155       "limitations_section_present": {
    156         "applies": true,
    157         "answer": false,
    158         "justification": "There is no dedicated limitations section. Section 10 ('Challenges and Considerations') discusses generic challenges of AI adoption but does not discuss limitations of the paper itself or its methodology."
    159       },
    160       "threats_to_validity_specific": {
    161         "applies": true,
    162         "answer": false,
    163         "justification": "No threats to validity are discussed. The paper does not acknowledge any weaknesses in its own analysis or methodology."
    164       },
    165       "scope_boundaries_stated": {
    166         "applies": true,
    167         "answer": false,
    168         "justification": "No explicit scope boundaries are stated. The paper covers the entire SDLC broadly without stating what it does NOT cover or what its claims do NOT extend to."
    169       }
    170     },
    171     "data_integrity": {
    172       "raw_data_available": {
    173         "applies": false,
    174         "answer": false,
    175         "justification": "No data was collected. This is a viewpoint paper."
    176       },
    177       "data_collection_described": {
    178         "applies": true,
    179         "answer": false,
    180         "justification": "The abstract mentions 'systematic analysis of recent empirical studies' but no methodology for selecting or analyzing these studies is described anywhere in the paper."
    181       },
    182       "recruitment_methods_described": {
    183         "applies": false,
    184         "answer": false,
    185         "justification": "No participants or systematic sample. This is a viewpoint paper."
    186       },
    187       "data_pipeline_documented": {
    188         "applies": false,
    189         "answer": false,
    190         "justification": "No data pipeline exists."
    191       }
    192     },
    193     "conflicts_of_interest": {
    194       "funding_disclosed": {
    195         "applies": true,
    196         "answer": false,
    197         "justification": "No funding information is provided. The author is affiliated with O9 Solutions Inc but no funding or support statement is included."
    198       },
    199       "affiliations_disclosed": {
    200         "applies": true,
    201         "answer": true,
    202         "justification": "The author's affiliation with O9 Solutions Inc, Bengaluru, India is listed."
    203       },
    204       "funder_independent_of_outcome": {
    205         "applies": false,
    206         "answer": false,
    207         "justification": "No funding is disclosed; appears to be independent work by a single industry author."
    208       },
    209       "financial_interests_declared": {
    210         "applies": true,
    211         "answer": false,
    212         "justification": "No competing interests statement is present."
    213       }
    214     },
    215     "contamination": {
    216       "training_cutoff_stated": {
    217         "applies": false,
    218         "answer": false,
    219         "justification": "No model evaluation on any benchmark. This is a viewpoint paper."
    220       },
    221       "train_test_overlap_discussed": {
    222         "applies": false,
    223         "answer": false,
    224         "justification": "No benchmark evaluation conducted."
    225       },
    226       "benchmark_contamination_addressed": {
    227         "applies": false,
    228         "answer": false,
    229         "justification": "No benchmark evaluation conducted."
    230       }
    231     },
    232     "human_studies": {
    233       "pre_registered": {
    234         "applies": false,
    235         "answer": false,
    236         "justification": "No human participants."
    237       },
    238       "irb_or_ethics_approval": {
    239         "applies": false,
    240         "answer": false,
    241         "justification": "No human participants."
    242       },
    243       "demographics_reported": {
    244         "applies": false,
    245         "answer": false,
    246         "justification": "No human participants."
    247       },
    248       "inclusion_exclusion_criteria": {
    249         "applies": false,
    250         "answer": false,
    251         "justification": "No human participants."
    252       },
    253       "randomization_described": {
    254         "applies": false,
    255         "answer": false,
    256         "justification": "No human participants."
    257       },
    258       "blinding_described": {
    259         "applies": false,
    260         "answer": false,
    261         "justification": "No human participants."
    262       },
    263       "attrition_reported": {
    264         "applies": false,
    265         "answer": false,
    266         "justification": "No human participants."
    267       }
    268     },
    269     "cost_and_practicality": {
    270       "inference_cost_reported": {
    271         "applies": false,
    272         "answer": false,
    273         "justification": "Viewpoint paper with no method or system to cost."
    274       },
    275       "compute_budget_stated": {
    276         "applies": false,
    277         "answer": false,
    278         "justification": "No computation performed."
    279       }
    280     }
    281   },
    282   "claims": [
    283     {
    284       "claim": "AI demonstrates significant capabilities in automating code generation, enhancing testing strategies, optimizing deployment processes, and enabling proactive maintenance approaches.",
    285       "evidence": "No original evidence provided. The paper describes potential applications narratively and cites references that are largely unrelated to AI-powered software development (e.g., antibody discovery, drug formulation, ultrasonic testing of composites).",
    286       "supported": "unsupported"
    287     },
    288     {
    289       "claim": "Modern AI systems can analyze unstructured business descriptions and automatically generate comprehensive functional requirements.",
    290       "evidence": "Section 2.1 provides a hypothetical example ('We need a customer portal') but no empirical evidence, evaluation, or citation to a relevant study.",
    291       "supported": "unsupported"
    292     },
    293     {
    294       "claim": "Successful AI integration requires gradual implementation strategies, robust quality assurance mechanisms, and maintaining human oversight for critical decisions.",
    295       "evidence": "Section 9 discusses implementation strategies narratively. No empirical evidence supports these specific recommendations over alternatives.",
    296       "supported": "weak"
    297     }
    298   ],
    299   "methodology_tags": ["theoretical"],
    300   "key_findings": "This is a viewpoint paper that narratively describes potential AI applications across SDLC phases (requirements, design, coding, testing, deployment, maintenance). It provides no original experiments, data, or systematic review. The 25 references are almost entirely unrelated to the paper's topic — covering antibody discovery, drug formulation, ultrasonic testing, e-textiles, and plant disease prediction rather than AI-powered software development.",
    301   "red_flags": [
    302     {
    303       "flag": "Irrelevant references",
    304       "detail": "The vast majority of the 25 cited references have no connection to AI-powered software development. Examples include papers on antibody discovery (refs 8, 17), liposomal drug delivery (ref 20), ultrasonic testing of carbon fibre composites (ref 14), e-textile development (ref 22), and plant disease prediction (ref 23). This raises serious questions about whether the references were generated by an AI tool without verification."
    305     },
    306     {
    307       "flag": "No empirical evidence",
    308       "detail": "Despite claiming in the abstract to conduct a 'systematic analysis of recent empirical studies,' the paper presents no data, no experiments, no systematic review methodology, and no evidence beyond narrative assertions."
    309     },
    310     {
    311       "flag": "Unbounded claims",
    312       "detail": "The paper makes sweeping claims about AI capabilities across the entire SDLC without any qualification, evidence, or scope boundaries. Claims like 'paradigmatic shift' and 'significant capabilities' are presented as established facts."
    313     },
    314     {
    315       "flag": "Potential AI-generated content",
    316       "detail": "The combination of fluent but generic prose, irrelevant references that appear to be keyword-matched rather than content-relevant, and lack of any concrete technical depth suggests this paper may have been substantially generated by an AI tool."
    317     }
    318   ],
    319   "cited_papers": [
    320     {
    321       "title": "Reliable machine learning models for estimating effective software development efforts: A comparative analysis",
    322       "authors": ["A. Jadhav", "S. K. Shandilya"],
    323       "year": 2023,
    324       "doi": "10.1016/j.jer.2023.100150",
    325       "relevance": "Directly relevant to AI-assisted software effort estimation."
    326     },
    327     {
    328       "title": "Technical risk model of machine learning based software project development - A multinational empirical study using modified Delphi-AHP method",
    329       "authors": ["C. T. Lin", "S. J. Huang"],
    330       "year": 2024,
    331       "doi": "10.1016/j.infsof.2024.107449",
    332       "relevance": "Addresses risk modeling in ML-based software projects."
    333     },
    334     {
    335       "title": "A deep learning-based automated framework for functional user interface testing",
    336       "authors": ["Z. Khaliq", "S. U. Farooq", "D. A. Khan"],
    337       "year": 2022,
    338       "doi": "10.1016/j.infsof.2022.106969",
    339       "relevance": "Deep learning applied to automated UI testing."
    340     },
    341     {
    342       "title": "Predicting continuous integration build failures using evolutionary search",
    343       "authors": ["I. Saidani", "A. Ouni", "M. Chouchen", "M. W. Mkaouer"],
    344       "year": 2020,
    345       "doi": "10.1016/j.infsof.2020.106392",
    346       "relevance": "ML-based prediction of CI build failures, relevant to AI in DevOps."
    347     }
    348   ]
    349 }

Impressum · Datenschutz