ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

scan.json (21529B)


      1 {
      2   "paper": {
      3     "title": "Integrating AI-Driven Automated Code Review in Agile Development: Benefits, Challenges, and Best Practices",
      4     "authors": ["Saad Ahmed"],
      5     "year": 2025,
      6     "venue": "International Journal of Advanced Engineering, Management and Science (IJAEMS)",
      7     "doi": "10.22161/ijaems.112.1"
      8   },
      9   "scan_version": 3,
     10   "active_modules": [],
     11   "methodology_tags": ["qualitative", "case-study"],
     12   "key_findings": "The paper claims that AI-driven automated code review tools reduce review time, improve code quality, and enhance developer productivity in Agile environments. It identifies accuracy limitations, legacy system integration difficulties, and developer resistance as key challenges. However, the paper provides no actual quantitative data, survey results, interview transcripts, or experimental measurements despite claiming a mixed-methods approach. The 'results' consist entirely of generic bullet points and qualitative summary tables with no empirical content.",
     13   "checklist": {
     14     "artifacts": {
     15       "code_released": {
     16         "applies": true,
     17         "answer": false,
     18         "justification": "No code, scripts, survey instruments, or analysis tools are released. No repository URL is provided."
     19       },
     20       "data_released": {
     21         "applies": true,
     22         "answer": false,
     23         "justification": "No survey data, interview transcripts, case study data, or experimental measurements are released or made available."
     24       },
     25       "environment_specified": {
     26         "applies": true,
     27         "answer": false,
     28         "justification": "No environment or tool specifications are provided. The paper does not name the specific AI code review tool used in the claimed experimental evaluation."
     29       },
     30       "reproduction_instructions": {
     31         "applies": true,
     32         "answer": false,
     33         "justification": "No reproduction instructions are provided. The methodology section describes intended methods at a high level but gives no actionable details."
     34       }
     35     },
     36     "statistical_methodology": {
     37       "confidence_intervals_or_error_bars": {
     38         "applies": true,
     39         "answer": false,
     40         "justification": "The methodology section claims quantitative analysis with 'descriptive analysis and inferential testing' but the results section contains no numerical data whatsoever — no confidence intervals, no error bars, no numbers at all."
     41       },
     42       "significance_tests": {
     43         "applies": true,
     44         "answer": false,
     45         "justification": "Section 3.3B claims 'hypothesis testing to compare the effectiveness of AI-driven and manual code review processes' but no hypothesis tests, p-values, or test statistics appear anywhere in the paper."
     46       },
     47       "effect_sizes_reported": {
     48         "applies": true,
     49         "answer": false,
     50         "justification": "No effect sizes are reported. The paper makes claims about 'significant' reductions in review time but provides no quantitative magnitude."
     51       },
     52       "sample_size_justified": {
     53         "applies": true,
     54         "answer": false,
     55         "justification": "Despite claiming surveys, interviews, and an experimental evaluation, no sample sizes are stated anywhere. No number of participants, respondents, or case study organizations is mentioned."
     56       },
     57       "variance_reported": {
     58         "applies": true,
     59         "answer": false,
     60         "justification": "No variance, standard deviation, or any measure of spread is reported. No quantitative results of any kind are presented."
     61       }
     62     },
     63     "evaluation_design": {
     64       "baselines_included": {
     65         "applies": true,
     66         "answer": false,
     67         "justification": "The methodology claims to compare AI-driven vs manual code review in a controlled experiment, but no actual comparison data is presented. Tables 1-3 contain only qualitative descriptions."
     68       },
     69       "baselines_contemporary": {
     70         "applies": true,
     71         "answer": false,
     72         "justification": "No baselines are presented at all. The paper does not name any specific AI code review tools evaluated or any specific manual review process compared against."
     73       },
     74       "ablation_study": {
     75         "applies": false,
     76         "answer": false,
     77         "justification": "The paper does not propose or evaluate a system with components that could be ablated."
     78       },
     79       "multiple_metrics": {
     80         "applies": true,
     81         "answer": false,
     82         "justification": "Section 3.2 mentions 'defect detection rates, review turnaround time, and developer productivity' as planned metrics, but none are actually reported with any values."
     83       },
     84       "human_evaluation": {
     85         "applies": true,
     86         "answer": false,
     87         "justification": "The paper claims surveys and interviews were conducted but provides no actual results from these — no response counts, no quotes, no thematic analysis output, no survey statistics."
     88       },
     89       "held_out_test_set": {
     90         "applies": false,
     91         "answer": false,
     92         "justification": "Not applicable — this is not a benchmark or ML evaluation study."
     93       },
     94       "per_category_breakdown": {
     95         "applies": true,
     96         "answer": false,
     97         "justification": "No per-category or per-task breakdowns are provided. Tables 1-3 list categories (benefits, challenges, best practices) but contain only prose descriptions, not data."
     98       },
     99       "failure_cases_discussed": {
    100         "applies": true,
    101         "answer": false,
    102         "justification": "Challenges are discussed generically (accuracy limitations, legacy systems, resistance) but no specific failure cases from the claimed studies are presented."
    103       },
    104       "negative_results_reported": {
    105         "applies": true,
    106         "answer": false,
    107         "justification": "No specific negative results from the authors' own claimed research are reported. The challenges section reads like a literature review, not empirical findings."
    108       }
    109     },
    110     "claims_and_evidence": {
    111       "abstract_claims_supported": {
    112         "applies": true,
    113         "answer": false,
    114         "justification": "The abstract claims 'surveys and interviews were conducted with software engineers, DevOps professionals, and Agile practitioners' and 'an experimental study measured performance indicators such as error detection rates, review efficiency, and developer workflow improvements.' None of these are substantiated in the paper — no survey results, no interview data, no experimental measurements appear."
    115       },
    116       "causal_claims_justified": {
    117         "applies": true,
    118         "answer": false,
    119         "justification": "The paper makes causal claims such as 'AI tools significantly reduced manual code review time' (Table 1) and 'AI tools significantly reduce code review time, enhance consistency' (abstract) without any causal design, data, or evidence. No controlled experiment results are shown despite claiming one was conducted."
    120       },
    121       "generalization_bounded": {
    122         "applies": true,
    123         "answer": false,
    124         "justification": "The paper makes broad claims about AI-driven code review in Agile development generally without bounding to any specific tools, organizations, programming languages, or tested settings. No specific context is described for the claimed studies."
    125       },
    126       "alternative_explanations_discussed": {
    127         "applies": true,
    128         "answer": false,
    129         "justification": "No alternative explanations for the claimed findings are discussed. The paper presents AI tool benefits and challenges as straightforward observations without considering confounds or alternatives."
    130       },
    131       "proxy_outcome_distinction": {
    132         "applies": true,
    133         "answer": false,
    134         "justification": "The paper claims to measure 'developer productivity' and 'code quality' but never defines what was actually measured as a proxy for these broad constructs. No distinction between proxy measures and claimed outcomes is made."
    135       }
    136     },
    137     "setup_transparency": {
    138       "model_versions_specified": {
    139         "applies": false,
    140         "answer": false,
    141         "justification": "The paper discusses AI code review tools generically without evaluating any specific model or tool. No specific AI system is named or tested."
    142       },
    143       "prompts_provided": {
    144         "applies": false,
    145         "answer": false,
    146         "justification": "No prompting is involved. The paper does not use or evaluate any specific AI system."
    147       },
    148       "hyperparameters_reported": {
    149         "applies": false,
    150         "answer": false,
    151         "justification": "No specific AI tools are configured or evaluated, so no hyperparameters are relevant."
    152       },
    153       "scaffolding_described": {
    154         "applies": false,
    155         "answer": false,
    156         "justification": "No agentic scaffolding is used or evaluated in this paper."
    157       },
    158       "data_preprocessing_documented": {
    159         "applies": true,
    160         "answer": false,
    161         "justification": "The methodology claims data collection through surveys, interviews, case studies, and experiments but provides no details on data preprocessing, coding procedures, or analysis steps."
    162       }
    163     },
    164     "limitations_and_scope": {
    165       "limitations_section_present": {
    166         "applies": true,
    167         "answer": false,
    168         "justification": "There is no dedicated limitations section. The paper has Introduction, Review of Literature, Methodology, Results, Conclusion, and Recommendations — none of which contain a limitations discussion."
    169       },
    170       "threats_to_validity_specific": {
    171         "applies": true,
    172         "answer": false,
    173         "justification": "No threats to validity are discussed anywhere in the paper."
    174       },
    175       "scope_boundaries_stated": {
    176         "applies": true,
    177         "answer": false,
    178         "justification": "No scope boundaries are stated. The paper does not specify what settings, organizations, tools, or populations its claims do or do not apply to."
    179       }
    180     },
    181     "data_integrity": {
    182       "raw_data_available": {
    183         "applies": true,
    184         "answer": false,
    185         "justification": "No raw data is available. No survey responses, interview recordings, case study documents, or experimental data are provided or referenced."
    186       },
    187       "data_collection_described": {
    188         "applies": true,
    189         "answer": false,
    190         "justification": "Section 3.2 describes planned data collection methods (surveys, interviews, case studies, experiments) at a very high level but provides no specifics: no survey instrument, no interview guide, no number of participants, no case study selection criteria, no experimental setup details."
    191       },
    192       "recruitment_methods_described": {
    193         "applies": true,
    194         "answer": false,
    195         "justification": "The paper claims surveys and interviews with 'software developers, DevOps engineers, and Agile practitioners across different sectors' but does not describe how participants were recruited, from which organizations, through what channels, or how many."
    196       },
    197       "data_pipeline_documented": {
    198         "applies": true,
    199         "answer": false,
    200         "justification": "No data pipeline is documented. The paper jumps from describing planned methods to presenting generic 'results' without showing any intermediate data processing steps."
    201       }
    202     },
    203     "conflicts_of_interest": {
    204       "funding_disclosed": {
    205         "applies": true,
    206         "answer": false,
    207         "justification": "No funding disclosure or acknowledgments section appears in the paper."
    208       },
    209       "affiliations_disclosed": {
    210         "applies": true,
    211         "answer": true,
    212         "justification": "The author's affiliation is listed: 'Department of Information Technology, Sir Syed University of Engineering and Technology.'"
    213       },
    214       "funder_independent_of_outcome": {
    215         "applies": true,
    216         "answer": false,
    217         "justification": "No funding information is provided, making it impossible to assess funder independence."
    218       },
    219       "financial_interests_declared": {
    220         "applies": true,
    221         "answer": false,
    222         "justification": "No competing interests or financial interests statement appears in the paper."
    223       }
    224     },
    225     "contamination": {
    226       "training_cutoff_stated": {
    227         "applies": false,
    228         "answer": false,
    229         "justification": "The paper does not evaluate any specific pre-trained model on a benchmark. It discusses AI code review tools generically."
    230       },
    231       "train_test_overlap_discussed": {
    232         "applies": false,
    233         "answer": false,
    234         "justification": "No pre-trained model is evaluated on any benchmark."
    235       },
    236       "benchmark_contamination_addressed": {
    237         "applies": false,
    238         "answer": false,
    239         "justification": "No benchmark evaluation is conducted."
    240       }
    241     },
    242     "human_studies": {
    243       "pre_registered": {
    244         "applies": true,
    245         "answer": false,
    246         "justification": "The paper claims surveys and interviews with human participants but no pre-registration is mentioned."
    247       },
    248       "irb_or_ethics_approval": {
    249         "applies": true,
    250         "answer": false,
    251         "justification": "No IRB or ethics board approval is mentioned despite claiming to have conducted surveys and interviews with human participants."
    252       },
    253       "demographics_reported": {
    254         "applies": true,
    255         "answer": false,
    256         "justification": "No participant demographics are reported. The paper mentions 'software developers, DevOps engineers, and Agile practitioners' as target populations but provides no actual demographic data."
    257       },
    258       "inclusion_exclusion_criteria": {
    259         "applies": true,
    260         "answer": false,
    261         "justification": "No inclusion or exclusion criteria for participants are stated."
    262       },
    263       "randomization_described": {
    264         "applies": true,
    265         "answer": false,
    266         "justification": "The methodology claims a 'controlled experiment' but no randomization procedure is described."
    267       },
    268       "blinding_described": {
    269         "applies": true,
    270         "answer": false,
    271         "justification": "No blinding procedures are described for any of the claimed studies."
    272       },
    273       "attrition_reported": {
    274         "applies": true,
    275         "answer": false,
    276         "justification": "No participant counts are reported at all, so attrition cannot be assessed."
    277       }
    278     },
    279     "cost_and_practicality": {
    280       "inference_cost_reported": {
    281         "applies": false,
    282         "answer": false,
    283         "justification": "This is a qualitative/survey-type paper that does not propose or evaluate its own AI method. Cost is not applicable."
    284       },
    285       "compute_budget_stated": {
    286         "applies": false,
    287         "answer": false,
    288         "justification": "This is a qualitative/survey-type paper with no computational methods of its own."
    289       }
    290     }
    291   },
    292   "claims": [
    293     {
    294       "claim": "AI tools significantly reduced manual code review time by automating repetitive tasks.",
    295       "evidence": "Stated in Table 1 and Section 4.1 as a finding, but no quantitative measurements, time data, or comparison data are presented.",
    296       "supported": "unsupported"
    297     },
    298     {
    299       "claim": "AI tools provided consistent suggestions, reducing errors and vulnerabilities, improving code quality.",
    300       "evidence": "Stated in Table 1 and Section 4.1 as a finding, but no error rates, vulnerability counts, or quality metrics are presented.",
    301       "supported": "unsupported"
    302     },
    303     {
    304       "claim": "Developers were freed from tedious reviews, focusing more on feature development, enhancing productivity.",
    305       "evidence": "Stated in Table 1 and Section 4.1 as a finding, but no productivity measurements are presented.",
    306       "supported": "unsupported"
    307     },
    308     {
    309       "claim": "AI tools occasionally missed complex issues or failed to offer context-sensitive suggestions.",
    310       "evidence": "Stated in Table 2 and Section 4.2 as a finding. Consistent with known limitations of AI tools from the literature, but no data from the claimed study is presented.",
    311       "supported": "weak"
    312     },
    313     {
    314       "claim": "A mixed-methods study was conducted combining surveys, interviews, case studies, and experimental evaluation.",
    315       "evidence": "Section 3 describes the planned methodology, but the results section (Section 4) contains no quantitative data, no interview quotes, no case study details, and no experimental measurements. The claimed methodology appears to not have been executed.",
    316       "supported": "unsupported"
    317     }
    318   ],
    319   "red_flags": [
    320     {
    321       "flag": "Ghost methodology",
    322       "detail": "The paper describes an elaborate mixed-methods approach (surveys, interviews, case studies, controlled experiments) in Section 3 but presents zero empirical data in the results. No sample sizes, no survey responses, no interview quotes, no experimental measurements, no statistical tests. The 'results' are entirely generic prose and qualitative summary tables."
    323     },
    324     {
    325       "flag": "Irrelevant references / citation padding",
    326       "detail": "At least 8 of 21 references are completely unrelated to AI, software engineering, or code review. References [2] and [3] are about probiotics and non-communicable disease prevention. References [4] and [7] are about customer experience in the health industry. References [5] and [8] are about bioenergy economics. Reference [6] is about business leadership in marketing. Reference [9] is about agricultural economics. Several are also duplicates ([5]=[8], [6]=[12], [4]=[7], [11]=[14]). This pattern is consistent with paper mill practices."
    327     },
    328     {
    329       "flag": "Claims significantly outrun evidence",
    330       "detail": "The paper makes strong claims ('AI tools significantly reduce code review time', 'enhance consistency', 'measured performance indicators') but provides no supporting data. Every claim in the results section is a restatement of generic knowledge from the literature, not an empirical finding."
    331     },
    332     {
    333       "flag": "No actual participants or sample",
    334       "detail": "Despite claiming to have conducted surveys and interviews with 'software engineers, DevOps professionals, and Agile practitioners,' the paper never states how many people participated, from which organizations, or what the response rates were."
    335     },
    336     {
    337       "flag": "Possible paper mill characteristics",
    338       "detail": "The combination of irrelevant and duplicate references, a described-but-not-executed methodology, generic results that read like a literature summary, and publication in a low-impact journal raises concerns about paper mill practices."
    339     }
    340   ],
    341   "cited_papers": [
    342     {
    343       "title": "Artificial Intelligence in Software Engineering: A Systematic Exploration of AI-Driven Development",
    344       "authors": ["S. Kumar"],
    345       "year": 2024,
    346       "relevance": "Systematic exploration of AI-driven software development practices, directly relevant to AI-assisted coding tools."
    347     },
    348     {
    349       "title": "AI-Driven Innovations in Software Engineering: A Review of Current Practices and Future Directions",
    350       "authors": ["M. Alenezi", "M. Akour"],
    351       "year": 2025,
    352       "relevance": "Review of AI innovations in software engineering including code review and quality assurance."
    353     },
    354     {
    355       "title": "AI-Driven Developer Performance Metrics: Enhancing Agile Software Development",
    356       "authors": ["R. Noor", "G. Talavera"],
    357       "year": 2025,
    358       "relevance": "Examines AI-driven metrics for developer performance in Agile, relevant to productivity measurement."
    359     },
    360     {
    361       "title": "AI-Driven Automation in Agile Development: Multi-Agent LLMs for Software Engineering",
    362       "authors": ["S. Khan", "M. Daviglus"],
    363       "year": 2025,
    364       "relevance": "Studies multi-agent LLM systems for automating Agile software engineering tasks including code review."
    365     },
    366     {
    367       "title": "Integrating AI into Agile Workflows: Opportunities and Challenges",
    368       "authors": ["Z. Jin"],
    369       "year": 2024,
    370       "relevance": "Directly examines AI integration into Agile development workflows."
    371     },
    372     {
    373       "title": "AI-Driven Automation in DevOps: Enhancing Continuous Integration and Deployment",
    374       "authors": ["B. C. Vadde", "V. B. Munagandla"],
    375       "year": 2022,
    376       "relevance": "Examines AI automation in CI/CD pipelines, relevant to automated code review in DevOps workflows."
    377     }
    378   ],
    379   "engagement_factors": {
    380     "practical_relevance": {
    381       "score": 1,
    382       "justification": "Discusses a practical topic (AI code review in Agile) but provides no actionable tools, data, or specific implementation guidance."
    383     },
    384     "surprise_contrarian": {
    385       "score": 0,
    386       "justification": "Confirms conventional wisdom that AI code review is useful but imperfect; nothing surprising or contrarian."
    387     },
    388     "fear_safety": {
    389       "score": 0,
    390       "justification": "No AI safety, security, or risk concerns are raised."
    391     },
    392     "drama_conflict": {
    393       "score": 0,
    394       "justification": "No controversy, no challenges to other work, no provocative claims."
    395     },
    396     "demo_ability": {
    397       "score": 0,
    398       "justification": "No code, tool, demo, or artifact of any kind is provided."
    399     },
    400     "brand_recognition": {
    401       "score": 0,
    402       "justification": "Unknown author from a lesser-known university, published in a low-impact journal (IJAEMS)."
    403     }
    404   }
    405 }

Impressum · Datenschutz