scan.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan.json (21377B)
      1 {
      2   "paper": {
      3     "title": "Review of AI-Driven Approaches for Automated Defect Detection and Classification in Software Testing",
      4     "authors": ["Alex Thomas Thomas"],
      5     "year": 2025,
      6     "venue": "International Journal of Research and Review",
      7     "doi": "10.52403/ijrr.20250619"
      8   },
      9   "scan_version": 3,
     10   "active_modules": ["survey_methodology"],
     11   "methodology_tags": ["meta-analysis"],
     12   "key_findings": "This narrative review summarizes AI-driven approaches to automated software defect detection and classification, covering machine learning, deep learning, NLP, and cross-domain transfer learning techniques. The paper categorizes approaches by technique type (CNNs, RNNs, transformers), application domain (source code, PCB, test generation), and proposes future directions including explainable AI and transfer learning. However, the review is unsystematic, covers only 20 references, provides no quality assessment of sources, and multiple references contain fabricated page numbers (e.g., pp. 123456–123467, pp. 98765–98775), raising serious integrity concerns.",
     13   "checklist": {
     14     "artifacts": {
     15       "code_released": {
     16         "applies": true,
     17         "answer": false,
     18         "justification": "No code, analysis scripts, or repository links are provided. A survey can release analysis code or search corpus, but none is offered."
     19       },
     20       "data_released": {
     21         "applies": true,
     22         "answer": false,
     23         "justification": "No dataset, search corpus, or extracted data tables are released. The paper does not provide any downloadable or supplementary data."
     24       },
     25       "environment_specified": {
     26         "applies": true,
     27         "answer": false,
     28         "justification": "No environment or tool specifications are provided. There is no description of any software used for the review process."
     29       },
     30       "reproduction_instructions": {
     31         "applies": true,
     32         "answer": false,
     33         "justification": "No reproduction instructions are provided. There is no description of how to replicate the literature search or analysis."
     34       }
     35     },
     36     "statistical_methodology": {
     37       "confidence_intervals_or_error_bars": {
     38         "applies": false,
     39         "answer": false,
     40         "justification": "This is a narrative review with no quantitative analysis or experiments. No statistical results are reported."
     41       },
     42       "significance_tests": {
     43         "applies": false,
     44         "answer": false,
     45         "justification": "No experiments or comparative statistical claims are made by the authors themselves. The paper is a narrative summary."
     46       },
     47       "effect_sizes_reported": {
     48         "applies": false,
     49         "answer": false,
     50         "justification": "No experiments are conducted. The paper does not report any effect sizes from its own analysis."
     51       },
     52       "sample_size_justified": {
     53         "applies": false,
     54         "answer": false,
     55         "justification": "No experiments are conducted. The number of reviewed papers (20) is not justified or discussed."
     56       },
     57       "variance_reported": {
     58         "applies": false,
     59         "answer": false,
     60         "justification": "No experiments are conducted and no quantitative aggregation is performed."
     61       }
     62     },
     63     "evaluation_design": {
     64       "baselines_included": {
     65         "applies": true,
     66         "answer": false,
     67         "justification": "The paper does not compare its review against prior surveys or reviews in the same domain. No baseline comparison of any kind is provided."
     68       },
     69       "baselines_contemporary": {
     70         "applies": true,
     71         "answer": false,
     72         "justification": "No baselines of any kind are included, so contemporaneity cannot be assessed."
     73       },
     74       "ablation_study": {
     75         "applies": false,
     76         "answer": false,
     77         "justification": "This is a survey paper with no system or components to ablate."
     78       },
     79       "multiple_metrics": {
     80         "applies": false,
     81         "answer": false,
     82         "justification": "No experiments are conducted by the authors. No metrics are reported from the authors' own evaluation."
     83       },
     84       "human_evaluation": {
     85         "applies": false,
     86         "answer": false,
     87         "justification": "No experiments are conducted. Human evaluation is not applicable to this narrative review."
     88       },
     89       "held_out_test_set": {
     90         "applies": false,
     91         "answer": false,
     92         "justification": "No experiments are conducted. No datasets are used by the authors."
     93       },
     94       "per_category_breakdown": {
     95         "applies": true,
     96         "answer": true,
     97         "justification": "The paper organizes its review by approach category: ML/DL for defect detection, intelligent source code analysis, automated test generation, defect classification, cross-domain AI, NLP-based testing, and integrated frameworks. Table 1 compares AI vs. traditional approaches across five aspects."
     98       },
     99       "failure_cases_discussed": {
    100         "applies": true,
    101         "answer": false,
    102         "justification": "The paper mentions generic challenges (data availability, interpretability, integration complexity) but does not discuss specific failure cases of any reviewed approach."
    103       },
    104       "negative_results_reported": {
    105         "applies": true,
    106         "answer": false,
    107         "justification": "All reviewed work is presented positively. No negative results, failed approaches, or limitations of specific methods are reported."
    108       }
    109     },
    110     "claims_and_evidence": {
    111       "abstract_claims_supported": {
    112         "applies": true,
    113         "answer": true,
    114         "justification": "The abstract's claims (AI techniques enhance defect detection, VulDeePecker shows improvements, challenges persist regarding data availability and interpretability) are consistent with the narrative presented in the paper body. However, the support is entirely derivative — just restating what cited papers claim."
    115       },
    116       "causal_claims_justified": {
    117         "applies": true,
    118         "answer": false,
    119         "justification": "The paper makes numerous causal claims: 'these technologies enhance accuracy, efficiency, and scalability,' 'AI-based methods simplify the job by automatically producing viable test inputs.' These causal claims are asserted without critical evaluation of the causal evidence in the cited studies."
    120       },
    121       "generalization_bounded": {
    122         "applies": true,
    123         "answer": false,
    124         "justification": "The title claims a comprehensive 'Review of AI-Driven Approaches' but the paper covers only 20 references. No scope boundaries are stated regarding which domains, time periods, or publication venues were covered. The conclusions generalize broadly without bounding."
    125       },
    126       "alternative_explanations_discussed": {
    127         "applies": true,
    128         "answer": false,
    129         "justification": "No alternative explanations are considered for any of the findings discussed. The paper does not consider whether improvements attributed to AI could have other explanations (e.g., better engineering, more data, selection bias in reported results)."
    130       },
    131       "proxy_outcome_distinction": {
    132         "applies": true,
    133         "answer": false,
    134         "justification": "The paper freely equates proxy metrics with outcomes — e.g., claiming AI 'enhances accuracy, efficiency, and scalability' without distinguishing what was actually measured in the cited studies vs. the broader claims being made about software quality improvement."
    135       }
    136     },
    137     "setup_transparency": {
    138       "model_versions_specified": {
    139         "applies": false,
    140         "answer": false,
    141         "justification": "This is a survey paper. No AI models are used by the authors themselves."
    142       },
    143       "prompts_provided": {
    144         "applies": false,
    145         "answer": false,
    146         "justification": "This is a survey paper. No prompting is used."
    147       },
    148       "hyperparameters_reported": {
    149         "applies": false,
    150         "answer": false,
    151         "justification": "This is a survey paper. No experiments with hyperparameters are conducted."
    152       },
    153       "scaffolding_described": {
    154         "applies": false,
    155         "answer": false,
    156         "justification": "This is a survey paper. No agentic scaffolding is used."
    157       },
    158       "data_preprocessing_documented": {
    159         "applies": true,
    160         "answer": false,
    161         "justification": "No paper selection pipeline is documented. There is no description of which databases were searched, what search terms were used, what inclusion/exclusion criteria were applied, or how many papers were screened at each stage."
    162       }
    163     },
    164     "limitations_and_scope": {
    165       "limitations_section_present": {
    166         "applies": true,
    167         "answer": false,
    168         "justification": "There is no dedicated limitations section. The paper mentions generic challenges (data availability, interpretability, integration) scattered through the text, but these are presented as challenges in the field, not limitations of the review itself."
    169       },
    170       "threats_to_validity_specific": {
    171         "applies": true,
    172         "answer": false,
    173         "justification": "No threats to validity are discussed. The paper does not acknowledge any limitations of its review methodology, selection bias, or potential gaps in coverage."
    174       },
    175       "scope_boundaries_stated": {
    176         "applies": true,
    177         "answer": false,
    178         "justification": "No scope boundaries are explicitly stated. The paper does not specify which time periods, databases, or types of literature are included or excluded from the review."
    179       }
    180     },
    181     "data_integrity": {
    182       "raw_data_available": {
    183         "applies": true,
    184         "answer": false,
    185         "justification": "No raw data, search results, or extracted information is made available for verification."
    186       },
    187       "data_collection_described": {
    188         "applies": true,
    189         "answer": false,
    190         "justification": "The paper selection process is entirely undocumented. There is no description of how the 20 reviewed papers were identified or selected."
    191       },
    192       "recruitment_methods_described": {
    193         "applies": false,
    194         "answer": false,
    195         "justification": "No human participants are involved. This is a literature review."
    196       },
    197       "data_pipeline_documented": {
    198         "applies": true,
    199         "answer": false,
    200         "justification": "No data pipeline is documented. The paper goes directly from introduction to reviewing individual papers with no description of the review process."
    201       }
    202     },
    203     "conflicts_of_interest": {
    204       "funding_disclosed": {
    205         "applies": true,
    206         "answer": true,
    207         "justification": "The paper explicitly states 'Source of Funding: None' in the declaration section."
    208       },
    209       "affiliations_disclosed": {
    210         "applies": true,
    211         "answer": true,
    212         "justification": "The author's affiliation with 'Saransh Inc, Princeton, NJ, USA' is disclosed. The paper does not evaluate products from this company."
    213       },
    214       "funder_independent_of_outcome": {
    215         "applies": false,
    216         "answer": false,
    217         "justification": "The paper is unfunded (explicitly stated as 'Source of Funding: None'), so funder independence is not applicable."
    218       },
    219       "financial_interests_declared": {
    220         "applies": true,
    221         "answer": true,
    222         "justification": "The paper includes a declaration stating 'Conflict of Interest: No conflicts of interest declared.'"
    223       }
    224     },
    225     "contamination": {
    226       "training_cutoff_stated": {
    227         "applies": false,
    228         "answer": false,
    229         "justification": "This is a survey paper that does not evaluate any pre-trained model on a benchmark."
    230       },
    231       "train_test_overlap_discussed": {
    232         "applies": false,
    233         "answer": false,
    234         "justification": "This is a survey paper that does not evaluate any pre-trained model on a benchmark."
    235       },
    236       "benchmark_contamination_addressed": {
    237         "applies": false,
    238         "answer": false,
    239         "justification": "This is a survey paper that does not evaluate any pre-trained model on a benchmark."
    240       }
    241     },
    242     "human_studies": {
    243       "pre_registered": {
    244         "applies": false,
    245         "answer": false,
    246         "justification": "No human participants are involved in this survey paper."
    247       },
    248       "irb_or_ethics_approval": {
    249         "applies": false,
    250         "answer": false,
    251         "justification": "No human participants are involved in this survey paper."
    252       },
    253       "demographics_reported": {
    254         "applies": false,
    255         "answer": false,
    256         "justification": "No human participants are involved in this survey paper."
    257       },
    258       "inclusion_exclusion_criteria": {
    259         "applies": false,
    260         "answer": false,
    261         "justification": "No human participants are involved in this survey paper."
    262       },
    263       "randomization_described": {
    264         "applies": false,
    265         "answer": false,
    266         "justification": "No human participants are involved in this survey paper."
    267       },
    268       "blinding_described": {
    269         "applies": false,
    270         "answer": false,
    271         "justification": "No human participants are involved in this survey paper."
    272       },
    273       "attrition_reported": {
    274         "applies": false,
    275         "answer": false,
    276         "justification": "No human participants are involved in this survey paper."
    277       }
    278     },
    279     "cost_and_practicality": {
    280       "inference_cost_reported": {
    281         "applies": false,
    282         "answer": false,
    283         "justification": "This is a survey paper with no experiments or methods of its own to cost."
    284       },
    285       "compute_budget_stated": {
    286         "applies": false,
    287         "answer": false,
    288         "justification": "This is a survey paper with no computational experiments."
    289       }
    290     },
    291     "survey_methodology": {
    292       "prisma_or_structured_protocol": {
    293         "applies": true,
    294         "answer": false,
    295         "justification": "No PRISMA flow diagram, no structured review protocol, no reproducible search strategy, no registered protocol. The paper is an ad-hoc narrative review with no systematic methodology described."
    296       },
    297       "quality_assessment_of_sources": {
    298         "applies": true,
    299         "answer": false,
    300         "justification": "No quality assessment of any kind is applied to the reviewed papers. All 20 cited works are treated as equally valid without any evaluation of their methodological rigor or risk of bias."
    301       },
    302       "publication_bias_discussed": {
    303         "applies": true,
    304         "answer": false,
    305         "justification": "Publication bias is never mentioned. The paper does not consider that reviewed studies may skew positive or that negative results may be underrepresented in the literature."
    306       }
    307     }
    308   },
    309   "claims": [
    310     {
    311       "claim": "Deep learning frameworks like VulDeePecker have revealed significant vulnerability detection improvements through automated static and dynamic code analysis.",
    312       "evidence": "Cites Li et al. [2] (VulDeePecker) and Zhang et al. [14] as demonstrating that deep learning models can detect defects in source code. No independent verification or quantitative synthesis provided.",
    313       "supported": "weak"
    314     },
    315     {
    316       "claim": "AI-based test automation tools show measurable improvements in testing efficiency, especially in CI/CD pipelines.",
    317       "evidence": "Cites Garousi et al. [5] who 'conducted a systematic review and evaluation of such tools and showed measurable improvements in testing efficiency.' The claim is entirely derivative with no independent analysis.",
    318       "supported": "weak"
    319     },
    320     {
    321       "claim": "AI-driven approaches outperform traditional approaches across accuracy, automation, scalability, adaptability, and vulnerability detection.",
    322       "evidence": "Table 1 presents a qualitative comparison of AI vs. traditional approaches across five aspects. No quantitative data, benchmarks, or citations support the specific claims in the table.",
    323       "supported": "unsupported"
    324     },
    325     {
    326       "claim": "Cross-domain AI techniques from hardware defect detection (PCBs, welding) can be applied to software testing through model transfer.",
    327       "evidence": "Cites PCB defect detection [9,10] and welding quality assessment [13] papers, claiming approaches like Faster R-CNN 'have been ported to software artifacts.' No evidence of actual porting or transfer is provided.",
    328       "supported": "unsupported"
    329     }
    330   ],
    331   "red_flags": [
    332     {
    333       "flag": "Fabricated reference details",
    334       "detail": "Multiple references have obviously fabricated page numbers: ref [15] 'pp. 12345–12358', ref [16] 'pp. 123456–123467', ref [17] 'pp. 98765–98775', ref [19] 'pp. 112233–112245', ref [20] 'pp. 1234–1246'. These sequential/round numbers cannot correspond to real IEEE publications. Several of these references may be entirely fictional."
    335     },
    336     {
    337       "flag": "No systematic search methodology",
    338       "detail": "For a paper titled as a 'review,' there is no description of databases searched, search terms used, inclusion/exclusion criteria, screening process, or date range. The paper selection appears entirely ad hoc."
    339     },
    340     {
    341       "flag": "Extremely thin reference base",
    342       "detail": "Only 20 references for a paper claiming to provide a 'comprehensive perspective' on AI-driven defect detection and classification. Real systematic reviews in this space typically cite 50-200+ papers."
    343     },
    344     {
    345       "flag": "Uncritical narrative laundering",
    346       "detail": "The paper uncritically summarizes all cited work as successful without any quality assessment, effectively laundering the signal-to-noise ratio of its sources. No reviewed paper is critically evaluated or found to have limitations."
    347     },
    348     {
    349       "flag": "Claims significantly outrun evidence",
    350       "detail": "The paper makes sweeping claims about AI superiority over traditional testing (Table 1) and the feasibility of cross-domain transfer from hardware to software testing, but provides no original analysis, meta-analytic synthesis, or quantitative evidence to support these conclusions."
    351     }
    352   ],
    353   "cited_papers": [
    354     {
    355       "title": "VulDeePecker: A Deep Learning-Based System for Vulnerability Detection",
    356       "authors": ["Z. Li", "C. Sun", "Y. Liang"],
    357       "year": 2018,
    358       "arxiv_id": "1801.01681",
    359       "relevance": "Seminal work on deep learning for automated vulnerability detection in source code, directly relevant to AI-driven software quality."
    360     },
    361     {
    362       "title": "A Survey on Machine Learning Techniques for Source Code Analysis",
    363       "authors": ["T. Sharma", "A. K. Maurya", "P. Raj"],
    364       "year": 2021,
    365       "arxiv_id": "2110.09610",
    366       "relevance": "Survey of ML techniques applied to source code analysis, covering defect prediction and code understanding approaches."
    367     },
    368     {
    369       "title": "The Role of Artificial Intelligence and Machine Learning in Software Testing",
    370       "authors": ["A. Ramadan", "H. Yasin", "B. Pektas"],
    371       "year": 2024,
    372       "arxiv_id": "2409.02693",
    373       "relevance": "Review of AI/ML applications in software testing, relevant to understanding the landscape of AI-driven testing approaches."
    374     },
    375     {
    376       "title": "AI-powered test automation tools: A systematic review and empirical evaluation",
    377       "authors": ["V. Garousi", "N. Joy", "A. B. Keleş"],
    378       "year": 2024,
    379       "arxiv_id": "2409.00411",
    380       "relevance": "Systematic review with empirical evaluation of AI-powered test automation tools, directly relevant to the survey scope."
    381     },
    382     {
    383       "title": "AI-Powered Software Testing: A Novel Framework for Enhancing Bug Detection and Code Reliability",
    384       "authors": ["O. I. Al Mrayat", "M. Jawarneh", "D. Ibrahim", "A. Altrad"],
    385       "year": 2024,
    386       "relevance": "Proposes an AI-powered framework for software testing that aims to enhance bug detection and code reliability."
    387     },
    388     {
    389       "title": "Artificial Intelligence in Software Testing for Emerging Fields: A Review of Technical Applications and Developments",
    390       "authors": ["Y. Ding"],
    391       "year": 2024,
    392       "relevance": "Reviews AI applications in software testing for emerging domains like IoT and autonomous systems."
    393     }
    394   ],
    395   "engagement_factors": {
    396     "practical_relevance": {
    397       "score": 1,
    398       "justification": "Provides a high-level overview of AI testing approaches but no actionable tools, code, or specific implementation guidance."
    399     },
    400     "surprise_contrarian": {
    401       "score": 0,
    402       "justification": "Confirms the expected narrative that AI improves software testing without any surprising or contrarian findings."
    403     },
    404     "fear_safety": {
    405       "score": 0,
    406       "justification": "No AI risk, security, or safety concerns are raised."
    407     },
    408     "drama_conflict": {
    409       "score": 0,
    410       "justification": "No controversy, no critical evaluation of any work, entirely non-confrontational."
    411     },
    412     "demo_ability": {
    413       "score": 0,
    414       "justification": "No code, tools, demos, or reproducible artifacts of any kind are provided."
    415     },
    416     "brand_recognition": {
    417       "score": 0,
    418       "justification": "Unknown author from an unknown company (Saransh Inc), published in a low-tier venue (IJRR)."
    419     }
    420   }
    421 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs