scan.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan.json (21048B)
      1 {
      2   "paper": {
      3     "title": "Generative AI in Software Development: An Overview and Evaluation of Modern Coding Tools",
      4     "authors": ["Aarti"],
      5     "year": 2024,
      6     "venue": "International Journal for Multidisciplinary Research (IJFMR)",
      7     "doi": "10.36948/ijfmr.2024.v06i03.23271"
      8   },
      9   "scan_version": 3,
     10   "active_modules": ["survey_methodology"],
     11   "methodology_tags": ["meta-analysis", "qualitative"],
     12   "key_findings": "This narrative review surveys seven AI-powered coding tools (GitHub Copilot, OpenAI Codex, DeepCode, Amazon CodeGuru, TabNine, Kite, IntelliCode) and provides a qualitative comparison of their capabilities, use cases, and accuracy. The paper identifies four key challenge areas: accuracy/reliability, contextual understanding, security/privacy, and ethical considerations. No original empirical evaluation is conducted; all assessments are drawn from secondary sources and tool documentation.",
     13   "checklist": {
     14     "artifacts": {
     15       "code_released": {
     16         "applies": true,
     17         "answer": false,
     18         "justification": "No code, analysis scripts, or repository links provided. A survey paper could release its analysis data or comparison methodology."
     19       },
     20       "data_released": {
     21         "applies": true,
     22         "answer": false,
     23         "justification": "No dataset or structured data released. The paper could have released its tool comparison data or the corpus of sources reviewed."
     24       },
     25       "environment_specified": {
     26         "applies": true,
     27         "answer": false,
     28         "justification": "No environment or tool specifications provided for reproducing the analysis."
     29       },
     30       "reproduction_instructions": {
     31         "applies": true,
     32         "answer": false,
     33         "justification": "No instructions for reproducing the survey methodology, tool selection process, or comparison framework."
     34       }
     35     },
     36     "statistical_methodology": {
     37       "confidence_intervals_or_error_bars": {
     38         "applies": false,
     39         "answer": false,
     40         "justification": "This is a narrative review paper with no quantitative analysis or experiments."
     41       },
     42       "significance_tests": {
     43         "applies": false,
     44         "answer": false,
     45         "justification": "No statistical analysis or comparative claims requiring significance tests."
     46       },
     47       "effect_sizes_reported": {
     48         "applies": false,
     49         "answer": false,
     50         "justification": "No quantitative results or effect sizes to report."
     51       },
     52       "sample_size_justified": {
     53         "applies": false,
     54         "answer": false,
     55         "justification": "No sample or quantitative data collected."
     56       },
     57       "variance_reported": {
     58         "applies": false,
     59         "answer": false,
     60         "justification": "No experimental runs or quantitative measurements."
     61       }
     62     },
     63     "evaluation_design": {
     64       "baselines_included": {
     65         "applies": true,
     66         "answer": false,
     67         "justification": "The survey does not compare against prior surveys or reviews of AI coding tools. No baseline comparison framework is used."
     68       },
     69       "baselines_contemporary": {
     70         "applies": true,
     71         "answer": false,
     72         "justification": "No baselines or prior surveys are compared against."
     73       },
     74       "ablation_study": {
     75         "applies": false,
     76         "answer": false,
     77         "justification": "No system or methodology with components to ablate."
     78       },
     79       "multiple_metrics": {
     80         "applies": false,
     81         "answer": false,
     82         "justification": "No experiments or metrics used; the paper is a qualitative narrative review."
     83       },
     84       "human_evaluation": {
     85         "applies": false,
     86         "answer": false,
     87         "justification": "No system outputs to evaluate; this is a qualitative review paper."
     88       },
     89       "held_out_test_set": {
     90         "applies": false,
     91         "answer": false,
     92         "justification": "No experiments requiring train/test separation."
     93       },
     94       "per_category_breakdown": {
     95         "applies": true,
     96         "answer": true,
     97         "justification": "Table 1 provides a per-tool comparison across LLM used, benefits, use cases, and accuracy. Section 4 gives per-tool recommendations for different development scenarios."
     98       },
     99       "failure_cases_discussed": {
    100         "applies": true,
    101         "answer": false,
    102         "justification": "Section 5 discusses generic challenges (accuracy, contextual understanding, security) but provides no specific failure case examples or instances where tools produced incorrect output."
    103       },
    104       "negative_results_reported": {
    105         "applies": true,
    106         "answer": false,
    107         "justification": "No negative findings reported. All tools are presented positively with only generic 'challenges' mentioned. No tool is found to be inadequate for any specific task."
    108       }
    109     },
    110     "claims_and_evidence": {
    111       "abstract_claims_supported": {
    112         "applies": true,
    113         "answer": false,
    114         "justification": "The abstract claims the paper provides an 'evaluation of modern AI-powered coding tools,' but the paper contains no actual evaluation — no testing, no metrics, no empirical comparison. The 'evaluation' is merely a qualitative description of tool features drawn from secondary sources."
    115       },
    116       "causal_claims_justified": {
    117         "applies": true,
    118         "answer": false,
    119         "justification": "The paper makes causal claims such as 'Generative AI has significantly transformed software development' (Section 1) and tools 'significantly accelerates development speed' (Section 3) without any causal evidence or empirical data to support these assertions."
    120       },
    121       "generalization_bounded": {
    122         "applies": true,
    123         "answer": false,
    124         "justification": "The title claims to cover 'Generative AI in Software Development' broadly but only examines 7 tools with no justification for this selection. No scope boundaries are stated — the paper does not acknowledge what populations, languages, or development contexts are excluded."
    125       },
    126       "alternative_explanations_discussed": {
    127         "applies": true,
    128         "answer": false,
    129         "justification": "No alternative interpretations are considered. The paper presents a uniformly positive view of AI tools' benefits without considering that reported productivity gains might be due to other factors or that tool evaluations might be biased by vendor marketing."
    130       },
    131       "proxy_outcome_distinction": {
    132         "applies": true,
    133         "answer": false,
    134         "justification": "Table 1 rates tool 'Accuracy' with vague qualitative labels like 'High for common tasks' without any measurement. The paper frames these subjective assessments as evaluative findings without acknowledging that no actual accuracy was measured."
    135       }
    136     },
    137     "setup_transparency": {
    138       "model_versions_specified": {
    139         "applies": false,
    140         "answer": false,
    141         "justification": "No models used in any experiment; this is a survey paper that describes tools narratively."
    142       },
    143       "prompts_provided": {
    144         "applies": false,
    145         "answer": false,
    146         "justification": "No prompting used; the paper does not interact with any AI models."
    147       },
    148       "hyperparameters_reported": {
    149         "applies": false,
    150         "answer": false,
    151         "justification": "No experiments conducted requiring hyperparameters."
    152       },
    153       "scaffolding_described": {
    154         "applies": false,
    155         "answer": false,
    156         "justification": "No agentic scaffolding used; this is a narrative review."
    157       },
    158       "data_preprocessing_documented": {
    159         "applies": true,
    160         "answer": false,
    161         "justification": "No paper selection pipeline is documented. The paper does not describe how the 7 tools or 12 references were selected, what search databases were used, or what inclusion/exclusion criteria were applied."
    162       }
    163     },
    164     "limitations_and_scope": {
    165       "limitations_section_present": {
    166         "applies": true,
    167         "answer": false,
    168         "justification": "Section 5 ('Future Scope and Challenges') discusses limitations of AI tools in general, but there is no discussion of limitations of the paper's own methodology or analysis. No threats-to-validity section exists."
    169       },
    170       "threats_to_validity_specific": {
    171         "applies": true,
    172         "answer": false,
    173         "justification": "No threats to validity of the paper's own survey methodology are discussed. The paper does not acknowledge selection bias in tool choice, reliance on non-academic sources, or the absence of empirical evaluation."
    174       },
    175       "scope_boundaries_stated": {
    176         "applies": true,
    177         "answer": false,
    178         "justification": "No explicit scope boundaries stated. The paper does not clarify why these 7 tools were selected, what development contexts are covered, or what the paper's claims do not extend to."
    179       }
    180     },
    181     "data_integrity": {
    182       "raw_data_available": {
    183         "applies": true,
    184         "answer": false,
    185         "justification": "No raw data or analysis corpus available for verification."
    186       },
    187       "data_collection_described": {
    188         "applies": true,
    189         "answer": false,
    190         "justification": "No description of how the 7 tools or 12 references were identified and selected. The literature review section references sources but provides no systematic collection methodology."
    191       },
    192       "recruitment_methods_described": {
    193         "applies": true,
    194         "answer": false,
    195         "justification": "The 'sample' of tools and papers is not described — no explanation of how these 7 tools were selected from the many available AI coding tools, or why these specific 12 references were chosen."
    196       },
    197       "data_pipeline_documented": {
    198         "applies": true,
    199         "answer": false,
    200         "justification": "No data pipeline documented. The paper goes directly from introduction to tool descriptions with no methodology section explaining the analysis process."
    201       }
    202     },
    203     "conflicts_of_interest": {
    204       "funding_disclosed": {
    205         "applies": true,
    206         "answer": false,
    207         "justification": "No funding information is disclosed anywhere in the paper. No acknowledgments section mentioning grants or sponsors."
    208       },
    209       "affiliations_disclosed": {
    210         "applies": true,
    211         "answer": true,
    212         "justification": "The author's affiliation is clearly stated: 'Assistant Professor, Apex Institute of Technology-CSE, Chandigarh University.' No evaluated product is affiliated with this institution."
    213       },
    214       "funder_independent_of_outcome": {
    215         "applies": true,
    216         "answer": false,
    217         "justification": "Cannot assess funder independence since no funding is disclosed. The absence of a funding statement makes this unevaluable."
    218       },
    219       "financial_interests_declared": {
    220         "applies": true,
    221         "answer": false,
    222         "justification": "No competing interests or financial interests statement is provided."
    223       }
    224     },
    225     "contamination": {
    226       "training_cutoff_stated": {
    227         "applies": false,
    228         "answer": false,
    229         "justification": "This is a survey paper that does not evaluate any pre-trained model's capability on a benchmark."
    230       },
    231       "train_test_overlap_discussed": {
    232         "applies": false,
    233         "answer": false,
    234         "justification": "This is a survey paper with no benchmark evaluation."
    235       },
    236       "benchmark_contamination_addressed": {
    237         "applies": false,
    238         "answer": false,
    239         "justification": "This is a survey paper with no benchmark evaluation."
    240       }
    241     },
    242     "human_studies": {
    243       "pre_registered": {
    244         "applies": false,
    245         "answer": false,
    246         "justification": "No human participants in this study."
    247       },
    248       "irb_or_ethics_approval": {
    249         "applies": false,
    250         "answer": false,
    251         "justification": "No human participants in this study."
    252       },
    253       "demographics_reported": {
    254         "applies": false,
    255         "answer": false,
    256         "justification": "No human participants in this study."
    257       },
    258       "inclusion_exclusion_criteria": {
    259         "applies": false,
    260         "answer": false,
    261         "justification": "No human participants in this study."
    262       },
    263       "randomization_described": {
    264         "applies": false,
    265         "answer": false,
    266         "justification": "No human participants in this study."
    267       },
    268       "blinding_described": {
    269         "applies": false,
    270         "answer": false,
    271         "justification": "No human participants in this study."
    272       },
    273       "attrition_reported": {
    274         "applies": false,
    275         "answer": false,
    276         "justification": "No human participants in this study."
    277       }
    278     },
    279     "cost_and_practicality": {
    280       "inference_cost_reported": {
    281         "applies": false,
    282         "answer": false,
    283         "justification": "Survey paper with no method or system to report costs for."
    284       },
    285       "compute_budget_stated": {
    286         "applies": false,
    287         "answer": false,
    288         "justification": "Survey paper with no computational experiments."
    289       }
    290     },
    291     "survey_methodology": {
    292       "prisma_or_structured_protocol": {
    293         "applies": true,
    294         "answer": false,
    295         "justification": "No structured review protocol followed. No PRISMA diagram, no systematic search strategy, no reproducible queries, no inclusion/exclusion criteria. The paper is an ad-hoc narrative review."
    296       },
    297       "quality_assessment_of_sources": {
    298         "applies": true,
    299         "answer": false,
    300         "justification": "No quality assessment of source papers. The paper treats Forbes blog posts, Medium articles, and peer-reviewed IEEE papers as equivalent sources without any quality differentiation."
    301       },
    302       "publication_bias_discussed": {
    303         "applies": true,
    304         "answer": false,
    305         "justification": "No discussion of publication bias. The paper does not consider whether its sources (largely vendor documentation and positive industry reports) skew toward favorable assessments of AI tools."
    306       }
    307     }
    308   },
    309   "claims": [
    310     {
    311       "claim": "Generative AI has significantly transformed software development by leveraging advanced machine learning models to automate coding tasks, generate code, and enhance productivity.",
    312       "evidence": "Stated in the abstract and Section 1 but supported only by citations to Forbes articles, a McKinsey report, and general references — no original empirical evidence.",
    313       "supported": "unsupported"
    314     },
    315     {
    316       "claim": "AI-powered tools like GitHub Copilot, Codex, DeepCode, CodeGuru, TabNine, Kite, and IntelliCode significantly accelerate development speed and reduce cognitive load.",
    317       "evidence": "Section 3 describes each tool's purported capabilities based on vendor documentation and secondary sources. Table 1 assigns qualitative accuracy ratings (e.g., 'High for common tasks') with no measurement methodology.",
    318       "supported": "weak"
    319     },
    320     {
    321       "claim": "These tools face challenges related to accuracy, contextual understanding, security, privacy, and ethical considerations.",
    322       "evidence": "Section 5 discusses four challenge categories with reasonable arguments, but all claims are generic and drawn from common knowledge rather than original analysis or empirical findings.",
    323       "supported": "weak"
    324     },
    325     {
    326       "claim": "The future of generative AI lies in enhanced model training, context-aware models, secure AI training methods, and ethical AI usage.",
    327       "evidence": "Section 5 proposes future directions including federated learning, hybrid approaches, and ethical guidelines, but these are speculative recommendations without evidence of feasibility or effectiveness.",
    328       "supported": "unsupported"
    329     }
    330   ],
    331   "red_flags": [
    332     {
    333       "flag": "No empirical evaluation despite 'evaluation' title",
    334       "detail": "The paper's title promises an 'Evaluation of Modern Coding Tools' but contains no actual testing, benchmarking, or empirical comparison. The 'evaluation' consists entirely of qualitative descriptions drawn from tool documentation and secondary sources."
    335     },
    336     {
    337       "flag": "No structured review methodology",
    338       "detail": "The paper presents as a review/survey but has no systematic search strategy, no inclusion/exclusion criteria, no explanation of how the 7 tools or 12 references were selected. This is an ad-hoc narrative with no reproducibility."
    339     },
    340     {
    341       "flag": "Non-academic sources treated as evidence",
    342       "detail": "Several references are Forbes articles (refs 1, 3), a Medium blog post (ref 6), a product website (ref 8, FuturePedia), and a Gartner report (ref 10). These are treated equivalently to peer-reviewed sources without quality differentiation."
    343     },
    344     {
    345       "flag": "Unsubstantiated accuracy claims in comparison table",
    346       "detail": "Table 1 rates each tool's 'Accuracy' with labels like 'High for common tasks' or 'Effective for standard code' without any measurement methodology, test data, or empirical basis. These appear to be subjective opinions presented as evaluative findings."
    347     },
    348     {
    349       "flag": "Uniformly positive framing with no critical analysis",
    350       "detail": "All 7 tools are presented in positive terms. No tool is found inadequate for any task. 'Challenges' in Section 5 are generic industry-wide concerns, not critical findings from the paper's analysis. This is characteristic of a promotional overview rather than a scholarly evaluation."
    351     },
    352     {
    353       "flag": "Extremely thin reference list",
    354       "detail": "Only 12 references for a survey paper, several of which are non-academic. A credible survey of AI coding tools would require engagement with the substantial academic literature on code generation, LLM evaluation, and developer productivity."
    355     },
    356     {
    357       "flag": "Survey launders source quality",
    358       "detail": "The paper summarizes vendor claims and industry reports without quality assessment, effectively laundering the signal-to-noise ratio of its uncritical sources into a paper that appears to offer independent evaluation."
    359     }
    360   ],
    361   "cited_papers": [
    362     {
    363       "title": "Generative AI for software practitioners",
    364       "authors": ["C. Ebert", "P. Louridas"],
    365       "year": 2023,
    366       "doi": "10.1109/MS.2023.3265877",
    367       "relevance": "IEEE Software overview of generative AI capabilities for software practitioners, directly relevant to the survey's scope."
    368     },
    369     {
    370       "title": "Large Language Models as Tool Makers",
    371       "authors": [],
    372       "year": 2023,
    373       "arxiv_id": "2305.17126",
    374       "relevance": "Explores LLMs creating and using tools, relevant to understanding agentic AI capabilities in software development."
    375     },
    376     {
    377       "title": "Generative AI assistants in software development education: a vision for integrating generative AI into educational practice",
    378       "authors": ["C. Bull", "A. Kharrufa"],
    379       "year": 2023,
    380       "doi": "10.1109/MS.2023.3300574",
    381       "relevance": "Examines integration of generative AI in software development education, relevant to AI-assisted coding practices."
    382     },
    383     {
    384       "title": "Future of software development with generative AI",
    385       "authors": ["J. Sauvola", "S. Tarkoma", "M. Klemettinen", "J. Riekki", "D. Doermann"],
    386       "year": 2024,
    387       "doi": "10.1007/s10515-024-00426-z",
    388       "relevance": "Comprehensive review of generative AI's future in software development, directly overlapping with this paper's scope."
    389     },
    390     {
    391       "title": "Investigation of the interplay between developers and automation",
    392       "authors": ["O. Elazhary"],
    393       "year": 2021,
    394       "relevance": "Studies developer-automation interaction at ICSE, relevant to understanding human-AI collaboration in software development."
    395     }
    396   ],
    397   "engagement_factors": {
    398     "practical_relevance": {
    399       "score": 2,
    400       "justification": "The tool comparison table and per-scenario recommendations could help practitioners choose between AI coding tools, though the analysis is very surface-level."
    401     },
    402     "surprise_contrarian": {
    403       "score": 0,
    404       "justification": "Confirms conventional wisdom that AI tools help developers; no surprising or contrarian findings."
    405     },
    406     "fear_safety": {
    407       "score": 1,
    408       "justification": "Mentions security, privacy, and ethical concerns (job displacement) but with no novel insights beyond common discourse."
    409     },
    410     "drama_conflict": {
    411       "score": 0,
    412       "justification": "No controversy or conflict; uniformly positive framing of all tools discussed."
    413     },
    414     "demo_ability": {
    415       "score": 0,
    416       "justification": "No code, demo, or tool released; purely a written review."
    417     },
    418     "brand_recognition": {
    419       "score": 2,
    420       "justification": "Discusses well-known products (GitHub Copilot, OpenAI Codex, Amazon CodeGuru) from major tech companies."
    421     }
    422   }
    423 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs