scan.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan.json (13650B)
      1 {
      2   "paper": {
      3     "title": "Advancements in software engineering using AI",
      4     "authors": ["Hazem W. Marar"],
      5     "year": 2024,
      6     "venue": "Computer Software and Media Applications",
      7     "doi": "10.24294/csma.v6i1.3906"
      8   },
      9   "checklist": {
     10     "artifacts": {
     11       "code_released": {
     12         "applies": false,
     13         "answer": false,
     14         "justification": "This is a narrative overview paper with no original code, experiments, or tools to release."
     15       },
     16       "data_released": {
     17         "applies": false,
     18         "answer": false,
     19         "justification": "No data was collected or analyzed. The paper is a descriptive overview of existing AI tools in software engineering."
     20       },
     21       "environment_specified": {
     22         "applies": false,
     23         "answer": false,
     24         "justification": "No experiments were conducted, so no environment specification is applicable."
     25       },
     26       "reproduction_instructions": {
     27         "applies": false,
     28         "answer": false,
     29         "justification": "No experiments or analyses to reproduce."
     30       }
     31     },
     32     "statistical_methodology": {
     33       "confidence_intervals_or_error_bars": {
     34         "applies": false,
     35         "answer": false,
     36         "justification": "No quantitative experiments or results are presented."
     37       },
     38       "significance_tests": {
     39         "applies": false,
     40         "answer": false,
     41         "justification": "No comparative empirical claims are made."
     42       },
     43       "effect_sizes_reported": {
     44         "applies": false,
     45         "answer": false,
     46         "justification": "No empirical results are reported."
     47       },
     48       "sample_size_justified": {
     49         "applies": false,
     50         "answer": false,
     51         "justification": "No data collection or sampling is performed."
     52       },
     53       "variance_reported": {
     54         "applies": false,
     55         "answer": false,
     56         "justification": "No experimental runs are conducted."
     57       }
     58     },
     59     "evaluation_design": {
     60       "baselines_included": {
     61         "applies": false,
     62         "answer": false,
     63         "justification": "No evaluation is conducted. The paper describes existing tools narratively without comparative assessment."
     64       },
     65       "baselines_contemporary": {
     66         "applies": false,
     67         "answer": false,
     68         "justification": "No evaluation or comparison is performed."
     69       },
     70       "ablation_study": {
     71         "applies": false,
     72         "answer": false,
     73         "justification": "No system or method is proposed that could be ablated."
     74       },
     75       "multiple_metrics": {
     76         "applies": false,
     77         "answer": false,
     78         "justification": "No evaluation metrics are used."
     79       },
     80       "human_evaluation": {
     81         "applies": false,
     82         "answer": false,
     83         "justification": "No evaluation of any system outputs is conducted."
     84       },
     85       "held_out_test_set": {
     86         "applies": false,
     87         "answer": false,
     88         "justification": "No experiments are conducted."
     89       },
     90       "per_category_breakdown": {
     91         "applies": false,
     92         "answer": false,
     93         "justification": "No quantitative results to break down."
     94       },
     95       "failure_cases_discussed": {
     96         "applies": true,
     97         "answer": false,
     98         "justification": "The paper discusses AI tools but never addresses where they fail or their limitations in practice."
     99       },
    100       "negative_results_reported": {
    101         "applies": true,
    102         "answer": false,
    103         "justification": "No negative results or unsuccessful approaches are mentioned. The paper is entirely positive about AI in SE."
    104       }
    105     },
    106     "claims_and_evidence": {
    107       "abstract_claims_supported": {
    108         "applies": true,
    109         "answer": false,
    110         "justification": "The abstract claims AI 'reshapes traditional development processes' and 'enhance efficiency, improve software quality,' but the paper provides no empirical evidence — only narrative descriptions of existing tools without data."
    111       },
    112       "causal_claims_justified": {
    113         "applies": true,
    114         "answer": false,
    115         "justification": "The paper makes causal claims such as 'This results in a significant reduction in energy consumption' (Section 2.1) and 'Microsoft has improved developer productivity by leveraging AI' (Section 2.2) without any supporting data or study design."
    116       },
    117       "generalization_bounded": {
    118         "applies": true,
    119         "answer": false,
    120         "justification": "The paper makes sweeping claims about AI transforming software engineering based on anecdotal descriptions of a few commercial tools, with no bounding of scope."
    121       },
    122       "alternative_explanations_discussed": {
    123         "applies": true,
    124         "answer": false,
    125         "justification": "No alternative explanations are considered for any of the claimed benefits of AI in software engineering."
    126       }
    127     },
    128     "setup_transparency": {
    129       "model_versions_specified": {
    130         "applies": false,
    131         "answer": false,
    132         "justification": "No models are used in experiments. The paper describes tools narratively."
    133       },
    134       "prompts_provided": {
    135         "applies": false,
    136         "answer": false,
    137         "justification": "No prompting is used."
    138       },
    139       "hyperparameters_reported": {
    140         "applies": false,
    141         "answer": false,
    142         "justification": "No experiments are conducted."
    143       },
    144       "scaffolding_described": {
    145         "applies": false,
    146         "answer": false,
    147         "justification": "No agentic scaffolding is used."
    148       },
    149       "data_preprocessing_documented": {
    150         "applies": false,
    151         "answer": false,
    152         "justification": "No data is collected or processed."
    153       }
    154     },
    155     "limitations_and_scope": {
    156       "limitations_section_present": {
    157         "applies": true,
    158         "answer": false,
    159         "justification": "There is no limitations section. Section 3 discusses 'Challenges' of AI in SE generally (bias, transparency) but these are about the field, not about this paper's own limitations."
    160       },
    161       "threats_to_validity_specific": {
    162         "applies": true,
    163         "answer": false,
    164         "justification": "No threats to validity are discussed for this paper."
    165       },
    166       "scope_boundaries_stated": {
    167         "applies": true,
    168         "answer": false,
    169         "justification": "The paper does not state what it does not cover or what its claims do not apply to."
    170       }
    171     },
    172     "data_integrity": {
    173       "raw_data_available": {
    174         "applies": false,
    175         "answer": false,
    176         "justification": "No data was collected. This is a narrative overview."
    177       },
    178       "data_collection_described": {
    179         "applies": false,
    180         "answer": false,
    181         "justification": "No data collection occurred."
    182       },
    183       "recruitment_methods_described": {
    184         "applies": false,
    185         "answer": false,
    186         "justification": "No participants or samples were recruited."
    187       },
    188       "data_pipeline_documented": {
    189         "applies": false,
    190         "answer": false,
    191         "justification": "No data pipeline exists."
    192       }
    193     },
    194     "conflicts_of_interest": {
    195       "funding_disclosed": {
    196         "applies": true,
    197         "answer": false,
    198         "justification": "No funding information is provided anywhere in the paper."
    199       },
    200       "affiliations_disclosed": {
    201         "applies": true,
    202         "answer": true,
    203         "justification": "The author's affiliation with Princess Sumaya University for Technology is stated in the header."
    204       },
    205       "funder_independent_of_outcome": {
    206         "applies": true,
    207         "answer": false,
    208         "justification": "No funding is disclosed, so independence cannot be assessed."
    209       },
    210       "financial_interests_declared": {
    211         "applies": true,
    212         "answer": true,
    213         "justification": "The paper includes a 'Conflict of interest' section stating 'The author declares no conflict of interest.'"
    214       }
    215     },
    216     "contamination": {
    217       "training_cutoff_stated": {
    218         "applies": false,
    219         "answer": false,
    220         "justification": "No pre-trained models are evaluated on any benchmark."
    221       },
    222       "train_test_overlap_discussed": {
    223         "applies": false,
    224         "answer": false,
    225         "justification": "No benchmark evaluation is conducted."
    226       },
    227       "benchmark_contamination_addressed": {
    228         "applies": false,
    229         "answer": false,
    230         "justification": "No benchmark evaluation is conducted."
    231       }
    232     },
    233     "human_studies": {
    234       "pre_registered": {
    235         "applies": false,
    236         "answer": false,
    237         "justification": "No human participants."
    238       },
    239       "irb_or_ethics_approval": {
    240         "applies": false,
    241         "answer": false,
    242         "justification": "No human participants."
    243       },
    244       "demographics_reported": {
    245         "applies": false,
    246         "answer": false,
    247         "justification": "No human participants."
    248       },
    249       "inclusion_exclusion_criteria": {
    250         "applies": false,
    251         "answer": false,
    252         "justification": "No human participants."
    253       },
    254       "randomization_described": {
    255         "applies": false,
    256         "answer": false,
    257         "justification": "No human participants."
    258       },
    259       "blinding_described": {
    260         "applies": false,
    261         "answer": false,
    262         "justification": "No human participants."
    263       },
    264       "attrition_reported": {
    265         "applies": false,
    266         "answer": false,
    267         "justification": "No human participants."
    268       }
    269     },
    270     "cost_and_practicality": {
    271       "inference_cost_reported": {
    272         "applies": false,
    273         "answer": false,
    274         "justification": "No method is proposed or evaluated. This is a narrative overview."
    275       },
    276       "compute_budget_stated": {
    277         "applies": false,
    278         "answer": false,
    279         "justification": "No computation is performed."
    280       }
    281     }
    282   },
    283   "claims": [
    284     {
    285       "claim": "AI-driven cooling optimization results in significant reduction in energy consumption for Google's data centers.",
    286       "evidence": "Section 2.1 describes DeepMind's reinforcement learning system for data center cooling, citing reference [7], but provides no quantitative data or measurements.",
    287       "supported": "unsupported"
    288     },
    289     {
    290       "claim": "Microsoft's IntelliCode improves developer productivity by predicting code patterns.",
    291       "evidence": "Section 2.2 describes IntelliCode features narratively, citing reference [8], but provides no productivity measurements or user studies.",
    292       "supported": "unsupported"
    293     },
    294     {
    295       "claim": "GitHub's CodeQL can detect complex vulnerabilities that other methods may miss.",
    296       "evidence": "Section 2.3 describes CodeQL capabilities narratively, citing reference [9], but no comparative evaluation or detection rates are provided.",
    297       "supported": "unsupported"
    298     }
    299   ],
    300   "methodology_tags": ["qualitative"],
    301   "key_findings": "This is a short narrative overview of AI applications in software engineering, describing five commercial tools (Google DeepMind for data centers, Microsoft IntelliCode, GitHub CodeQL, OpenAI Codex, IBM Watson) and four future trends (XAI, reinforcement/unsupervised learning, AI+DevOps, code generation/AutoML). No original experiments, data collection, or systematic review methodology is employed. All claims are descriptive summaries of existing products without empirical validation.",
    302   "red_flags": [
    303     {
    304       "flag": "No empirical evidence",
    305       "detail": "The paper makes numerous claims about AI improving efficiency, productivity, and software quality but provides zero quantitative data, experiments, or systematic evidence. All 'case studies' are narrative product descriptions."
    306     },
    307     {
    308       "flag": "Unsupported causal claims",
    309       "detail": "Claims like 'significant reduction in energy consumption' and 'improved developer productivity' are stated as facts without any supporting measurements or citations to empirical studies that demonstrate these effects."
    310     },
    311     {
    312       "flag": "No systematic methodology",
    313       "detail": "Despite being labeled 'Original Research Article,' the paper employs no research methodology — no literature search protocol, no selection criteria, no analytical framework. It reads as an opinion piece or tutorial."
    314     },
    315     {
    316       "flag": "Uncritical treatment",
    317       "detail": "Every AI tool is described in exclusively positive terms with no discussion of failures, limitations, or negative outcomes. This one-sided framing undermines credibility."
    318     }
    319   ],
    320   "cited_papers": [
    321     {
    322       "title": "Assessing the quality of GitHub copilot's code generation",
    323       "authors": ["B. Yetistiren", "I. Ozsoy", "E. Tuzun"],
    324       "year": 2022,
    325       "doi": "10.1145/3558489.3559072",
    326       "relevance": "Empirical evaluation of AI code generation quality, directly relevant to the survey's scope on LLM/AI coding tools."
    327     },
    328     {
    329       "title": "The robots are coming: Exploring the implications of OpenAI Codex on introductory programming",
    330       "authors": ["J. Finnie-Ansley", "P. Denny", "B.A. Becker"],
    331       "year": 2022,
    332       "doi": "10.1145/3511861.3511863",
    333       "relevance": "Study of AI code generation (Codex) impact on programming education, relevant to LLM programming capabilities."
    334     },
    335     {
    336       "title": "DevOps for AI—Challenges in development of AI-enabled applications",
    337       "authors": ["L.E. Lwakatare", "I. Crnkovic", "J. Bosch"],
    338       "year": 2020,
    339       "doi": "10.23919/softcom50211.2020.9238323",
    340       "relevance": "Addresses challenges of integrating AI into DevOps software development practices."
    341     }
    342   ]
    343 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs