scan.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan.json (16353B)
      1 {
      2   "paper": {
      3     "title": "Towards AI-Augmented Software Engineering: A Theoretical Framework",
      4     "authors": ["Samia Akhtar", "Shabib Aftab"],
      5     "year": 2025,
      6     "venue": "ICCK Journal of Software Engineering",
      7     "doi": "10.62762/JSE.2025.407864"
      8   },
      9   "checklist": {
     10     "artifacts": {
     11       "code_released": {
     12         "applies": false,
     13         "answer": false,
     14         "justification": "Purely theoretical/conceptual paper with no code, experiments, or analysis scripts to release."
     15       },
     16       "data_released": {
     17         "applies": false,
     18         "answer": false,
     19         "justification": "No data was generated or analyzed. The paper states: 'No new data were generated or analyzed in this study.'"
     20       },
     21       "environment_specified": {
     22         "applies": false,
     23         "answer": false,
     24         "justification": "No computational experiments were run; no environment to specify."
     25       },
     26       "reproduction_instructions": {
     27         "applies": false,
     28         "answer": false,
     29         "justification": "No experiments to reproduce. This is a theoretical framework paper."
     30       }
     31     },
     32     "statistical_methodology": {
     33       "confidence_intervals_or_error_bars": {
     34         "applies": false,
     35         "answer": false,
     36         "justification": "No experiments or quantitative results are presented. Purely theoretical paper."
     37       },
     38       "significance_tests": {
     39         "applies": false,
     40         "answer": false,
     41         "justification": "No comparative empirical claims are made by the authors themselves."
     42       },
     43       "effect_sizes_reported": {
     44         "applies": false,
     45         "answer": false,
     46         "justification": "No empirical results of the authors' own work to report effect sizes for."
     47       },
     48       "sample_size_justified": {
     49         "applies": false,
     50         "answer": false,
     51         "justification": "No data collection or sampling was performed."
     52       },
     53       "variance_reported": {
     54         "applies": false,
     55         "answer": false,
     56         "justification": "No experimental runs were conducted."
     57       }
     58     },
     59     "evaluation_design": {
     60       "baselines_included": {
     61         "applies": false,
     62         "answer": false,
     63         "justification": "No system or method is evaluated empirically. The paper proposes a conceptual framework without experimental validation."
     64       },
     65       "baselines_contemporary": {
     66         "applies": false,
     67         "answer": false,
     68         "justification": "No empirical evaluation is performed."
     69       },
     70       "ablation_study": {
     71         "applies": false,
     72         "answer": false,
     73         "justification": "No system with components to ablate; this is a theoretical framework."
     74       },
     75       "multiple_metrics": {
     76         "applies": false,
     77         "answer": false,
     78         "justification": "No empirical evaluation is conducted."
     79       },
     80       "human_evaluation": {
     81         "applies": false,
     82         "answer": false,
     83         "justification": "No system outputs to evaluate. The paper is a theoretical framework proposal."
     84       },
     85       "held_out_test_set": {
     86         "applies": false,
     87         "answer": false,
     88         "justification": "No data or experiments."
     89       },
     90       "per_category_breakdown": {
     91         "applies": false,
     92         "answer": false,
     93         "justification": "No quantitative results to break down."
     94       },
     95       "failure_cases_discussed": {
     96         "applies": true,
     97         "answer": false,
     98         "justification": "The paper discusses challenges (Section 8) at a high level but does not discuss specific failure cases of its proposed framework or any concrete limitations of the conceptual model."
     99       },
    100       "negative_results_reported": {
    101         "applies": false,
    102         "answer": false,
    103         "justification": "No experiments were conducted to yield negative results."
    104       }
    105     },
    106     "claims_and_evidence": {
    107       "abstract_claims_supported": {
    108         "applies": true,
    109         "answer": true,
    110         "justification": "The abstract claims the paper proposes a theoretical framework and illustrates it with case studies from the literature. The paper delivers on this: Sections 3-6 present the framework, and Section 5 provides case studies. No unsupported empirical claims are made in the abstract."
    111       },
    112       "causal_claims_justified": {
    113         "applies": true,
    114         "answer": false,
    115         "justification": "The paper makes numerous causal claims (e.g., 'AI transforms testing by enabling automated test case generation', 'AI improves PM through predictive analytics') without any empirical evidence from the authors' own work. These are presented as established facts but rely entirely on cited literature without systematic verification."
    116       },
    117       "generalization_bounded": {
    118         "applies": true,
    119         "answer": false,
    120         "justification": "The paper makes sweeping claims about AI transforming 'every phase of the software engineering lifecycle' (Section 4) without bounding these claims to specific contexts, domains, or scales. The title and abstract present the framework as broadly applicable without qualification."
    121       },
    122       "alternative_explanations_discussed": {
    123         "applies": true,
    124         "answer": false,
    125         "justification": "The paper presents AI augmentation as uniformly beneficial without considering alternative explanations for why AI integration might fail, produce worse outcomes, or be unnecessary in certain SE contexts. The challenges section (Section 8) lists obstacles but does not discuss alternatives to the proposed framework."
    126       }
    127     },
    128     "setup_transparency": {
    129       "model_versions_specified": {
    130         "applies": false,
    131         "answer": false,
    132         "justification": "No models are used or evaluated by the authors."
    133       },
    134       "prompts_provided": {
    135         "applies": false,
    136         "answer": false,
    137         "justification": "No prompting is used in this paper."
    138       },
    139       "hyperparameters_reported": {
    140         "applies": false,
    141         "answer": false,
    142         "justification": "No experiments are conducted."
    143       },
    144       "scaffolding_described": {
    145         "applies": false,
    146         "answer": false,
    147         "justification": "No agentic scaffolding is used."
    148       },
    149       "data_preprocessing_documented": {
    150         "applies": false,
    151         "answer": false,
    152         "justification": "No data collection or preprocessing is performed. This is not a survey with a systematic search methodology either—it is a theoretical paper that cites literature narratively."
    153       }
    154     },
    155     "limitations_and_scope": {
    156       "limitations_section_present": {
    157         "applies": true,
    158         "answer": true,
    159         "justification": "Section 8 'Challenges and Limitations' discusses technical, organizational, and ethical hurdles of AI-augmented SE, presented in Table 3."
    160       },
    161       "threats_to_validity_specific": {
    162         "applies": true,
    163         "answer": false,
    164         "justification": "The challenges discussed in Section 8 are about the general field of AI in SE, not specific threats to the validity of this paper's framework or its claims. No self-critical assessment of the paper's own methodology or theoretical contributions."
    165       },
    166       "scope_boundaries_stated": {
    167         "applies": true,
    168         "answer": false,
    169         "justification": "The paper does not explicitly state what its framework does NOT cover or what claims it is NOT making. Section 2.4 states objectives but does not bound the scope to specific SE domains, organization types, or AI maturity levels."
    170       }
    171     },
    172     "data_integrity": {
    173       "raw_data_available": {
    174         "applies": false,
    175         "answer": false,
    176         "justification": "No data was collected or generated. The paper states: 'No new data were generated or analyzed in this study.'"
    177       },
    178       "data_collection_described": {
    179         "applies": false,
    180         "answer": false,
    181         "justification": "No data collection was performed."
    182       },
    183       "recruitment_methods_described": {
    184         "applies": false,
    185         "answer": false,
    186         "justification": "No participants or samples were recruited."
    187       },
    188       "data_pipeline_documented": {
    189         "applies": false,
    190         "answer": false,
    191         "justification": "No data pipeline exists in this theoretical paper."
    192       }
    193     },
    194     "conflicts_of_interest": {
    195       "funding_disclosed": {
    196         "applies": true,
    197         "answer": true,
    198         "justification": "The paper states: 'This work was supported without any funding.'"
    199       },
    200       "affiliations_disclosed": {
    201         "applies": true,
    202         "answer": true,
    203         "justification": "Authors are identified as affiliated with the Department of Computer Science, Virtual University of Pakistan."
    204       },
    205       "funder_independent_of_outcome": {
    206         "applies": false,
    207         "answer": false,
    208         "justification": "The paper is explicitly unfunded."
    209       },
    210       "financial_interests_declared": {
    211         "applies": true,
    212         "answer": true,
    213         "justification": "The paper states: 'The authors declare no conflicts of interest.'"
    214       }
    215     },
    216     "contamination": {
    217       "training_cutoff_stated": {
    218         "applies": false,
    219         "answer": false,
    220         "justification": "No pre-trained model is evaluated on any benchmark."
    221       },
    222       "train_test_overlap_discussed": {
    223         "applies": false,
    224         "answer": false,
    225         "justification": "No model evaluation is performed."
    226       },
    227       "benchmark_contamination_addressed": {
    228         "applies": false,
    229         "answer": false,
    230         "justification": "No benchmarks are used."
    231       }
    232     },
    233     "human_studies": {
    234       "pre_registered": {
    235         "applies": false,
    236         "answer": false,
    237         "justification": "No human participants in this study."
    238       },
    239       "irb_or_ethics_approval": {
    240         "applies": false,
    241         "answer": false,
    242         "justification": "No human participants. The paper states: 'Ethical Approval and Consent to Participate: Not applicable.'"
    243       },
    244       "demographics_reported": {
    245         "applies": false,
    246         "answer": false,
    247         "justification": "No human participants."
    248       },
    249       "inclusion_exclusion_criteria": {
    250         "applies": false,
    251         "answer": false,
    252         "justification": "No human participants."
    253       },
    254       "randomization_described": {
    255         "applies": false,
    256         "answer": false,
    257         "justification": "No human participants."
    258       },
    259       "blinding_described": {
    260         "applies": false,
    261         "answer": false,
    262         "justification": "No human participants."
    263       },
    264       "attrition_reported": {
    265         "applies": false,
    266         "answer": false,
    267         "justification": "No human participants."
    268       }
    269     },
    270     "cost_and_practicality": {
    271       "inference_cost_reported": {
    272         "applies": false,
    273         "answer": false,
    274         "justification": "Theoretical paper with no method that incurs inference costs."
    275       },
    276       "compute_budget_stated": {
    277         "applies": false,
    278         "answer": false,
    279         "justification": "No computation was performed."
    280       }
    281     }
    282   },
    283   "claims": [
    284     {
    285       "claim": "AI can be systematically embedded across all phases of the software engineering lifecycle (requirements, design, coding, testing, maintenance, project management, process improvement).",
    286       "evidence": "Sections 4.1-4.8 describe potential AI applications in each phase, citing existing literature and case studies from other researchers (Section 5).",
    287       "supported": "weak"
    288     },
    289     {
    290       "claim": "The proposed theoretical framework positions AI as a catalyst for greater automation, higher productivity, and improved predictive accuracy in SE practices.",
    291       "evidence": "Section 6 presents the framework with a high-level architecture (Figure 5) and key principles (Figure 6), but provides no empirical validation.",
    292       "supported": "unsupported"
    293     },
    294     {
    295       "claim": "AI-augmented SE represents a paradigm shift from rule-based practices to adaptive, predictive, and intelligent paradigms.",
    296       "evidence": "Table 1 contrasts traditional vs. intelligent SE. The claim is supported by narrative literature review but no original evidence.",
    297       "supported": "weak"
    298     }
    299   ],
    300   "methodology_tags": ["theoretical"],
    301   "key_findings": "This paper proposes a conceptual framework for AI-augmented software engineering, organized around four theoretical perspectives: cognitive augmentation, data-driven decision-making, systems adaptability, and socio-technical integration. The framework is a layered architecture (data, AI/ML processing, SE lifecycle, and feedback/adaptation layers) illustrated with case studies from the literature. No original empirical validation is provided; the paper explicitly states it presents theoretical perspectives rather than experimental results.",
    302   "red_flags": [
    303     {
    304       "flag": "No empirical validation",
    305       "detail": "The paper proposes a theoretical framework but provides zero empirical evidence for its utility, feasibility, or correctness. The case studies in Section 5 are all drawn from other researchers' work and are summarized narratively rather than systematically analyzed."
    306     },
    307     {
    308       "flag": "Unbounded generalizations",
    309       "detail": "Claims like 'AI has the potential to influence every phase of the software engineering lifecycle' (Section 4) and that the framework applies from 'small agile teams to large enterprise-scale development' (Section 6.3) are not bounded by any evidence or caveats."
    310     },
    311     {
    312       "flag": "No systematic literature review methodology",
    313       "detail": "The paper cites literature narratively without a documented search strategy, inclusion/exclusion criteria, or systematic selection process. It is unclear how the cited case studies were selected or whether they are representative."
    314     },
    315     {
    316       "flag": "Low-quality venue",
    317       "detail": "Published in Vol. 1, No. 2 of the ICCK Journal of Software Engineering, a very new journal with no established impact factor or reputation. The paper was submitted, accepted, and published within two months."
    318     }
    319   ],
    320   "cited_papers": [
    321     {
    322       "title": "Generative artificial intelligence for software engineering—A research agenda",
    323       "authors": ["A. Nguyen-Duc", "B. Cabrero-Daniel", "A. Przybylek", "C. Arora", "D. Khanna", "T. Herda", "P. Abrahamsson"],
    324       "year": 2025,
    325       "relevance": "Research agenda for generative AI in SE, directly relevant to the survey scope."
    326     },
    327     {
    328       "title": "Application of large language models to software engineering tasks: Opportunities, risks, and implications",
    329       "authors": ["I. Ozkaya"],
    330       "year": 2023,
    331       "relevance": "Overview of LLM applications in SE tasks with discussion of risks."
    332     },
    333     {
    334       "title": "Software engineering for AI-based systems: a survey",
    335       "authors": ["S. Martínez-Fernández", "J. Bogner", "X. Franch"],
    336       "year": 2022,
    337       "relevance": "Comprehensive survey of SE practices for AI-based systems."
    338     },
    339     {
    340       "title": "Software engineering for machine learning: A case study",
    341       "authors": ["S. Amershi", "A. Begel", "C. Bird"],
    342       "year": 2019,
    343       "relevance": "Foundational case study of ML engineering practices at Microsoft."
    344     },
    345     {
    346       "title": "A survey on deep learning for software engineering",
    347       "authors": ["Y. Yang", "X. Xia", "D. Lo", "J. Grundy"],
    348       "year": 2022,
    349       "relevance": "Survey of deep learning applications across SE tasks."
    350     },
    351     {
    352       "title": "Software testing with large language models: Survey, landscape, and vision",
    353       "authors": ["J. Wang", "Y. Huang", "C. Chen"],
    354       "year": 2024,
    355       "relevance": "Survey of LLM-based software testing approaches."
    356     },
    357     {
    358       "title": "An analysis of the automatic bug fixing performance of chatgpt",
    359       "authors": ["D. Sobania", "M. Briesch", "C. Hanna", "J. Petke"],
    360       "year": 2023,
    361       "relevance": "Empirical evaluation of ChatGPT for automated program repair."
    362     },
    363     {
    364       "title": "Machine learning for software engineering: A tertiary study",
    365       "authors": ["Z. Kotti", "R. Galanopoulou", "D. Spinellis"],
    366       "year": 2023,
    367       "relevance": "Tertiary study (survey of surveys) on ML for SE, relevant for methodology assessment."
    368     }
    369   ]
    370 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs