ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

scan.json (15926B)


      1 {
      2   "paper": {
      3     "title": "AI Agents in Software Engineering Optimizing Software Development Processes and Enhancing Security Management in Learning Management Systems",
      4     "authors": ["Rajendra Varma"],
      5     "year": 2025,
      6     "venue": "International Journal for Research in Applied Science & Engineering Technology (IJRASET)",
      7     "doi": "10.22214/ijraset.2025.73299"
      8   },
      9   "checklist": {
     10     "artifacts": {
     11       "code_released": {
     12         "applies": true,
     13         "answer": false,
     14         "justification": "No source code, repository URL, or archive is provided anywhere in the paper."
     15       },
     16       "data_released": {
     17         "applies": true,
     18         "answer": false,
     19         "justification": "No dataset is released or referenced. The paper presents figures with results but does not identify any dataset used."
     20       },
     21       "environment_specified": {
     22         "applies": true,
     23         "answer": false,
     24         "justification": "No environment specifications, library versions, or dependency information is provided."
     25       },
     26       "reproduction_instructions": {
     27         "applies": true,
     28         "answer": false,
     29         "justification": "No reproduction instructions are provided. The methodology describes models conceptually but gives no implementation details."
     30       }
     31     },
     32     "statistical_methodology": {
     33       "confidence_intervals_or_error_bars": {
     34         "applies": true,
     35         "answer": false,
     36         "justification": "The figures show results but no confidence intervals or error bars are mentioned or visible."
     37       },
     38       "significance_tests": {
     39         "applies": true,
     40         "answer": false,
     41         "justification": "The paper makes comparative claims (before vs. after AI integration) but uses no statistical significance tests."
     42       },
     43       "effect_sizes_reported": {
     44         "applies": true,
     45         "answer": false,
     46         "justification": "No effect sizes are reported. Results are shown only in figures with no quantitative values stated in the text."
     47       },
     48       "sample_size_justified": {
     49         "applies": true,
     50         "answer": false,
     51         "justification": "No sample sizes are stated or justified. It is unclear how many data points underlie any of the figures."
     52       },
     53       "variance_reported": {
     54         "applies": true,
     55         "answer": false,
     56         "justification": "No variance, standard deviation, or spread measures are reported for any results."
     57       }
     58     },
     59     "evaluation_design": {
     60       "baselines_included": {
     61         "applies": true,
     62         "answer": true,
     63         "justification": "Figures compare 'before AI' vs. 'after AI' (e.g., Fig 2 bug incidence, Fig 4 anomaly detection accuracy vs. traditional security). These serve as baselines, though poorly described."
     64       },
     65       "baselines_contemporary": {
     66         "applies": true,
     67         "answer": false,
     68         "justification": "The 'traditional' baselines are not identified or described. No specific prior systems or methods are named as comparators."
     69       },
     70       "ablation_study": {
     71         "applies": true,
     72         "answer": false,
     73         "justification": "The system has multiple components (SDA, SMA, Learning Layer) but no ablation study examines their individual contributions."
     74       },
     75       "multiple_metrics": {
     76         "applies": true,
     77         "answer": true,
     78         "justification": "Multiple metrics are referenced: bug rates, development time, anomaly detection accuracy, false positive ratio, threat detection rates, response time, student performance."
     79       },
     80       "human_evaluation": {
     81         "applies": true,
     82         "answer": false,
     83         "justification": "No human evaluation of the system's outputs is conducted. All evaluation appears automated or unspecified."
     84       },
     85       "held_out_test_set": {
     86         "applies": true,
     87         "answer": false,
     88         "justification": "No mention of train/test splits or held-out evaluation sets."
     89       },
     90       "per_category_breakdown": {
     91         "applies": true,
     92         "answer": false,
     93         "justification": "Results are shown as aggregate trends in figures. No per-category or per-task breakdowns are provided."
     94       },
     95       "failure_cases_discussed": {
     96         "applies": true,
     97         "answer": false,
     98         "justification": "No failure cases or error analysis is discussed. All results show improvements."
     99       },
    100       "negative_results_reported": {
    101         "applies": true,
    102         "answer": false,
    103         "justification": "Every result shows improvement. No negative results or failed approaches are reported."
    104       }
    105     },
    106     "claims_and_evidence": {
    107       "abstract_claims_supported": {
    108         "applies": true,
    109         "answer": false,
    110         "justification": "The abstract claims AI agents can 'automate software engineering tasks, enhance system performance and guarantee secure security measures' but the results section provides only vague figure descriptions without quantitative support."
    111       },
    112       "causal_claims_justified": {
    113         "applies": true,
    114         "answer": false,
    115         "justification": "The paper repeatedly claims AI integration caused improvements (e.g., 'reducing bugs,' 'speed up development cycles') but provides no controlled experimental design to justify causal inference."
    116       },
    117       "generalization_bounded": {
    118         "applies": true,
    119         "answer": false,
    120         "justification": "Claims are made broadly about AI in software engineering and LMS with no bounding to specific systems, datasets, or contexts tested."
    121       },
    122       "alternative_explanations_discussed": {
    123         "applies": true,
    124         "answer": false,
    125         "justification": "No alternative explanations for the observed improvements are discussed."
    126       }
    127     },
    128     "setup_transparency": {
    129       "model_versions_specified": {
    130         "applies": true,
    131         "answer": false,
    132         "justification": "The paper mentions 'OpenAI's GPT or other transformer models' without specifying any version, model size, or snapshot date."
    133       },
    134       "prompts_provided": {
    135         "applies": true,
    136         "answer": false,
    137         "justification": "No prompts or system instructions are provided despite mentioning use of language models for code generation."
    138       },
    139       "hyperparameters_reported": {
    140         "applies": true,
    141         "answer": false,
    142         "justification": "No hyperparameters are reported for any of the ML models mentioned (GMM, CNN, RL agent, etc.)."
    143       },
    144       "scaffolding_described": {
    145         "applies": true,
    146         "answer": false,
    147         "justification": "The paper describes AI agents (SDA, SMA) at a high conceptual level but provides no implementation details of the agentic scaffolding, tool use, or workflow."
    148       },
    149       "data_preprocessing_documented": {
    150         "applies": true,
    151         "answer": false,
    152         "justification": "The data pipeline is mentioned as a component but no preprocessing steps, filtering criteria, or data transformations are documented."
    153       }
    154     },
    155     "limitations_and_scope": {
    156       "limitations_section_present": {
    157         "applies": true,
    158         "answer": false,
    159         "justification": "There is no limitations or threats-to-validity section. The conclusion mentions 'challenges' only in the context of future work."
    160       },
    161       "threats_to_validity_specific": {
    162         "applies": true,
    163         "answer": false,
    164         "justification": "No specific threats to validity are discussed anywhere in the paper."
    165       },
    166       "scope_boundaries_stated": {
    167         "applies": true,
    168         "answer": false,
    169         "justification": "No explicit scope boundaries are stated. The paper does not clarify what the results do not show."
    170       }
    171     },
    172     "data_integrity": {
    173       "raw_data_available": {
    174         "applies": true,
    175         "answer": false,
    176         "justification": "No raw data is available. The figures show trends but the underlying data is not provided or described."
    177       },
    178       "data_collection_described": {
    179         "applies": true,
    180         "answer": false,
    181         "justification": "No data collection procedure is described. It is entirely unclear where the data for the results came from."
    182       },
    183       "recruitment_methods_described": {
    184         "applies": false,
    185         "answer": false,
    186         "justification": "The paper does not involve human participants. The student performance data source is unspecified but appears to be from a system simulation, not a human study."
    187       },
    188       "data_pipeline_documented": {
    189         "applies": true,
    190         "answer": false,
    191         "justification": "A 'data pipeline' is mentioned as an architectural component but no actual pipeline from collection to analysis is documented."
    192       }
    193     },
    194     "conflicts_of_interest": {
    195       "funding_disclosed": {
    196         "applies": true,
    197         "answer": false,
    198         "justification": "No funding source is disclosed anywhere in the paper."
    199       },
    200       "affiliations_disclosed": {
    201         "applies": true,
    202         "answer": false,
    203         "justification": "The author's institutional affiliation is not provided beyond the author name."
    204       },
    205       "funder_independent_of_outcome": {
    206         "applies": false,
    207         "answer": false,
    208         "justification": "No funding is disclosed, so independence cannot be assessed. Given the single-author nature and lack of any funding mention, this appears unfunded."
    209       },
    210       "financial_interests_declared": {
    211         "applies": true,
    212         "answer": false,
    213         "justification": "No competing interests or financial interests statement is present."
    214       }
    215     },
    216     "contamination": {
    217       "training_cutoff_stated": {
    218         "applies": false,
    219         "answer": false,
    220         "justification": "The paper does not evaluate a pre-trained model on any benchmark. It proposes a conceptual framework."
    221       },
    222       "train_test_overlap_discussed": {
    223         "applies": false,
    224         "answer": false,
    225         "justification": "No benchmark evaluation is performed, so train/test overlap is not applicable."
    226       },
    227       "benchmark_contamination_addressed": {
    228         "applies": false,
    229         "answer": false,
    230         "justification": "No benchmark evaluation is performed."
    231       }
    232     },
    233     "human_studies": {
    234       "pre_registered": {
    235         "applies": false,
    236         "answer": false,
    237         "justification": "No human participants are involved in this study."
    238       },
    239       "irb_or_ethics_approval": {
    240         "applies": false,
    241         "answer": false,
    242         "justification": "No human participants are involved."
    243       },
    244       "demographics_reported": {
    245         "applies": false,
    246         "answer": false,
    247         "justification": "No human participants are involved."
    248       },
    249       "inclusion_exclusion_criteria": {
    250         "applies": false,
    251         "answer": false,
    252         "justification": "No human participants are involved."
    253       },
    254       "randomization_described": {
    255         "applies": false,
    256         "answer": false,
    257         "justification": "No human participants are involved."
    258       },
    259       "blinding_described": {
    260         "applies": false,
    261         "answer": false,
    262         "justification": "No human participants are involved."
    263       },
    264       "attrition_reported": {
    265         "applies": false,
    266         "answer": false,
    267         "justification": "No human participants are involved."
    268       }
    269     },
    270     "cost_and_practicality": {
    271       "inference_cost_reported": {
    272         "applies": true,
    273         "answer": false,
    274         "justification": "No inference cost, latency, or API costs are reported despite proposing a system that uses multiple ML models."
    275       },
    276       "compute_budget_stated": {
    277         "applies": true,
    278         "answer": false,
    279         "justification": "No computational budget, GPU hours, or hardware specifications are stated."
    280       }
    281     }
    282   },
    283   "claims": [
    284     {
    285       "claim": "AI-based bug detection and correction actively reduces bugs over time in LMS development.",
    286       "evidence": "Figure 2 shows bug incidence before and after AI integration, but no quantitative values, dataset details, or statistical analysis are provided (Section IV).",
    287       "supported": "unsupported"
    288     },
    289     {
    290       "claim": "AI agents decrease development time across various stages of the SDLC.",
    291       "evidence": "Figure 3 shows a line graph of development time reduction, but no specific numbers, systems, or datasets are identified (Section IV).",
    292       "supported": "unsupported"
    293     },
    294     {
    295       "claim": "AI anomaly detection achieves higher accuracy and lower false positives than traditional security solutions.",
    296       "evidence": "Figure 4 compares AI vs. traditional solutions, but no accuracy values, datasets, or experimental details are provided (Section IV).",
    297       "supported": "unsupported"
    298     },
    299     {
    300       "claim": "AI-driven adaptive learning improves student performance.",
    301       "evidence": "Figure 6 shows student performance before and after AI adaptive learning, but no participant details, metrics, or statistical analysis are provided (Section IV).",
    302       "supported": "unsupported"
    303     }
    304   ],
    305   "methodology_tags": ["theoretical"],
    306   "key_findings": "The paper proposes a conceptual framework for integrating AI agents into LMS software development and security management, comprising a Software Development Agent, Security Management Agent, data pipeline, and reinforcement learning layer. Results are presented only as figures showing before/after comparisons with no quantitative values, datasets, experimental details, or statistical analysis provided. All claims of improvement are unsupported by verifiable evidence.",
    307   "red_flags": [
    308     {
    309       "flag": "Results without data",
    310       "detail": "The paper presents seven figures purporting to show experimental results (bug reduction, development time, anomaly detection accuracy, etc.) but never identifies the dataset, system under test, sample sizes, or any quantitative values. The origin of these results is entirely opaque."
    311     },
    312     {
    313       "flag": "No reproducibility information",
    314       "detail": "Zero implementation details are provided: no code, no data, no environment specs, no hyperparameters, no model versions. The methodology section describes models only with mathematical formulas at a textbook level."
    315     },
    316     {
    317       "flag": "Unsubstantiated causal claims",
    318       "detail": "The paper repeatedly claims AI integration caused improvements without any controlled experimental design, confound analysis, or statistical testing."
    319     },
    320     {
    321       "flag": "Irrelevant references",
    322       "detail": "Many references are tangentially related at best (e.g., Turing 1950, robotics in precision agriculture, AI image analysis in buildings, eye-tracking emotional intelligence). This suggests padding rather than genuine scholarly engagement."
    323     },
    324     {
    325       "flag": "No limitations discussion",
    326       "detail": "The paper has no limitations section and does not acknowledge any weaknesses in its methodology or results."
    327     }
    328   ],
    329   "cited_papers": [
    330     {
    331       "title": "Future of software development with generative AI",
    332       "authors": ["J. Sauvola", "S. Tarkoma", "M. Klemettinen", "J. Riekki", "D. Doermann"],
    333       "year": 2024,
    334       "relevance": "Directly addresses the future of AI-driven software development, core survey topic."
    335     },
    336     {
    337       "title": "ChatGPT and Open-AI Models: A Preliminary Review",
    338       "authors": ["K.I. Roumeliotis", "N.D. Tselikas"],
    339       "year": 2023,
    340       "relevance": "Review of OpenAI models relevant to LLM capability assessment."
    341     },
    342     {
    343       "title": "Detecting latent topics and trends in software engineering research since 1980 using probabilistic topic modeling",
    344       "authors": ["F. Gurcan", "G.G.M. Dalveren", "N.E. Cagiltay", "A. Soylu"],
    345       "year": 2022,
    346       "relevance": "Maps software engineering research trends, relevant to understanding the field's evolution."
    347     }
    348   ]
    349 }

Impressum · Datenschutz