ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

scan.json (24135B)


      1 {
      2   "paper": {
      3     "title": "A Review of Generative AI and DevOps Pipelines: CI/CD, Agentic Automation, MLOps Integration, and LLMs",
      4     "authors": ["Satyadhar Joshi"],
      5     "year": 2025,
      6     "venue": "International Journal of Innovative Research in Computer Science and Technology (IJIRCST)",
      7     "doi": "10.55524/ijircst.2025.13.4.1"
      8   },
      9   "scan_version": 3,
     10   "active_modules": ["survey_methodology"],
     11   "methodology_tags": ["meta-analysis"],
     12   "key_findings": "This review surveys approximately 50 publications and industry sources on generative AI applications in DevOps, covering CI/CD pipeline automation, Kubernetes management, cloud platform comparisons, and AI agent architectures. The paper identifies DevOps automation, AI agents, and containerization as key themes, and projects increasing AI autonomy in DevOps through 2029. However, the review relies predominantly on blog posts, Medium articles, and marketing materials rather than peer-reviewed research, and provides no quality assessment of its sources.",
     13   "checklist": {
     14     "artifacts": {
     15       "code_released": {
     16         "applies": true,
     17         "answer": false,
     18         "justification": "No code, analysis scripts, or repository is mentioned or linked anywhere in the paper. A survey can release its search corpus or analysis tooling."
     19       },
     20       "data_released": {
     21         "applies": true,
     22         "answer": false,
     23         "justification": "No dataset, search corpus, or structured extraction of reviewed papers is released. The paper does not provide any downloadable data."
     24       },
     25       "environment_specified": {
     26         "applies": true,
     27         "answer": false,
     28         "justification": "No environment specifications are provided. No tools, software, or analysis environment is described."
     29       },
     30       "reproduction_instructions": {
     31         "applies": true,
     32         "answer": false,
     33         "justification": "No reproduction instructions are provided. A reader could not reconstruct the search process or reproduce the review from the information given."
     34       }
     35     },
     36     "statistical_methodology": {
     37       "confidence_intervals_or_error_bars": {
     38         "applies": false,
     39         "answer": false,
     40         "justification": "Survey paper that does not run experiments or compute its own statistics. All quantitative claims are cited from other sources."
     41       },
     42       "significance_tests": {
     43         "applies": false,
     44         "answer": false,
     45         "justification": "Survey paper with no original statistical analysis. No comparative claims requiring significance tests."
     46       },
     47       "effect_sizes_reported": {
     48         "applies": false,
     49         "answer": false,
     50         "justification": "Survey paper that does not compute its own effect sizes. Numbers like '217% acceleration' and '68% PR cycle time reduction' are cited from blog posts, not computed by the authors."
     51       },
     52       "sample_size_justified": {
     53         "applies": false,
     54         "answer": false,
     55         "justification": "Survey paper with no experiments. The choice of 50 publications is not statistically justified but this is a literature review, not a statistical study."
     56       },
     57       "variance_reported": {
     58         "applies": false,
     59         "answer": false,
     60         "justification": "Survey paper that does not run experiments or report variance across any measurements."
     61       }
     62     },
     63     "evaluation_design": {
     64       "baselines_included": {
     65         "applies": true,
     66         "answer": false,
     67         "justification": "The survey does not compare itself against prior reviews or surveys on the same topic. No baseline or comparison with existing literature reviews is provided."
     68       },
     69       "baselines_contemporary": {
     70         "applies": true,
     71         "answer": false,
     72         "justification": "No baselines are included at all, so contemporariness cannot be assessed. The paper does not reference or compare against any prior surveys of AI in DevOps."
     73       },
     74       "ablation_study": {
     75         "applies": false,
     76         "answer": false,
     77         "justification": "Survey paper with no system or components to ablate."
     78       },
     79       "multiple_metrics": {
     80         "applies": false,
     81         "answer": false,
     82         "justification": "Survey paper with no experiments requiring evaluation metrics."
     83       },
     84       "human_evaluation": {
     85         "applies": false,
     86         "answer": false,
     87         "justification": "Survey paper with no system outputs to evaluate."
     88       },
     89       "held_out_test_set": {
     90         "applies": false,
     91         "answer": false,
     92         "justification": "Survey paper with no experiments requiring train/test splits."
     93       },
     94       "per_category_breakdown": {
     95         "applies": true,
     96         "answer": true,
     97         "justification": "The paper organizes findings into multiple categories: CI/CD pipelines (Section IV), Kubernetes (Section V), cloud services (Section VI), AI agents (Section IX). Table 1 breaks down source types, Table 2 covers risks, Table 5 compares cloud providers."
     98       },
     99       "failure_cases_discussed": {
    100         "applies": true,
    101         "answer": true,
    102         "justification": "The paper discusses risks and challenges in multiple sections: Table 2 lists risks in AI-augmented CI/CD (security gaps 42%, configuration drift 31%, over-automation 27%), Section V.E discusses Kubernetes-AI challenges, and Section XI.A lists challenges including ethics, integration complexity, and reliability."
    103       },
    104       "negative_results_reported": {
    105         "applies": true,
    106         "answer": false,
    107         "justification": "The paper presents an entirely positive narrative about AI in DevOps. While it lists challenges and risks, it does not report any negative findings from the literature (e.g., cases where AI integration failed or reduced performance)."
    108       }
    109     },
    110     "claims_and_evidence": {
    111       "abstract_claims_supported": {
    112         "applies": true,
    113         "answer": true,
    114         "justification": "The abstract claims to cover 50 research works on generative AI in DevOps, identify top terms/theories/algorithms, and offer future projections. The paper does address these topics in Sections III, IV-IX, and X respectively. However, the quality of the underlying evidence is weak."
    115       },
    116       "causal_claims_justified": {
    117         "applies": true,
    118         "answer": false,
    119         "justification": "The paper makes numerous causal claims such as 'Code review automation reduces PR cycle time by 68%' and 'AI-driven autoscaling cuts costs by 37%' (Section II), but these are sourced from blog posts and marketing materials, not from rigorous causal studies. The paper presents these as established facts without evaluating the evidence quality."
    120       },
    121       "generalization_bounded": {
    122         "applies": true,
    123         "answer": false,
    124         "justification": "The paper makes extremely broad claims like 'Generative AI is fundamentally transforming DevOps' and projects '80% CI/CD pipelines will be AI-assisted' by 2026 (Table 7) without bounding these to specific contexts, industries, or organizational sizes. Future projections in Section X are entirely unbounded."
    125       },
    126       "alternative_explanations_discussed": {
    127         "applies": true,
    128         "answer": false,
    129         "justification": "The paper presents a single optimistic narrative about AI transforming DevOps. No alternative interpretations of the literature are considered — e.g., whether reported improvements are due to novelty effects, selection bias in industry reports, or marketing hype."
    130       },
    131       "proxy_outcome_distinction": {
    132         "applies": true,
    133         "answer": false,
    134         "justification": "The paper equates proxy measures with outcomes without distinction. For example, '68% PR cycle time reduction' is presented as evidence of 'efficiency' and '92% coverage' as evidence of testing quality, without discussing what these proxies actually capture or miss."
    135       }
    136     },
    137     "setup_transparency": {
    138       "model_versions_specified": {
    139         "applies": false,
    140         "answer": false,
    141         "justification": "Survey paper that does not use any AI models directly."
    142       },
    143       "prompts_provided": {
    144         "applies": false,
    145         "answer": false,
    146         "justification": "Survey paper that does not use prompting."
    147       },
    148       "hyperparameters_reported": {
    149         "applies": false,
    150         "answer": false,
    151         "justification": "Survey paper with no experiments requiring hyperparameters."
    152       },
    153       "scaffolding_described": {
    154         "applies": false,
    155         "answer": false,
    156         "justification": "Survey paper with no agentic scaffolding."
    157       },
    158       "data_preprocessing_documented": {
    159         "applies": true,
    160         "answer": false,
    161         "justification": "The paper mentions inclusion criteria ('Address DevOps-AI integration, Present empirical results, Be published between 2023–2025') in Section II.A but does not document the actual search process: which databases were searched, what search queries were used, how many initial results were found, or how the final 50 were selected from the initial pool."
    162       }
    163     },
    164     "limitations_and_scope": {
    165       "limitations_section_present": {
    166         "applies": true,
    167         "answer": false,
    168         "justification": "No dedicated limitations section exists. Section XI.A ('Challenges and Future Directions') discusses challenges of AI in DevOps generally, but not limitations of this review itself."
    169       },
    170       "threats_to_validity_specific": {
    171         "applies": true,
    172         "answer": false,
    173         "justification": "No threats to validity are discussed. The paper does not acknowledge any weaknesses in its own methodology, source selection, or analysis approach."
    174       },
    175       "scope_boundaries_stated": {
    176         "applies": true,
    177         "answer": false,
    178         "justification": "No explicit scope boundaries are stated. The paper claims to cover 2023-2025 publications but includes references from 2022 and 2018. No discussion of what topics, venues, or paper types were excluded, or what the review does NOT claim to show."
    179       }
    180     },
    181     "data_integrity": {
    182       "raw_data_available": {
    183         "applies": true,
    184         "answer": false,
    185         "justification": "No raw data is available. The list of reviewed papers, screening decisions, or extracted data points are not provided in any downloadable or structured format."
    186       },
    187       "data_collection_described": {
    188         "applies": true,
    189         "answer": false,
    190         "justification": "The paper mentions reviewing '50 peer-reviewed publications and industry white papers' with inclusion criteria, but does not describe which databases were searched, what search terms were used, when the search was conducted, or how papers were identified."
    191       },
    192       "recruitment_methods_described": {
    193         "applies": true,
    194         "answer": false,
    195         "justification": "The 'sample' for this survey is the set of reviewed papers. The paper does not describe how papers were found or recruited — no search databases, queries, or discovery process is documented. Table 1 shows source type distribution but not how sources were discovered."
    196       },
    197       "data_pipeline_documented": {
    198         "applies": true,
    199         "answer": false,
    200         "justification": "No data pipeline is documented. There are no screening stages with counts, no PRISMA flow diagram, and no explanation of how the initial pool was narrowed to the final set of reviewed papers."
    201       }
    202     },
    203     "conflicts_of_interest": {
    204       "funding_disclosed": {
    205         "applies": true,
    206         "answer": false,
    207         "justification": "No funding disclosure. The Declaration section states 'Work is done as a part of independent researcher' but does not formally disclose funding status. The author's bio notes 'currently working as AVP at BoFA USA' but no statement about whether employer resources were used."
    208       },
    209       "affiliations_disclosed": {
    210         "applies": true,
    211         "answer": true,
    212         "justification": "Author affiliation is listed as 'Alumus, International MBA, Bar Ilan University, Israel' and the About the Author section notes 'currently working as AVP at BoFA USA.' The paper is not evaluating products from these organizations."
    213       },
    214       "funder_independent_of_outcome": {
    215         "applies": false,
    216         "answer": false,
    217         "justification": "The work appears to be unfunded independent research per the Declaration section. No funder to assess independence of."
    218       },
    219       "financial_interests_declared": {
    220         "applies": true,
    221         "answer": false,
    222         "justification": "No competing interests or financial interests statement is present. The Declaration section addresses institutional views but not financial interests."
    223       }
    224     },
    225     "contamination": {
    226       "training_cutoff_stated": {
    227         "applies": false,
    228         "answer": false,
    229         "justification": "Survey paper that does not evaluate any pre-trained model's capability on a benchmark."
    230       },
    231       "train_test_overlap_discussed": {
    232         "applies": false,
    233         "answer": false,
    234         "justification": "Survey paper that does not evaluate any pre-trained model on a benchmark."
    235       },
    236       "benchmark_contamination_addressed": {
    237         "applies": false,
    238         "answer": false,
    239         "justification": "Survey paper that does not evaluate any pre-trained model on a benchmark."
    240       }
    241     },
    242     "human_studies": {
    243       "pre_registered": {
    244         "applies": false,
    245         "answer": false,
    246         "justification": "Survey paper with no human participants."
    247       },
    248       "irb_or_ethics_approval": {
    249         "applies": false,
    250         "answer": false,
    251         "justification": "Survey paper with no human participants."
    252       },
    253       "demographics_reported": {
    254         "applies": false,
    255         "answer": false,
    256         "justification": "Survey paper with no human participants."
    257       },
    258       "inclusion_exclusion_criteria": {
    259         "applies": false,
    260         "answer": false,
    261         "justification": "Survey paper with no human participants."
    262       },
    263       "randomization_described": {
    264         "applies": false,
    265         "answer": false,
    266         "justification": "Survey paper with no human participants."
    267       },
    268       "blinding_described": {
    269         "applies": false,
    270         "answer": false,
    271         "justification": "Survey paper with no human participants."
    272       },
    273       "attrition_reported": {
    274         "applies": false,
    275         "answer": false,
    276         "justification": "Survey paper with no human participants."
    277       }
    278     },
    279     "cost_and_practicality": {
    280       "inference_cost_reported": {
    281         "applies": false,
    282         "answer": false,
    283         "justification": "Survey paper with no own method or system to cost."
    284       },
    285       "compute_budget_stated": {
    286         "applies": false,
    287         "answer": false,
    288         "justification": "Survey paper with no computational experiments."
    289       }
    290     },
    291     "survey_methodology": {
    292       "prisma_or_structured_protocol": {
    293         "applies": true,
    294         "answer": false,
    295         "justification": "The paper claims to use 'systematic literature review (SLR) methodology' in Section II.A but provides no PRISMA flow diagram, no registered protocol, no reproducible search queries, and no documentation of search databases. The methodology description is a brief paragraph with inclusion criteria only."
    296       },
    297       "quality_assessment_of_sources": {
    298         "applies": true,
    299         "answer": false,
    300         "justification": "No quality assessment of reviewed sources is performed. Blog posts, Medium articles, LinkedIn posts, product documentation, and marketing materials (comprising the majority of the ~95 references) are treated with equal weight to the small number of peer-reviewed papers. Table 1 categorizes sources by type but does not assess their quality."
    301       },
    302       "publication_bias_discussed": {
    303         "applies": true,
    304         "answer": false,
    305         "justification": "No discussion of publication bias. The paper does not consider that its sources (heavily weighted toward industry marketing materials and blog posts) systematically skew toward positive portrayals of AI-DevOps integration."
    306       }
    307     }
    308   },
    309   "claims": [
    310     {
    311       "claim": "The integration of Generative AI into DevOps practices has accelerated by 217% since 2023.",
    312       "evidence": "Cited to reference [2], a TechBullion blog post (Section II.A). No primary data source or methodology for this figure is provided.",
    313       "supported": "unsupported"
    314     },
    315     {
    316       "claim": "Code review automation reduces PR cycle time by 68%.",
    317       "evidence": "Cited to reference [35], a Google Cloud blog post (Section II.B). The original source's methodology and sample are not evaluated.",
    318       "supported": "weak"
    319     },
    320     {
    321       "claim": "AI-generated test cases achieve 92% coverage.",
    322       "evidence": "Cited to reference [40], a Medium blog post '10 Ways to Use Generative AI for DevOps' (Section II.B). No peer-reviewed source or experimental details.",
    323       "supported": "weak"
    324     },
    325     {
    326       "claim": "Komodor's Klaudia reduces MTTR by 53%.",
    327       "evidence": "Cited to reference [28], a Cloud Native Now news article about a product launch (Section II.A). This is a vendor claim, not independently verified.",
    328       "supported": "weak"
    329     },
    330     {
    331       "claim": "80% of CI/CD pipelines will be AI-assisted by 2026.",
    332       "evidence": "Presented in Table 7 (Section X) and reiterated in the conclusion. No methodology for this projection is provided; it is not derived from trend data or modeling.",
    333       "supported": "unsupported"
    334     },
    335     {
    336       "claim": "First fully autonomous DevOps teams will emerge by 2029.",
    337       "evidence": "Presented in Table 7 (Section X). Speculative projection with no supporting evidence or trend analysis.",
    338       "supported": "unsupported"
    339     }
    340   ],
    341   "red_flags": [
    342     {
    343       "flag": "Sources predominantly blog posts and marketing materials",
    344       "detail": "Of ~95 references, the vast majority are blog posts (Medium, DEV Community, LinkedIn), product documentation (Docker, AWS, Azure, Google Cloud), marketing content (TechBullion, CloudWars), and training course pages. Only a handful are peer-reviewed publications. The paper claims to be a 'systematic literature review' but reviews mostly grey literature."
    345     },
    346     {
    347       "flag": "Self-citation padding",
    348       "detail": "References [13], [31], [32], [90]-[95] are all by the same author (Satyadhar Joshi), comprising 8 of ~95 references. These self-cited papers are about financial applications of AI, not DevOps, and are published in low-impact journals. They appear inserted to inflate the author's citation count."
    349     },
    350     {
    351       "flag": "Unsubstantiated quantitative claims",
    352       "detail": "Specific numbers like '217% acceleration,' '68% PR cycle time reduction,' '92% test coverage,' and '53% MTTR reduction' are presented as authoritative findings but are sourced from blog posts and vendor marketing materials. No verification or critical evaluation of these numbers is attempted."
    353     },
    354     {
    355       "flag": "Speculative projections presented as findings",
    356       "detail": "Section X and Table 7 present speculative milestones (e.g., 'K8s self-management reaches L5 autonomy' by 2027, 'DevOps Singularity: Human oversight becomes optional' by 2029) without any supporting methodology, trend data, or modeling. These are presented alongside empirical claims."
    357     },
    358     {
    359       "flag": "No quality assessment of reviewed sources",
    360       "detail": "The paper treats all sources equally regardless of provenance. A vendor marketing blog post claiming '68% reduction' carries the same evidentiary weight as a peer-reviewed study. This launders the signal-to-noise ratio of its sources."
    361     },
    362     {
    363       "flag": "Numerous typos and editorial errors",
    364       "detail": "The paper contains many typos: 'devleoperss deploey,' 'ingergration,' 'enhansing,' 'synergoes,' 'indepdendent,' 'intergration,' 'Microsfot,' 'Comaprisons,' 'survyed.' Internal inconsistencies include anonymized cloud providers in Table 3 (Cloud A/B/C) but named providers in Table 5. The SLR methodology description is repeated nearly verbatim in Section II.A. This suggests minimal editorial review."
    365     },
    366     {
    367       "flag": "Duplicate and contradictory structure",
    368       "detail": "The paper repeats its SLR methodology description twice in Section II.A (same inclusion criteria listed twice). The 'Intelligent Automation' bullet points appear in both Section II.A and II.B verbatim. Future projections appear in both Section X and the conclusion. This suggests copy-paste assembly rather than structured writing."
    369     },
    370     {
    371       "flag": "Predatory journal indicators",
    372       "detail": "The venue (IJIRCST) publishes in 'Volume-13, Issue-4' with a rapid turnaround (received May 23, revised June 6, accepted June 21 — 29 days from submission to acceptance). The extensive typos, self-citations, and lack of editorial polish are consistent with minimal peer review."
    373     }
    374   ],
    375   "cited_papers": [
    376     {
    377       "title": "Building AI Agents for Autonomous Clouds: Challenges and Design Principles",
    378       "authors": ["M. Shetty"],
    379       "year": 2024,
    380       "arxiv_id": "2407.12165",
    381       "relevance": "Discusses design principles for AI agents in autonomous cloud operations, directly relevant to agentic AI systems."
    382     },
    383     {
    384       "title": "Study on the interaction between big data and artificial intelligence",
    385       "authors": ["J. Li", "Z. Ye", "C. Zhang"],
    386       "year": 2022,
    387       "doi": "10.1002/sres.2878",
    388       "relevance": "Examines the foundational relationship between big data and AI systems relevant to understanding AI pipeline infrastructure."
    389     },
    390     {
    391       "title": "Ydata-profiling: Accelerating data-centric AI with high-quality data",
    392       "authors": ["F. Clemente", "G. M. Ribeiro", "A. Quemy", "M. S. Santos", "R. C. Pereira", "A. Barros"],
    393       "year": 2023,
    394       "doi": "10.1016/j.neucom.2023.126585",
    395       "relevance": "Tool for data quality in AI pipelines, relevant to data-centric AI development and MLOps practices."
    396     },
    397     {
    398       "title": "Platform power in AI: The evolution of cloud infrastructures in the political economy of artificial intelligence",
    399       "authors": ["D. Luitse"],
    400       "year": 2024,
    401       "relevance": "Analyzes platform lock-in and power dynamics in AI cloud infrastructure, relevant to understanding vendor dependencies in AI development."
    402     },
    403     {
    404       "title": "Big AI: Cloud infrastructure dependence and the industrialisation of artificial intelligence",
    405       "authors": ["F. van der Vlist", "A. Helmond", "F. Ferrari"],
    406       "year": 2024,
    407       "doi": "10.1177/20539517241232630",
    408       "relevance": "Studies cloud infrastructure dependence in AI development, relevant to understanding concentration risks in AI systems."
    409     }
    410   ],
    411   "engagement_factors": {
    412     "practical_relevance": {
    413       "score": 1,
    414       "justification": "Lists DevOps AI tools and trends at a high level but provides no actionable techniques, configurations, or implementation guidance a practitioner could use directly."
    415     },
    416     "surprise_contrarian": {
    417       "score": 0,
    418       "justification": "Entirely confirms conventional wisdom that AI is transforming DevOps; no contrarian findings or surprises."
    419     },
    420     "fear_safety": {
    421       "score": 0,
    422       "justification": "Mentions security risks briefly (Table 2) but does not raise novel safety concerns or demonstrate any attacks."
    423     },
    424     "drama_conflict": {
    425       "score": 0,
    426       "justification": "No controversy, critique of existing work, or conflict — the paper is uniformly positive about AI-DevOps integration."
    427     },
    428     "demo_ability": {
    429       "score": 0,
    430       "justification": "No code, demo, or tool is provided; the paper is purely descriptive."
    431     },
    432     "brand_recognition": {
    433       "score": 1,
    434       "justification": "Discusses well-known platforms (Docker, Kubernetes, AWS, Azure, GCP) but is not authored by or affiliated with those organizations."
    435     }
    436   }
    437 }

Impressum · Datenschutz