scan.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan.json (23318B)
      1 {
      2   "paper": {
      3     "title": "Bridging MDE and AI: A Systematic Review of Domain-Specific Languages and Model-Driven Practices in AI Software Systems Engineering",
      4     "authors": [
      5       "Simon Rädler",
      6       "Luca Berardinelli",
      7       "Karolin Winter",
      8       "Abbas Rahimi",
      9       "Stefanie Rinderle-Ma"
     10     ],
     11     "year": 2023,
     12     "venue": "SoSyM Journal (preprint)",
     13     "arxiv_id": "2307.04599",
     14     "doi": null
     15   },
     16   "checklist": {
     17     "artifacts": {
     18       "code_released": {
     19         "applies": true,
     20         "answer": true,
     21         "justification": "The paper references an online repository at https://github.com/sraedler/Model-Driven-Engineering4Artificial-Intelligence containing the SLR protocol spreadsheet and bibliographic entries (Section 3.2, footnote 3)."
     22       },
     23       "data_released": {
     24         "applies": true,
     25         "answer": true,
     26         "justification": "The search terms, results, and extracted data are archived and available online at the GitHub repository referenced in footnote 3. The SLR protocol execution is documented in a spreadsheet available online."
     27       },
     28       "environment_specified": {
     29         "applies": true,
     30         "answer": false,
     31         "justification": "No environment or software dependency specifications are provided. This is an SLR so there is no computational environment per se, but the analysis tools/scripts (if any) are not documented."
     32       },
     33       "reproduction_instructions": {
     34         "applies": true,
     35         "answer": false,
     36         "justification": "While the SLR protocol is described in Section 3 and the search terms are given, there are no step-by-step reproduction instructions explaining how to use the online artifacts to replicate the study end-to-end."
     37       }
     38     },
     39     "statistical_methodology": {
     40       "confidence_intervals_or_error_bars": {
     41         "applies": false,
     42         "answer": false,
     43         "justification": "This is a systematic literature review that does not perform statistical experiments or report quantitative measurements requiring confidence intervals."
     44       },
     45       "significance_tests": {
     46         "applies": false,
     47         "answer": false,
     48         "justification": "This is a systematic literature review that does not make comparative claims requiring statistical significance tests."
     49       },
     50       "effect_sizes_reported": {
     51         "applies": false,
     52         "answer": false,
     53         "justification": "This is a systematic literature review without meta-analysis or statistical aggregation. No effect sizes are applicable."
     54       },
     55       "sample_size_justified": {
     56         "applies": false,
     57         "answer": false,
     58         "justification": "This is a systematic literature review. The 'sample size' (18 primary studies) is a result of the search/selection protocol, not a pre-determined sample requiring power analysis."
     59       },
     60       "variance_reported": {
     61         "applies": false,
     62         "answer": false,
     63         "justification": "This is a systematic literature review without quantitative experiments. No variance or standard deviation is applicable."
     64       }
     65     },
     66     "evaluation_design": {
     67       "baselines_included": {
     68         "applies": true,
     69         "answer": true,
     70         "justification": "The paper compares its scope and approach against prior surveys and secondary studies by Portugal et al. [68], Naveed et al. [64], Giray [38], Martínez-Fernández et al. [59], and Bucaioni et al. [18] in the Related Work section (Section 2.3), explaining how their work differs and complements these."
     71       },
     72       "baselines_contemporary": {
     73         "applies": true,
     74         "answer": true,
     75         "justification": "The related surveys compared against include Naveed et al. [64] (2024), Martínez-Fernández et al. [59] (2022), Giray [38] (2021), and Bucaioni et al. [18] (2022), which are contemporary works."
     76       },
     77       "ablation_study": {
     78         "applies": false,
     79         "answer": false,
     80         "justification": "This is a systematic literature review. There is no system with components to ablate."
     81       },
     82       "multiple_metrics": {
     83         "applies": false,
     84         "answer": false,
     85         "justification": "This is a systematic literature review, not a system evaluation. There are no performance metrics to report."
     86       },
     87       "human_evaluation": {
     88         "applies": false,
     89         "answer": false,
     90         "justification": "This is a systematic literature review that classifies and discusses published studies. Human evaluation of system outputs is not relevant."
     91       },
     92       "held_out_test_set": {
     93         "applies": false,
     94         "answer": false,
     95         "justification": "This is a systematic literature review with no test sets."
     96       },
     97       "per_category_breakdown": {
     98         "applies": true,
     99         "answer": true,
    100         "justification": "The paper provides detailed per-study breakdowns across MDE concerns (Table 5), AI concerns (Table 5), frameworks/tools (Table 7), and artifact availability/application domains (Table 8). Each of the 18 studies is individually assessed."
    101       },
    102       "failure_cases_discussed": {
    103         "applies": true,
    104         "answer": true,
    105         "justification": "The paper discusses gaps and weaknesses in the reviewed approaches. Section 5 (Discussion) and Section 5.6 (RQ6) explicitly identify shortcomings: lack of business understanding support, incomplete CRISP-DM coverage, case-specificity, lack of user studies, and missing runtime support."
    106       },
    107       "negative_results_reported": {
    108         "applies": true,
    109         "answer": true,
    110         "justification": "Section 5.6 reports several negative findings: no low-code/no-code contributions found for intelligent systems, lack of closed-loop processes, limited general applicability, and few user studies conducted by the reviewed approaches."
    111       }
    112     },
    113     "claims_and_evidence": {
    114       "abstract_claims_supported": {
    115         "applies": true,
    116         "answer": true,
    117         "justification": "The abstract claims are supported: (1) language workbenches are paramount (Table 5 shows 17/18 use LWs), (2) most prominent AI concerns are training and modeling (14/18 for Model Training in Table 5), (3) data preparation gets minor emphasis (8/18 for Feature Preparation), (4) business understanding is rarely reflected (5/18). All supported by data in Tables 5-8."
    118       },
    119       "causal_claims_justified": {
    120         "applies": false,
    121         "answer": false,
    122         "justification": "The paper is a systematic literature review that describes the state of the art. It does not make causal claims about why certain outcomes occur, only descriptive observations about what exists."
    123       },
    124       "generalization_bounded": {
    125         "applies": true,
    126         "answer": true,
    127         "justification": "The paper explicitly bounds its scope to MDE4AI (not AI4MDE), to DSLs with AI extensions as first-class entities, and acknowledges the limited volume of publications found (Section 5). The threats to validity section (Section 6) discusses limitations of generalizability."
    128       },
    129       "alternative_explanations_discussed": {
    130         "applies": true,
    131         "answer": true,
    132         "justification": "Section 5 discusses that the limited volume of publications may result from their selective exclusion criteria and the focus on MDE4AI excluding the complementary AI4MDE perspective. Section 6 (Threats to Validity) discusses how interdisciplinary terminology and various definitions could affect findings."
    133       }
    134     },
    135     "setup_transparency": {
    136       "model_versions_specified": {
    137         "applies": false,
    138         "answer": false,
    139         "justification": "This is a systematic literature review. No AI models are used in the research methodology."
    140       },
    141       "prompts_provided": {
    142         "applies": false,
    143         "answer": false,
    144         "justification": "This is a systematic literature review that does not use prompting or LLMs."
    145       },
    146       "hyperparameters_reported": {
    147         "applies": false,
    148         "answer": false,
    149         "justification": "This is a systematic literature review with no computational experiments requiring hyperparameters."
    150       },
    151       "scaffolding_described": {
    152         "applies": false,
    153         "answer": false,
    154         "justification": "No agentic scaffolding is used. This is a traditional systematic literature review."
    155       },
    156       "data_preprocessing_documented": {
    157         "applies": true,
    158         "answer": true,
    159         "justification": "The paper documents the filtering pipeline in detail: 1335 candidate studies from 5 databases, filtered to 130 by IC/EC on title/abstract, then to 18 after full-text reading, plus 4 from snowballing (Section 3.3, Figure 1, Table 2). The inclusion and exclusion criteria are explicitly stated in Table 2 with detailed explanations."
    160       }
    161     },
    162     "limitations_and_scope": {
    163       "limitations_section_present": {
    164         "applies": true,
    165         "answer": true,
    166         "justification": "Section 6 'Threats to Validity' provides a dedicated section discussing construct validity, internal validity, external validity, and conclusion validity."
    167       },
    168       "threats_to_validity_specific": {
    169         "applies": true,
    170         "answer": true,
    171         "justification": "The threats to validity section includes specific concerns: at least two researchers independently analyze each study with a third for disagreements (construct validity), the interdisciplinary nature of MDE and AI fields may cause terminology issues (conclusion validity), and the subjective nature of data extraction is mitigated by the protocol. These are specific to this study rather than purely generic."
    172       },
    173       "scope_boundaries_stated": {
    174         "applies": true,
    175         "answer": true,
    176         "justification": "The paper explicitly states scope boundaries: MDE4AI only (not AI4MDE), excludes DSLs without first-class AI concepts, excludes non-primary studies, vision papers, and theses (Table 2). Section 3.4 notes that deployment phase of CRISP-DM is beyond scope. The related work section (Section 2.3) explicitly differentiates the scope from prior surveys."
    177       }
    178     },
    179     "data_integrity": {
    180       "raw_data_available": {
    181         "applies": true,
    182         "answer": true,
    183         "justification": "The raw search results, selection spreadsheet, and bibliographic entries are available at the GitHub repository (footnote 3: https://github.com/sraedler/Model-Driven-Engineering4Artificial-Intelligence)."
    184       },
    185       "data_collection_described": {
    186         "applies": true,
    187         "answer": true,
    188         "justification": "Section 3.2 describes the search process in detail: 5 databases, keyword sets S1 (12 MDE terms) and S2 (10 AI terms) combined into 120 conjunctive propositions, executed as individual queries. The automated search was run in November 2022 and February 2024."
    189       },
    190       "recruitment_methods_described": {
    191         "applies": false,
    192         "answer": false,
    193         "justification": "No human participants are involved. The paper is a systematic literature review analyzing published studies, not conducting research with human subjects."
    194       },
    195       "data_pipeline_documented": {
    196         "applies": true,
    197         "answer": true,
    198         "justification": "Figure 1 and Section 3 document the full pipeline: 1335 collected from queries -> IC/EC applied to titles/abstracts -> 130 papers -> full-text reading -> 18 papers retained + 4 from snowballing. The data extraction template (Table 4) details what was extracted from each paper."
    199       }
    200     },
    201     "conflicts_of_interest": {
    202       "funding_disclosed": {
    203         "applies": true,
    204         "answer": true,
    205         "justification": "Funding is disclosed in the footnote on page 1: 'This project has been partially supported and funded by the AIDOaRt project, an ECSEL Joint Undertaking (JU) under grant agreement No. 101007350 and the Austrian Research Promotion Agency (FFG) via the Austrian Competence Center for Digital Production (CDP) under the contract number 881843.'"
    206       },
    207       "affiliations_disclosed": {
    208         "applies": true,
    209         "answer": true,
    210         "justification": "Author affiliations are listed: Technical University of Munich, Technical University of Vienna, Johannes Kepler University, and Eindhoven University of Technology. One author (Rädler) is both first author and author of primary study [71], which is acknowledged."
    211       },
    212       "funder_independent_of_outcome": {
    213         "applies": true,
    214         "answer": true,
    215         "justification": "The funders are the ECSEL Joint Undertaking (EU program) and the Austrian Research Promotion Agency via a competence center. These are public research funding bodies that do not have a financial stake in the specific outcomes of this systematic review."
    216       },
    217       "financial_interests_declared": {
    218         "applies": true,
    219         "answer": false,
    220         "justification": "No competing interests statement or financial interests declaration is present in the paper."
    221       }
    222     },
    223     "contamination": {
    224       "training_cutoff_stated": {
    225         "applies": false,
    226         "answer": false,
    227         "justification": "This is a systematic literature review that does not evaluate any pre-trained model on a benchmark."
    228       },
    229       "train_test_overlap_discussed": {
    230         "applies": false,
    231         "answer": false,
    232         "justification": "This is a systematic literature review that does not evaluate any pre-trained model on a benchmark."
    233       },
    234       "benchmark_contamination_addressed": {
    235         "applies": false,
    236         "answer": false,
    237         "justification": "This is a systematic literature review that does not evaluate any pre-trained model on a benchmark."
    238       }
    239     },
    240     "human_studies": {
    241       "pre_registered": {
    242         "applies": false,
    243         "answer": false,
    244         "justification": "No human participants are involved. This is a systematic literature review."
    245       },
    246       "irb_or_ethics_approval": {
    247         "applies": false,
    248         "answer": false,
    249         "justification": "No human participants are involved. This is a systematic literature review."
    250       },
    251       "demographics_reported": {
    252         "applies": false,
    253         "answer": false,
    254         "justification": "No human participants are involved. This is a systematic literature review."
    255       },
    256       "inclusion_exclusion_criteria": {
    257         "applies": false,
    258         "answer": false,
    259         "justification": "No human participants are involved. This is a systematic literature review. (Note: the paper does have inclusion/exclusion criteria for papers, addressed under data_preprocessing_documented.)"
    260       },
    261       "randomization_described": {
    262         "applies": false,
    263         "answer": false,
    264         "justification": "No human participants are involved. This is a systematic literature review."
    265       },
    266       "blinding_described": {
    267         "applies": false,
    268         "answer": false,
    269         "justification": "No human participants are involved. This is a systematic literature review."
    270       },
    271       "attrition_reported": {
    272         "applies": false,
    273         "answer": false,
    274         "justification": "No human participants are involved. This is a systematic literature review."
    275       }
    276     },
    277     "cost_and_practicality": {
    278       "inference_cost_reported": {
    279         "applies": false,
    280         "answer": false,
    281         "justification": "This is a systematic literature review. No computational method with inference costs is proposed."
    282       },
    283       "compute_budget_stated": {
    284         "applies": false,
    285         "answer": false,
    286         "justification": "This is a systematic literature review. No significant computational resources are required."
    287       }
    288     }
    289   },
    290   "claims": [
    291     {
    292       "claim": "Language workbenches are of paramount importance in dealing with all aspects of modeling language development and are leveraged in 17 of 18 selected studies.",
    293       "evidence": "Table 5 shows the Language Workbench (LW) row has a filled square for 17 of 18 approaches, with only [42] lacking a specific LW implementation.",
    294       "supported": "strong"
    295     },
    296     {
    297       "claim": "The most prominent AI-related concern addressed by MDE approaches is model training (14/18 studies), while data preparation receives less emphasis (8/18).",
    298       "evidence": "Table 5 quantifies AI concerns: Model Training (MTrain) is supported by 14 studies, Feature Preparation (FP) by 8, Business Understanding (BU) by only 5.",
    299       "supported": "strong"
    300     },
    301     {
    302       "claim": "Early project phases supporting interdisciplinary communication (CRISP-DM Business Understanding) are rarely reflected, with only 5 of 18 approaches supporting it.",
    303       "evidence": "Table 5 shows Business Understanding (BU) supported by 5 studies. Section 5.5 discusses the limited communication support.",
    304       "supported": "strong"
    305     },
    306     {
    307       "claim": "The use of MDE for AI is still in its early stages with no single widely-used tool or method.",
    308       "evidence": "Section 5 and 7 discuss the fragmented landscape. Table 7 shows a wide variety of tools with no dominant one. Only 18 primary studies were found from 1335 candidates, and publication volumes are limited (Figure 2).",
    309       "supported": "strong"
    310     },
    311     {
    312       "claim": "Textual concrete syntax is preferred over graphical syntax in the reviewed MDE4AI approaches.",
    313       "evidence": "Table 5 shows 10 textual, 7 graphical, 2 XMI concrete syntaxes. Section 5.1 discusses this preference.",
    314       "supported": "strong"
    315     },
    316     {
    317       "claim": "No valuable contributions were identified for low-code/no-code development platforms targeting intelligent AI systems.",
    318       "evidence": "Section 5.6 states: 'this study did not identify any valuable contributions, despite the recent trends in low-code and no-code development platforms targeting intelligent systems.'",
    319       "supported": "moderate"
    320     }
    321   ],
    322   "methodology_tags": [
    323     "meta-analysis"
    324   ],
    325   "key_findings": "This systematic literature review of 18 primary studies (from 1335 candidates) finds that MDE approaches for AI software systems predominantly focus on the model training phase of CRISP-DM, while time-consuming tasks like data preparation and early business understanding phases are underserved. Language workbenches are nearly universally adopted (17/18 studies), with EMF and Xtext being the most common tooling, and textual syntax preferred over graphical. The field is fragmented with no dominant tool or method, and current approaches tend to be case-specific rather than generally applicable.",
    326   "red_flags": [
    327     {
    328       "flag": "Author self-inclusion",
    329       "detail": "First author Simon Rädler is also an author of primary study [71] (Raedler et al.), which is one of the 18 reviewed papers. While this is acknowledged in the text, it represents a potential bias in the review process."
    330     },
    331     {
    332       "flag": "No structured quality assessment of primary studies",
    333       "detail": "While the paper mentions using quality criteria from [56] (CASP tool), no quality scores or structured quality assessment results are presented for the 18 primary studies. The reader cannot assess the methodological rigor of the individual reviewed papers."
    334     },
    335     {
    336       "flag": "Small corpus",
    337       "detail": "Only 18 primary studies were retained from 1335 candidates, which is a very aggressive filtering ratio (1.3%). While this may reflect the narrow scope, it limits the generalizability of the findings about the MDE4AI field."
    338     }
    339   ],
    340   "cited_papers": [
    341     {
    342       "title": "Software engineering for ai-based systems: a survey",
    343       "authors": ["Silverio Martínez-Fernández", "Justus Bogner", "Xavier Franch", "Marc Oriol", "Julien Siebert", "Adam Trendowicz", "Anna Maria Vollmer", "Stefan Wagner"],
    344       "year": 2022,
    345       "relevance": "Comprehensive survey on software engineering practices for AI-based systems, directly relevant to understanding SE4AI methodology and quality."
    346     },
    347     {
    348       "title": "A software engineering perspective on engineering machine learning systems: State of the art and challenges",
    349       "authors": ["Görkem Giray"],
    350       "year": 2021,
    351       "relevance": "Systematic review of 141 studies on SE for ML systems, finding no mature tools/techniques and identifying challenges in ML systems engineering."
    352     },
    353     {
    354       "title": "Model driven engineering for machine learning components: A systematic literature review",
    355       "authors": ["Hira Naveed", "Chetan Arora", "Hourieh Khalajzadeh", "John Grundy", "Omar Haggag"],
    356       "year": 2024,
    357       "relevance": "Parallel systematic review on MDE4ML selecting 46 primary studies, providing a complementary viewpoint to this paper's analysis."
    358     },
    359     {
    360       "title": "Modelling in low-code development: a multi-vocal systematic review",
    361       "authors": ["Alessio Bucaioni", "Antonio Cicchetti", "Federico Ciccozzi"],
    362       "year": 2022,
    363       "relevance": "Multi-vocal review on low-code development platforms and MDE's role, relevant to understanding AI-augmented software development tooling."
    364     },
    365     {
    366       "title": "On the Engineering of AI-Powered Systems",
    367       "authors": ["Evgeny Kusmenko", "Svetlana Pavlitskaya", "Bernhard Rumpe", "Sebastian Stuber"],
    368       "year": 2019,
    369       "relevance": "Presents DSL-based MDE approach for deep learning using MontiAnna framework, directly relevant to AI-powered system engineering methodology."
    370     },
    371     {
    372       "title": "A model-driven approach to machine learning and software modeling for the IoT",
    373       "authors": ["Armin Moin", "Moharram Challenger", "Atta Badii", "Stephan Günnemann"],
    374       "year": 2022,
    375       "relevance": "MDE approach extending ThingML for IoT with ML/data analytics, demonstrating model-driven code generation for AI in cyber-physical systems."
    376     },
    377     {
    378       "title": "The next Evolution of MDE: A Seamless Integration of Machine Learning into Domain Modeling",
    379       "authors": ["Thomas Hartmann", "Assaad Moawad", "Francois Fouquet", "Yves Le Traon"],
    380       "year": 2019,
    381       "relevance": "Proposes integration of machine learning micro-learning units into domain modeling, one of only approaches supporting runtime models."
    382     },
    383     {
    384       "title": "AIoTML: A Unified Modeling Language for AIoT-Based Cyber-Physical Systems",
    385       "authors": ["Ming Hu", "E. Cao", "Hongbing Huang", "Min Zhang", "Xiaohong Chen", "Mingsong Chen"],
    386       "year": 2023,
    387       "relevance": "Novel DSL extending ThingML for AI-IoT cyber-physical systems, relevant to understanding model-driven approaches for AI system development."
    388     },
    389     {
    390       "title": "Failure of AI projects: Understanding the critical factors",
    391       "authors": ["Jens Westenberger", "Kajetan Schuler", "Dennis Schlegel"],
    392       "year": 2022,
    393       "relevance": "Identifies critical factors in AI project failures including communication challenges, providing context for why MDE4AI approaches are needed."
    394     },
    395     {
    396       "title": "DescribeML: A tool for describing machine learning datasets",
    397       "authors": ["Joan Giner-Miguelez", "Abel Gómez", "Jordi Cabot"],
    398       "year": 2022,
    399       "relevance": "Proposes a DSL for ML dataset description using Langium, relevant to dataset documentation and reproducibility in AI development."
    400     }
    401   ]
    402 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs