scan.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan.json (14555B)
      1 {
      2   "paper": {
      3     "title": "AI-guided Model-Driven Embedded Software Engineering",
      4     "authors": ["Padma Iyenghar", "Friedrich Otte", "Elke Pulvermueller"],
      5     "year": 2022,
      6     "venue": "MODELSWARD 2022 - 10th International Conference on Model-Driven Engineering and Software Development",
      7     "doi": "10.5220/0011006200003119"
      8   },
      9   "checklist": {
     10     "artifacts": {
     11       "code_released": {
     12         "applies": true,
     13         "answer": false,
     14         "justification": "No repository URL or code archive is provided. The paper describes a prototype built with Rasa but does not release the source code."
     15       },
     16       "data_released": {
     17         "applies": true,
     18         "answer": false,
     19         "justification": "No training data, NLU examples, or Rasa configuration files are released."
     20       },
     21       "environment_specified": {
     22         "applies": true,
     23         "answer": false,
     24         "justification": "No environment specifications, dependency lists, or version information for Rasa or Python are provided."
     25       },
     26       "reproduction_instructions": {
     27         "applies": true,
     28         "answer": false,
     29         "justification": "No step-by-step reproduction instructions are included. The paper describes design concepts but not how to rebuild the prototype."
     30       }
     31     },
     32     "statistical_methodology": {
     33       "confidence_intervals_or_error_bars": {
     34         "applies": false,
     35         "answer": false,
     36         "justification": "This is a design/prototype paper with no quantitative experiments or measurements."
     37       },
     38       "significance_tests": {
     39         "applies": false,
     40         "answer": false,
     41         "justification": "No comparative quantitative claims are made."
     42       },
     43       "effect_sizes_reported": {
     44         "applies": false,
     45         "answer": false,
     46         "justification": "No quantitative experiments are conducted."
     47       },
     48       "sample_size_justified": {
     49         "applies": false,
     50         "answer": false,
     51         "justification": "No experiments with samples are conducted."
     52       },
     53       "variance_reported": {
     54         "applies": false,
     55         "answer": false,
     56         "justification": "No experimental runs are reported."
     57       }
     58     },
     59     "evaluation_design": {
     60       "baselines_included": {
     61         "applies": true,
     62         "answer": false,
     63         "justification": "No baselines or comparisons with alternative approaches are provided. The paper compares two internal design options (form vs. custom actions) qualitatively but does not compare against any external baseline."
     64       },
     65       "baselines_contemporary": {
     66         "applies": true,
     67         "answer": false,
     68         "justification": "No baselines are included at all."
     69       },
     70       "ablation_study": {
     71         "applies": false,
     72         "answer": false,
     73         "justification": "The system is a single prototype; ablation is not applicable to this design paper."
     74       },
     75       "multiple_metrics": {
     76         "applies": true,
     77         "answer": false,
     78         "justification": "No evaluation metrics of any kind are reported. The prototype is described but not measured."
     79       },
     80       "human_evaluation": {
     81         "applies": true,
     82         "answer": false,
     83         "justification": "The paper claims the chatbot would aid novices but includes no user study or human evaluation of the prototype."
     84       },
     85       "held_out_test_set": {
     86         "applies": false,
     87         "answer": false,
     88         "justification": "No dataset or test set is used; this is a design/prototype paper."
     89       },
     90       "per_category_breakdown": {
     91         "applies": false,
     92         "answer": false,
     93         "justification": "No quantitative results are reported to break down."
     94       },
     95       "failure_cases_discussed": {
     96         "applies": true,
     97         "answer": false,
     98         "justification": "No failure cases or limitations of the prototype's NLU performance are discussed."
     99       },
    100       "negative_results_reported": {
    101         "applies": true,
    102         "answer": true,
    103         "justification": "The paper reports that the form approach (Option 1) has disadvantages: 'adding content very time consuming and is in conflict with requirement R4.' This led to choosing Option 2 instead."
    104       }
    105     },
    106     "claims_and_evidence": {
    107       "abstract_claims_supported": {
    108         "applies": true,
    109         "answer": false,
    110         "justification": "The abstract claims the assistant 'would aid novices in MDE or even non-programmer to learn and adopt model-driven ESE with a not-so-steep learning curve.' No evidence supports this claim — no user study or evaluation is conducted."
    111       },
    112       "causal_claims_justified": {
    113         "applies": true,
    114         "answer": false,
    115         "justification": "The paper implies the chatbot would reduce learning curve and costs of MDE adoption, but provides no evidence for these causal claims."
    116       },
    117       "generalization_bounded": {
    118         "applies": true,
    119         "answer": false,
    120         "justification": "The paper discusses AI-guided MDE broadly but only implements a prototype for one specific tool (SiSy). The title and framing suggest broad applicability without bounding claims to this single tool."
    121       },
    122       "alternative_explanations_discussed": {
    123         "applies": true,
    124         "answer": false,
    125         "justification": "No alternative explanations or confounding factors are discussed."
    126       }
    127     },
    128     "setup_transparency": {
    129       "model_versions_specified": {
    130         "applies": true,
    131         "answer": false,
    132         "justification": "No version of Rasa or SiSy is specified. References point to websites accessed Nov 2021 but no version numbers are given."
    133       },
    134       "prompts_provided": {
    135         "applies": false,
    136         "answer": false,
    137         "justification": "The paper does not use LLM prompting; it uses a Rasa NLU pipeline with intents and entities."
    138       },
    139       "hyperparameters_reported": {
    140         "applies": true,
    141         "answer": false,
    142         "justification": "No Rasa NLU pipeline configuration, training parameters, or model hyperparameters are reported."
    143       },
    144       "scaffolding_described": {
    145         "applies": true,
    146         "answer": true,
    147         "justification": "The paper describes the chatbot architecture in detail: NLU component, dialog manager, custom actions, forms, tutorial dispatcher, and slot management (Sections 3 and 4, Figures 1, 2, 3, 5)."
    148       },
    149       "data_preprocessing_documented": {
    150         "applies": true,
    151         "answer": false,
    152         "justification": "No information about how NLU training data was prepared, what intents were defined, or how many training examples were used."
    153       }
    154     },
    155     "limitations_and_scope": {
    156       "limitations_section_present": {
    157         "applies": true,
    158         "answer": false,
    159         "justification": "There is no limitations or threats-to-validity section. The conclusion mentions future improvements but does not discuss limitations."
    160       },
    161       "threats_to_validity_specific": {
    162         "applies": true,
    163         "answer": false,
    164         "justification": "No threats to validity are discussed."
    165       },
    166       "scope_boundaries_stated": {
    167         "applies": true,
    168         "answer": false,
    169         "justification": "The paper does not explicitly state what the results do not show or what settings are excluded. It acknowledges 'this is only the tip of the iceberg' but does not bound its claims."
    170       }
    171     },
    172     "data_integrity": {
    173       "raw_data_available": {
    174         "applies": true,
    175         "answer": false,
    176         "justification": "No raw data (NLU training data, conversation logs, etc.) is available."
    177       },
    178       "data_collection_described": {
    179         "applies": true,
    180         "answer": false,
    181         "justification": "No description of how NLU training data or intent examples were collected or created."
    182       },
    183       "recruitment_methods_described": {
    184         "applies": false,
    185         "answer": false,
    186         "justification": "No human participants; this is a prototype design paper."
    187       },
    188       "data_pipeline_documented": {
    189         "applies": true,
    190         "answer": false,
    191         "justification": "No data pipeline from training data creation to model training is documented."
    192       }
    193     },
    194     "conflicts_of_interest": {
    195       "funding_disclosed": {
    196         "applies": true,
    197         "answer": false,
    198         "justification": "No funding information or acknowledgments section is present."
    199       },
    200       "affiliations_disclosed": {
    201         "applies": true,
    202         "answer": true,
    203         "justification": "Author affiliations are clearly listed: University of Osnabrueck and innotec GmbH."
    204       },
    205       "funder_independent_of_outcome": {
    206         "applies": true,
    207         "answer": false,
    208         "justification": "No funding is disclosed, so independence cannot be assessed. One author is affiliated with innotec GmbH, which could have a commercial interest, but this is not discussed."
    209       },
    210       "financial_interests_declared": {
    211         "applies": true,
    212         "answer": false,
    213         "justification": "No competing interests statement is present. One author has an industry affiliation (innotec GmbH) but no financial interest declaration is made."
    214       }
    215     },
    216     "contamination": {
    217       "training_cutoff_stated": {
    218         "applies": false,
    219         "answer": false,
    220         "justification": "The paper does not evaluate a pre-trained model on a benchmark; it builds a chatbot with Rasa's NLU pipeline."
    221       },
    222       "train_test_overlap_discussed": {
    223         "applies": false,
    224         "answer": false,
    225         "justification": "No benchmark evaluation of a pre-trained model is conducted."
    226       },
    227       "benchmark_contamination_addressed": {
    228         "applies": false,
    229         "answer": false,
    230         "justification": "No benchmark evaluation is conducted."
    231       }
    232     },
    233     "human_studies": {
    234       "pre_registered": {
    235         "applies": false,
    236         "answer": false,
    237         "justification": "No human participants in this study."
    238       },
    239       "irb_or_ethics_approval": {
    240         "applies": false,
    241         "answer": false,
    242         "justification": "No human participants in this study."
    243       },
    244       "demographics_reported": {
    245         "applies": false,
    246         "answer": false,
    247         "justification": "No human participants in this study."
    248       },
    249       "inclusion_exclusion_criteria": {
    250         "applies": false,
    251         "answer": false,
    252         "justification": "No human participants in this study."
    253       },
    254       "randomization_described": {
    255         "applies": false,
    256         "answer": false,
    257         "justification": "No human participants in this study."
    258       },
    259       "blinding_described": {
    260         "applies": false,
    261         "answer": false,
    262         "justification": "No human participants in this study."
    263       },
    264       "attrition_reported": {
    265         "applies": false,
    266         "answer": false,
    267         "justification": "No human participants in this study."
    268       }
    269     },
    270     "cost_and_practicality": {
    271       "inference_cost_reported": {
    272         "applies": false,
    273         "answer": false,
    274         "justification": "This is a design/prototype paper, not proposing a method with measurable inference costs."
    275       },
    276       "compute_budget_stated": {
    277         "applies": false,
    278         "answer": false,
    279         "justification": "This is a design/prototype paper with no significant compute requirements to report."
    280       }
    281     }
    282   },
    283   "claims": [
    284     {
    285       "claim": "Empowering MDE tools with AI assistants would aid novices in MDE or even non-programmers to learn and adopt model-driven ESE with a not-so-steep learning curve.",
    286       "evidence": "No empirical evidence is provided. This is stated as a motivation in the abstract and Section 1.",
    287       "supported": "unsupported"
    288     },
    289     {
    290       "claim": "The custom actions approach (Option 2) is preferable to the form approach (Option 1) for implementing step-by-step tutorials in Rasa.",
    291       "evidence": "Qualitative comparison in Section 3.2.1: the form approach requires creating individual slots and FormValidationAction for each tutorial, making it time-consuming and conflicting with scalability requirement R4.",
    292       "supported": "moderate"
    293     },
    294     {
    295       "claim": "The prototype chatbot can assist the MDE tool (SiSy) user with tutorials and FAQs.",
    296       "evidence": "Screenshots in Figures 4, 6, and 7 show the chatbot interface and tutorial steps. Section 4 describes the implementation.",
    297       "supported": "moderate"
    298     }
    299   ],
    300   "methodology_tags": ["case-study"],
    301   "key_findings": "This short paper presents a prototype chatbot built with the Rasa conversational AI framework to assist users of the SiSy MDE tool for embedded software engineering. Two design approaches for implementing step-by-step tutorials (forms vs. custom actions) are compared qualitatively, with custom actions chosen for better scalability. No empirical evaluation of the chatbot's effectiveness is conducted; the contribution is limited to requirements, design concepts, and a prototype demonstration.",
    302   "red_flags": [
    303     {
    304       "flag": "No empirical evaluation",
    305       "detail": "The paper presents a prototype but includes no user study, no NLU accuracy measurements, no task completion metrics, and no comparison with alternative approaches. Claims about aiding novices are entirely unsupported."
    306     },
    307     {
    308       "flag": "Claims outrun evidence",
    309       "detail": "The abstract and title suggest broad applicability of AI-guided MDE, but the paper only describes a simple chatbot for one specific tool (SiSy) with no evidence of effectiveness."
    310     }
    311   ],
    312   "cited_papers": [
    313     {
    314       "title": "A comparison of natural language understanding platforms for chatbots in software engineering",
    315       "authors": ["A. Abdellatif", "K. Badran", "D. Costa", "E. Shihab"],
    316       "year": 2021,
    317       "relevance": "Evaluates NLU platforms (Dialogflow, LUIS, Watson, Rasa) for software engineering chatbots."
    318     },
    319     {
    320       "title": "Chatbots: History, technology, and applications",
    321       "authors": ["E. Adamopoulou", "L. Moussiades"],
    322       "year": 2020,
    323       "relevance": "Literature review on chatbot technology and applications relevant to AI-assisted software tools."
    324     },
    325     {
    326       "title": "A survey on modeling and model-driven engineering practices in the embedded software industry",
    327       "authors": ["D. Akdur", "V. Garousi", "O. Demirörs"],
    328       "year": 2018,
    329       "relevance": "Survey on MDE adoption in embedded software industry, relevant to understanding AI integration in software engineering practices."
    330     }
    331   ]
    332 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs