ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

scan.json (15214B)


      1 {
      2   "paper": {
      3     "title": "AI-Driven Software Engineering – The Role of Conceptual Modeling",
      4     "authors": ["Hans-Georg Fill", "Jordi Cabot", "Wolfgang Maass", "Marten van Sinderen"],
      5     "year": 2024,
      6     "venue": "Enterprise Modelling and Information Systems Architectures (EMISA), Vol. 19, No. 1",
      7     "doi": "10.18417/emisa.19.1"
      8   },
      9   "checklist": {
     10     "artifacts": {
     11       "code_released": {
     12         "applies": true,
     13         "answer": false,
     14         "justification": "No code or repository is referenced. This is a viewpoint paper, but analysis scripts or supplementary materials could have been released."
     15       },
     16       "data_released": {
     17         "applies": true,
     18         "answer": false,
     19         "justification": "No data or supplementary materials released. A viewpoint paper could still release structured data (e.g., comparison tables in machine-readable form)."
     20       },
     21       "environment_specified": {
     22         "applies": false,
     23         "answer": false,
     24         "justification": "No computational experiments were conducted; environment specification is structurally inapplicable."
     25       },
     26       "reproduction_instructions": {
     27         "applies": false,
     28         "answer": false,
     29         "justification": "No experiments to reproduce. This is a panel discussion summary/viewpoint paper."
     30       }
     31     },
     32     "statistical_methodology": {
     33       "confidence_intervals_or_error_bars": {
     34         "applies": false,
     35         "answer": false,
     36         "justification": "No quantitative experiments conducted. This is a theoretical viewpoint paper."
     37       },
     38       "significance_tests": {
     39         "applies": false,
     40         "answer": false,
     41         "justification": "No empirical comparisons made. Viewpoint paper with no statistical analysis."
     42       },
     43       "effect_sizes_reported": {
     44         "applies": false,
     45         "answer": false,
     46         "justification": "No empirical results to report effect sizes for."
     47       },
     48       "sample_size_justified": {
     49         "applies": false,
     50         "answer": false,
     51         "justification": "No data collection or sampling performed."
     52       },
     53       "variance_reported": {
     54         "applies": false,
     55         "answer": false,
     56         "justification": "No experimental runs conducted."
     57       }
     58     },
     59     "evaluation_design": {
     60       "baselines_included": {
     61         "applies": false,
     62         "answer": false,
     63         "justification": "No evaluation or experiment is conducted. This is a viewpoint/position paper summarizing a panel discussion."
     64       },
     65       "baselines_contemporary": {
     66         "applies": false,
     67         "answer": false,
     68         "justification": "No baselines applicable; no evaluation performed."
     69       },
     70       "ablation_study": {
     71         "applies": false,
     72         "answer": false,
     73         "justification": "No system or method is proposed that could be ablated."
     74       },
     75       "multiple_metrics": {
     76         "applies": false,
     77         "answer": false,
     78         "justification": "No evaluation metrics used; no experiments."
     79       },
     80       "human_evaluation": {
     81         "applies": false,
     82         "answer": false,
     83         "justification": "No system outputs to evaluate. The paper is a panel discussion summary."
     84       },
     85       "held_out_test_set": {
     86         "applies": false,
     87         "answer": false,
     88         "justification": "No datasets or experiments involved."
     89       },
     90       "per_category_breakdown": {
     91         "applies": false,
     92         "answer": false,
     93         "justification": "No quantitative results to break down."
     94       },
     95       "failure_cases_discussed": {
     96         "applies": false,
     97         "answer": false,
     98         "justification": "No system or method evaluated; no failure cases applicable."
     99       },
    100       "negative_results_reported": {
    101         "applies": false,
    102         "answer": false,
    103         "justification": "No experiments conducted that could yield negative results."
    104       }
    105     },
    106     "claims_and_evidence": {
    107       "abstract_claims_supported": {
    108         "applies": true,
    109         "answer": true,
    110         "justification": "The paper is labeled as a 'Viewpoint' and the abstract/introduction states it summarizes a panel discussion. The claims are opinion-based and the paper delivers on that framing."
    111       },
    112       "causal_claims_justified": {
    113         "applies": false,
    114         "answer": false,
    115         "justification": "The paper makes no empirical causal claims. Statements like 'conceptual modeling could play a key role' are aspirational, not causal assertions backed by data."
    116       },
    117       "generalization_bounded": {
    118         "applies": true,
    119         "answer": true,
    120         "justification": "The paper frames its claims as perspectives and opinions from a panel discussion at ICSOFT 2023. It does not claim empirical generalizability."
    121       },
    122       "alternative_explanations_discussed": {
    123         "applies": false,
    124         "answer": false,
    125         "justification": "No empirical results are presented, so there are no findings requiring alternative explanations. The paper is a viewpoint/opinion piece."
    126       }
    127     },
    128     "setup_transparency": {
    129       "model_versions_specified": {
    130         "applies": false,
    131         "answer": false,
    132         "justification": "No models are used in experiments. References to GPT-4, PaLM, etc. are in discussion context, not experimental use."
    133       },
    134       "prompts_provided": {
    135         "applies": false,
    136         "answer": false,
    137         "justification": "No prompting is used; no experiments conducted."
    138       },
    139       "hyperparameters_reported": {
    140         "applies": false,
    141         "answer": false,
    142         "justification": "No experiments conducted requiring hyperparameter specification."
    143       },
    144       "scaffolding_described": {
    145         "applies": false,
    146         "answer": false,
    147         "justification": "No agentic scaffolding used; this is a viewpoint paper."
    148       },
    149       "data_preprocessing_documented": {
    150         "applies": false,
    151         "answer": false,
    152         "justification": "No data collection or preprocessing performed."
    153       }
    154     },
    155     "limitations_and_scope": {
    156       "limitations_section_present": {
    157         "applies": true,
    158         "answer": false,
    159         "justification": "No limitations or threats-to-validity section is present. The paper acknowledges open challenges in Section 5 but does not discuss limitations of its own viewpoints or analysis."
    160       },
    161       "threats_to_validity_specific": {
    162         "applies": true,
    163         "answer": false,
    164         "justification": "No threats to validity discussed. The paper does not acknowledge potential biases in the panelists' perspectives or limitations of a single panel discussion as a basis for conclusions."
    165       },
    166       "scope_boundaries_stated": {
    167         "applies": true,
    168         "answer": false,
    169         "justification": "The paper does not explicitly state what its viewpoints do NOT cover or the boundaries of its analysis."
    170       }
    171     },
    172     "data_integrity": {
    173       "raw_data_available": {
    174         "applies": false,
    175         "answer": false,
    176         "justification": "No data collected; this is a viewpoint paper summarizing a panel discussion."
    177       },
    178       "data_collection_described": {
    179         "applies": true,
    180         "answer": true,
    181         "justification": "The introduction states the paper summarizes a panel discussion conducted on July 10, 2023 at ICSOFT in Rome, identifying the panelists and moderator."
    182       },
    183       "recruitment_methods_described": {
    184         "applies": false,
    185         "answer": false,
    186         "justification": "No human participants recruited for a study. The panelists are named conference participants, not research subjects."
    187       },
    188       "data_pipeline_documented": {
    189         "applies": false,
    190         "answer": false,
    191         "justification": "No data pipeline exists; this is a viewpoint paper."
    192       }
    193     },
    194     "conflicts_of_interest": {
    195       "funding_disclosed": {
    196         "applies": true,
    197         "answer": false,
    198         "justification": "No funding information or acknowledgments section is present in the paper."
    199       },
    200       "affiliations_disclosed": {
    201         "applies": true,
    202         "answer": true,
    203         "justification": "Author affiliations are clearly listed: University of Fribourg, Luxembourg Institute of Science and Technology/University of Luxembourg, Saarland University/DFKI, and University of Twente."
    204       },
    205       "funder_independent_of_outcome": {
    206         "applies": true,
    207         "answer": false,
    208         "justification": "No funding is disclosed, so independence cannot be assessed. Absence of disclosure is not absence of conflict."
    209       },
    210       "financial_interests_declared": {
    211         "applies": true,
    212         "answer": false,
    213         "justification": "No competing interests or financial interests statement is present in the paper."
    214       }
    215     },
    216     "contamination": {
    217       "training_cutoff_stated": {
    218         "applies": false,
    219         "answer": false,
    220         "justification": "No pre-trained model is evaluated on any benchmark. This is a viewpoint paper."
    221       },
    222       "train_test_overlap_discussed": {
    223         "applies": false,
    224         "answer": false,
    225         "justification": "No benchmark evaluation conducted."
    226       },
    227       "benchmark_contamination_addressed": {
    228         "applies": false,
    229         "answer": false,
    230         "justification": "No benchmark evaluation conducted."
    231       }
    232     },
    233     "human_studies": {
    234       "pre_registered": {
    235         "applies": false,
    236         "answer": false,
    237         "justification": "No human subjects study conducted. This is a panel discussion summary."
    238       },
    239       "irb_or_ethics_approval": {
    240         "applies": false,
    241         "answer": false,
    242         "justification": "No human subjects study conducted."
    243       },
    244       "demographics_reported": {
    245         "applies": false,
    246         "answer": false,
    247         "justification": "No human subjects study conducted."
    248       },
    249       "inclusion_exclusion_criteria": {
    250         "applies": false,
    251         "answer": false,
    252         "justification": "No human subjects study conducted."
    253       },
    254       "randomization_described": {
    255         "applies": false,
    256         "answer": false,
    257         "justification": "No human subjects study conducted."
    258       },
    259       "blinding_described": {
    260         "applies": false,
    261         "answer": false,
    262         "justification": "No human subjects study conducted."
    263       },
    264       "attrition_reported": {
    265         "applies": false,
    266         "answer": false,
    267         "justification": "No human subjects study conducted."
    268       }
    269     },
    270     "cost_and_practicality": {
    271       "inference_cost_reported": {
    272         "applies": false,
    273         "answer": false,
    274         "justification": "No method proposed or evaluated; this is a viewpoint paper."
    275       },
    276       "compute_budget_stated": {
    277         "applies": false,
    278         "answer": false,
    279         "justification": "No computation performed."
    280       }
    281     }
    282   },
    283   "claims": [
    284     {
    285       "claim": "Conceptual modeling will shift from a designing function to an explanatory function as AI models replace business logic implementation in software.",
    286       "evidence": "Section 3 (Wolfgang Maass) argues that business logic is being replaced by AI models, changing conceptual modeling's role. No empirical evidence provided.",
    287       "supported": "unsupported"
    288     },
    289     {
    290       "claim": "DDSE and MDSE are complementary rather than alternative approaches to software engineering.",
    291       "evidence": "Section 4 (Marten van Sinderen) provides a conceptual comparison in Tables 1 and 2 and a driving analogy, but no empirical evidence.",
    292       "supported": "weak"
    293     },
    294     {
    295       "claim": "Data, conceptual models, and AI form a 'golden triangle' that reinforces each component.",
    296       "evidence": "Section 2 (Jordi Cabot) describes three combination scenarios (Data+AI→Models, Data+Models→AI, AI+Models→Data) but provides no empirical validation.",
    297       "supported": "weak"
    298     }
    299   ],
    300   "methodology_tags": ["theoretical"],
    301   "key_findings": "This viewpoint paper summarizes a panel discussion at ICSOFT 2023 on the role of conceptual modeling in AI-driven software engineering. The panelists argue that data-driven and model-driven software engineering are complementary, that conceptual modeling will shift from a design function to an explanatory function for AI systems, and that a 'golden triangle' of data, models, and AI can reinforce each component. The paper identifies open challenges including finding the right abstraction level for AI concepts, uncertainty modeling, explainability, and teaching integrated approaches.",
    302   "red_flags": [
    303     {
    304       "flag": "No empirical evidence",
    305       "detail": "All claims are based on opinion and analogy. The paper presents no experiments, case studies, or systematic evidence to support its assertions about the future role of conceptual modeling."
    306     },
    307     {
    308       "flag": "No structured methodology",
    309       "detail": "As a panel discussion summary, the paper lacks any systematic methodology for arriving at its conclusions. The views represent four researchers' opinions without broader validation."
    310     }
    311   ],
    312   "cited_papers": [
    313     {
    314       "title": "The Scope of ChatGPT in Software Engineering: A Thorough Investigation",
    315       "authors": ["W. Ma", "S. Liu", "W. Wang", "Q. Hu", "Y. Liu", "C. Zhang", "L. Nie", "Y. Liu"],
    316       "year": 2023,
    317       "arxiv_id": "2305.12138",
    318       "relevance": "Investigates ChatGPT's capabilities across software engineering tasks."
    319     },
    320     {
    321       "title": "The impact of AI on developer productivity: Evidence from GitHub Copilot",
    322       "authors": ["S. Peng", "E. Kalliamvakou", "P. Cihon", "M. Demirer"],
    323       "year": 2023,
    324       "arxiv_id": "2302.06590",
    325       "relevance": "Empirical study on AI-assisted developer productivity with GitHub Copilot."
    326     },
    327     {
    328       "title": "Grounded Copilot: How Programmers Interact with Code-Generating Models",
    329       "authors": ["S. Barke", "M. B. James", "N. Polikarpova"],
    330       "year": 2023,
    331       "relevance": "Qualitative study of programmer interaction modes with LLM-based code generation tools."
    332     },
    333     {
    334       "title": "ChatGPT Prompt Patterns for Improving Code Quality, Refactoring, Requirements Elicitation, and Software Design",
    335       "authors": ["J. White", "S. Hays", "Q. Fu", "J. Spencer-Smith", "D. C. Schmidt"],
    336       "year": 2023,
    337       "arxiv_id": "2303.07839",
    338       "relevance": "Proposes prompt patterns for using ChatGPT in software engineering tasks."
    339     },
    340     {
    341       "title": "On the assessment of generative AI in modeling tasks: an experience report with ChatGPT and UML",
    342       "authors": ["J. Cámara", "J. Troya", "L. Burgueño", "A. Vallecillo"],
    343       "year": 2023,
    344       "relevance": "Evaluates generative AI capabilities in conceptual modeling tasks with UML."
    345     },
    346     {
    347       "title": "Competition-level code generation with AlphaCode",
    348       "authors": ["Y. Li", "D. Choi", "J. Chung"],
    349       "year": 2022,
    350       "relevance": "Demonstrates AI code generation at competitive programming level."
    351     },
    352     {
    353       "title": "GPT-4 Technical Report",
    354       "authors": ["OpenAI"],
    355       "year": 2023,
    356       "arxiv_id": "2303.08774",
    357       "relevance": "Technical report for GPT-4, a foundational LLM discussed in the paper."
    358     }
    359   ]
    360 }

Impressum · Datenschutz