scan.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan.json (27558B)
      1 {
      2   "paper": {
      3     "title": "Generative AI for Software Architecture. Applications, Challenges, and Future Directions",
      4     "authors": [
      5       "Matteo Esposito",
      6       "Xiaozhou Li",
      7       "Sergio Moreschini",
      8       "Noman Ahmad",
      9       "Tomas Cerny",
     10       "Karthik Vaidhyanathan",
     11       "Valentina Lenarduzzi",
     12       "Davide Taibi"
     13     ],
     14     "year": 2025,
     15     "venue": "Journal of Systems and Software",
     16     "arxiv_id": "2503.13310",
     17     "doi": "10.48550/arXiv.2503.13310"
     18   },
     19   "checklist": {
     20     "artifacts": {
     21       "code_released": {
     22         "applies": true,
     23         "answer": false,
     24         "justification": "The paper provides a replication package on Zenodo (doi:10.5281/zenodo.15032395) described as containing 'raw data, and the MLR workflow,' but no source code or analysis scripts are explicitly mentioned. The replication package appears to be data/documentation rather than executable code."
     25       },
     26       "data_released": {
     27         "applies": true,
     28         "answer": true,
     29         "justification": "Section 7 states 'We provide our raw data, and the MLR workflow in our replication package hosted on Zenodo' with a DOI link (10.5281/zenodo.15032395)."
     30       },
     31       "environment_specified": {
     32         "applies": true,
     33         "answer": false,
     34         "justification": "No environment or tool version specifications are provided. For an MLR the relevant 'environment' would be the tools used for search, screening, and analysis — none are specified."
     35       },
     36       "reproduction_instructions": {
     37         "applies": true,
     38         "answer": false,
     39         "justification": "While a replication package is provided on Zenodo, the paper does not include step-by-step reproduction instructions. The methodology section describes the process conceptually but not as a reproducible script or protocol with exact commands."
     40       }
     41     },
     42     "statistical_methodology": {
     43       "confidence_intervals_or_error_bars": {
     44         "applies": false,
     45         "answer": false,
     46         "justification": "This is a systematic literature review that reports descriptive counts and percentages of papers. No experimental results requiring confidence intervals are produced."
     47       },
     48       "significance_tests": {
     49         "applies": false,
     50         "answer": false,
     51         "justification": "The paper is a systematic literature review reporting descriptive statistics (counts, percentages) of its included studies. No comparative statistical claims requiring significance tests are made."
     52       },
     53       "effect_sizes_reported": {
     54         "applies": false,
     55         "answer": false,
     56         "justification": "No experimental comparisons are performed. The paper reports descriptive frequencies of themes found across reviewed papers."
     57       },
     58       "sample_size_justified": {
     59         "applies": true,
     60         "answer": false,
     61         "justification": "The final sample of 46 papers (36 white, 10 gray) is not justified as sufficient for the breadth of claims made. No discussion of whether this sample adequately represents the field or whether saturation was reached."
     62       },
     63       "variance_reported": {
     64         "applies": false,
     65         "answer": false,
     66         "justification": "No experimental runs or repeated measurements are performed. The paper reports counts from a single systematic search process."
     67       }
     68     },
     69     "evaluation_design": {
     70       "baselines_included": {
     71         "applies": true,
     72         "answer": true,
     73         "justification": "Table 1 provides a detailed comparison of this MLR against 10 prior systematic studies (Fan et al., Hou et al., Jiang et al., Wang et al., Marques et al., Santos et al., Saucedo & Rodriguez, Bucaioni et al., Schmid et al.), situating this work relative to existing reviews."
     74       },
     75       "baselines_contemporary": {
     76         "applies": true,
     77         "answer": true,
     78         "justification": "The compared reviews in Table 1 include recent works: Bucaioni et al. (2025), Schmid et al. (2025), Santos et al. (2024), and others from 2023-2024. These represent the current state of related review literature."
     79       },
     80       "ablation_study": {
     81         "applies": false,
     82         "answer": false,
     83         "justification": "This is a survey with no system components to ablate."
     84       },
     85       "multiple_metrics": {
     86         "applies": false,
     87         "answer": false,
     88         "justification": "This is a survey paper that does not evaluate a system using metrics."
     89       },
     90       "human_evaluation": {
     91         "applies": false,
     92         "answer": false,
     93         "justification": "Human evaluation of a system's outputs is not applicable to a literature review."
     94       },
     95       "held_out_test_set": {
     96         "applies": false,
     97         "answer": false,
     98         "justification": "No experimental evaluation requiring train/test splits is performed."
     99       },
    100       "per_category_breakdown": {
    101         "applies": true,
    102         "answer": true,
    103         "justification": "Results are broken down extensively by category: purpose of LLM usage (Table 11), models used (Table 12), how GenAI is used (Table 13), SALC phases (Table 14), architectural styles (Table 15), modeling languages (Table 16), validation methods (Table 18), use cases (Table 19), future challenges (Table 21), and white vs. gray literature (Table 7)."
    104       },
    105       "failure_cases_discussed": {
    106         "applies": true,
    107         "answer": true,
    108         "justification": "The paper discusses significant gaps found in the reviewed literature: 93% of studies lack validation methods (Table 18), 68% omit architectural style details (Table 15), 74% lack formal modeling (Table 16), and 15% of studies don't mention future challenges (Section 4.4)."
    109       },
    110       "negative_results_reported": {
    111         "applies": true,
    112         "answer": true,
    113         "justification": "The paper reports several negative findings: rigorous testing of GenAI outputs is 'typically missing,' 93% of studies provide no validation, Architecture-to-Architecture transitions are nearly unexplored (3%), and there is a 'paucity of examples addressing system-level reasoning, trade-off analysis, or performance modeling.'"
    114       }
    115     },
    116     "claims_and_evidence": {
    117       "abstract_claims_supported": {
    118         "applies": true,
    119         "answer": true,
    120         "justification": "Abstract claims about architectural decision support dominance (38%, Table 11), GPT model prevalence (62%, Table 12), few-shot prompting and RAG usage (Table 13), initial SALC stages focus (Table 14), monolithic/microservice targets (Table 15), and missing rigorous testing (93%, Table 18) are all supported by the results tables."
    121       },
    122       "causal_claims_justified": {
    123         "applies": false,
    124         "answer": false,
    125         "justification": "The paper makes descriptive claims about the state of research ('GenAI has been applied mostly to...', 'OpenAI GPT models are predominantly applied'). No causal claims about what causes or improves architectural outcomes are made."
    126       },
    127       "generalization_bounded": {
    128         "applies": true,
    129         "answer": false,
    130         "justification": "The title 'Generative AI for Software Architecture: Applications, Challenges, and Future Directions' and conclusions like 'GenAI shows significant potential in software design' are broad, but the evidence comes from only 46 papers (36 peer-reviewed, 10 gray literature) searched through February 2025. The paper does not explicitly acknowledge that 46 papers may not represent the full landscape, especially given the rapidly evolving field."
    131       },
    132       "alternative_explanations_discussed": {
    133         "applies": true,
    134         "answer": false,
    135         "justification": "The paper does not discuss alternative explanations for its findings. For example, the dominance of OpenAI GPT (62%) could reflect publication bias, marketing, or API accessibility rather than actual superiority or preference. The paper presents percentages at face value without considering confounds in what gets published."
    136       },
    137       "proxy_outcome_distinction": {
    138         "applies": true,
    139         "answer": false,
    140         "justification": "The paper measures what is reported in published literature and frames it as how GenAI 'is utilized' in software architecture. The gap between publication patterns and actual industry practice is not explicitly discussed. The inclusion of gray literature partially addresses this but the proxy gap is not acknowledged."
    141       }
    142     },
    143     "setup_transparency": {
    144       "model_versions_specified": {
    145         "applies": false,
    146         "answer": false,
    147         "justification": "The paper does not use any AI models for its own analysis. It is a literature review. (The authors acknowledge using ChatGPT for language editing but this is not part of the methodology.)"
    148       },
    149       "prompts_provided": {
    150         "applies": false,
    151         "answer": false,
    152         "justification": "The paper does not use prompting as part of its methodology."
    153       },
    154       "hyperparameters_reported": {
    155         "applies": false,
    156         "answer": false,
    157         "justification": "No models are run as part of the study methodology."
    158       },
    159       "scaffolding_described": {
    160         "applies": false,
    161         "answer": false,
    162         "justification": "No agentic scaffolding is used in this literature review."
    163       },
    164       "data_preprocessing_documented": {
    165         "applies": true,
    166         "answer": true,
    167         "justification": "The paper selection pipeline is documented in detail: search terms (Section 3.2.1), databases (Section 3.2.2), inclusion/exclusion criteria with specific rules (Table 2), step-by-step counts at each filtering stage (Table 5: 1054 → title/abstract screening → full reading → snowballing → quality assessment → 46 final papers), and inter-rater agreement (Cohen's kappa reported at each stage)."
    168       }
    169     },
    170     "limitations_and_scope": {
    171       "limitations_section_present": {
    172         "applies": true,
    173         "answer": true,
    174         "justification": "Section 6 'Threats to Validity' provides a dedicated discussion structured by construct, internal, external, and conclusion validity, following Wohlin et al.'s framework."
    175       },
    176       "threats_to_validity_specific": {
    177         "applies": true,
    178         "answer": true,
    179         "justification": "Section 6 discusses specific threats: subjective analysis mitigated by dual-author extraction with third-author arbitration, potential non-inclusion of studies mitigated by searching eight digital libraries plus snowballing, inability to evaluate external validity of all included studies, and specific mention of applying inclusion/exclusion criteria to both title/abstract and full text."
    180       },
    181       "scope_boundaries_stated": {
    182         "applies": true,
    183         "answer": false,
    184         "justification": "The paper does not explicitly state what the results do NOT show. The search is bounded temporally (after March 2022) and by topic (GenAI in software architecture), but the paper makes no explicit statements about what claims it is NOT making or what populations/settings are excluded from its conclusions."
    185       }
    186     },
    187     "data_integrity": {
    188       "raw_data_available": {
    189         "applies": true,
    190         "answer": true,
    191         "justification": "The paper states 'We provide our raw data, and the MLR workflow in our replication package hosted on Zenodo' (doi:10.5281/zenodo.15032395), enabling independent verification of the extracted data."
    192       },
    193       "data_collection_described": {
    194         "applies": true,
    195         "answer": true,
    196         "justification": "Data collection is described in detail in Section 3: search terms (Section 3.2.1), four white-literature databases and three gray-literature search engines (Section 3.2.2), inclusion/exclusion criteria (Table 2), and the open coding approach for data extraction (Section 3.3, Table 6)."
    197       },
    198       "recruitment_methods_described": {
    199         "applies": false,
    200         "answer": false,
    201         "justification": "No human participants are involved. The paper sources are academic databases and search engines, which are standard and described in the search strategy."
    202       },
    203       "data_pipeline_documented": {
    204         "applies": true,
    205         "answer": true,
    206         "justification": "Table 5 documents the full pipeline: 621 white papers → title/abstract screening (-576) → full reading (-18) → snowballing (+11) → quality assessment (-2) → 36 final. Gray: 433 → -356 → -70 → +3 → 10 final. Cohen's kappa is reported at each screening stage."
    207       }
    208     },
    209     "conflicts_of_interest": {
    210       "funding_disclosed": {
    211         "applies": true,
    212         "answer": true,
    213         "justification": "The Acknowledgment section lists: 'Business Finland Project 6GSoft, Academy of Finland project MUFANO/349488, and National Science Foundation (NSF) Grant No. 2409933.'"
    214       },
    215       "affiliations_disclosed": {
    216         "applies": true,
    217         "answer": true,
    218         "justification": "Author affiliations are clearly listed: University of Oulu (Finland), Tampere University (Finland), University of Arizona (USA), and IIIT Hyderabad (India). None of the authors are affiliated with GenAI product companies."
    219       },
    220       "funder_independent_of_outcome": {
    221         "applies": true,
    222         "answer": true,
    223         "justification": "Funders are public research agencies (Business Finland, Academy of Finland, NSF) with no financial interest in any particular GenAI product or outcome."
    224       },
    225       "financial_interests_declared": {
    226         "applies": true,
    227         "answer": false,
    228         "justification": "No competing interests or financial interests declaration statement is present in the paper."
    229       }
    230     },
    231     "contamination": {
    232       "training_cutoff_stated": {
    233         "applies": false,
    234         "answer": false,
    235         "justification": "This is a literature review that does not evaluate any pre-trained model on a benchmark."
    236       },
    237       "train_test_overlap_discussed": {
    238         "applies": false,
    239         "answer": false,
    240         "justification": "This is a literature review that does not evaluate any pre-trained model on a benchmark."
    241       },
    242       "benchmark_contamination_addressed": {
    243         "applies": false,
    244         "answer": false,
    245         "justification": "This is a literature review that does not evaluate any pre-trained model on a benchmark."
    246       }
    247     },
    248     "human_studies": {
    249       "pre_registered": {
    250         "applies": false,
    251         "answer": false,
    252         "justification": "No human participants are involved in this literature review."
    253       },
    254       "irb_or_ethics_approval": {
    255         "applies": false,
    256         "answer": false,
    257         "justification": "No human participants are involved in this literature review."
    258       },
    259       "demographics_reported": {
    260         "applies": false,
    261         "answer": false,
    262         "justification": "No human participants are involved in this literature review."
    263       },
    264       "inclusion_exclusion_criteria": {
    265         "applies": false,
    266         "answer": false,
    267         "justification": "No human participants are involved. (The paper does have inclusion/exclusion criteria for papers, which is covered under data_preprocessing_documented.)"
    268       },
    269       "randomization_described": {
    270         "applies": false,
    271         "answer": false,
    272         "justification": "No human participants or experimental conditions are involved."
    273       },
    274       "blinding_described": {
    275         "applies": false,
    276         "answer": false,
    277         "justification": "No human participants or experimental conditions are involved."
    278       },
    279       "attrition_reported": {
    280         "applies": false,
    281         "answer": false,
    282         "justification": "No human participants are involved."
    283       }
    284     },
    285     "cost_and_practicality": {
    286       "inference_cost_reported": {
    287         "applies": false,
    288         "answer": false,
    289         "justification": "This is a survey paper with no inference or computational method of its own."
    290       },
    291       "compute_budget_stated": {
    292         "applies": false,
    293         "answer": false,
    294         "justification": "This is a survey paper with no computational experiments."
    295       }
    296     },
    297     "survey_methodology": {
    298       "prisma_or_structured_protocol": {
    299         "applies": true,
    300         "answer": true,
    301         "justification": "The paper follows the MLR protocol of Garousi et al. [5] and Kitchenham's SLR guidelines [20]. It includes a systematic search strategy (Figure 1), defined search terms (Section 3.2.1), four white-literature databases, three gray-literature search engines, structured inclusion/exclusion criteria (Table 2), inter-rater reliability measures (Cohen's kappa), and snowballing following Wohlin's guidelines [22]."
    302       },
    303       "quality_assessment_of_sources": {
    304         "applies": true,
    305         "answer": true,
    306         "justification": "White literature is assessed using 11 quality assessment criteria (Table 3) on a 5-point Likert scale, with a minimum score of 2 required for inclusion. Gray literature uses a separate quality rubric (Table 4) covering authority, methodology, objectivity, date, novelty, impact, and outlet control, with a minimum average score of 0.5. Two papers were excluded via quality assessment."
    307       },
    308       "publication_bias_discussed": {
    309         "applies": true,
    310         "answer": false,
    311         "justification": "Publication bias is not explicitly discussed. While the inclusion of gray literature partially addresses the gap between academic and industry perspectives, the paper does not discuss whether positive results are overrepresented in the reviewed studies, use funnel plots, or acknowledge systematic bias in what gets published about GenAI."
    312       }
    313     }
    314   },
    315   "scan_version": 3,
    316   "active_modules": [
    317     "survey_methodology"
    318   ],
    319   "claims": [
    320     {
    321       "claim": "Architectural decision support is the most frequent purpose for GenAI in software architecture (38% of studies).",
    322       "evidence": "Table 11 (RQ1.1) lists 18 studies under 'Architectural Decision Support' out of 47 total coded purposes.",
    323       "supported": "strong"
    324     },
    325     {
    326       "claim": "OpenAI GPT models dominate the research landscape at 62% of model usage.",
    327       "evidence": "Table 12 (RQ1.2) counts 105 GPT model instances across studies, representing 62% of all model usage.",
    328       "supported": "strong"
    329     },
    330     {
    331       "claim": "Few-shot prompting is the most common prompt engineering technique (31%).",
    332       "evidence": "Table 13 (RQ1.3) reports 16 studies using few-shot prompting out of 52 coded instances.",
    333       "supported": "strong"
    334     },
    335     {
    336       "claim": "93% of studies do not report any validation method for GenAI-generated architectural outputs.",
    337       "evidence": "Table 18 (RQ2.4) shows 43 of 46 studies listed as 'Unspecified' for architecture analysis method, with only ATAM, SAAM, and static analysis reported (1 study each).",
    338       "supported": "strong"
    339     },
    340     {
    341       "claim": "Requirement-to-Architecture is the most targeted SALC phase (40%) and Architecture-to-Code is second (32%).",
    342       "evidence": "Table 14 (RQ2.1) reports 24 studies targeting Req-to-Arch and 19 targeting Arch-to-Code.",
    343       "supported": "strong"
    344     },
    345     {
    346       "claim": "85% of studies involve some form of human interaction with the GenAI model.",
    347       "evidence": "Table 13 (RQ1.3) reports 39 of 46 studies involving human-model interaction.",
    348       "supported": "strong"
    349     },
    350     {
    351       "claim": "LLM accuracy (15%) and hallucinations (8%) are the most cited future challenges.",
    352       "evidence": "Table 21 (RQ3) lists 9 studies citing LLM accuracy and 5 citing hallucinations as future challenges.",
    353       "supported": "strong"
    354     }
    355   ],
    356   "methodology_tags": [
    357     "meta-analysis"
    358   ],
    359   "key_findings": "This MLR of 46 studies (36 peer-reviewed, 10 gray literature) finds that GenAI in software architecture is primarily used for architectural decision support (38%) and reverse engineering (19%), with OpenAI GPT models dominating at 62% of usage. GenAI is applied mostly in early SALC phases (Requirements-to-Architecture 40%, Architecture-to-Code 32%), while Architecture-to-Architecture transformation is nearly unexplored (3%). A critical finding is that 93% of studies lack any formal validation method for GenAI-generated architectural outputs, and most studies (85%) still require human interaction, indicating GenAI serves as an assistive tool rather than an autonomous decision-maker.",
    360   "red_flags": [
    361     {
    362       "flag": "Small sample for broad claims",
    363       "detail": "Only 46 papers (36 white, 10 gray) support claims about the entire landscape of 'Generative AI for Software Architecture.' The title and conclusions suggest comprehensive coverage but the evidence base is thin for such sweeping claims, especially given the rapidly evolving field."
    364     },
    365     {
    366       "flag": "Gray literature quality concerns",
    367       "detail": "10 of 46 included sources (22%) are gray literature including blog posts (4), a YouTube video (1), and theses (2). While the MLR methodology accounts for this, gray literature sources like Medium blog posts and YouTube videos may introduce anecdotal or biased perspectives that are weighted equally with peer-reviewed work in the frequency counts."
    368     },
    369     {
    370       "flag": "No quality-weighted synthesis",
    371       "detail": "The paper applies quality assessment criteria to filter sources but then treats all included sources equally in the synthesis. A case study from a blog post and a rigorous experiment from a top venue both count as '1' in the frequency tables. This launders quality differences across sources."
    372     },
    373     {
    374       "flag": "Percentage denominators shift across tables",
    375       "detail": "Many tables note 'One paper can have more than one [category],' meaning percentages are computed over total coded instances rather than total papers. This inflates apparent coverage and makes it difficult to assess how many unique papers contribute to each finding. For example, RQ1.2 (Table 12) has 105+ model instances from 46 papers."
    376     },
    377     {
    378       "flag": "Missing publication bias analysis",
    379       "detail": "The paper does not discuss whether positive results about GenAI in software architecture are overrepresented in the literature. Given industry hype around GenAI, publication bias is a significant concern that could inflate reported benefits and understate challenges."
    380     }
    381   ],
    382   "cited_papers": [
    383     {
    384       "title": "Large language models for software engineering: Survey and open problems",
    385       "authors": ["A. Fan", "B. Gokkaya", "M. Harman", "M. Lyubarskiy", "S. Sengupta", "S. Yoo", "J. M. Zhang"],
    386       "year": 2023,
    387       "relevance": "Major survey on LLM applications across the SE lifecycle, finding emphasis on code generation with limited attention to requirements/design phases."
    388     },
    389     {
    390       "title": "Large language models for software engineering: A systematic literature review",
    391       "authors": ["X. Hou", "Y. Zhao", "Y. Liu", "Z. Yang", "K. Wang", "L. Li", "X. Luo", "D. Lo", "J. Grundy", "H. Wang"],
    392       "year": 2024,
    393       "relevance": "SLR analyzing 395 articles on LLMs in SE, concluding most applications focus on software development with limited design applications."
    394     },
    395     {
    396       "title": "A survey on large language models for code generation",
    397       "authors": ["J. Jiang", "F. Wang", "J. Shen", "S. Kim", "S. Kim"],
    398       "year": 2024,
    399       "arxiv_id": "2406.00515",
    400       "relevance": "Survey of LLMs specifically for code generation, developing a taxonomy and identifying research-practice gaps."
    401     },
    402     {
    403       "title": "Software testing with large language models: Survey, landscape, and vision",
    404       "authors": ["J. Wang", "Y. Huang", "C. Chen", "Z. Liu", "S. Wang", "Q. Wang"],
    405       "year": 2024,
    406       "relevance": "SLR on LLMs for software testing, analyzing 102 studies and identifying practical integration barriers."
    407     },
    408     {
    409       "title": "Using chatgpt in software requirements engineering: A comprehensive review",
    410       "authors": ["N. Marques", "R. R. Silva", "J. Bernardino"],
    411       "year": 2024,
    412       "relevance": "Comprehensive review of ChatGPT applications in requirements engineering, documenting challenges and future directions."
    413     },
    414     {
    415       "title": "Impacts of the usage of generative artificial intelligence on software development process",
    416       "authors": ["P. d. O. Santos", "A. C. Figueiredo", "P. Nuno Moura", "B. Diirr", "A. C. Alvim", "R. P. D. Santos"],
    417       "year": 2024,
    418       "relevance": "SLR on GenAI impact on SE lifecycle confirming dominance of development/testing applications."
    419     },
    420     {
    421       "title": "Artificial intelligence for software architecture: Literature review and the road ahead",
    422       "authors": ["A. Bucaioni", "M. Weyssow", "J. He", "Y. Lyu", "D. Lo"],
    423       "year": 2025,
    424       "arxiv_id": "2504.04334",
    425       "relevance": "SLR on AI integration with software architecture, identifying needs for real-time adaptation, automated documentation, and robust benchmarking."
    426     },
    427     {
    428       "title": "Software architecture meets llms: A systematic literature review",
    429       "authors": ["L. Schmid", "T. Hey", "M. Armbruster", "S. Corallo", "D. Fuchss", "J. Keim", "H. Liu", "A. Koziolek"],
    430       "year": 2025,
    431       "relevance": "SLR on LLMs in software architecture identifying four application areas and gaps in advanced prompting and evaluation."
    432     },
    433     {
    434       "title": "Application of large language models to software engineering tasks: Opportunities, risks, and implications",
    435       "authors": ["I. Ozkaya"],
    436       "year": 2023,
    437       "relevance": "Pragmatic overview of LLM opportunities and risks in SE, highlighting bias, data quality, privacy, and explainability challenges."
    438     },
    439     {
    440       "title": "Navigating the complexity of generative ai adoption in software engineering",
    441       "authors": ["D. Russo"],
    442       "year": 2024,
    443       "relevance": "Analysis of GenAI adoption complexity in software engineering with implications for how development practices are changing."
    444     },
    445     {
    446       "title": "LLMs for code: The potential, prospects, and problems",
    447       "authors": ["T. Sharma"],
    448       "year": 2024,
    449       "relevance": "Analysis of LLM potential and problems for code-related tasks including generation quality and maintainability concerns."
    450     }
    451   ],
    452   "engagement_factors": {
    453     "practical_relevance": {
    454       "score": 1,
    455       "justification": "Provides a taxonomy and research roadmap but no immediately usable tools or techniques for practitioners."
    456     },
    457     "surprise_contrarian": {
    458       "score": 1,
    459       "justification": "The finding that 93% of studies lack GenAI output validation is noteworthy but the overall message — that GenAI in architecture is immature — largely confirms expectations."
    460     },
    461     "fear_safety": {
    462       "score": 0,
    463       "justification": "No novel AI risk or security concerns are raised; challenges mentioned (hallucinations, accuracy) are well-known."
    464     },
    465     "drama_conflict": {
    466       "score": 0,
    467       "justification": "No controversy or provocative claims; the paper is a neutral synthesis of existing literature."
    468     },
    469     "demo_ability": {
    470       "score": 0,
    471       "justification": "No code, tool, or demo is provided — only a Zenodo data archive."
    472     },
    473     "brand_recognition": {
    474       "score": 1,
    475       "justification": "Authors from University of Oulu and University of Arizona; not famous AI labs but established SE research groups. Published in Journal of Systems and Software."
    476     }
    477   }
    478 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs