scan.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan.json (20389B)
      1 {
      2   "paper": {
      3     "title": "Revolution or Hype? Seeking the Limits of Large Models in Hardware Design",
      4     "authors": ["Qiang Xu", "Leon Stok", "Rolf Drechsler", "Xi Wang", "Grace Li Zhang", "Igor L. Markov"],
      5     "year": 2025,
      6     "venue": "2025 IEEE/ACM International Conference On Computer Aided Design (ICCAD)",
      7     "arxiv_id": "2509.04905",
      8     "doi": "10.1109/ICCAD66269.2025.11240750"
      9   },
     10   "scan_version": 2,
     11   "active_modules": ["survey_methodology"],
     12   "methodology_tags": ["meta-analysis", "qualitative"],
     13   "key_findings": "This ICCAD 2025 panel paper surveys the state of Large Language Models (LLMs) and Large Circuit Models (LCMs) in electronic design automation. It identifies key opportunities (RTL generation, verification acceleration, PPA optimization) and challenges (hallucination, data scarcity, explainability). The panelists converge on a division of labor: LLMs for interpreting high-level intent ('the What') and LCMs for circuit-native optimization ('the How'), with formal verification as the indispensable trust anchor.",
     14   "checklist": {
     15     "artifacts": {
     16       "code_released": {
     17         "applies": true,
     18         "answer": false,
     19         "justification": "No code or repository is released. This is a survey/panel paper but could have released analysis scripts or curated reference lists."
     20       },
     21       "data_released": {
     22         "applies": true,
     23         "answer": false,
     24         "justification": "No dataset or curated corpus of surveyed papers is released."
     25       },
     26       "environment_specified": {
     27         "applies": false,
     28         "answer": false,
     29         "justification": "No computational experiments are performed; this is a qualitative survey/panel paper."
     30       },
     31       "reproduction_instructions": {
     32         "applies": false,
     33         "answer": false,
     34         "justification": "No experiments to reproduce; this is a position/panel paper."
     35       }
     36     },
     37     "statistical_methodology": {
     38       "confidence_intervals_or_error_bars": {
     39         "applies": false,
     40         "answer": false,
     41         "justification": "No quantitative experiments are conducted. The paper is a qualitative survey and panel synthesis."
     42       },
     43       "significance_tests": {
     44         "applies": false,
     45         "answer": false,
     46         "justification": "No statistical comparisons are made; this is a qualitative overview paper."
     47       },
     48       "effect_sizes_reported": {
     49         "applies": false,
     50         "answer": false,
     51         "justification": "No experiments with quantitative results are presented."
     52       },
     53       "sample_size_justified": {
     54         "applies": false,
     55         "answer": false,
     56         "justification": "No empirical sample is collected or analyzed."
     57       },
     58       "variance_reported": {
     59         "applies": false,
     60         "answer": false,
     61         "justification": "No experimental runs are performed."
     62       }
     63     },
     64     "evaluation_design": {
     65       "baselines_included": {
     66         "applies": true,
     67         "answer": false,
     68         "justification": "The paper does not compare against prior surveys of LLMs/LCMs for EDA. It references prior work but does not position itself against comparable survey papers."
     69       },
     70       "baselines_contemporary": {
     71         "applies": false,
     72         "answer": false,
     73         "justification": "No baselines are included, so contemporaneity is inapplicable."
     74       },
     75       "ablation_study": {
     76         "applies": false,
     77         "answer": false,
     78         "justification": "No system or method with components to ablate."
     79       },
     80       "multiple_metrics": {
     81         "applies": false,
     82         "answer": false,
     83         "justification": "No quantitative evaluation is performed."
     84       },
     85       "human_evaluation": {
     86         "applies": false,
     87         "answer": false,
     88         "justification": "No system outputs to evaluate; this is a qualitative panel synthesis."
     89       },
     90       "held_out_test_set": {
     91         "applies": false,
     92         "answer": false,
     93         "justification": "No datasets or test sets are used."
     94       },
     95       "per_category_breakdown": {
     96         "applies": true,
     97         "answer": true,
     98         "justification": "The paper provides structured breakdowns by task type: Section II-A covers LLMs for hardware design (code generation, verification, optimization) and Section II-B covers LCMs. Fig. 2 and Fig. 3 provide detailed chronological breakdowns by application area."
     99       },
    100       "failure_cases_discussed": {
    101         "applies": true,
    102         "answer": true,
    103         "justification": "Section IV discusses challenges extensively: hallucination (IV-A), semantic gap in circuit representation (IV-B), data scarcity (IV-C), and explainability (IV-D). Section V-B further discusses failure modes and reliability concerns from each panelist."
    104       },
    105       "negative_results_reported": {
    106         "applies": true,
    107         "answer": true,
    108         "justification": "The paper extensively discusses limitations and negative aspects: Markov notes LLMs struggle with arithmetic (ref [43]), reasoning quality declines with task size (ref [44]), and productivity gains may be overstated (ref [45]). The entire framing is 'Revolution or Hype?' acknowledging potential failure of the paradigm."
    109       }
    110     },
    111     "claims_and_evidence": {
    112       "abstract_claims_supported": {
    113         "applies": true,
    114         "answer": true,
    115         "justification": "The abstract claims the paper 'critically examines the practical capabilities, fundamental limitations, and future prospects' and provides 'an authoritative overview.' The body delivers on this through structured sections on background (II), opportunities (III), challenges (IV), expert opinions (V), and recommendations (VI)."
    116       },
    117       "causal_claims_justified": {
    118         "applies": false,
    119         "answer": false,
    120         "justification": "The paper makes no causal claims. It surveys and synthesizes opinions about the state of a field without asserting causal relationships."
    121       },
    122       "generalization_bounded": {
    123         "applies": true,
    124         "answer": true,
    125         "justification": "The paper clearly bounds its scope to hardware design / EDA workflows. It distinguishes between LLM and LCM applicability for different stages of the design flow (Fig. 4) and notes limitations are specific to the EDA domain (data scarcity, IP concerns)."
    126       },
    127       "alternative_explanations_discussed": {
    128         "applies": false,
    129         "answer": false,
    130         "justification": "This is a qualitative survey/panel paper with no empirical results requiring alternative explanations."
    131       },
    132       "proxy_outcome_distinction": {
    133         "applies": false,
    134         "answer": false,
    135         "justification": "No measurements are taken; this is a qualitative position paper."
    136       }
    137     },
    138     "setup_transparency": {
    139       "model_versions_specified": {
    140         "applies": false,
    141         "answer": false,
    142         "justification": "No models are used in experiments; this is a survey paper."
    143       },
    144       "prompts_provided": {
    145         "applies": false,
    146         "answer": false,
    147         "justification": "No prompting is used; this is a survey paper."
    148       },
    149       "hyperparameters_reported": {
    150         "applies": false,
    151         "answer": false,
    152         "justification": "No experiments are conducted."
    153       },
    154       "scaffolding_described": {
    155         "applies": false,
    156         "answer": false,
    157         "justification": "No agentic scaffolding is used in this paper."
    158       },
    159       "data_preprocessing_documented": {
    160         "applies": true,
    161         "answer": false,
    162         "justification": "The paper does not describe how the surveyed papers were selected. There is no description of a search strategy, inclusion/exclusion criteria, or filtering pipeline. Papers appear to be selected ad-hoc based on the panelists' expertise."
    163       }
    164     },
    165     "limitations_and_scope": {
    166       "limitations_section_present": {
    167         "applies": true,
    168         "answer": false,
    169         "justification": "There is no dedicated limitations section. Section IV discusses challenges of the technology being surveyed, not limitations of the paper itself."
    170       },
    171       "threats_to_validity_specific": {
    172         "applies": true,
    173         "answer": false,
    174         "justification": "No threats to the validity of this survey are discussed. The paper does not acknowledge that panelist selection, scope of surveyed work, or the qualitative nature of expert opinions may bias its conclusions."
    175       },
    176       "scope_boundaries_stated": {
    177         "applies": true,
    178         "answer": true,
    179         "justification": "The paper clearly states it focuses on LLMs and LCMs for hardware/EDA design, synthesizing perspectives from the ICCAD 2025 panel (Introduction). The scope is explicitly bounded to hardware design, not software or general AI."
    180       }
    181     },
    182     "data_integrity": {
    183       "raw_data_available": {
    184         "applies": true,
    185         "answer": false,
    186         "justification": "No raw data (e.g., list of all surveyed papers, selection criteria, or panelist transcripts) is made available."
    187       },
    188       "data_collection_described": {
    189         "applies": true,
    190         "answer": false,
    191         "justification": "The paper does not describe how the literature was collected or how panelist contributions were solicited and integrated."
    192       },
    193       "recruitment_methods_described": {
    194         "applies": false,
    195         "answer": false,
    196         "justification": "No human participants beyond the named panelists, who are introduced in biographies. This is not a study with recruited participants."
    197       },
    198       "data_pipeline_documented": {
    199         "applies": true,
    200         "answer": false,
    201         "justification": "No documentation of how the survey was assembled—no search queries, databases searched, or filtering stages described."
    202       }
    203     },
    204     "conflicts_of_interest": {
    205       "funding_disclosed": {
    206         "applies": true,
    207         "answer": false,
    208         "justification": "No funding or acknowledgments section is present in the paper."
    209       },
    210       "affiliations_disclosed": {
    211         "applies": true,
    212         "answer": true,
    213         "justification": "Author affiliations are clearly listed: CUHK, IBM, University of Bremen/DFKI, Southeast University, TU Darmstadt, and Synopsys. Industry affiliations (IBM, Synopsys) are transparent."
    214       },
    215       "funder_independent_of_outcome": {
    216         "applies": true,
    217         "answer": false,
    218         "justification": "No funding is disclosed. Two authors are from major EDA industry players (IBM, Synopsys) who have commercial interest in AI-for-EDA outcomes. This potential conflict is not acknowledged."
    219       },
    220       "financial_interests_declared": {
    221         "applies": true,
    222         "answer": false,
    223         "justification": "No competing interests statement. Markov is from Synopsys and references Synopsys products (DSO.ai, ChipArchitect, EUCLIDE, KNOWLEDGE ASSISTANT, RUN ASSISTANT) multiple times. Stok is from IBM. No disclosure of financial interests."
    224       }
    225     },
    226     "contamination": {
    227       "training_cutoff_stated": {
    228         "applies": false,
    229         "answer": false,
    230         "justification": "No pre-trained model is evaluated on any benchmark."
    231       },
    232       "train_test_overlap_discussed": {
    233         "applies": false,
    234         "answer": false,
    235         "justification": "No benchmark evaluation is performed."
    236       },
    237       "benchmark_contamination_addressed": {
    238         "applies": false,
    239         "answer": false,
    240         "justification": "No benchmark evaluation is performed."
    241       }
    242     },
    243     "human_studies": {
    244       "pre_registered": {
    245         "applies": false,
    246         "answer": false,
    247         "justification": "No human subjects study."
    248       },
    249       "irb_or_ethics_approval": {
    250         "applies": false,
    251         "answer": false,
    252         "justification": "No human subjects study."
    253       },
    254       "demographics_reported": {
    255         "applies": false,
    256         "answer": false,
    257         "justification": "No human subjects study."
    258       },
    259       "inclusion_exclusion_criteria": {
    260         "applies": false,
    261         "answer": false,
    262         "justification": "No human subjects study."
    263       },
    264       "randomization_described": {
    265         "applies": false,
    266         "answer": false,
    267         "justification": "No human subjects study."
    268       },
    269       "blinding_described": {
    270         "applies": false,
    271         "answer": false,
    272         "justification": "No human subjects study."
    273       },
    274       "attrition_reported": {
    275         "applies": false,
    276         "answer": false,
    277         "justification": "No human subjects study."
    278       }
    279     },
    280     "cost_and_practicality": {
    281       "inference_cost_reported": {
    282         "applies": false,
    283         "answer": false,
    284         "justification": "Survey/panel paper with no computational experiments."
    285       },
    286       "compute_budget_stated": {
    287         "applies": false,
    288         "answer": false,
    289         "justification": "Survey/panel paper with no computational experiments."
    290       }
    291     },
    292     "survey_methodology": {
    293       "prisma_or_structured_protocol": {
    294         "applies": true,
    295         "answer": false,
    296         "justification": "No structured review protocol is followed. Papers are selected ad-hoc by panelists. No search strategy, database queries, or PRISMA flow diagram is provided."
    297       },
    298       "quality_assessment_of_sources": {
    299         "applies": true,
    300         "answer": false,
    301         "justification": "The surveyed papers are not assessed for methodological quality. All referenced work is treated equally regardless of rigor. For example, arxiv preprints and peer-reviewed conference papers are cited without distinction."
    302       },
    303       "publication_bias_discussed": {
    304         "applies": true,
    305         "answer": false,
    306         "justification": "Publication bias is not discussed. Markov briefly mentions data leakage in benchmarks (Section VI) but publication bias in the surveyed literature itself is not addressed."
    307       }
    308     }
    309   },
    310   "claims": [
    311     {
    312       "claim": "LLMs and LCMs serve complementary roles: LLMs for interpreting high-level intent ('the What') and LCMs for circuit-native optimization ('the How').",
    313       "evidence": "Section V-A: Qiang Xu proposes a division of labor between LLMs (natural language interface for design intent) and LCMs (expert engineer for PPA optimization). Leon Stok reinforces this with practical use cases.",
    314       "supported": "moderate"
    315     },
    316     {
    317       "claim": "LLM hallucination is a critical barrier to industrial EDA adoption, as even 0.1% error rates can cause catastrophic silicon failures.",
    318       "evidence": "Section IV-A and V-B: Multiple panelists (Zhang, Drechsler, Stok) identify hallucination as a key blocker. Drechsler notes 0.1% error tolerance in safety-critical design.",
    319       "supported": "moderate"
    320     },
    321     {
    322       "claim": "Standard transformer models are generally inefficient at representing and reasoning over high-precision numerical values needed for EDA.",
    323       "evidence": "Section V-A: Markov cites ref [43] (McLeish et al.) showing transformers struggle with arithmetic, and ref [44] showing sharp declines in output quality as task size increases.",
    324       "supported": "moderate"
    325     },
    326     {
    327       "claim": "LLMs are already in commercial use for targeted EDA tasks such as converting natural-language specs into structured formats and generating RTL code snippets.",
    328       "evidence": "Section V-B: Markov describes Synopsys KNOWLEDGE ASSISTANT, RUN ASSISTANT, and EUCLIDE IDE as commercially deployed LLM-based tools.",
    329       "supported": "moderate"
    330     },
    331     {
    332       "claim": "Perceived productivity gains from LLM code assistants may not match actual productivity, with engineers often rewriting or discarding generated code.",
    333       "evidence": "Section V-A: Markov cites ref [45] (METR study from McKinsey) showing mismatch between perceived and actual productivity gains.",
    334       "supported": "moderate"
    335     }
    336   ],
    337   "red_flags": [
    338     {
    339       "flag": "Undisclosed conflicts of interest",
    340       "detail": "Igor Markov (Synopsys) references multiple Synopsys products (DSO.ai, ChipArchitect, KNOWLEDGE ASSISTANT, RUN ASSISTANT, EUCLIDE) without explicit conflict-of-interest disclosure. Leon Stok is VP of EDA at IBM. Both have commercial stakes in AI-for-EDA outcomes."
    341     },
    342     {
    343       "flag": "No structured review methodology",
    344       "detail": "The paper surveys dozens of papers but follows no systematic review protocol. Paper selection appears driven by panelist familiarity and self-citation rather than a reproducible search strategy."
    345     },
    346     {
    347       "flag": "No quality assessment of surveyed work",
    348       "detail": "All referenced papers are treated with equal weight. ArXiv preprints and peer-reviewed publications are cited interchangeably without assessing their methodological rigor."
    349     },
    350     {
    351       "flag": "Self-promotion through survey",
    352       "detail": "Panelists frequently cite their own work: Xu cites DeepGate/DeepGate3/DeepCell/DeepRTL (his group), Wang cites ChatCPU/MEIC/UVLLM/VeriDebug (his group), Drechsler cites AutoBench/CorrectBench (his group). The survey structure gives each panelist a platform to advocate for their research agenda."
    353     }
    354   ],
    355   "cited_papers": [
    356     {
    357       "title": "Measuring the impact of early-2025 AI on experienced open-source developer productivity",
    358       "authors": ["J. Becker", "N. Rush", "E. Barnes", "D. Rein"],
    359       "year": 2025,
    360       "relevance": "RCT measuring actual vs. perceived AI coding assistant productivity gains, directly relevant to survey's methodology quality assessment."
    361     },
    362     {
    363       "title": "ChipNeMo: Domain-adapted LLMs for Chip Design",
    364       "authors": ["M. Liu"],
    365       "year": 2023,
    366       "arxiv_id": "2311.00176",
    367       "relevance": "Domain-adapted LLM for hardware design, key example of LLM specialization for EDA tasks."
    368     },
    369     {
    370       "title": "VerilogCoder: Autonomous Verilog coding agents with graph-based planning and abstract syntax tree (AST)-based waveform tracing tool",
    371       "authors": ["C.-T. Ho", "H. Ren", "B. Khailany"],
    372       "year": 2025,
    373       "relevance": "Agentic LLM pipeline for hardware code generation with tool-in-the-loop verification."
    374     },
    375     {
    376       "title": "The illusion of thinking: Understanding the strengths and limitations of reasoning models via the lens of problem complexity",
    377       "authors": ["P. Shojaee"],
    378       "year": 2025,
    379       "arxiv_id": "2506.06941",
    380       "relevance": "Demonstrates sharp declines in LLM reasoning quality as task complexity increases, relevant to AI capability evaluation."
    381     },
    382     {
    383       "title": "Transformers can do arithmetic with the right embeddings",
    384       "authors": ["S. McLeish"],
    385       "year": 2024,
    386       "relevance": "Analyzes fundamental limitations of transformers on numerical reasoning tasks, relevant to LLM capability boundaries."
    387     },
    388     {
    389       "title": "RTLCoder: Fully open-source and efficient LLM-assisted RTL code generation technique",
    390       "authors": ["S. Liu", "W. Fang", "Y. Lu", "J. Wang", "Q. Zhang", "H. Zhang", "Z. Xie"],
    391       "year": 2024,
    392       "relevance": "Open-source LLM for RTL code generation, key benchmark in LLM-for-hardware-design evaluation."
    393     },
    394     {
    395       "title": "AutoChip: Automating HDL generation using LLM feedback",
    396       "authors": ["S. Thakur", "J. Blocklove", "H. Pearce", "B. Tan", "S. Garg", "R. Karri"],
    397       "year": 2023,
    398       "arxiv_id": "2311.04887",
    399       "relevance": "LLM-based hardware code generation with iterative feedback loops, early agentic approach to EDA."
    400     },
    401     {
    402       "title": "Large circuit models: opportunities and challenges",
    403       "authors": ["L. Chen", "Y. Chen", "Z. Chu"],
    404       "year": 2024,
    405       "relevance": "Foundation paper defining the Large Circuit Model concept for AI-native EDA."
    406     },
    407     {
    408       "title": "Accurate predictions on small data with a tabular foundation model",
    409       "authors": ["N. Hollmann"],
    410       "year": 2025,
    411       "relevance": "Demonstrates synthetic data strategy for foundation model training, cited as alternative to scarce domain data."
    412     },
    413     {
    414       "title": "A survey of research in large language models for electronic design automation",
    415       "authors": ["J. Pan", "G. Zhou", "C.-C. Chang", "I. Jacobson", "J. Hu", "Y. Chen"],
    416       "year": 2025,
    417       "relevance": "Comprehensive survey of LLM applications in EDA, direct comparator to this panel paper."
    418     }
    419   ]
    420 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs