scan-v5.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan-v5.json (21588B)
      1 {
      2   "scan_version": 5,
      3   "paper_type": "position",
      4   "paper": {
      5     "title": "Generative AI for computational chemistry: A roadmap to predicting emergent phenomena",
      6     "authors": [
      7       "P. Tiwary",
      8       "Lukas Herron",
      9       "Richard John",
     10       "Suemin Lee",
     11       "Disha Sanwal"
     12     ],
     13     "year": 2025,
     14     "venue": "PNAS",
     15     "arxiv_id": "2409.03118",
     16     "doi": "10.1073/pnas.2415655121"
     17   },
     18   "checklist": {
     19     "claims_and_evidence": {
     20       "abstract_claims_supported": {
     21         "applies": true,
     22         "answer": true,
     23         "justification": "Claims about progress in molecular structure sampling, force field development, and protein/RNA structure prediction are backed by extensive literature citations. The central claim that current AI struggles with emergent phenomena is supported by Schaeffer et al. (2024) on LLMs and Biroli & Mezard (2023) on diffusion models.",
     24         "source": "haiku"
     25       },
     26       "causal_claims_justified": {
     27         "applies": false,
     28         "answer": false,
     29         "justification": "This is a perspective paper making prescriptive arguments rather than empirical causal claims requiring study design validation. Statements that integrating statistical mechanics 'will improve' AI are speculative recommendations, not tested causal hypotheses.",
     30         "source": "haiku"
     31       },
     32       "generalization_bounded": {
     33         "applies": true,
     34         "answer": false,
     35         "justification": "The paper makes sweeping generalizations about 'current AI approaches' primarily doing 'memorization and interpolation' without bounding these claims to specific architectures, datasets, or time windows. The domain scope to molecular simulation chemistry is stated but capability claims about all generative AI are overbroad.",
     36         "source": "haiku"
     37       },
     38       "alternative_explanations_discussed": {
     39         "applies": true,
     40         "answer": false,
     41         "justification": "The paper does not engage with alternative views — e.g., that scaling, architecture innovations, or curated data alone might overcome current limitations without explicit statistical mechanics integration. Only one research direction is presented as viable.",
     42         "source": "haiku"
     43       },
     44       "proxy_outcome_distinction": {
     45         "applies": false,
     46         "answer": false,
     47         "justification": "This is a perspective paper making theoretical arguments rather than reporting empirical measurements, so the proxy-outcome distinction criterion does not apply.",
     48         "source": "haiku"
     49       }
     50     },
     51     "limitations_and_scope": {
     52       "limitations_section_present": {
     53         "applies": true,
     54         "answer": false,
     55         "justification": "Section 5 is titled 'Critical assessment and outlook' but is a single short paragraph acknowledging 'significant obstacles remain' without systematic treatment. This does not constitute a dedicated limitations section.",
     56         "source": "haiku"
     57       },
     58       "threats_to_validity_specific": {
     59         "applies": true,
     60         "answer": false,
     61         "justification": "No specific threats to the paper's arguments or recommendations are identified. The paper does not address what would falsify its central claim that statistical mechanics integration is necessary for emergent-phenomena prediction.",
     62         "source": "haiku"
     63       },
     64       "scope_boundaries_stated": {
     65         "applies": true,
     66         "answer": true,
     67         "justification": "The introduction explicitly states: 'this Perspective will focus exclusively on molecular simulation driven computational chemistry' — a clear and specific scope boundary.",
     68         "source": "haiku"
     69       }
     70     },
     71     "conflicts_of_interest": {
     72       "funding_disclosed": {
     73         "applies": true,
     74         "answer": true,
     75         "justification": "Acknowledgments clearly state support from NIH/NIGMS (R35GM142719), NSF ACCESS (CHE180027P), and University of Maryland Institute for Health Computing with Montgomery County and UMD funding.",
     76         "source": "haiku"
     77       },
     78       "affiliations_disclosed": {
     79         "applies": true,
     80         "answer": true,
     81         "justification": "All author affiliations are disclosed in the author note: Department of Chemistry and Biochemistry, Biophysics Program, Department of Physics, and UMD Institute for Health Computing — all University of Maryland.",
     82         "source": "haiku"
     83       },
     84       "funder_independent_of_outcome": {
     85         "applies": true,
     86         "answer": true,
     87         "justification": "NIH/NIGMS and NSF are government funding agencies independent of the paper's conclusions about generative AI for chemistry.",
     88         "source": "haiku"
     89       },
     90       "financial_interests_declared": {
     91         "applies": true,
     92         "answer": false,
     93         "justification": "There is no competing interests or financial interests statement. The paper prominently features the corresponding author's own methods (AF2RAVE, Thermodynamic Maps, SPIB) as exemplars of the recommended approach, without any disclosure of potential conflicts.",
     94         "source": "haiku"
     95       }
     96     },
     97     "scope_and_framing": {
     98       "key_terms_defined": {
     99         "applies": true,
    100         "answer": true,
    101         "justification": "Section 1 'The Theoretical Minimum' explicitly defines six computational chemistry terms (PES, force fields, thermodynamic ensemble, CVs, free energy surface, molecular simulations) and seven generative AI terms (latent variables, prior, loss function, training/testing/validation, regularization, embeddings, attention).",
    102         "source": "haiku"
    103       },
    104       "intended_contribution_clear": {
    105         "applies": true,
    106         "answer": true,
    107         "justification": "The introduction explicitly states: 'a clear Perspective is now essential to highlight progress and critically examine pitfalls.' The structured roadmap framing is explicit throughout.",
    108         "source": "haiku"
    109       },
    110       "engagement_with_prior_work": {
    111         "applies": true,
    112         "answer": true,
    113         "justification": "The paper cites 133 references and substantively discusses how each method family builds on, compares to, or improves upon prior work — not merely listing citations but analyzing relationships between approaches.",
    114         "source": "haiku"
    115       }
    116     }
    117   },
    118   "type_checklist": {
    119     "position": {
    120       "argument_quality": {
    121         "argument_internally_consistent": {
    122           "applies": true,
    123           "answer": true,
    124           "justification": "The paper consistently argues from the premise that emergent phenomena require physics-grounded AI, supports this through method reviews and application analyses, and concludes with aligned recommendations. No internal contradictions detected.",
    125           "source": "haiku"
    126         },
    127         "counterarguments_addressed": {
    128           "applies": true,
    129           "answer": false,
    130           "justification": "The paper never engages with the opposing view that data-driven scaling, architectural innovations, or larger training sets might overcome current limitations without physics priors. The advocacy for statistical mechanics integration is entirely one-directional.",
    131           "source": "haiku"
    132         },
    133         "analogies_appropriate": {
    134           "applies": true,
    135           "answer": true,
    136           "justification": "The Phil Anderson 'More Is Different' analogy for emergent phenomena is correctly cited (Science 1972) and apt. The framing of AI-chemistry integration as synergistic rather than substitutive is a fair characterization of the relationship.",
    137           "source": "haiku"
    138         },
    139         "prescriptions_proportional": {
    140           "applies": true,
    141           "answer": true,
    142           "justification": "The five prescriptions (physics integration, interpretability, OOD generalization, data rethinking, emergent phenomena coupling) are technical recommendations bounded to the scope of the review, not sweeping policy demands requiring stronger justification.",
    143           "source": "haiku"
    144         },
    145         "evidence_for_claims_cited": {
    146           "applies": true,
    147           "answer": true,
    148           "justification": "Factual claims are consistently backed by citations. Claims about AI limitations cite specific empirical papers (e.g., Schaeffer et al. on emergent abilities, Buel & Walters on AlphaFold mutation prediction limits).",
    149           "source": "haiku"
    150         },
    151         "alternatives_discussed": {
    152           "applies": true,
    153           "answer": false,
    154           "justification": "Alternative frameworks — pure scaling approaches, foundation model pre-training, improved architectures without physics priors — are never discussed as viable alternatives to the statistical mechanics integration pathway the paper advocates.",
    155           "source": "haiku"
    156         },
    157         "historical_context_accurate": {
    158           "applies": true,
    159           "answer": true,
    160           "justification": "Historical references appear accurate: Anderson 1972 is correctly cited and characterized, AlphaFold2/RoseTTAFold history is factually correct, and foundational ML citations (LSTM, Vaswani transformers, GANs) are correctly attributed.",
    161           "source": "haiku"
    162         }
    163       },
    164       "clarity_and_scope": {
    165         "key_terms_defined_precisely": {
    166           "applies": true,
    167           "answer": true,
    168           "justification": "Section 1 provides explicit, precise definitions of both disciplinary vocabularies before they are used. This is stronger than most perspective papers — terms like 'collective variables,' 'committor,' 'latent variables,' and 'mode collapse' are each given a dedicated definition.",
    169           "source": "haiku"
    170         },
    171         "engages_with_existing_literature": {
    172           "applies": true,
    173           "answer": true,
    174           "justification": "133 references are engaged substantively: each method's prior work is discussed, limitations identified by others are cited, and inter-method comparisons reference specific papers rather than generic claims.",
    175           "source": "haiku"
    176         },
    177         "intended_audience_clear": {
    178           "applies": true,
    179           "answer": false,
    180           "justification": "The intended audience — computational chemists, AI researchers, or both — is never explicitly stated. The dual-vocabulary framing suggests a cross-disciplinary audience but this is not declared.",
    181           "source": "haiku"
    182         },
    183         "assumptions_stated": {
    184           "applies": true,
    185           "answer": false,
    186           "justification": "The central assumption — that emergent phenomena are the definitive test of AI utility, and that statistical mechanics is necessary for achieving this — is asserted as obvious rather than stated as an assumption the argument depends on.",
    187           "source": "haiku"
    188         },
    189         "scope_of_applicability_discussed": {
    190           "applies": true,
    191           "answer": true,
    192           "justification": "The paper explicitly limits applicability to molecular simulation-driven computational chemistry and notes this exclusion prevents overgeneralization to materials science or drug discovery contexts not covered by the review.",
    193           "source": "haiku"
    194         }
    195       }
    196     }
    197   },
    198   "claims": [
    199     {
    200       "claim": "Current generative AI methods primarily excel at memorization and interpolation, not predicting emergent phenomena.",
    201       "evidence": "Cites Schaeffer et al. (2024) quantifying LLM emergent ability limitations and Biroli & Mezard (2023) on diffusion model generalization/memorization regimes.",
    202       "supported": "moderate"
    203     },
    204     {
    205       "claim": "GANs are going out of fashion for chemical applications in favor of diffusion models and RL-based approaches.",
    206       "evidence": "Cites Dhariwal & Nichol (2021) showing diffusion models outperform GANs on image synthesis, and describes inherent GAN limitations (mode collapse, training instability, OOD failure).",
    207       "supported": "moderate"
    208     },
    209     {
    210       "claim": "AlphaFold2 cannot reliably predict conformational ensembles or the impact of point mutations.",
    211       "evidence": "Cites Buel & Walters (2022) on AF2 missense mutation failures and Bowman (2024) arguing conformational ensembles are the frontier AF2 does not address.",
    212       "supported": "strong"
    213     },
    214     {
    215       "claim": "Machine learning force fields can achieve quantum-level accuracy at classical MD speeds.",
    216       "evidence": "Cites Noe et al. (2020), Tiwary (2024), and multiple application papers. Notes generalization beyond training data remains a known limitation.",
    217       "supported": "moderate"
    218     },
    219     {
    220       "claim": "Typical AI scaling laws do not hold in chemistry — more data is not always better.",
    221       "evidence": "Uses MD trajectories trapped in metastable states as illustrative example where more data amplifies noise. No quantitative evidence provided; argument is qualitative.",
    222       "supported": "weak"
    223     },
    224     {
    225       "claim": "Integrating statistical mechanics into AI will enable prediction of novel emergent chemical phenomena.",
    226       "evidence": "Cites AF2RAVE and Thermodynamic Maps (both from the corresponding author's group) as proof-of-concept. No broad comparative validation across methods.",
    227       "supported": "weak"
    228     }
    229   ],
    230   "methodology_tags": [
    231     "theoretical"
    232   ],
    233   "key_findings": "This PNAS perspective argues that generative AI for computational chemistry has achieved impressive results in structure prediction and force field development but fails at the field's ultimate goal: predicting emergent chemical phenomena — behaviors arising from complex many-body interactions over time. The paper reviews five method families (autoencoders, GANs, RL, flow models, LLMs) and three application domains (quantum chemistry/force fields, protein structure, RNA structure), consistently finding limitations in out-of-distribution generalization and environmental coupling. The central prescription is that future AI models must deeply integrate statistical mechanics principles — thermodynamic ensembles, free energy surfaces, and non-equilibrium dynamics — rather than treating chemistry as another data domain for pattern matching.",
    234   "red_flags": [
    235     {
    236       "flag": "Self-citation advocacy",
    237       "detail": "The corresponding author's own group's methods are featured as the primary positive examples: AF2RAVE (refs 85, 87, 88), Thermodynamic Maps (ref 133), SPIB (ref 17), and enhanced sampling work (ref 4, 56). This represents advocacy for the author's own research program without explicit disclosure."
    238     },
    239     {
    240       "flag": "No competing interests statement",
    241       "detail": "No financial interests, patent, or competing interests statement is included despite the paper functioning partly as advocacy for the corresponding author's research directions."
    242     },
    243     {
    244       "flag": "Sweeping capability generalizations",
    245       "detail": "Claims that 'current AI approaches struggle with capturing emergent behaviors' and 'primarily excel at memorization and interpolation' are stated as field-wide facts without bounding to specific architectures, scales, or time periods in a rapidly evolving field."
    246     },
    247     {
    248       "flag": "No counterargument engagement",
    249       "detail": "The strongest counterargument — that scaling, architectural improvements, or foundation model pretraining could overcome current limitations without explicit physics priors — is never engaged. The perspective is entirely one-sided advocacy."
    250     }
    251   ],
    252   "cited_papers": [
    253     {
    254       "title": "Are emergent abilities of large language models a mirage?",
    255       "relevance": "Key citation supporting the paper's central claim that current AI cannot predict emergent phenomena — directly relevant to the paper's core argument."
    256     },
    257     {
    258       "title": "Highly accurate protein structure prediction with AlphaFold",
    259       "relevance": "Central example of generative AI success in chemistry used as benchmark for what has been achieved and what limitations remain."
    260     },
    261     {
    262       "title": "Accurate structure prediction of biomolecular interactions with AlphaFold 3",
    263       "relevance": "Represents current state-of-the-art discussed as still insufficient for conformational ensemble prediction."
    264     },
    265     {
    266       "title": "Enhanced sampling with machine learning (Annual Review Physical Chemistry)",
    267       "relevance": "Reviews enhanced MD sampling methods directly relevant to the paper's focus on accessing rare and emergent phenomena."
    268     },
    269     {
    270       "title": "Boltzmann generators: Sampling equilibrium states of many-body systems with deep learning",
    271       "relevance": "Key example of flow-based generative models applied to molecular simulation, central to the roadmap."
    272     },
    273     {
    274       "title": "Machine learning for molecular simulation (Annual Review Physical Chemistry)",
    275       "relevance": "Comprehensive review establishing the baseline landscape this perspective builds upon and critiques."
    276     },
    277     {
    278       "title": "GFlowNet foundations",
    279       "relevance": "RL framework highlighted as promising for integrating statistical mechanics concepts into generative AI for chemistry."
    280     },
    281     {
    282       "title": "Generative diffusion in very large dimensions",
    283       "relevance": "Cited to support the claim that diffusion models show memorization vs. generalization regimes — key empirical basis for the paper's critique."
    284     }
    285   ],
    286   "engagement_factors": {
    287     "practical_relevance": {
    288       "score": 2,
    289       "justification": "Provides a structured five-point roadmap with specific technical recommendations practitioners can act on, though no tools, code, or benchmarks are released."
    290     },
    291     "surprise_contrarian": {
    292       "score": 2,
    293       "justification": "Takes a skeptical stance against dominant AI enthusiasm in chemistry, arguing current methods are fundamentally limited to memorization — contrarian in a field dominated by AlphaFold triumphalism."
    294     },
    295     "fear_safety": {
    296       "score": 1,
    297       "justification": "Briefly warns that AI-generated unreliable molecular predictions could produce 'deepfakes' in chemistry, but this is peripheral and not developed as a safety argument."
    298     },
    299     "drama_conflict": {
    300       "score": 1,
    301       "justification": "Implicitly challenges AlphaFold triumphalism and LLM hype applied to chemistry, but does so in measured academic prose without naming specific claims or researchers to dispute."
    302     },
    303     "demo_ability": {
    304       "score": 0,
    305       "justification": "No code, tools, demos, or interactive components are provided or linked. This is a pure perspective paper."
    306     },
    307     "brand_recognition": {
    308       "score": 1,
    309       "justification": "Published in PNAS (high-profile venue), but the authors and their methods (AF2RAVE, Thermodynamic Maps) are not broadly recognized outside the computational chemistry subfield."
    310     }
    311   },
    312   "hn_data": {
    313     "threads": [
    314       {
    315         "hn_id": "40876840",
    316         "title": "LivePortrait: A fast, controllable portrait animation model",
    317         "points": 203,
    318         "comments": 25,
    319         "url": "https://news.ycombinator.com/item?id=40876840",
    320         "created_at": "2024-07-04T18:02:50Z"
    321       },
    322       {
    323         "hn_id": "24576451",
    324         "title": "It's Not Just Size That Matters: Small Models with Performance Similar to GPT-3",
    325         "points": 9,
    326         "comments": 0,
    327         "url": "https://news.ycombinator.com/item?id=24576451",
    328         "created_at": "2020-09-24T08:10:46Z"
    329       },
    330       {
    331         "hn_id": "26393219",
    332         "title": "It's Not Just Size That Matters Small Language Models Are Also Few-Shot Learners",
    333         "points": 4,
    334         "comments": 0,
    335         "url": "https://news.ycombinator.com/item?id=26393219",
    336         "created_at": "2021-03-08T23:43:29Z"
    337       },
    338       {
    339         "hn_id": "28436460",
    340         "title": "Hosting Industry Centralization and Consolidation",
    341         "points": 3,
    342         "comments": 0,
    343         "url": "https://news.ycombinator.com/item?id=28436460",
    344         "created_at": "2021-09-06T18:19:31Z"
    345       },
    346       {
    347         "hn_id": "37515238",
    348         "title": "Bayes' Rays: Uncertainty Quantification for Neural Radiance Fields",
    349         "points": 2,
    350         "comments": 0,
    351         "url": "https://news.ycombinator.com/item?id=37515238",
    352         "created_at": "2023-09-14T21:42:16Z"
    353       },
    354       {
    355         "hn_id": "35194358",
    356         "title": "Petals: Collaborative Inference and Fine-Tuning of Large Models",
    357         "points": 2,
    358         "comments": 0,
    359         "url": "https://news.ycombinator.com/item?id=35194358",
    360         "created_at": "2023-03-17T07:40:54Z"
    361       },
    362       {
    363         "hn_id": "40598084",
    364         "title": "Reconstructing Training Data from Document Understanding Models",
    365         "points": 1,
    366         "comments": 1,
    367         "url": "https://news.ycombinator.com/item?id=40598084",
    368         "created_at": "2024-06-06T14:50:43Z"
    369       },
    370       {
    371         "hn_id": "41655851",
    372         "title": "The WMDP Benchmark: Measuring and Reducing Malicious Use with Unlearning",
    373         "points": 1,
    374         "comments": 0,
    375         "url": "https://news.ycombinator.com/item?id=41655851",
    376         "created_at": "2024-09-26T08:20:13Z"
    377       },
    378       {
    379         "hn_id": "24551849",
    380         "title": "It's Not Just Size That Matters:Small Language Models Are Also Few-Shot Learners",
    381         "points": 1,
    382         "comments": 0,
    383         "url": "https://news.ycombinator.com/item?id=24551849",
    384         "created_at": "2020-09-22T07:23:22Z"
    385       }
    386     ],
    387     "top_points": 203,
    388     "total_points": 226,
    389     "total_comments": 26
    390   }
    391 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs