scan-v4.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan-v4.json (20766B)
      1 {
      2   "scan_version": 4,
      3   "paper_type": "position",
      4   "paper": {
      5     "title": "Generative AI for computational chemistry: A roadmap to predicting emergent phenomena",
      6     "authors": [
      7       "P. Tiwary",
      8       "Lukas Herron",
      9       "Richard John",
     10       "Suemin Lee",
     11       "Disha Sanwal"
     12     ],
     13     "year": 2025,
     14     "venue": "PNAS",
     15     "arxiv_id": "2409.03118",
     16     "doi": "10.1073/pnas.2415655121"
     17   },
     18   "checklist": {
     19     "claims_and_evidence": {
     20       "abstract_claims_supported": {
     21         "applies": true,
     22         "answer": true,
     23         "justification": "The abstract claims are appropriately hedged as perspective/opinion ('We believe', 'We suggest') and the paper provides a structured overview supporting these positions through its discussion of methods, applications, and desirables.",
     24         "source": "opus"
     25       },
     26       "causal_claims_justified": {
     27         "applies": false,
     28         "answer": false,
     29         "justification": "The paper makes no causal claims from empirical data; it offers perspective and reviews existing literature.",
     30         "source": "opus"
     31       },
     32       "generalization_bounded": {
     33         "applies": true,
     34         "answer": true,
     35         "justification": "The paper explicitly scopes itself: 'this Perspective will focus exclusively on molecular simulation driven computational chemistry' (Sec. 1). Scope boundaries are clearly stated throughout.",
     36         "source": "opus"
     37       },
     38       "alternative_explanations_discussed": {
     39         "applies": false,
     40         "answer": false,
     41         "justification": "This is a perspective/review paper with no original empirical results requiring alternative explanations.",
     42         "source": "opus"
     43       },
     44       "proxy_outcome_distinction": {
     45         "applies": false,
     46         "answer": false,
     47         "justification": "No measurements or proxies are used; this is a theoretical perspective paper.",
     48         "source": "opus"
     49       }
     50     },
     51     "limitations_and_scope": {
     52       "limitations_section_present": {
     53         "applies": true,
     54         "answer": true,
     55         "justification": "Section 5 'Critical assessment and outlook' serves as a limitations discussion, acknowledging 'significant obstacles remain before AI can fully integrate into the molecular simulation toolbox.'",
     56         "source": "opus"
     57       },
     58       "threats_to_validity_specific": {
     59         "applies": true,
     60         "answer": true,
     61         "justification": "The paper identifies specific concerns: AI models primarily excel at memorization/interpolation (citing Refs 6, 7), AlphaFold's pLDDT scores have 'shown limitations in providing reliable assessments' (Sec. 4), and training on synthetic MD data risks 'deepfakes' and 'unreliable outcomes' (Sec. 3B).",
     62         "source": "opus"
     63       },
     64       "scope_boundaries_stated": {
     65         "applies": true,
     66         "answer": true,
     67         "justification": "Explicitly stated: 'this Perspective will focus exclusively on molecular simulation driven computational chemistry' (Introduction). The paper also clearly scopes what remains to be desired from generative AI in Sec. 4.",
     68         "source": "opus"
     69       }
     70     },
     71     "conflicts_of_interest": {
     72       "funding_disclosed": {
     73         "applies": true,
     74         "answer": true,
     75         "justification": "Acknowledgments section states: 'This work was supported by NIH/NIGMS under award number R35GM142719' and mentions UMD HPC and NSF ACCESS resources.",
     76         "source": "opus"
     77       },
     78       "affiliations_disclosed": {
     79         "applies": true,
     80         "answer": true,
     81         "justification": "Author affiliations are listed: University of Maryland Department of Chemistry and Biochemistry, Institute for Health Computing, Biophysics Program, Department of Physics.",
     82         "source": "opus"
     83       },
     84       "funder_independent_of_outcome": {
     85         "applies": true,
     86         "answer": true,
     87         "justification": "NIH/NIGMS and NSF are government funding agencies with no financial stake in the conclusions of this perspective paper.",
     88         "source": "opus"
     89       },
     90       "financial_interests_declared": {
     91         "applies": true,
     92         "answer": false,
     93         "justification": "No competing interests or financial disclosure statement is present in the paper.",
     94         "source": "opus"
     95       }
     96     },
     97     "scope_and_framing": {
     98       "key_terms_defined": {
     99         "applies": true,
    100         "answer": true,
    101         "justification": "Section 1 'The Theoretical Minimum' explicitly defines both computational chemistry terms (PES, force fields, collective variables, free energy surface) and AI terms (latent variables, prior, loss function, attention) used throughout the paper.",
    102         "source": "haiku"
    103       },
    104       "intended_contribution_clear": {
    105         "applies": true,
    106         "answer": true,
    107         "justification": "The paper explicitly states its contribution: 'This Perspective offers a structured overview... highlights selected applications... and concludes with Critical Assessment and Outlook,' framing it as a roadmap for the field.",
    108         "source": "haiku"
    109       },
    110       "engagement_with_prior_work": {
    111         "applies": true,
    112         "answer": true,
    113         "justification": "The paper engages substantively with 133 references, explicitly positioning reviewed methods relative to each other (e.g., explaining why GANs are declining in favor of diffusion models, how AF2RAVE builds on AlphaFold2's limitations).",
    114         "source": "haiku"
    115       }
    116     }
    117   },
    118   "type_checklist": {
    119     "position": {
    120       "argument_quality": {
    121         "argument_internally_consistent": {
    122           "applies": true,
    123           "answer": true,
    124           "justification": "The core argument is coherent: current AI excels at interpolation not extrapolation → emergent phenomena require extrapolation → therefore AI needs grounding in statistical mechanics. The sections build on this logic consistently.",
    125           "source": "haiku"
    126         },
    127         "counterarguments_addressed": {
    128           "applies": true,
    129           "answer": false,
    130           "justification": "The paper does not seriously engage with the strongest counterargument—that scaling laws or data-driven approaches may eventually capture emergent phenomena without explicit physics integration. The citation of Schaeffer et al. is used to support the authors' view rather than steelman the opposition.",
    131           "source": "haiku"
    132         },
    133         "analogies_appropriate": {
    134           "applies": true,
    135           "answer": true,
    136           "justification": "The analogy comparing AI-generated chemistry to DALL-E/ChatGPT 'deepfakes' is apt and bounded to the specific concern about non-physical predictions; the statistical physics analogies for diffusion models are technically accurate.",
    137           "source": "haiku"
    138         },
    139         "prescriptions_proportional": {
    140           "applies": true,
    141           "answer": true,
    142           "justification": "The five desiderata (Section 4) are narrow and domain-specific, not sweeping policy demands; they are proportional to the reviewed evidence about limitations of current methods.",
    143           "source": "haiku"
    144         },
    145         "evidence_for_claims_cited": {
    146           "applies": true,
    147           "answer": true,
    148           "justification": "Factual claims throughout are well-cited (133 references); for example, claims about AlphaFold2 limitations cite Buel & Walters 2022, and claims about mode collapse cite specific GAN literature.",
    149           "source": "haiku"
    150         },
    151         "alternatives_discussed": {
    152           "applies": true,
    153           "answer": true,
    154           "justification": "The paper explicitly compares competing approaches (normalizing flows vs. diffusion models, reduced-MSA vs. full MSA approaches) and explains why some methods are being superseded, providing genuine comparison rather than only advocating one view.",
    155           "source": "haiku"
    156         },
    157         "historical_context_accurate": {
    158           "applies": true,
    159           "answer": true,
    160           "justification": "Historical references (Anderson's 1972 'More Is Different,' LSTM origins, GAN origins, classical MD/MC methods) are accurately attributed and contextualized.",
    161           "source": "haiku"
    162         }
    163       },
    164       "clarity_and_scope": {
    165         "key_terms_defined_precisely": {
    166           "applies": true,
    167           "answer": true,
    168           "justification": "Key terms are defined precisely in the 'Theoretical Minimum' section with mathematical definitions (e.g., committor function, free energy surface, normalizing flow Jacobian determinant identity).",
    169           "source": "haiku"
    170         },
    171         "engages_with_existing_literature": {
    172           "applies": true,
    173           "answer": true,
    174           "justification": "The paper substantively discusses, compares, and critiques existing methods rather than merely listing them; e.g., explaining why AlphaFold2 fails for metastable conformations and how AF2RAVE addresses this gap.",
    175           "source": "haiku"
    176         },
    177         "intended_audience_clear": {
    178           "applies": true,
    179           "answer": false,
    180           "justification": "The intended audience is never explicitly stated; the paper assumes deep familiarity with both quantum chemistry and ML, suggesting an expert audience, but this is never articulated.",
    181           "source": "haiku"
    182         },
    183         "assumptions_stated": {
    184           "applies": true,
    185           "answer": true,
    186           "justification": "The central assumption—that predicting emergent phenomena from chemical identity is the appropriate standard for AI tools—is stated explicitly in Section 4: 'We believe the ultimate predictive power of any tool... lies in starting from chemical identity and accurately predicting function.'",
    187           "source": "haiku"
    188         },
    189         "scope_of_applicability_discussed": {
    190           "applies": true,
    191           "answer": true,
    192           "justification": "The paper explicitly bounds itself to 'molecular simulation driven computational chemistry' and acknowledges the broad scope of generative AI while restricting discussion: 'While the scope of Generative AI's impact in chemistry is broad, this Perspective will focus exclusively on molecular simulation.'",
    193           "source": "haiku"
    194         }
    195       }
    196     }
    197   },
    198   "claims": [
    199     {
    200       "claim": "Generative AI methods have made significant progress in sampling molecular structures, developing transferable force fields, and speeding up molecular simulations.",
    201       "evidence": "Sections 2-3 review extensive literature including AlphaFold2, MLFFs, and enhanced sampling methods with 133 citations.",
    202       "supported": "strong"
    203     },
    204     {
    205       "claim": "Current AI tools primarily excel at memorization and interpolation, not predicting emergent phenomena.",
    206       "evidence": "Cites Schaeffer et al. (2024) on emergent abilities being a 'mirage' and Biroli & Mezard (2023) on limitations of diffusion models; argued in introduction and section 4.",
    207       "supported": "moderate"
    208     },
    209     {
    210       "claim": "Integrating statistical mechanics principles into AI models is necessary for predicting emergent chemical phenomena.",
    211       "evidence": "Argued through logical inference about what emergent phenomena require; demonstrated only weakly by examples like AF2RAVE and Thermodynamic Maps, not by controlled comparison.",
    212       "supported": "weak"
    213     },
    214     {
    215       "claim": "More data does not always improve performance in computational chemistry AI—additional MD data can amplify noise in metastable systems.",
    216       "evidence": "Argued in Section 4.4 with a specific example of MD trajectories trapped in metastable states, but no quantitative demonstration is provided.",
    217       "supported": "weak"
    218     },
    219     {
    220       "claim": "AlphaFold2 struggles to predict the impact of missense mutations and metastable non-native structures.",
    221       "evidence": "Cites Buel & Walters 2022 (Nat. Struct. & Mol. Biol.) and Wayment-Steele et al. 2024 for evidence of these limitations.",
    222       "supported": "strong"
    223     },
    224     {
    225       "claim": "GANs are gradually going out of fashion for chemical applications in favor of diffusion models.",
    226       "evidence": "Asserted with a citation to Dhariwal & Nichol 2021 (diffusion beats GANs on image synthesis) but no systematic survey of adoption trends in chemistry is provided.",
    227       "supported": "weak"
    228     }
    229   ],
    230   "methodology_tags": [
    231     "theoretical"
    232   ],
    233   "key_findings": "This perspective argues that while generative AI has made genuine progress in computational chemistry (force field development, protein structure prediction, molecular sampling), current methods are fundamentally limited to interpolation within training distributions and cannot reliably predict emergent chemical phenomena. The authors prescribe five desiderata for future AI: physics-grounded model design, better interpretability, out-of-distribution generalization, smarter data curation over raw data quantity, and explicit coupling to environmental variables. The proposed solution centers on integrating statistical mechanics—particularly thermodynamic ensembles, committor functions, and non-equilibrium dynamics—as inductive biases into generative models.",
    234   "red_flags": [
    235     {
    236       "flag": "Heavy self-citation",
    237       "detail": "A substantial fraction of citations reference the Tiwary lab's own prior work (refs 4, 15, 16, 17, 18, 19, 56, 60, 85, 87, 88, 133), creating potential selection bias in which methods are highlighted as exemplary."
    238     },
    239     {
    240       "flag": "No competing interests statement",
    241       "detail": "Despite the paper prescribing directions that would benefit the authors' own research program, no competing interests or financial interests disclosure is present."
    242     },
    243     {
    244       "flag": "Prescriptive claims not empirically validated",
    245       "detail": "The central prescription—that statistical mechanics integration is the key to predicting emergent phenomena—is argued by analogy and logic rather than demonstrated empirically with a controlled comparison."
    246     },
    247     {
    248       "flag": "Non-systematic literature coverage",
    249       "detail": "This is a narrative perspective, not a systematic review; no search methodology, inclusion/exclusion criteria, or coverage assessment is provided, making completeness claims unverifiable."
    250     }
    251   ],
    252   "cited_papers": [
    253     {
    254       "title": "Are emergent abilities of large language models a mirage?",
    255       "relevance": "Core evidence for the claim that AI appears to exhibit emergent behavior but primarily performs interpolation."
    256     },
    257     {
    258       "title": "Enhanced sampling with machine learning (Annu. Rev. Phys. Chem. 2024)",
    259       "relevance": "Foundational review of ML-enhanced sampling that this perspective builds upon."
    260     },
    261     {
    262       "title": "Highly accurate protein structure prediction with AlphaFold (Jumper et al., Nature 2021)",
    263       "relevance": "Primary example of generative AI success in computational chemistry, used to illustrate both progress and limitations."
    264     },
    265     {
    266       "title": "Boltzmann generators: Sampling equilibrium states of many-body systems with deep learning (Noe et al., Science 2019)",
    267       "relevance": "Key example of physics-grounded generative model for molecular systems."
    268     },
    269     {
    270       "title": "Machine learning for molecular simulation (Noe et al., Annu. Rev. Phys. Chem. 2020)",
    271       "relevance": "Broad review of ML in molecular simulation that this perspective updates and extends."
    272     },
    273     {
    274       "title": "AlphaFold2-RAVE: From sequence to Boltzmann ranking (Vani et al., JCTC 2023)",
    275       "relevance": "Example of integrating physics (Boltzmann weighting) with AI structure prediction—held up as a model for the proposed approach."
    276     },
    277     {
    278       "title": "On the dangers of stochastic parrots: Can language models be too big? (Bender et al., FAccT 2021)",
    279       "relevance": "Referenced for LLM limitations in extrapolating beyond training data—relevant to argument about AI interpolation vs. emergence."
    280     },
    281     {
    282       "title": "Dynamical regimes of diffusion models (Biroli et al., 2024)",
    283       "relevance": "Evidence that diffusion models can be divided into memorization vs. generalization regimes—supports claim about AI interpolation limits."
    284     },
    285     {
    286       "title": "Inferring phase transitions and critical exponents with thermodynamic maps (Herron et al., arXiv 2023)",
    287       "relevance": "Authors' own method proposed as an example of AI-physics integration for emergent phenomena (RNA structure ensembles)."
    288     },
    289     {
    290       "title": "Characterizing uncertainty in machine learning for chemistry (Heid et al., JCIM 2023)",
    291       "relevance": "Cited on data leakage and careful data curation requirements in chemistry AI—supports Section 4.4 on data quality."
    292     }
    293   ],
    294   "engagement_factors": {
    295     "practical_relevance": {
    296       "score": 2,
    297       "justification": "Offers a structured roadmap with five concrete desiderata for practitioners building AI tools for computational chemistry."
    298     },
    299     "surprise_contrarian": {
    300       "score": 2,
    301       "justification": "Argues against the prevailing hype that AI (especially AlphaFold-style) has 'solved' computational chemistry, citing memorization limitations and emergent phenomena failures."
    302     },
    303     "fear_safety": {
    304       "score": 0,
    305       "justification": "No AI safety or risk framing; limited to scientific reliability concerns about non-physical predictions ('deepfakes' in chemistry)."
    306     },
    307     "drama_conflict": {
    308       "score": 1,
    309       "justification": "Mild tension with the AlphaFold/LLM success narrative, but presented academically without adversarial framing."
    310     },
    311     "demo_ability": {
    312       "score": 0,
    313       "justification": "This is a perspective paper with no software, dataset, or demo artifact released."
    314     },
    315     "brand_recognition": {
    316       "score": 1,
    317       "justification": "Published in PNAS (high-prestige venue) but no famous lab brand; University of Maryland is well-regarded but not a household AI lab name."
    318     }
    319   },
    320   "hn_data": {
    321     "threads": [
    322       {
    323         "hn_id": "40876840",
    324         "title": "LivePortrait: A fast, controllable portrait animation model",
    325         "points": 203,
    326         "comments": 25,
    327         "url": "https://news.ycombinator.com/item?id=40876840",
    328         "created_at": "2024-07-04T18:02:50Z"
    329       },
    330       {
    331         "hn_id": "24576451",
    332         "title": "It's Not Just Size That Matters: Small Models with Performance Similar to GPT-3",
    333         "points": 9,
    334         "comments": 0,
    335         "url": "https://news.ycombinator.com/item?id=24576451",
    336         "created_at": "2020-09-24T08:10:46Z"
    337       },
    338       {
    339         "hn_id": "26393219",
    340         "title": "It's Not Just Size That Matters Small Language Models Are Also Few-Shot Learners",
    341         "points": 4,
    342         "comments": 0,
    343         "url": "https://news.ycombinator.com/item?id=26393219",
    344         "created_at": "2021-03-08T23:43:29Z"
    345       },
    346       {
    347         "hn_id": "28436460",
    348         "title": "Hosting Industry Centralization and Consolidation",
    349         "points": 3,
    350         "comments": 0,
    351         "url": "https://news.ycombinator.com/item?id=28436460",
    352         "created_at": "2021-09-06T18:19:31Z"
    353       },
    354       {
    355         "hn_id": "37515238",
    356         "title": "Bayes' Rays: Uncertainty Quantification for Neural Radiance Fields",
    357         "points": 2,
    358         "comments": 0,
    359         "url": "https://news.ycombinator.com/item?id=37515238",
    360         "created_at": "2023-09-14T21:42:16Z"
    361       },
    362       {
    363         "hn_id": "35194358",
    364         "title": "Petals: Collaborative Inference and Fine-Tuning of Large Models",
    365         "points": 2,
    366         "comments": 0,
    367         "url": "https://news.ycombinator.com/item?id=35194358",
    368         "created_at": "2023-03-17T07:40:54Z"
    369       },
    370       {
    371         "hn_id": "40598084",
    372         "title": "Reconstructing Training Data from Document Understanding Models",
    373         "points": 1,
    374         "comments": 1,
    375         "url": "https://news.ycombinator.com/item?id=40598084",
    376         "created_at": "2024-06-06T14:50:43Z"
    377       },
    378       {
    379         "hn_id": "41655851",
    380         "title": "The WMDP Benchmark: Measuring and Reducing Malicious Use with Unlearning",
    381         "points": 1,
    382         "comments": 0,
    383         "url": "https://news.ycombinator.com/item?id=41655851",
    384         "created_at": "2024-09-26T08:20:13Z"
    385       },
    386       {
    387         "hn_id": "24551849",
    388         "title": "It's Not Just Size That Matters:Small Language Models Are Also Few-Shot Learners",
    389         "points": 1,
    390         "comments": 0,
    391         "url": "https://news.ycombinator.com/item?id=24551849",
    392         "created_at": "2020-09-22T07:23:22Z"
    393       }
    394     ],
    395     "top_points": 203,
    396     "total_points": 226,
    397     "total_comments": 26
    398   }
    399 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs