scan.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan.json (16873B)
      1 {
      2   "paper": {
      3     "title": "Generative artificial intelligence for computational chemistry: a roadmap to predicting emergent phenomena",
      4     "authors": ["Pratyush Tiwary", "Lukas Herron", "Richard John", "Suemin Lee", "Disha Sanwal", "Ruiyu Wang"],
      5     "year": 2024,
      6     "venue": "PNAS",
      7     "arxiv_id": "2409.03118",
      8     "doi": "10.1073/pnas.2415655121"
      9   },
     10   "scan_version": 2,
     11   "active_modules": [],
     12   "methodology_tags": ["theoretical", "qualitative"],
     13   "key_findings": "This perspective paper surveys generative AI methods (autoencoders, GANs, RL, flow models, LLMs) for computational chemistry, focusing on molecular simulation. It argues that current AI methods excel at interpolation and memorization but struggle to predict emergent chemical phenomena. The authors contend that integrating statistical mechanics principles into AI models is necessary for truly predictive tools, and that the field should hold generative AI to the same predictive standards as traditional simulation methods.",
     14   "checklist": {
     15     "artifacts": {
     16       "code_released": {
     17         "applies": false,
     18         "answer": false,
     19         "justification": "This is a perspective/review paper with no original experiments or code to release."
     20       },
     21       "data_released": {
     22         "applies": false,
     23         "answer": false,
     24         "justification": "This is a perspective/review paper with no original data to release."
     25       },
     26       "environment_specified": {
     27         "applies": false,
     28         "answer": false,
     29         "justification": "No computational experiments were conducted; this is a narrative review."
     30       },
     31       "reproduction_instructions": {
     32         "applies": false,
     33         "answer": false,
     34         "justification": "No experiments to reproduce; this is a perspective paper."
     35       }
     36     },
     37     "statistical_methodology": {
     38       "confidence_intervals_or_error_bars": {
     39         "applies": false,
     40         "answer": false,
     41         "justification": "No original quantitative results are reported; this is a perspective paper."
     42       },
     43       "significance_tests": {
     44         "applies": false,
     45         "answer": false,
     46         "justification": "No statistical comparisons are made; this is a narrative review."
     47       },
     48       "effect_sizes_reported": {
     49         "applies": false,
     50         "answer": false,
     51         "justification": "No original experiments or quantitative comparisons."
     52       },
     53       "sample_size_justified": {
     54         "applies": false,
     55         "answer": false,
     56         "justification": "No experiments with samples; theoretical perspective paper."
     57       },
     58       "variance_reported": {
     59         "applies": false,
     60         "answer": false,
     61         "justification": "No experimental runs; this is a perspective paper."
     62       }
     63     },
     64     "evaluation_design": {
     65       "baselines_included": {
     66         "applies": false,
     67         "answer": false,
     68         "justification": "No system or method is evaluated; this is a perspective/review."
     69       },
     70       "baselines_contemporary": {
     71         "applies": false,
     72         "answer": false,
     73         "justification": "No evaluation is conducted."
     74       },
     75       "ablation_study": {
     76         "applies": false,
     77         "answer": false,
     78         "justification": "No system with components to ablate; this is a perspective paper."
     79       },
     80       "multiple_metrics": {
     81         "applies": false,
     82         "answer": false,
     83         "justification": "No evaluation metrics are used; this is a narrative review."
     84       },
     85       "human_evaluation": {
     86         "applies": false,
     87         "answer": false,
     88         "justification": "No system outputs to evaluate; perspective paper."
     89       },
     90       "held_out_test_set": {
     91         "applies": false,
     92         "answer": false,
     93         "justification": "No experiments; perspective paper."
     94       },
     95       "per_category_breakdown": {
     96         "applies": false,
     97         "answer": false,
     98         "justification": "No quantitative results to break down."
     99       },
    100       "failure_cases_discussed": {
    101         "applies": true,
    102         "answer": true,
    103         "justification": "The paper extensively discusses limitations and failure modes of current generative AI methods for chemistry, including mode collapse in GANs (Sec. 2B), curse of dimensionality in RL (Sec. 2C), and the inability of current models to predict emergent phenomena (Sec. 4, 5)."
    104       },
    105       "negative_results_reported": {
    106         "applies": true,
    107         "answer": true,
    108         "justification": "The paper highlights negative findings: LLMs have limitations in extrapolating beyond training data (Sec. 2E), diffusion models struggle with emergent behavior (Sec. 4), and AlphaFold's confidence scores have shown limitations (Sec. 4). References 6 and 7 are cited as showing AI tools primarily excel at memorization and interpolation."
    109       }
    110     },
    111     "claims_and_evidence": {
    112       "abstract_claims_supported": {
    113         "applies": true,
    114         "answer": true,
    115         "justification": "The abstract claims are appropriately hedged as perspective/opinion ('We believe', 'We suggest') and the paper provides a structured overview supporting these positions through its discussion of methods, applications, and desirables."
    116       },
    117       "causal_claims_justified": {
    118         "applies": false,
    119         "answer": false,
    120         "justification": "The paper makes no causal claims from empirical data; it offers perspective and reviews existing literature."
    121       },
    122       "generalization_bounded": {
    123         "applies": true,
    124         "answer": true,
    125         "justification": "The paper explicitly scopes itself: 'this Perspective will focus exclusively on molecular simulation driven computational chemistry' (Sec. 1). Scope boundaries are clearly stated throughout."
    126       },
    127       "alternative_explanations_discussed": {
    128         "applies": false,
    129         "answer": false,
    130         "justification": "This is a perspective/review paper with no original empirical results requiring alternative explanations."
    131       },
    132       "proxy_outcome_distinction": {
    133         "applies": false,
    134         "answer": false,
    135         "justification": "No measurements or proxies are used; this is a theoretical perspective paper."
    136       }
    137     },
    138     "setup_transparency": {
    139       "model_versions_specified": {
    140         "applies": false,
    141         "answer": false,
    142         "justification": "No models are run; this is a perspective paper reviewing the field."
    143       },
    144       "prompts_provided": {
    145         "applies": false,
    146         "answer": false,
    147         "justification": "No prompting is used; this is a review paper."
    148       },
    149       "hyperparameters_reported": {
    150         "applies": false,
    151         "answer": false,
    152         "justification": "No experiments conducted; perspective paper."
    153       },
    154       "scaffolding_described": {
    155         "applies": false,
    156         "answer": false,
    157         "justification": "No agentic scaffolding used; perspective paper."
    158       },
    159       "data_preprocessing_documented": {
    160         "applies": false,
    161         "answer": false,
    162         "justification": "No data processing; this is a narrative perspective paper, not a systematic review with a search protocol."
    163       }
    164     },
    165     "limitations_and_scope": {
    166       "limitations_section_present": {
    167         "applies": true,
    168         "answer": true,
    169         "justification": "Section 5 'Critical assessment and outlook' serves as a limitations discussion, acknowledging 'significant obstacles remain before AI can fully integrate into the molecular simulation toolbox.'"
    170       },
    171       "threats_to_validity_specific": {
    172         "applies": true,
    173         "answer": true,
    174         "justification": "The paper identifies specific concerns: AI models primarily excel at memorization/interpolation (citing Refs 6, 7), AlphaFold's pLDDT scores have 'shown limitations in providing reliable assessments' (Sec. 4), and training on synthetic MD data risks 'deepfakes' and 'unreliable outcomes' (Sec. 3B)."
    175       },
    176       "scope_boundaries_stated": {
    177         "applies": true,
    178         "answer": true,
    179         "justification": "Explicitly stated: 'this Perspective will focus exclusively on molecular simulation driven computational chemistry' (Introduction). The paper also clearly scopes what remains to be desired from generative AI in Sec. 4."
    180       }
    181     },
    182     "data_integrity": {
    183       "raw_data_available": {
    184         "applies": false,
    185         "answer": false,
    186         "justification": "No original data; perspective paper."
    187       },
    188       "data_collection_described": {
    189         "applies": false,
    190         "answer": false,
    191         "justification": "No data collection; this is a narrative perspective, not a systematic review."
    192       },
    193       "recruitment_methods_described": {
    194         "applies": false,
    195         "answer": false,
    196         "justification": "No participants or sample recruitment; perspective paper."
    197       },
    198       "data_pipeline_documented": {
    199         "applies": false,
    200         "answer": false,
    201         "justification": "No data pipeline; perspective paper."
    202       }
    203     },
    204     "conflicts_of_interest": {
    205       "funding_disclosed": {
    206         "applies": true,
    207         "answer": true,
    208         "justification": "Acknowledgments section states: 'This work was supported by NIH/NIGMS under award number R35GM142719' and mentions UMD HPC and NSF ACCESS resources."
    209       },
    210       "affiliations_disclosed": {
    211         "applies": true,
    212         "answer": true,
    213         "justification": "Author affiliations are listed: University of Maryland Department of Chemistry and Biochemistry, Institute for Health Computing, Biophysics Program, Department of Physics."
    214       },
    215       "funder_independent_of_outcome": {
    216         "applies": true,
    217         "answer": true,
    218         "justification": "NIH/NIGMS and NSF are government funding agencies with no financial stake in the conclusions of this perspective paper."
    219       },
    220       "financial_interests_declared": {
    221         "applies": true,
    222         "answer": false,
    223         "justification": "No competing interests or financial disclosure statement is present in the paper."
    224       }
    225     },
    226     "contamination": {
    227       "training_cutoff_stated": {
    228         "applies": false,
    229         "answer": false,
    230         "justification": "No pre-trained model is evaluated on any benchmark; this is a perspective paper."
    231       },
    232       "train_test_overlap_discussed": {
    233         "applies": false,
    234         "answer": false,
    235         "justification": "No benchmark evaluation conducted."
    236       },
    237       "benchmark_contamination_addressed": {
    238         "applies": false,
    239         "answer": false,
    240         "justification": "No benchmark evaluation conducted."
    241       }
    242     },
    243     "human_studies": {
    244       "pre_registered": {
    245         "applies": false,
    246         "answer": false,
    247         "justification": "No human participants; perspective paper."
    248       },
    249       "irb_or_ethics_approval": {
    250         "applies": false,
    251         "answer": false,
    252         "justification": "No human participants."
    253       },
    254       "demographics_reported": {
    255         "applies": false,
    256         "answer": false,
    257         "justification": "No human participants."
    258       },
    259       "inclusion_exclusion_criteria": {
    260         "applies": false,
    261         "answer": false,
    262         "justification": "No human participants."
    263       },
    264       "randomization_described": {
    265         "applies": false,
    266         "answer": false,
    267         "justification": "No human participants."
    268       },
    269       "blinding_described": {
    270         "applies": false,
    271         "answer": false,
    272         "justification": "No human participants."
    273       },
    274       "attrition_reported": {
    275         "applies": false,
    276         "answer": false,
    277         "justification": "No human participants."
    278       }
    279     },
    280     "cost_and_practicality": {
    281       "inference_cost_reported": {
    282         "applies": false,
    283         "answer": false,
    284         "justification": "Perspective/review paper with no method to cost."
    285       },
    286       "compute_budget_stated": {
    287         "applies": false,
    288         "answer": false,
    289         "justification": "No computation performed; perspective paper."
    290       }
    291     }
    292   },
    293   "claims": [
    294     {
    295       "claim": "Current generative AI methods primarily excel at memorization and interpolation rather than predicting emergent phenomena.",
    296       "evidence": "Cites Schaeffer et al. (Ref 6) on emergent abilities of LLMs being a 'mirage' and Biroli & Mezard (Ref 7) on limitations of diffusion models in very large dimensions. Section 4 item 5 discusses how AI 'often struggles to produce novel physics or chemistry beyond its training data.'",
    297       "supported": "moderate"
    298     },
    299     {
    300       "claim": "Integrating statistical mechanics principles into AI models is necessary for predicting emergent chemical phenomena.",
    301       "evidence": "Argued throughout Sections 4 and 5. Specific examples given: MaxEnt RL (Sec. 2C), Thermodynamic Maps (Sec. 3C), AF2RAVE (Sec. 3B), physics-informed loss terms (Sec. 2A). However, this is primarily an opinion/perspective claim rather than an empirically demonstrated finding.",
    302       "supported": "weak"
    303     },
    304     {
    305       "claim": "AlphaFold2's confidence measures (pLDDT scores) have limitations in providing reliable assessments of predictions.",
    306       "evidence": "Section 4 item 2 cites Buel & Walters (Ref 79) showing AlphaFold2 struggles to predict effects of point mutations. The claim is supported by the cited reference.",
    307       "supported": "moderate"
    308     },
    309     {
    310       "claim": "GANs are gradually going out of fashion for chemical applications due to training instability, mode collapse, and data dependence.",
    311       "evidence": "Section 2B discusses these limitations and cites Dhariwal & Nichol (Ref 29) showing diffusion models beat GANs on image synthesis. The trend claim is a field observation without systematic evidence.",
    312       "supported": "moderate"
    313     }
    314   ],
    315   "red_flags": [
    316     {
    317       "flag": "Heavy self-citation",
    318       "detail": "Several of the highlighted 'selected applications' (AF2RAVE, Thermodynamic Maps, State Predictive Information Bottleneck, path sampling for LSTMs) are from the senior author's lab. While disclosed through authorship, the paper's framing as a field-wide perspective may overweight the importance of this specific research program relative to alternatives."
    319     },
    320     {
    321       "flag": "Non-systematic review methodology",
    322       "detail": "The paper is a narrative perspective with no systematic search strategy, no inclusion/exclusion criteria for cited work, and no quality assessment of reviewed methods. The 'selected applications' are curated without transparent selection criteria, which could introduce selection bias."
    323     }
    324   ],
    325   "cited_papers": [
    326     {
    327       "title": "Are emergent abilities of large language models a mirage?",
    328       "authors": ["R. Schaeffer", "B. Miranda", "S. Koyejo"],
    329       "year": 2024,
    330       "relevance": "Directly challenges the notion of emergent abilities in LLMs, relevant to evaluating AI capability claims."
    331     },
    332     {
    333       "title": "On the dangers of stochastic parrots: Can language models be too big?",
    334       "authors": ["E.M. Bender", "T. Gebru", "A. McMillan-Major", "S. Shmitchell"],
    335       "year": 2021,
    336       "relevance": "Foundational critique of large language models and their limitations, cited for LLM limitations in extrapolation."
    337     },
    338     {
    339       "title": "Attention is all you need",
    340       "authors": ["A. Vaswani"],
    341       "year": 2017,
    342       "arxiv_id": "1706.03762",
    343       "relevance": "Foundational transformer architecture paper underlying all LLM-based approaches discussed."
    344     },
    345     {
    346       "title": "Generative adversarial nets",
    347       "authors": ["I. Goodfellow"],
    348       "year": 2014,
    349       "relevance": "Foundational GAN paper; GANs are a major generative AI method evaluated for chemistry applications."
    350     },
    351     {
    352       "title": "Denoising diffusion probabilistic models",
    353       "authors": ["J. Ho", "A. Jain", "P. Abbeel"],
    354       "year": 2020,
    355       "relevance": "Core diffusion model paper; diffusion models are discussed extensively as alternatives to GANs for molecular generation."
    356     },
    357     {
    358       "title": "Highly accurate protein structure prediction with AlphaFold",
    359       "authors": ["J. Jumper"],
    360       "year": 2021,
    361       "relevance": "AlphaFold2 is a central case study in the paper for AI-driven structure prediction and its limitations."
    362     },
    363     {
    364       "title": "Accurate structure prediction of biomolecular interactions with AlphaFold 3",
    365       "authors": ["J. Abramson"],
    366       "year": 2024,
    367       "relevance": "AlphaFold3 represents the state-of-the-art in AI-driven biomolecular prediction discussed in the paper."
    368     },
    369     {
    370       "title": "Score-based generative modeling through stochastic differential equations",
    371       "authors": ["Y. Song"],
    372       "year": 2020,
    373       "arxiv_id": "2011.13456",
    374       "relevance": "Key score-based generative modeling paper underlying diffusion models discussed for chemistry applications."
    375     }
    376   ]
    377 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs