scan.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan.json (19089B)
      1 {
      2   "paper": {
      3     "title": "The rise and potential opportunities of large language model agents in bioinformatics and biomedicine",
      4     "authors": ["Tiantian Yang", "Yihang Xiao", "Zhijie Bao", "Jianye Hao", "Jiajie Peng"],
      5     "year": 2025,
      6     "venue": "Briefings in Bioinformatics",
      7     "doi": "10.1093/bib/bbaf601"
      8   },
      9   "scan_version": 2,
     10   "active_modules": ["survey_methodology"],
     11   "methodology_tags": ["meta-analysis"],
     12   "key_findings": "This review surveys LLM agent applications across bioinformatics and biomedicine, covering drug discovery, protein engineering, clinical decision support, medical education, and multi-omics analysis. It catalogs ~60 agent systems in a structured table with functions and limitations. The paper identifies key challenges including data privacy, model hallucination, interpretability, knowledge update timeliness, and ethical/legal risks. No original experiments or systematic quality assessment of reviewed papers is performed.",
     13   "checklist": {
     14     "artifacts": {
     15       "code_released": {
     16         "applies": true,
     17         "answer": false,
     18         "justification": "No code or analysis scripts are released. The paper states 'Data sharing is not applicable to this article as no new data were created or analyzed.'"
     19       },
     20       "data_released": {
     21         "applies": true,
     22         "answer": false,
     23         "justification": "No dataset of reviewed papers, search results, or extracted data is released. A survey can release its corpus and extraction tables but this one did not."
     24       },
     25       "environment_specified": {
     26         "applies": false,
     27         "answer": false,
     28         "justification": "This is a narrative review with no computational experiments requiring an environment specification."
     29       },
     30       "reproduction_instructions": {
     31         "applies": true,
     32         "answer": false,
     33         "justification": "No instructions are provided for reproducing the literature search or paper selection process."
     34       }
     35     },
     36     "statistical_methodology": {
     37       "confidence_intervals_or_error_bars": {
     38         "applies": false,
     39         "answer": false,
     40         "justification": "This is a narrative review that does not run experiments or perform statistical aggregation."
     41       },
     42       "significance_tests": {
     43         "applies": false,
     44         "answer": false,
     45         "justification": "No experiments or meta-analytic aggregation is performed."
     46       },
     47       "effect_sizes_reported": {
     48         "applies": false,
     49         "answer": false,
     50         "justification": "No experiments conducted. The paper cites effect sizes from reviewed papers but does not compute its own."
     51       },
     52       "sample_size_justified": {
     53         "applies": false,
     54         "answer": false,
     55         "justification": "No statistical sampling performed; this is a narrative review."
     56       },
     57       "variance_reported": {
     58         "applies": false,
     59         "answer": false,
     60         "justification": "No experiments run; no variance to report."
     61       }
     62     },
     63     "evaluation_design": {
     64       "baselines_included": {
     65         "applies": true,
     66         "answer": false,
     67         "justification": "The paper does not compare its coverage or methodology against prior surveys of LLM agents in biomedicine. It mentions that 'reviews have discussed LLMs in medicine' but does not systematically compare against them."
     68       },
     69       "baselines_contemporary": {
     70         "applies": false,
     71         "answer": false,
     72         "justification": "No baselines are included, so contemporaneity is not assessable."
     73       },
     74       "ablation_study": {
     75         "applies": false,
     76         "answer": false,
     77         "justification": "Not applicable to a survey paper."
     78       },
     79       "multiple_metrics": {
     80         "applies": false,
     81         "answer": false,
     82         "justification": "No evaluation metrics are used; this is a narrative review."
     83       },
     84       "human_evaluation": {
     85         "applies": false,
     86         "answer": false,
     87         "justification": "Not applicable; no system outputs to evaluate."
     88       },
     89       "held_out_test_set": {
     90         "applies": false,
     91         "answer": false,
     92         "justification": "Not applicable to a survey."
     93       },
     94       "per_category_breakdown": {
     95         "applies": true,
     96         "answer": true,
     97         "justification": "Table 1 provides a per-domain breakdown of agent systems across drug discovery, medical diagnosis, and multi-omic analysis, listing function and limitations for each."
     98       },
     99       "failure_cases_discussed": {
    100         "applies": true,
    101         "answer": true,
    102         "justification": "The paper discusses limitations of each reviewed system in Table 1 and in the Challenges section, including hallucination, data privacy issues, and architectural limitations."
    103       },
    104       "negative_results_reported": {
    105         "applies": true,
    106         "answer": true,
    107         "justification": "The Challenges section discusses significant unsolved problems: hallucination, privacy risks, knowledge staleness, architectural limitations, and ethical/legal risks."
    108       }
    109     },
    110     "claims_and_evidence": {
    111       "abstract_claims_supported": {
    112         "applies": true,
    113         "answer": true,
    114         "justification": "The abstract claims the paper reviews technical foundations, applications, and challenges of LLM agents in bioinformatics/biomedicine. The paper body covers all these topics across its sections."
    115       },
    116       "causal_claims_justified": {
    117         "applies": false,
    118         "answer": false,
    119         "justification": "The paper is a narrative review that does not make causal claims of its own. It reports claims from reviewed papers."
    120       },
    121       "generalization_bounded": {
    122         "applies": true,
    123         "answer": false,
    124         "justification": "The paper makes broad claims about LLM agents being 'transformative' and 'essential partners in research and healthcare' without bounding these to the specific systems reviewed. The conclusion states agents are 'set to become essential partners' which overgeneralizes from the reviewed work."
    125       },
    126       "alternative_explanations_discussed": {
    127         "applies": false,
    128         "answer": false,
    129         "justification": "This is a narrative survey/taxonomy with no empirical results of its own requiring alternative explanations."
    130       },
    131       "proxy_outcome_distinction": {
    132         "applies": false,
    133         "answer": false,
    134         "justification": "Theoretical/survey paper with no measurements of its own."
    135       }
    136     },
    137     "setup_transparency": {
    138       "model_versions_specified": {
    139         "applies": false,
    140         "answer": false,
    141         "justification": "No models are used in this survey."
    142       },
    143       "prompts_provided": {
    144         "applies": false,
    145         "answer": false,
    146         "justification": "No prompting is used."
    147       },
    148       "hyperparameters_reported": {
    149         "applies": false,
    150         "answer": false,
    151         "justification": "No experiments conducted."
    152       },
    153       "scaffolding_described": {
    154         "applies": false,
    155         "answer": false,
    156         "justification": "No agentic scaffolding used by the authors."
    157       },
    158       "data_preprocessing_documented": {
    159         "applies": true,
    160         "answer": false,
    161         "justification": "The paper does not describe its literature search strategy, databases queried, search terms, date ranges, or filtering criteria. There is no description of how papers were selected for inclusion in the review."
    162       }
    163     },
    164     "limitations_and_scope": {
    165       "limitations_section_present": {
    166         "applies": true,
    167         "answer": true,
    168         "justification": "The 'Challenges and future directions' section devotes substantial discussion to challenges including data privacy, hallucination, interpretability, knowledge update timeliness, architectural limitations, and ethical/legal risks."
    169       },
    170       "threats_to_validity_specific": {
    171         "applies": true,
    172         "answer": false,
    173         "justification": "The challenges discussed are about the technology being reviewed, not threats to the validity of this survey itself. No discussion of selection bias in paper inclusion, potential gaps in coverage, or limitations of the review methodology."
    174       },
    175       "scope_boundaries_stated": {
    176         "applies": true,
    177         "answer": true,
    178         "justification": "The introduction explicitly states the scope: LLM agents in bioinformatics and biomedicine, with sections covering specific application domains. The paper distinguishes itself from prior reviews that covered 'LLMs in medicine' but not agent applications."
    179       }
    180     },
    181     "data_integrity": {
    182       "raw_data_available": {
    183         "applies": true,
    184         "answer": false,
    185         "justification": "No list of papers reviewed, search results, or extracted data tables beyond the summary in Table 1 are available for verification."
    186       },
    187       "data_collection_described": {
    188         "applies": true,
    189         "answer": false,
    190         "justification": "No description of how the literature was collected — no databases searched, no search queries, no date ranges, no inclusion/exclusion criteria for the review."
    191       },
    192       "recruitment_methods_described": {
    193         "applies": false,
    194         "answer": false,
    195         "justification": "No human participants; data source is published literature (not a standard benchmark requiring NA here, but the paper selection process should be described — covered under data_collection_described)."
    196       },
    197       "data_pipeline_documented": {
    198         "applies": true,
    199         "answer": false,
    200         "justification": "No pipeline from initial search to final included papers is documented. The reader cannot determine how papers were found or why certain ones were included."
    201       }
    202     },
    203     "conflicts_of_interest": {
    204       "funding_disclosed": {
    205         "applies": true,
    206         "answer": true,
    207         "justification": "Funding section states: 'This paper was supported by the National Natural Science Foundation of China (grant nos. 92370106 and 62072376).'"
    208       },
    209       "affiliations_disclosed": {
    210         "applies": true,
    211         "answer": true,
    212         "justification": "All author affiliations are listed: Northwestern Polytechnical University, Fudan University, Tianjin University."
    213       },
    214       "funder_independent_of_outcome": {
    215         "applies": true,
    216         "answer": true,
    217         "justification": "NSFC is a government funding agency with no commercial stake in LLM agent outcomes."
    218       },
    219       "financial_interests_declared": {
    220         "applies": true,
    221         "answer": true,
    222         "justification": "The paper includes 'Conflict of interest: None declared.'"
    223       }
    224     },
    225     "contamination": {
    226       "training_cutoff_stated": {
    227         "applies": false,
    228         "answer": false,
    229         "justification": "This is a survey paper that does not evaluate any pre-trained model on a benchmark."
    230       },
    231       "train_test_overlap_discussed": {
    232         "applies": false,
    233         "answer": false,
    234         "justification": "No model evaluation is performed."
    235       },
    236       "benchmark_contamination_addressed": {
    237         "applies": false,
    238         "answer": false,
    239         "justification": "No benchmark evaluation is performed."
    240       }
    241     },
    242     "human_studies": {
    243       "pre_registered": {
    244         "applies": false,
    245         "answer": false,
    246         "justification": "No human participants in this survey."
    247       },
    248       "irb_or_ethics_approval": {
    249         "applies": false,
    250         "answer": false,
    251         "justification": "No human participants."
    252       },
    253       "demographics_reported": {
    254         "applies": false,
    255         "answer": false,
    256         "justification": "No human participants."
    257       },
    258       "inclusion_exclusion_criteria": {
    259         "applies": false,
    260         "answer": false,
    261         "justification": "No human participants."
    262       },
    263       "randomization_described": {
    264         "applies": false,
    265         "answer": false,
    266         "justification": "No human participants."
    267       },
    268       "blinding_described": {
    269         "applies": false,
    270         "answer": false,
    271         "justification": "No human participants."
    272       },
    273       "attrition_reported": {
    274         "applies": false,
    275         "answer": false,
    276         "justification": "No human participants."
    277       }
    278     },
    279     "cost_and_practicality": {
    280       "inference_cost_reported": {
    281         "applies": false,
    282         "answer": false,
    283         "justification": "Survey paper with no computational method of its own."
    284       },
    285       "compute_budget_stated": {
    286         "applies": false,
    287         "answer": false,
    288         "justification": "Survey paper with no computation."
    289       }
    290     },
    291     "survey_methodology": {
    292       "prisma_or_structured_protocol": {
    293         "applies": true,
    294         "answer": false,
    295         "justification": "No PRISMA diagram, no registered protocol, no structured search strategy with reproducible queries. The paper is a narrative review with no documented systematic methodology."
    296       },
    297       "quality_assessment_of_sources": {
    298         "applies": true,
    299         "answer": false,
    300         "justification": "Table 1 lists limitations of individual systems but there is no quality scoring rubric or risk-of-bias assessment applied to the reviewed papers. All papers are treated equally regardless of methodological rigor."
    301       },
    302       "publication_bias_discussed": {
    303         "applies": true,
    304         "answer": false,
    305         "justification": "No discussion of publication bias, no acknowledgment that reviewed papers may skew toward positive results, no funnel plots or related analysis."
    306       }
    307     }
    308   },
    309   "claims": [
    310     {
    311       "claim": "LLM agents have demonstrated remarkable potential in bioinformatics and biomedicine across drug discovery, clinical diagnosis, and multi-omics analysis",
    312       "evidence": "The paper catalogs ~60 agent systems in Table 1 with described functions across drug discovery, medical diagnosis, and multi-omic analysis domains",
    313       "supported": "moderate"
    314     },
    315     {
    316       "claim": "Multi-agent collaboration frameworks outperform single-agent systems in complex biomedical tasks",
    317       "evidence": "Cited examples include MEDAGENTS raising zero-shot reasoning accuracy by 17% on MedQA, BioMaster cutting task completion from 48 to 6 hours, and MEDCO improving diagnostic accuracy by 23%",
    318       "supported": "weak"
    319     },
    320     {
    321       "claim": "LLM agents are evolving from task-specific tools into 'AI scientists' that partner with researchers",
    322       "evidence": "No original evidence provided; this is a narrative framing claim supported by citing individual system capabilities from other papers",
    323       "supported": "unsupported"
    324     }
    325   ],
    326   "red_flags": [
    327     {
    328       "flag": "No systematic review methodology",
    329       "detail": "This is presented as a comprehensive review but uses no systematic methodology — no search strategy, no inclusion/exclusion criteria, no PRISMA flow. The reader cannot assess coverage completeness or selection bias."
    330     },
    331     {
    332       "flag": "No quality assessment of reviewed papers",
    333       "detail": "The survey treats all reviewed agent systems equally regardless of validation quality. Systems with only preprint descriptions and no experimental validation are listed alongside peer-reviewed work with empirical results."
    334     },
    335     {
    336       "flag": "Uncritical claims laundering",
    337       "detail": "The paper aggregates performance claims from individual papers (e.g., '23% improvement', '17% accuracy increase') without independently verifying these claims or assessing their methodological soundness. This launders potentially weak results into a narrative of transformative progress."
    338     },
    339     {
    340       "flag": "Promotional tone",
    341       "detail": "The conclusion describes LLM agents as 'transformative', 'essential partners', and promising to 'usher in a new era' — language that goes well beyond what a narrative review of ~60 systems with acknowledged serious challenges can support."
    342     }
    343   ],
    344   "cited_papers": [
    345     {
    346       "title": "The rise and potential of large language model based agents: a survey",
    347       "authors": ["Z. Xi", "W. Chen", "X. Guo"],
    348       "year": 2025,
    349       "arxiv_id": "2309.07864",
    350       "relevance": "Comprehensive survey of LLM-based agents covering architecture, capabilities, and applications."
    351     },
    352     {
    353       "title": "Large language model based multi-agents: a survey of progress and challenges",
    354       "authors": ["T. Guo", "X. Chen", "Y. Wang"],
    355       "year": 2024,
    356       "arxiv_id": "2402.01680",
    357       "relevance": "Survey of multi-agent LLM systems covering collaboration patterns and challenges."
    358     },
    359     {
    360       "title": "Empowering biomedical discovery with AI agents",
    361       "authors": ["S. Gao", "A. Fang", "Y. Huang"],
    362       "year": 2024,
    363       "doi": "10.1016/j.cell.2024.09.022",
    364       "relevance": "Cell paper on AI agents for biomedical discovery, framing agents as 'AI scientists'."
    365     },
    366     {
    367       "title": "Augmenting large language models with chemistry tools",
    368       "authors": ["A. M. Bran", "S. Cox", "O. Schilter"],
    369       "year": 2024,
    370       "doi": "10.1038/s42256-024-00832-8",
    371       "relevance": "ChemCrow system integrating 18 expert tools with GPT-4 for autonomous chemical synthesis planning."
    372     },
    373     {
    374       "title": "The virtual lab of AI agents designs new SARS-CoV-2 nanobodies",
    375       "authors": ["K. Swanson", "W. Wu", "N. L. Bulaong"],
    376       "year": 2025,
    377       "doi": "10.1038/s41586-025-09442-9",
    378       "relevance": "Nature paper demonstrating multi-agent collaboration for nanobody design with experimental validation."
    379     },
    380     {
    381       "title": "Toward expert-level medical question answering with large language models",
    382       "authors": ["K. Singhal", "E. T. Gottweis"],
    383       "year": 2025,
    384       "doi": "10.1038/s41591-024-03423-7",
    385       "relevance": "Med-PaLM2 achieving 86.5% on MedQA, benchmark for medical LLM evaluation."
    386     },
    387     {
    388       "title": "MDAgents: an adaptive collaboration of LLMs for medical decision-making",
    389       "authors": ["Y. Kim", "H. Li", "Y.-A. Wang"],
    390       "year": 2024,
    391       "relevance": "Multi-agent framework for clinical decision-making with multi-modal reasoning."
    392     },
    393     {
    394       "title": "CellAgent: an LLM-driven multi-agent framework for automated single-cell data analysis",
    395       "authors": ["Y. Xiao", "J. Liu", "Y. Zheng"],
    396       "year": 2024,
    397       "arxiv_id": "2407.09811",
    398       "relevance": "Multi-agent system automating single-cell RNA-seq analysis pipelines."
    399     },
    400     {
    401       "title": "Reflexion: language agents with verbal reinforcement learning",
    402       "authors": ["N. Shinn", "B. Labash", "A. Jami"],
    403       "year": 2023,
    404       "relevance": "Foundational framework for agent self-reflection and iterative improvement."
    405     },
    406     {
    407       "title": "Navigating the risks: a survey of security, privacy, and ethics threats in LLM-based agents",
    408       "authors": ["Y. Gan", "Y. Yang", "Z. Ma"],
    409       "year": 2024,
    410       "arxiv_id": "2411.09523",
    411       "relevance": "Survey of security and privacy threats in LLM agents relevant to safety assessment."
    412     }
    413   ]
    414 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs