ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

scan.json (21654B)


      1 {
      2   "paper": {
      3     "title": "Beyond Single-Agent Safety: A Taxonomy of Risks in LLM-to-LLM Interactions",
      4     "authors": [
      5       "P. Bisconti",
      6       "M. Galisai",
      7       "F. Pierucci",
      8       "M. Bracale",
      9       "M. Prandi"
     10     ],
     11     "year": 2025,
     12     "venue": "arXiv",
     13     "arxiv_id": "2512.02682",
     14     "doi": "10.48550/arXiv.2512.02682"
     15   },
     16   "checklist": {
     17     "artifacts": {
     18       "code_released": {
     19         "applies": true,
     20         "answer": false,
     21         "justification": "No code repository, URL, or software artifact is mentioned anywhere in the paper. The paper is purely conceptual but could have released taxonomy data or analysis scripts."
     22       },
     23       "data_released": {
     24         "applies": true,
     25         "answer": false,
     26         "justification": "No dataset or structured data artifact is released. The taxonomy tables are presented only within the paper text. A machine-readable version of the taxonomy could have been provided."
     27       },
     28       "environment_specified": {
     29         "applies": false,
     30         "answer": false,
     31         "justification": "This is a purely theoretical/conceptual paper with no computational experiments, so environment specifications are structurally inapplicable."
     32       },
     33       "reproduction_instructions": {
     34         "applies": false,
     35         "answer": false,
     36         "justification": "No experiments are conducted, so reproduction instructions are structurally inapplicable. The paper is a conceptual framework proposal."
     37       }
     38     },
     39     "statistical_methodology": {
     40       "confidence_intervals_or_error_bars": {
     41         "applies": false,
     42         "answer": false,
     43         "justification": "No experiments or quantitative results are reported. This is a theoretical framework paper with no empirical data."
     44       },
     45       "significance_tests": {
     46         "applies": false,
     47         "answer": false,
     48         "justification": "No comparative claims based on empirical data are made. The paper proposes a conceptual taxonomy, not experimental results."
     49       },
     50       "effect_sizes_reported": {
     51         "applies": false,
     52         "answer": false,
     53         "justification": "No empirical measurements or effect sizes are relevant to a purely theoretical taxonomy paper."
     54       },
     55       "sample_size_justified": {
     56         "applies": false,
     57         "answer": false,
     58         "justification": "No samples or data collection of any kind. This is a theoretical paper."
     59       },
     60       "variance_reported": {
     61         "applies": false,
     62         "answer": false,
     63         "justification": "No experimental runs or quantitative measurements. Purely conceptual work."
     64       }
     65     },
     66     "evaluation_design": {
     67       "baselines_included": {
     68         "applies": true,
     69         "answer": true,
     70         "justification": "The paper explicitly positions its contribution against two prior works: MAST (Multi-Agent System failure Taxonomy, [3]) and MARAI (Multi-Agent Risks from Advanced AI, [6]). Section 1.2 describes how the paper extends and unifies these approaches."
     71       },
     72       "baselines_contemporary": {
     73         "applies": true,
     74         "answer": true,
     75         "justification": "Both comparison frameworks are from 2025: MAST (Cemri et al., 2025) and MARAI (Hammond et al., 2025), which are contemporary works."
     76       },
     77       "ablation_study": {
     78         "applies": false,
     79         "answer": false,
     80         "justification": "The paper proposes a conceptual taxonomy, not a system with components to ablate."
     81       },
     82       "multiple_metrics": {
     83         "applies": false,
     84         "answer": false,
     85         "justification": "No evaluation metrics are used. The paper proposes metrics (contagion velocity, misalignment diffusion, intent-opacity rate, goal drift) but does not measure them."
     86       },
     87       "human_evaluation": {
     88         "applies": false,
     89         "answer": false,
     90         "justification": "No system outputs are produced that could be evaluated by humans. This is a conceptual framework paper."
     91       },
     92       "held_out_test_set": {
     93         "applies": false,
     94         "answer": false,
     95         "justification": "No data or benchmarks are used. Purely theoretical work."
     96       },
     97       "per_category_breakdown": {
     98         "applies": true,
     99         "answer": true,
    100         "justification": "The taxonomy itself provides a structured breakdown across three levels (micro, meso, macro) with detailed risk types at each level (Tables 1-3), each with descriptions and observable effects."
    101       },
    102       "failure_cases_discussed": {
    103         "applies": true,
    104         "answer": true,
    105         "justification": "The entire paper is about failure modes. Tables 1-3 enumerate specific failure cases (semantic drift, prompt infection, false consensus, collusion, polarization, etc.) with descriptions and observable effects."
    106       },
    107       "negative_results_reported": {
    108         "applies": false,
    109         "answer": false,
    110         "justification": "No experiments are conducted, so there are no positive or negative experimental results to report."
    111       }
    112     },
    113     "claims_and_evidence": {
    114       "abstract_claims_supported": {
    115         "applies": true,
    116         "answer": true,
    117         "justification": "The abstract claims three contributions: (i) a theoretical account of collective risk, (ii) a taxonomy connecting micro/meso/macro failure modes, and (iii) a design proposal for Institutional AI. All three are delivered in Sections 2-4 respectively, though they are conceptual rather than empirically validated."
    118       },
    119       "causal_claims_justified": {
    120         "applies": true,
    121         "answer": false,
    122         "justification": "The paper makes implicit causal claims throughout, e.g., 'local compliance can aggregate into collective failure' (abstract), 'feedback loops that can magnify subtle deviations' (Section 1.1.1). These causal mechanisms are asserted based on analogy to multi-agent systems literature and a few cited empirical examples, but no causal evidence is presented for LLM-to-LLM settings specifically. The paper acknowledges this: 'experimental validation will follow' (Section 3.1)."
    123       },
    124       "generalization_bounded": {
    125         "applies": true,
    126         "answer": true,
    127         "justification": "The paper is careful to bound its claims. Section 3.2 states 'This taxonomy is conceptual and preliminary. Its function is not to claim empirical closure but to establish a common language.' Section 3.1 describes its dimensions as 'a working basis rather than an exhaustive set.' Section 4.3 explicitly positions the work as a 'starting point' requiring validation."
    128       },
    129       "alternative_explanations_discussed": {
    130         "applies": true,
    131         "answer": false,
    132         "justification": "The paper does not consider alternative frameworks or explanations for the phenomena it describes. It draws on multi-agent systems literature but does not discuss whether existing game theory, systems engineering, or distributed computing frameworks might already address these risks without the new ESRH concept. No alternative explanations for the cited empirical observations (e.g., collusion in auctions) are considered."
    133       }
    134     },
    135     "setup_transparency": {
    136       "model_versions_specified": {
    137         "applies": false,
    138         "answer": false,
    139         "justification": "No models are used in experiments. The paper is purely conceptual."
    140       },
    141       "prompts_provided": {
    142         "applies": false,
    143         "answer": false,
    144         "justification": "No prompting is done. This is a theoretical framework paper."
    145       },
    146       "hyperparameters_reported": {
    147         "applies": false,
    148         "answer": false,
    149         "justification": "No experiments are conducted, so no hyperparameters are relevant."
    150       },
    151       "scaffolding_described": {
    152         "applies": false,
    153         "answer": false,
    154         "justification": "No agentic scaffolding is implemented. The paper discusses scaffolding conceptually but does not build or test any system."
    155       },
    156       "data_preprocessing_documented": {
    157         "applies": false,
    158         "answer": false,
    159         "justification": "No data is collected or processed. This is a purely theoretical paper."
    160       }
    161     },
    162     "limitations_and_scope": {
    163       "limitations_section_present": {
    164         "applies": true,
    165         "answer": false,
    166         "justification": "There is no dedicated limitations or threats-to-validity section. The paper repeatedly notes that 'experimental validation will follow' (Sections 3.1, 4.3, 5) but does not consolidate limitations into a dedicated section."
    167       },
    168       "threats_to_validity_specific": {
    169         "applies": true,
    170         "answer": false,
    171         "justification": "No specific threats to the validity of the proposed taxonomy are discussed. The paper does not address whether the three-tier structure might miss important risk categories, whether the dimensions are sufficient, or whether the proposed metrics would actually capture the phenomena."
    172       },
    173       "scope_boundaries_stated": {
    174         "applies": true,
    175         "answer": true,
    176         "justification": "The paper explicitly states scope boundaries in multiple places. Section 3.2: 'This taxonomy is conceptual and preliminary.' Section 3.1: 'They are intended as a working basis rather than an exhaustive set, and additional dimensions may be introduced as experiments advance.' Section 4.2: mechanisms are 'deliberately abstract: they describe the functions required for internal governance, not yet their concrete implementation.' Section 5: 'This work is a starting point.'"
    177       }
    178     },
    179     "data_integrity": {
    180       "raw_data_available": {
    181         "applies": false,
    182         "answer": false,
    183         "justification": "No data is collected. This is a purely theoretical paper proposing a conceptual framework."
    184       },
    185       "data_collection_described": {
    186         "applies": false,
    187         "answer": false,
    188         "justification": "No data collection takes place. The paper is a theoretical framework proposal."
    189       },
    190       "recruitment_methods_described": {
    191         "applies": false,
    192         "answer": false,
    193         "justification": "No participants or data samples are recruited. Purely conceptual work."
    194       },
    195       "data_pipeline_documented": {
    196         "applies": false,
    197         "answer": false,
    198         "justification": "No data pipeline exists. The paper presents a conceptual taxonomy without empirical data."
    199       }
    200     },
    201     "conflicts_of_interest": {
    202       "funding_disclosed": {
    203         "applies": true,
    204         "answer": false,
    205         "justification": "No funding information is mentioned anywhere in the paper. There is no acknowledgments section disclosing grants or funding sources."
    206       },
    207       "affiliations_disclosed": {
    208         "applies": true,
    209         "answer": true,
    210         "justification": "Author affiliations are clearly listed: DEXAI – Icaro Lab, Sapienza University of Rome, and Sant'Anna School of Advanced Studies. The paper does not evaluate any product from these organizations."
    211       },
    212       "funder_independent_of_outcome": {
    213         "applies": true,
    214         "answer": false,
    215         "justification": "Funding is not disclosed at all. Without knowing the funding source, independence cannot be assessed. The absence of a funding disclosure is itself a gap."
    216       },
    217       "financial_interests_declared": {
    218         "applies": true,
    219         "answer": false,
    220         "justification": "No competing interests or financial interests statement is present in the paper."
    221       }
    222     },
    223     "contamination": {
    224       "training_cutoff_stated": {
    225         "applies": false,
    226         "answer": false,
    227         "justification": "No pre-trained model is evaluated on any benchmark. The paper is a theoretical framework proposal."
    228       },
    229       "train_test_overlap_discussed": {
    230         "applies": false,
    231         "answer": false,
    232         "justification": "No models are evaluated on benchmarks, so train/test overlap is structurally inapplicable."
    233       },
    234       "benchmark_contamination_addressed": {
    235         "applies": false,
    236         "answer": false,
    237         "justification": "No benchmarks are used. Purely conceptual work."
    238       }
    239     },
    240     "human_studies": {
    241       "pre_registered": {
    242         "applies": false,
    243         "answer": false,
    244         "justification": "No human participants are involved. This is a theoretical framework paper."
    245       },
    246       "irb_or_ethics_approval": {
    247         "applies": false,
    248         "answer": false,
    249         "justification": "No human participants are involved."
    250       },
    251       "demographics_reported": {
    252         "applies": false,
    253         "answer": false,
    254         "justification": "No human participants are involved."
    255       },
    256       "inclusion_exclusion_criteria": {
    257         "applies": false,
    258         "answer": false,
    259         "justification": "No human participants are involved."
    260       },
    261       "randomization_described": {
    262         "applies": false,
    263         "answer": false,
    264         "justification": "No human participants are involved."
    265       },
    266       "blinding_described": {
    267         "applies": false,
    268         "answer": false,
    269         "justification": "No human participants are involved."
    270       },
    271       "attrition_reported": {
    272         "applies": false,
    273         "answer": false,
    274         "justification": "No human participants are involved."
    275       }
    276     },
    277     "cost_and_practicality": {
    278       "inference_cost_reported": {
    279         "applies": false,
    280         "answer": false,
    281         "justification": "No method is implemented or run. This is a purely theoretical/conceptual paper, so cost reporting is inapplicable."
    282       },
    283       "compute_budget_stated": {
    284         "applies": false,
    285         "answer": false,
    286         "justification": "No computation is performed. Purely conceptual work."
    287       }
    288     }
    289   },
    290   "claims": [
    291     {
    292       "claim": "Safety mechanisms designed for single-agent human-model interaction do not scale to LLM-to-LLM environments where outputs are recursively reused as inputs.",
    293       "evidence": "Section 1.1.1 cites JAILJUDGE benchmark showing multi-agent evaluation alters jailbreak success rates [8], and market-simulation experiments showing independently aligned agents spontaneously coordinate to supracompetitive equilibria [1,5]. However, these are third-party citations, not original experiments.",
    294       "supported": "moderate"
    295     },
    296     {
    297       "claim": "The ESRH framework formalizes how instability arises from interaction structure rather than isolated misbehavior, through three dimensions: interaction topology, cognitive opacity, and objective divergence.",
    298       "evidence": "Section 3.1 defines these three dimensions with proposed metrics (contagion velocity, misalignment diffusion, intent-opacity rate, goal drift). The framework is conceptually presented but not empirically validated. Section 3.2 explicitly acknowledges the taxonomy is 'conceptual and preliminary.'",
    299       "supported": "weak"
    300     },
    301     {
    302       "claim": "A three-tier taxonomy (micro, meso, macro) captures the progression from localized deviations to system-wide pathologies in multi-agent LLM systems.",
    303       "evidence": "Tables 1-3 present the taxonomy with 16 risk types across three levels, each with descriptions and observable effects. The taxonomy synthesizes MAST [3] and MARAI [6] frameworks. No empirical evidence is provided that these tiers actually correspond to distinct failure regimes.",
    304       "supported": "weak"
    305     },
    306     {
    307       "claim": "Institutional AI—embedding adaptive oversight, peer evaluation, and functional differentiation within multi-agent systems—can manage emergent collective risks.",
    308       "evidence": "Section 4 proposes the concept at an abstract level, describing three mechanisms (adaptive collective policy, peer evaluation and weighting, functional differentiation). No implementation, simulation, or evidence is provided. Section 4.3 states 'Later work will examine how these can be instantiated.'",
    309       "supported": "unsupported"
    310     }
    311   ],
    312   "methodology_tags": [
    313     "theoretical"
    314   ],
    315   "key_findings": "This paper argues that single-agent safety mechanisms (prompt engineering, RLHF, output moderation) are insufficient for multi-agent LLM ecosystems where feedback loops, imitation, and emergent coordination create systemic risks. It introduces the Emergent Systemic Risk Horizon (ESRH) framework with three dimensions (interaction topology, cognitive opacity, objective divergence) and a 16-risk-type taxonomy across micro, meso, and macro levels. It proposes Institutional AI as a governance architecture embedding adaptive oversight within multi-agent systems. The entire contribution is conceptual—no experiments, simulations, or empirical validation are presented.",
    316   "red_flags": [
    317     {
    318       "flag": "No empirical validation",
    319       "detail": "The paper proposes a taxonomy and framework but provides zero empirical evidence. All claimed risk types and mechanisms are asserted based on analogy and citation of third-party work, not demonstrated in any controlled setting. The paper repeatedly defers validation to 'future work.'"
    320     },
    321     {
    322       "flag": "Proposed metrics are undefined operationally",
    323       "detail": "The paper introduces metrics like 'contagion velocity', 'misalignment diffusion', 'intent-opacity rate', and 'goal drift' but provides no formal definitions, measurement procedures, or demonstrations of how they would be computed in practice."
    324     },
    325     {
    326       "flag": "No limitations section",
    327       "detail": "Despite being a conceptual framework paper, there is no dedicated discussion of limitations, alternative frameworks, or potential weaknesses of the proposed taxonomy. The paper does not consider whether existing systems engineering or distributed computing frameworks already address these risks."
    328     },
    329     {
    330       "flag": "Claims outrun evidence for Institutional AI",
    331       "detail": "Section 4 proposes 'Institutional AI' as a governance solution but provides only abstract mechanism descriptions with no implementation, formal analysis, or evidence of feasibility. The proposal is presented as a contribution but remains entirely speculative."
    332     }
    333   ],
    334   "cited_papers": [
    335     {
    336       "title": "Evaluating LLM agent collusion in double-auctions",
    337       "authors": ["K. Agrawal", "V. Teo", "J.J. Vazquez", "S. Kunnavakkam", "V. Srikanth", "A. Liu"],
    338       "year": 2025,
    339       "arxiv_id": "2507.01413",
    340       "relevance": "Empirical study of emergent collusion behavior in LLM agents, directly relevant to multi-agent safety risks."
    341     },
    342     {
    343       "title": "Why Do Multi-Agent LLM Systems Fail?",
    344       "authors": ["M. Cemri", "M.Z. Pan", "S. Yang", "L.A. Agrawal", "B. Chopra", "R. Tiwari"],
    345       "year": 2025,
    346       "arxiv_id": "2503.13657",
    347       "relevance": "MAST taxonomy of multi-agent system failures, directly relevant to understanding agentic AI reliability."
    348     },
    349     {
    350       "title": "AgentVerse: Facilitating Multi-Agent Collaboration and Exploring Emergent Behaviors",
    351       "authors": ["W. Chen", "Y. Su", "J. Zuo", "C. Yang", "C. Yuan", "C. Chan"],
    352       "year": 2024,
    353       "arxiv_id": "2308.10848",
    354       "relevance": "Multi-agent collaboration framework demonstrating emergent behaviors in LLM collectives."
    355     },
    356     {
    357       "title": "Algorithmic collusion by large language models",
    358       "authors": ["S. Fish", "Y.A. Gonczarowski", "R.I. Shorrer"],
    359       "year": 2024,
    360       "arxiv_id": "2404.00806",
    361       "relevance": "Empirical evidence of LLM agents spontaneously forming collusive strategies in market settings."
    362     },
    363     {
    364       "title": "Multi-Agent Risks from Advanced AI",
    365       "authors": ["L. Hammond", "A. Chan", "J. Clifton"],
    366       "year": 2025,
    367       "arxiv_id": "2502.14143",
    368       "relevance": "MARAI framework for multi-agent risks, a key comparison point for taxonomies of agentic AI safety."
    369     },
    370     {
    371       "title": "CAMEL: Communicative agents for 'mind' exploration of large language models",
    372       "authors": ["G. Li", "H. Hammoud", "H. Itani", "D. Khizbullin", "B. Ghanem"],
    373       "year": 2023,
    374       "relevance": "Foundational multi-agent LLM communication framework relevant to understanding agentic collaboration patterns."
    375     },
    376     {
    377       "title": "JAILJUDGE: A Comprehensive Jailbreak Judge Benchmark with Multi-Agent Enhanced Explanation Evaluation Framework",
    378       "authors": ["F. Liu", "Y. Feng", "X. Zhao", "L. Su", "X. Ma", "D. Yin", "H. Liu"],
    379       "year": 2024,
    380       "arxiv_id": "2410.12855",
    381       "relevance": "Demonstrates that multi-agent evaluation alters jailbreak success rates, relevant to AI safety in multi-agent settings."
    382     },
    383     {
    384       "title": "AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation",
    385       "authors": ["Q. Wu", "G. Bansal", "J. Zhang", "Y. Wu", "B. Li", "E. Zhu"],
    386       "year": 2024,
    387       "relevance": "Major multi-agent LLM orchestration framework relevant to understanding agentic programming architectures."
    388     },
    389     {
    390       "title": "SWE-agent: Agent-Computer Interfaces Enable Automated Software Engineering",
    391       "authors": ["J. Yang", "C. Jimenez", "A. Wettig", "K. Lieret", "S. Yao", "K. Narasimhan", "O. Press"],
    392       "year": 2024,
    393       "relevance": "Agentic software engineering system with continuous learning loops, relevant to AI-augmented development evaluation."
    394     },
    395     {
    396       "title": "Voyager: An open-ended embodied agent with large language models",
    397       "authors": ["G. Wang", "Y. Xie", "Y. Jiang", "A. Mandlekar", "C. Xiao", "Y. Zhu", "L. Fan", "A. Anandkumar"],
    398       "year": 2023,
    399       "arxiv_id": "2305.16291",
    400       "relevance": "Open-ended LLM agent demonstrating continuous learning and task generation in interactive environments."
    401     }
    402   ]
    403 }

Impressum · Datenschutz