ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

scan.json (17137B)


      1 {
      2   "paper": {
      3     "title": "Insured Agents: A Decentralized Trust Insurance Mechanism for Agentic Economy",
      4     "authors": ["Botao 'Amber' Hu", "Bangdao Chen"],
      5     "year": 2025,
      6     "venue": "AAMAS 2026",
      7     "arxiv_id": "2512.08737",
      8     "doi": "10.48550/arXiv.2512.08737"
      9   },
     10   "scan_version": 2,
     11   "active_modules": [],
     12   "methodology_tags": ["theoretical"],
     13   "key_findings": "The paper proposes 'insured agents' as a protocol-native trust mechanism for open agent economies, where specialized insurer agents post slashable stake on behalf of operational agents and receive privileged audit access via TEEs. A game-theoretic model shows a subgame-perfect equilibrium exists where agents act honestly, insurers pay valid claims, and users escalate only valid disputes, under three conditions (access to justice, solvency, deterrence). The paper is a design proposal with a research agenda rather than an empirical contribution.",
     14   "checklist": {
     15     "artifacts": {
     16       "code_released": {
     17         "applies": true,
     18         "answer": false,
     19         "justification": "No code repository or implementation is mentioned. The paper is a mechanism design proposal without implementation."
     20       },
     21       "data_released": {
     22         "applies": false,
     23         "answer": false,
     24         "justification": "Purely theoretical paper with no data collection or experiments."
     25       },
     26       "environment_specified": {
     27         "applies": false,
     28         "answer": false,
     29         "justification": "No computational experiments are run; purely theoretical analysis."
     30       },
     31       "reproduction_instructions": {
     32         "applies": false,
     33         "answer": false,
     34         "justification": "No experiments to reproduce. The theorem proof is self-contained in the paper."
     35       }
     36     },
     37     "statistical_methodology": {
     38       "confidence_intervals_or_error_bars": {
     39         "applies": false,
     40         "answer": false,
     41         "justification": "No empirical results are reported; the paper is purely theoretical."
     42       },
     43       "significance_tests": {
     44         "applies": false,
     45         "answer": false,
     46         "justification": "No empirical comparisons are made."
     47       },
     48       "effect_sizes_reported": {
     49         "applies": false,
     50         "answer": false,
     51         "justification": "No empirical results."
     52       },
     53       "sample_size_justified": {
     54         "applies": false,
     55         "answer": false,
     56         "justification": "No empirical data collection."
     57       },
     58       "variance_reported": {
     59         "applies": false,
     60         "answer": false,
     61         "justification": "No experimental runs."
     62       }
     63     },
     64     "evaluation_design": {
     65       "baselines_included": {
     66         "applies": true,
     67         "answer": true,
     68         "justification": "Section 2 discusses alternative trust mechanisms (escrow, reputation, slashing) as baselines and contrasts the insured-agent approach against them."
     69       },
     70       "baselines_contemporary": {
     71         "applies": true,
     72         "answer": true,
     73         "justification": "Compares against recent mechanisms including ERC-8004 (2025), A2A protocol, MCP, and agent-at-stake proposals — all contemporary."
     74       },
     75       "ablation_study": {
     76         "applies": false,
     77         "answer": false,
     78         "justification": "Theoretical mechanism with no system components to ablate."
     79       },
     80       "multiple_metrics": {
     81         "applies": false,
     82         "answer": false,
     83         "justification": "No empirical evaluation with metrics."
     84       },
     85       "human_evaluation": {
     86         "applies": false,
     87         "answer": false,
     88         "justification": "No system outputs to evaluate."
     89       },
     90       "held_out_test_set": {
     91         "applies": false,
     92         "answer": false,
     93         "justification": "No data or benchmarks used."
     94       },
     95       "per_category_breakdown": {
     96         "applies": false,
     97         "answer": false,
     98         "justification": "No empirical results to break down."
     99       },
    100       "failure_cases_discussed": {
    101         "applies": true,
    102         "answer": true,
    103         "justification": "Section 4 discusses failure modes including moral hazard, adverse selection, insurer-agent collusion, sybil strategies, TEE vulnerabilities, and privacy-accountability tradeoffs."
    104       },
    105       "negative_results_reported": {
    106         "applies": true,
    107         "answer": true,
    108         "justification": "Section 4 openly discusses limitations: TEE side-channel vulnerabilities, classic insurance pathologies (moral hazard, adverse selection), and the difficulty of defining 'misbehavior' for heterogeneous tasks."
    109       }
    110     },
    111     "claims_and_evidence": {
    112       "abstract_claims_supported": {
    113         "applies": true,
    114         "answer": true,
    115         "justification": "The abstract claims the mechanism 'calibrates stake through pricing, decentralizes verification via competitive underwriting, and yields incentive-compatible dispute resolution.' These are supported by the game-theoretic model (Theorem 3.1) and mechanism description in Section 3."
    116       },
    117       "causal_claims_justified": {
    118         "applies": false,
    119         "answer": false,
    120         "justification": "The paper makes no empirical causal claims. The game-theoretic results are deductive proofs from assumptions, not causal inference from data."
    121       },
    122       "generalization_bounded": {
    123         "applies": true,
    124         "answer": true,
    125         "justification": "The paper explicitly frames the contribution as 'a design pattern rather than a fully specified protocol' (Section 3) and the theorem relies on stated assumptions (risk-neutrality, deterministic oracle, etc.)."
    126       },
    127       "alternative_explanations_discussed": {
    128         "applies": false,
    129         "answer": false,
    130         "justification": "No empirical results to explain. The paper presents a theoretical mechanism."
    131       },
    132       "proxy_outcome_distinction": {
    133         "applies": false,
    134         "answer": false,
    135         "justification": "No measurements or proxies; purely theoretical paper."
    136       }
    137     },
    138     "setup_transparency": {
    139       "model_versions_specified": {
    140         "applies": false,
    141         "answer": false,
    142         "justification": "No LLM models are used in experiments."
    143       },
    144       "prompts_provided": {
    145         "applies": false,
    146         "answer": false,
    147         "justification": "No prompting is used."
    148       },
    149       "hyperparameters_reported": {
    150         "applies": false,
    151         "answer": false,
    152         "justification": "No experiments with hyperparameters."
    153       },
    154       "scaffolding_described": {
    155         "applies": false,
    156         "answer": false,
    157         "justification": "No agentic scaffolding used in experiments."
    158       },
    159       "data_preprocessing_documented": {
    160         "applies": false,
    161         "answer": false,
    162         "justification": "No data preprocessing; purely theoretical."
    163       }
    164     },
    165     "limitations_and_scope": {
    166       "limitations_section_present": {
    167         "applies": true,
    168         "answer": true,
    169         "justification": "Section 4 ('Discussion: Open Challenges and Research Agenda') serves as a substantive limitations section, covering six categories of open challenges."
    170       },
    171       "threats_to_validity_specific": {
    172         "applies": true,
    173         "answer": true,
    174         "justification": "Section 4 discusses specific threats: moral hazard and adverse selection in insurance markets, TEE side-channel vulnerabilities, the difficulty of defining 'misbehavior' for heterogeneous agentic tasks, and insurer-agent collusion risks."
    175       },
    176       "scope_boundaries_stated": {
    177         "applies": true,
    178         "answer": true,
    179         "justification": "The paper explicitly states it introduces 'a minimal incentive analysis and a research agenda' (Section 1), frames the work as 'a design pattern rather than a fully specified protocol' (Section 3), and acknowledges the theorem relies on strong assumptions (risk-neutrality, deterministic oracle)."
    180       }
    181     },
    182     "data_integrity": {
    183       "raw_data_available": {
    184         "applies": false,
    185         "answer": false,
    186         "justification": "No data collected; purely theoretical."
    187       },
    188       "data_collection_described": {
    189         "applies": false,
    190         "answer": false,
    191         "justification": "No data collection."
    192       },
    193       "recruitment_methods_described": {
    194         "applies": false,
    195         "answer": false,
    196         "justification": "No participants or data recruitment."
    197       },
    198       "data_pipeline_documented": {
    199         "applies": false,
    200         "answer": false,
    201         "justification": "No data pipeline."
    202       }
    203     },
    204     "conflicts_of_interest": {
    205       "funding_disclosed": {
    206         "applies": true,
    207         "answer": false,
    208         "justification": "No funding or acknowledgments section is present in the paper."
    209       },
    210       "affiliations_disclosed": {
    211         "applies": true,
    212         "answer": true,
    213         "justification": "Author affiliations are listed: University of Oxford and University College Oxford Blockchain Research Center."
    214       },
    215       "funder_independent_of_outcome": {
    216         "applies": true,
    217         "answer": false,
    218         "justification": "No funding disclosed, so independence cannot be assessed. The 'Blockchain Research Center' affiliation could represent a conflict given the paper proposes blockchain-adjacent mechanisms, but this is not discussed."
    219       },
    220       "financial_interests_declared": {
    221         "applies": true,
    222         "answer": false,
    223         "justification": "No competing interests statement is present. Given the blockchain/crypto-economic nature of the proposal, financial interests would be relevant to disclose."
    224       }
    225     },
    226     "contamination": {
    227       "training_cutoff_stated": {
    228         "applies": false,
    229         "answer": false,
    230         "justification": "No pre-trained model is evaluated on any benchmark."
    231       },
    232       "train_test_overlap_discussed": {
    233         "applies": false,
    234         "answer": false,
    235         "justification": "No model evaluation."
    236       },
    237       "benchmark_contamination_addressed": {
    238         "applies": false,
    239         "answer": false,
    240         "justification": "No benchmark evaluation."
    241       }
    242     },
    243     "human_studies": {
    244       "pre_registered": {
    245         "applies": false,
    246         "answer": false,
    247         "justification": "No human participants."
    248       },
    249       "irb_or_ethics_approval": {
    250         "applies": false,
    251         "answer": false,
    252         "justification": "No human participants."
    253       },
    254       "demographics_reported": {
    255         "applies": false,
    256         "answer": false,
    257         "justification": "No human participants."
    258       },
    259       "inclusion_exclusion_criteria": {
    260         "applies": false,
    261         "answer": false,
    262         "justification": "No human participants."
    263       },
    264       "randomization_described": {
    265         "applies": false,
    266         "answer": false,
    267         "justification": "No human participants."
    268       },
    269       "blinding_described": {
    270         "applies": false,
    271         "answer": false,
    272         "justification": "No human participants."
    273       },
    274       "attrition_reported": {
    275         "applies": false,
    276         "answer": false,
    277         "justification": "No human participants."
    278       }
    279     },
    280     "cost_and_practicality": {
    281       "inference_cost_reported": {
    282         "applies": false,
    283         "answer": false,
    284         "justification": "Purely theoretical paper."
    285       },
    286       "compute_budget_stated": {
    287         "applies": false,
    288         "answer": false,
    289         "justification": "Purely theoretical paper."
    290       }
    291     }
    292   },
    293   "claims": [
    294     {
    295       "claim": "Under three conditions (access to justice, solvency, deterrence), there exists a subgame-perfect equilibrium where agents act honestly, insurers pay valid claims, and users escalate only valid disputes.",
    296       "evidence": "Theorem 3.1 with backward-induction proof in Section 3.6.",
    297       "supported": "strong"
    298     },
    299     {
    300       "claim": "The insured-agent mechanism creates an 'optimistic' structure where the costly verifier is rarely invoked in equilibrium.",
    301       "evidence": "Follows from Theorem 3.1: in equilibrium, insurers accept valid claims and agents behave honestly, so escalation to verifier V is off the equilibrium path (Section 3.6).",
    302       "supported": "strong"
    303     },
    304     {
    305       "claim": "Hierarchical insurance supports specialization and composable trust without requiring agents to build standalone reputation.",
    306       "evidence": "Section 3.4 describes the hierarchical structure with domain-specific insurers (Safety Insurer, Financial Insurer, Master Insurer) but provides no formal analysis or empirical evidence of this specific claim.",
    307       "supported": "weak"
    308     },
    309     {
    310       "claim": "Privacy-preserving verification is achieved through voluntary privileged audit access via TEEs.",
    311       "evidence": "Section 3.3 describes the concept but acknowledges TEE vulnerabilities (Section 4) and provides no formal privacy guarantee.",
    312       "supported": "weak"
    313     }
    314   ],
    315   "red_flags": [
    316     {
    317       "flag": "Strong assumptions in game-theoretic model",
    318       "detail": "The theorem assumes risk-neutral players, a deterministic infallible oracle verifier V, and common knowledge of the mechanism. These assumptions are acknowledged but are far from realistic — real verifiers make errors, agents are not purely rational, and information asymmetries are the norm."
    319     },
    320     {
    321       "flag": "No empirical validation or simulation",
    322       "detail": "The paper proposes a complex economic mechanism but provides no simulation, agent-based modeling, or empirical evidence. The equilibrium result holds only under idealized conditions; without simulation, it is unclear how the mechanism behaves under realistic conditions (bounded rationality, noisy verifiers, repeated interactions)."
    323     },
    324     {
    325       "flag": "Potential undisclosed conflicts of interest",
    326       "detail": "Authors are affiliated with a 'Blockchain Research Center' and propose a blockchain-adjacent mechanism (slashable stakes, protocol registries). No funding or competing interests statement is provided."
    327     }
    328   ],
    329   "cited_papers": [
    330     {
    331       "title": "Why Do Multi-Agent LLM Systems Fail?",
    332       "authors": ["Mert Cemri", "Melissa Z. Pan", "Shuyi Yang"],
    333       "year": 2025,
    334       "arxiv_id": "2503.13657",
    335       "relevance": "Empirical analysis of failure modes in multi-agent LLM systems, directly relevant to AI agent reliability."
    336     },
    337     {
    338       "title": "LLM Agents Are Hypersensitive to Nudges",
    339       "authors": ["Manuel Cherep", "Pattie Maes", "Nikhil Singh"],
    340       "year": 2025,
    341       "arxiv_id": "2505.11584",
    342       "relevance": "Demonstrates vulnerability of LLM agents to behavioral manipulation through nudging."
    343     },
    344     {
    345       "title": "Sleeper Agents: Training Deceptive LLMs That Persist Through Safety Training",
    346       "authors": ["Evan Hubinger"],
    347       "year": 2024,
    348       "arxiv_id": "2401.05566",
    349       "relevance": "Shows deceptive behaviors can persist through safety training, relevant to AI safety and trust."
    350     },
    351     {
    352       "title": "Model Context Protocol (MCP): Landscape, Security Threats, and Future Research Directions",
    353       "authors": ["Xinyi Hou", "Yanjie Zhao", "Shenao Wang", "Haoyu Wang"],
    354       "year": 2025,
    355       "arxiv_id": "2503.23278",
    356       "relevance": "Security analysis of MCP, a key agent communication protocol."
    357     },
    358     {
    359       "title": "Prompt Injection Attack against LLM-integrated Applications",
    360       "authors": ["Yi Liu", "Gelei Deng", "Yuekang Li"],
    361       "year": 2024,
    362       "arxiv_id": "2306.05499",
    363       "relevance": "Foundational work on prompt injection attacks relevant to agentic AI security."
    364     },
    365     {
    366       "title": "Prompt Infection: LLM-to-LLM Prompt Injection within Multi-Agent Systems",
    367       "authors": ["Donghyun Lee", "Mo Tiwari"],
    368       "year": 2024,
    369       "arxiv_id": "2410.07283",
    370       "relevance": "Demonstrates prompt injection propagation across multi-agent systems."
    371     },
    372     {
    373       "title": "Hallucination Is Inevitable: An Innate Limitation of Large Language Models",
    374       "authors": ["Ziwei Xu", "Sanjay Jain", "Mohan Kankanhalli"],
    375       "year": 2025,
    376       "arxiv_id": "2401.11817",
    377       "relevance": "Theoretical argument that LLM hallucination is unavoidable, motivating trust mechanisms."
    378     },
    379     {
    380       "title": "Safe Systems with Unsafe Agents: Challenges and Opportunities",
    381       "authors": ["Jeremy Bellay", "J. Timothy Balint", "Stephen A. Boxwell", "Jeffrey Geppert"],
    382       "year": 2025,
    383       "relevance": "AAMAS paper on designing safe multi-agent systems despite unsafe individual agents."
    384     },
    385     {
    386       "title": "Universal and Transferable Adversarial Attacks on Aligned Language Models",
    387       "authors": ["Andy Zou", "Zifan Wang", "Nicholas Carlini"],
    388       "year": 2023,
    389       "arxiv_id": "2307.15043",
    390       "relevance": "Demonstrates universal adversarial attacks on LLMs, relevant to agent security."
    391     },
    392     {
    393       "title": "Agentic Web: Weaving the Next Web with AI Agents",
    394       "authors": ["Yingxuan Yang", "Mulei Ma"],
    395       "year": 2025,
    396       "arxiv_id": "2507.21206",
    397       "relevance": "Vision paper for the agentic web paradigm that this paper builds upon."
    398     }
    399   ]
    400 }

Impressum · Datenschutz