ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

scan.json (24728B)


      1 {
      2   "paper": {
      3     "title": "SoK: Trust-Authorization Mismatch in LLM Agent Interactions",
      4     "authors": [
      5       "Guanquan Shi",
      6       "Haohua Du",
      7       "Zhiqiang Wang",
      8       "Xiaoyu Liang",
      9       "Weiwenpei Liu",
     10       "Song Bian",
     11       "Zhenyu Guan"
     12     ],
     13     "year": 2025,
     14     "venue": "arXiv",
     15     "arxiv_id": "2512.06914",
     16     "doi": "10.48550/arXiv.2512.06914"
     17   },
     18   "scan_version": 2,
     19   "active_modules": ["survey_methodology"],
     20   "methodology_tags": ["meta-analysis", "theoretical"],
     21   "key_findings": "The paper identifies a fundamental 'Trust-Authorization Mismatch' in LLM agent systems where static permissions are decoupled from runtime trustworthiness. It proposes the Belief-Intention-Permission (B-I-P) framework decomposing agent execution into Belief Formation, Intent Generation, and Permission Grant stages, with a Safety Margin Ratio (SMR) invariant. Surveying 87 papers (from 248 candidates), they find defenses are heavily skewed toward early stages (Belief/Intent) while neglecting the authorization bottleneck. Two case studies demonstrate the framework's analytical utility versus static permission baselines.",
     22   "checklist": {
     23     "artifacts": {
     24       "code_released": {
     25         "applies": true,
     26         "answer": true,
     27         "justification": "The paper provides anonymous artifact repositories at https://anonymous.4open.science/r/Paper-76DE/ and https://anonymous.4open.science/r/sok-76C8/ containing the dataset, CSV with bibliographic metadata, and access links."
     28       },
     29       "data_released": {
     30         "applies": true,
     31         "answer": true,
     32         "justification": "The paper states 'we have made our complete dataset and artifacts publicly available' including a 'comprehensive CSV file containing the raw bibliographic information and extracted metadata' and literature access links (Sec. 2.5 and Sec. 8)."
     33       },
     34       "environment_specified": {
     35         "applies": true,
     36         "answer": false,
     37         "justification": "No environment specifications, dependency files, or software requirements are mentioned. The artifacts are data-only (CSV, links)."
     38       },
     39       "reproduction_instructions": {
     40         "applies": true,
     41         "answer": false,
     42         "justification": "No step-by-step reproduction instructions are provided. The artifacts are released but there is no README or procedure described for reproducing the systematization or coding results."
     43       }
     44     },
     45     "statistical_methodology": {
     46       "confidence_intervals_or_error_bars": {
     47         "applies": false,
     48         "answer": false,
     49         "justification": "This is a survey/SoK paper that does not run quantitative experiments. Paper counts and distributions are reported but no statistical estimation is performed."
     50       },
     51       "significance_tests": {
     52         "applies": false,
     53         "answer": false,
     54         "justification": "No statistical hypothesis testing is conducted; the paper is a systematization of knowledge, not an experimental study."
     55       },
     56       "effect_sizes_reported": {
     57         "applies": false,
     58         "answer": false,
     59         "justification": "No effect sizes are relevant; the paper categorizes and maps existing literature rather than measuring effects."
     60       },
     61       "sample_size_justified": {
     62         "applies": false,
     63         "answer": false,
     64         "justification": "No quantitative experiments are conducted that would require sample size justification."
     65       },
     66       "variance_reported": {
     67         "applies": false,
     68         "answer": false,
     69         "justification": "No experimental runs are performed; variance reporting is not applicable to a survey paper."
     70       }
     71     },
     72     "evaluation_design": {
     73       "baselines_included": {
     74         "applies": true,
     75         "answer": true,
     76         "justification": "The paper compares the B-I-P framework against a 'Static Permission Baseline' (SPB) in both case studies (Sec. 5.1.1, 5.2.1), representing the prevailing access control paradigm."
     77       },
     78       "baselines_contemporary": {
     79         "applies": true,
     80         "answer": true,
     81         "justification": "The static permission baseline represents current Android/iOS access control paradigms, which is the contemporary state of practice the paper argues against."
     82       },
     83       "ablation_study": {
     84         "applies": false,
     85         "answer": false,
     86         "justification": "The B-I-P framework is a conceptual analytical lens, not a system with separable components to ablate."
     87       },
     88       "multiple_metrics": {
     89         "applies": false,
     90         "answer": false,
     91         "justification": "The paper does not run quantitative evaluations with metrics; the case studies are qualitative analyses using the B-I-P framework."
     92       },
     93       "human_evaluation": {
     94         "applies": false,
     95         "answer": false,
     96         "justification": "Human evaluation is not relevant to this systematization of knowledge paper."
     97       },
     98       "held_out_test_set": {
     99         "applies": false,
    100         "answer": false,
    101         "justification": "No quantitative evaluation is performed that would require train/test splits."
    102       },
    103       "per_category_breakdown": {
    104         "applies": true,
    105         "answer": true,
    106         "justification": "Fig. 4 provides a distribution of surveyed papers across B-I-P framework stages (A-S1 through A-S3, D-S1 through D-S3) with counts per category. Table 1 maps literature to trust dimensions."
    107       },
    108       "failure_cases_discussed": {
    109         "applies": true,
    110         "answer": true,
    111         "justification": "Section 6 ('Opportunities and Fundamental Challenges') discusses where current defenses fail, identifying that defenses are skewed toward early stages and neglect authorization (Sec. 6.3). Sec. 2.4 proves formal failure of static and semi-static permission models."
    112       },
    113       "negative_results_reported": {
    114         "applies": true,
    115         "answer": true,
    116         "justification": "The paper reports negative findings: current defenses fail to bridge the trust-authorization mismatch, defenses are skewed toward Belief/Intent stages while neglecting Authorization (Sec. 6), and semi-static permission models fundamentally reduce to the Halting Problem."
    117       }
    118     },
    119     "claims_and_evidence": {
    120       "abstract_claims_supported": {
    121         "applies": true,
    122         "answer": true,
    123         "justification": "The abstract claims: (1) surveying 200+ papers — the methodology section describes 248 candidates yielding 87 included; (2) B-I-P framework — formalized in Sec. 3; (3) gap analysis showing defenses skewed toward early stages — supported by Fig. 4 distribution. The '200 representative papers' claim in the abstract is somewhat inflated given 87 included in final corpus, but 'more than 200' may include the broader initial set."
    124       },
    125       "causal_claims_justified": {
    126         "applies": true,
    127         "answer": false,
    128         "justification": "The paper makes causal claims such as 'diverse threats...share a common root cause: the desynchronization between dynamic trust states and static authorization boundaries.' This is an analytical argument, not empirically tested. The B-I-P case studies claim the framework would prevent attacks, but these are hypothetical walk-throughs, not empirical demonstrations."
    129       },
    130       "generalization_bounded": {
    131         "applies": true,
    132         "answer": false,
    133         "justification": "The paper's scope statement (Sec. 2.5) bounds to 'runtime interaction security,' but claims like 'permission management is the most practical approach for verifiable constraints' (Sec. 8) and the B-I-P framework are presented as general solutions without bounding to the specific 87 papers analyzed or the specific protocols studied."
    134       },
    135       "alternative_explanations_discussed": {
    136         "applies": true,
    137         "answer": false,
    138         "justification": "The paper presents the trust-authorization mismatch as the root cause of agent security failures without considering alternative framings (e.g., that the problem might be better modeled as a capability control problem, an alignment problem, or an information flow problem that doesn't require the B-I-P decomposition)."
    139       },
    140       "proxy_outcome_distinction": {
    141         "applies": true,
    142         "answer": false,
    143         "justification": "The paper proposes the Safety Margin Ratio (SMR) as a quantifiable security metric but assigns specific numeric values in case studies (e.g., tprov=0.1, treturn=0.2) without discussing how these numbers would be obtained in practice or whether the proxy calculations reflect actual security. The gap between the formal metric and operational security is not acknowledged."
    144       }
    145     },
    146     "setup_transparency": {
    147       "model_versions_specified": {
    148         "applies": false,
    149         "answer": false,
    150         "justification": "The paper does not use any LLM or AI model in its own methodology; it surveys papers about agent security."
    151       },
    152       "prompts_provided": {
    153         "applies": false,
    154         "answer": false,
    155         "justification": "No prompting is used in the paper's methodology."
    156       },
    157       "hyperparameters_reported": {
    158         "applies": false,
    159         "answer": false,
    160         "justification": "No experiments requiring hyperparameters are conducted."
    161       },
    162       "scaffolding_described": {
    163         "applies": false,
    164         "answer": false,
    165         "justification": "No agentic scaffolding is used in the paper's methodology."
    166       },
    167       "data_preprocessing_documented": {
    168         "applies": true,
    169         "answer": true,
    170         "justification": "Appendix A describes the search strategy, inclusion/exclusion criteria, de-duplication process (248 → 184 unique → 81 included), PRISMA-style flow, and coding scheme. The filtering criteria at each stage are stated."
    171       }
    172     },
    173     "limitations_and_scope": {
    174       "limitations_section_present": {
    175         "applies": true,
    176         "answer": true,
    177         "justification": "Section 8 (Conclusion & Discussion) contains a dedicated 'Limitations' paragraph discussing grey literature reliance, time-sensitivity of ecosystem counts, and the framework's intended scope."
    178       },
    179       "threats_to_validity_specific": {
    180         "applies": true,
    181         "answer": true,
    182         "justification": "Appendix A.3 discusses specific threats: venue bias (security incidents in grey literature), temporal drift (ecosystem evolves quickly), and selection bias (mitigated via snowballing). These are specific to this study."
    183       },
    184       "scope_boundaries_stated": {
    185         "applies": true,
    186         "answer": true,
    187         "justification": "Section 2.5 explicitly states scope: 'We study runtime interaction security of LLM-based agents' and explicitly excludes 'Training-time poisoning/model theft...unless they directly mediate runtime decisions.' The threat model assumptions are also stated."
    188       }
    189     },
    190     "data_integrity": {
    191       "raw_data_available": {
    192         "applies": true,
    193         "answer": true,
    194         "justification": "The paper releases the complete CSV with raw bibliographic data, metadata, and access links at an anonymous repository, enabling independent verification of the corpus."
    195       },
    196       "data_collection_described": {
    197         "applies": true,
    198         "answer": true,
    199         "justification": "Appendix A.3 describes the search strategy: Google Scholar with specific keywords, venues searched (IEEE S&P, USENIX, CCS, NeurIPS, etc.), time window (Jan 2023-Nov 2025), and complementary snowballing."
    200       },
    201       "recruitment_methods_described": {
    202         "applies": false,
    203         "answer": false,
    204         "justification": "No human participants; the 'sample' is a corpus of academic papers, and data source description is covered under data_collection_described."
    205       },
    206       "data_pipeline_documented": {
    207         "applies": true,
    208         "answer": true,
    209         "justification": "Appendix A.3 documents the pipeline: 279 retrieved → automated de-duplication → 211 unique → title/abstract screening by two reviewers → full-text screening → 102 included → supplemented with 104 from prior surveys. Counts at each stage are provided."
    210       }
    211     },
    212     "conflicts_of_interest": {
    213       "funding_disclosed": {
    214         "applies": true,
    215         "answer": false,
    216         "justification": "No funding source or acknowledgments section is present in the paper."
    217       },
    218       "affiliations_disclosed": {
    219         "applies": true,
    220         "answer": true,
    221         "justification": "Author affiliations are listed (BUAA, USTC) with email addresses on the first page."
    222       },
    223       "funder_independent_of_outcome": {
    224         "applies": true,
    225         "answer": false,
    226         "justification": "No funding is disclosed, so independence cannot be assessed."
    227       },
    228       "financial_interests_declared": {
    229         "applies": true,
    230         "answer": false,
    231         "justification": "No competing interests or financial disclosure statement is present in the paper."
    232       }
    233     },
    234     "contamination": {
    235       "training_cutoff_stated": {
    236         "applies": false,
    237         "answer": false,
    238         "justification": "The paper does not evaluate any pre-trained model on benchmarks; it is a survey/SoK paper."
    239       },
    240       "train_test_overlap_discussed": {
    241         "applies": false,
    242         "answer": false,
    243         "justification": "No model evaluation is performed; contamination concerns do not apply to a survey."
    244       },
    245       "benchmark_contamination_addressed": {
    246         "applies": false,
    247         "answer": false,
    248         "justification": "No benchmark evaluation is conducted in this survey paper."
    249       }
    250     },
    251     "human_studies": {
    252       "pre_registered": {
    253         "applies": false,
    254         "answer": false,
    255         "justification": "No human participants in this survey paper."
    256       },
    257       "irb_or_ethics_approval": {
    258         "applies": false,
    259         "answer": false,
    260         "justification": "No human participants. The paper's ethics section confirms: 'This work does not involve human subjects at any stage.'"
    261       },
    262       "demographics_reported": {
    263         "applies": false,
    264         "answer": false,
    265         "justification": "No human participants in this study."
    266       },
    267       "inclusion_exclusion_criteria": {
    268         "applies": false,
    269         "answer": false,
    270         "justification": "No human participants. Paper inclusion/exclusion criteria are covered under data_integrity."
    271       },
    272       "randomization_described": {
    273         "applies": false,
    274         "answer": false,
    275         "justification": "No human participants."
    276       },
    277       "blinding_described": {
    278         "applies": false,
    279         "answer": false,
    280         "justification": "No human participants."
    281       },
    282       "attrition_reported": {
    283         "applies": false,
    284         "answer": false,
    285         "justification": "No human participants."
    286       }
    287     },
    288     "cost_and_practicality": {
    289       "inference_cost_reported": {
    290         "applies": false,
    291         "answer": false,
    292         "justification": "This is a survey/theoretical paper; no system is deployed or evaluated."
    293       },
    294       "compute_budget_stated": {
    295         "applies": false,
    296         "answer": false,
    297         "justification": "This is a survey/theoretical paper with no computational experiments."
    298       }
    299     },
    300     "survey_methodology": {
    301       "prisma_or_structured_protocol": {
    302         "applies": true,
    303         "answer": true,
    304         "justification": "Appendix A describes a PRISMA-style flow (279→211→102), reproducible search strings, venue coverage, inclusion/exclusion criteria, and coding scheme. The paper explicitly mentions 'PRISMA-style flowchart' in Sec. 2.5."
    305       },
    306       "quality_assessment_of_sources": {
    307         "applies": true,
    308         "answer": false,
    309         "justification": "The paper categorizes sources by B-I-P stage and records 'evidence level' in the coding scheme, but does not apply a quality scoring rubric or risk-of-bias assessment to included papers. All papers are treated as equally valid within their categories."
    310       },
    311       "publication_bias_discussed": {
    312         "applies": true,
    313         "answer": false,
    314         "justification": "No discussion of publication bias, funnel plots, or whether the surveyed literature skews toward successful attacks/defenses. The threats to validity (Appendix A.3) mention venue bias and selection bias but not publication bias specifically."
    315       }
    316     }
    317   },
    318   "claims": [
    319     {
    320       "claim": "Static permission sets are undecidable for verifying safety properties of LLM agents (Theorem 1).",
    321       "evidence": "Formal proof by reduction from the Halting Problem in Sec. 2.4.1. Constructs agent AM,w that executes DELETE_ROOT iff Turing machine M halts on input w.",
    322       "supported": "moderate"
    323     },
    324     {
    325       "claim": "Semi-static (context-aware) permission models also fail because they reduce to the Halting Problem for a specific input.",
    326       "evidence": "Argument in Sec. 2.4.2 that predicting multi-step action sequences requires simulating the full execution trajectory. Cites Rice's Theorem for the instruction-as-data problem.",
    327       "supported": "moderate"
    328     },
    329     {
    330       "claim": "Current defenses are skewed toward early B-I-P stages (Belief/Intent) while neglecting Authorization.",
    331       "evidence": "Fig. 4 shows paper distribution across stages. Table 1 shows 'N/A (Addressed by Policy)' for all three Stage III defense cells, indicating no concrete defense mechanisms exist for the permission stage.",
    332       "supported": "strong"
    333     },
    334     {
    335       "claim": "The B-I-P framework would have prevented the Doubao AI Phone account-freezing incident.",
    336       "evidence": "Case Study I (Sec. 5.1) walks through the scenario, computing SMR=0.2 < θsafe=1.2, which would have blocked the high-frequency clicks. However, this is a hypothetical analysis, not an empirical test.",
    337       "supported": "weak"
    338     },
    339     {
    340       "claim": "The B-I-P framework would neutralize visual indirect prompt injection attacks.",
    341       "evidence": "Case Study II (Sec. 5.2) computes SMR=0.027 < θcritical=1.5. Again hypothetical — the numeric values (tprov=0.1, TI=0.3) are assigned by the authors without empirical calibration.",
    342       "supported": "weak"
    343     },
    344     {
    345       "claim": "The paper surveyed more than 200 representative papers.",
    346       "evidence": "Abstract says '200 representative papers.' Sec. 2.5 reports 248 candidates, 184 after de-duplication, 81 included in final corpus. Appendix A.3 mentions supplementing with 104 from prior surveys to reach the consolidated set.",
    347       "supported": "moderate"
    348     }
    349   ],
    350   "red_flags": [
    351     {
    352       "flag": "Inflated survey count",
    353       "detail": "The abstract claims 'more than 200 representative papers' but the systematic search yielded only 81 included papers (from 248 candidates). The count reaches 200+ only by adding 104 papers from prior surveys and domain knowledge, which undermines the systematization methodology — these additions bypass the stated inclusion/exclusion criteria."
    354     },
    355     {
    356       "flag": "Hypothetical case studies presented as validation",
    357       "detail": "Both case studies (Sec. 5) are hypothetical walk-throughs where the authors assign numeric trust/risk values without empirical calibration. The SMR calculations (0.2, 0.027) use author-chosen parameters, making the 'validation' circular — the framework works because the authors chose values that make it work."
    358     },
    359     {
    360       "flag": "Underdefined formalism",
    361       "detail": "The B-I-P framework's core metrics (tprov, tconf, tcons, ttool, treturn, rconf, rrev, rscope) are defined conceptually but lack operational definitions for computation. The SMR equation T(B,I)/R(P) ≥ θ requires numeric values that the paper does not show how to derive from real systems."
    362     },
    363     {
    364       "flag": "No quality assessment of surveyed papers",
    365       "detail": "The SoK maps papers to B-I-P stages and records evidence levels but does not assess the methodological quality of included studies. This means weak demonstrations and rigorous empirical work are treated equally in the systematization."
    366     },
    367     {
    368       "flag": "Inconsistent corpus numbers",
    369       "detail": "Multiple conflicting corpus sizes appear: '248 candidates,' '184 unique items,' '81 papers in final corpus,' '104 papers from prior surveys,' and 'more than 200 representative papers' and 'over 200 papers' in the abstract/conclusion. Appendix says '279 retrieved records' then '211 de-duplicated' then '102 included.' These numbers don't reconcile across sections."
    370     }
    371   ],
    372   "cited_papers": [
    373     {
    374       "title": "Large multimodal agents: A survey",
    375       "authors": ["Junlin Xie", "Zhihong Chen", "Ruifei Zhang", "Xiang Wan", "Guanbin Li"],
    376       "year": 2024,
    377       "arxiv_id": "2402.15116",
    378       "relevance": "Survey of LLM-based multimodal agents, directly relevant to understanding agent capabilities and architectures."
    379     },
    380     {
    381       "title": "InjecAgent: Benchmarking indirect prompt injections in tool-integrated large language model agents",
    382       "authors": ["Qiusi Zhan", "Zhixiang Liang", "Zifan Ying", "Daniel Kang"],
    383       "year": 2024,
    384       "arxiv_id": "2403.02691",
    385       "relevance": "Benchmark for indirect prompt injection in tool-using agents, key evaluation resource for agent security."
    386     },
    387     {
    388       "title": "AgentDojo: A dynamic environment to evaluate prompt injection attacks and defenses for LLM agents",
    389       "authors": ["Edoardo Debenedetti", "Jie Zhang", "Mislav Balunovic"],
    390       "year": 2024,
    391       "relevance": "Dynamic evaluation environment for agent security, directly relevant to benchmarking agent defenses."
    392     },
    393     {
    394       "title": "AI agents under threat: A survey of key security challenges and future pathways",
    395       "authors": ["Zehang Deng", "Yongjian Guo", "Changzhou Han"],
    396       "year": 2025,
    397       "relevance": "Comprehensive survey of AI agent security challenges, primary related work for this SoK."
    398     },
    399     {
    400       "title": "Trustworthy LLMs: a survey and guideline for evaluating large language models' alignment",
    401       "authors": ["Yang Liu", "Yuanshun Yao", "Jean-Francois Ton"],
    402       "year": 2023,
    403       "arxiv_id": "2308.05374",
    404       "relevance": "Survey on LLM trustworthiness and alignment evaluation, foundational for the trust dimension of B-I-P."
    405     },
    406     {
    407       "title": "Exploiting programmatic behavior of LLMs: Dual-use through standard security attacks",
    408       "authors": ["Daniel Kang", "Xuechen Li", "Ion Stoica"],
    409       "year": 2024,
    410       "relevance": "Demonstrates Turing completeness of LLM agent loops, key theoretical foundation for the undecidability argument."
    411     },
    412     {
    413       "title": "MCP-Bench: Benchmarking tool-using LLM agents with complex real-world tasks via MCP servers",
    414       "authors": ["Zhenting Wang", "Qi Chang", "Hemani Patel"],
    415       "year": 2025,
    416       "arxiv_id": "2508.20453",
    417       "relevance": "Benchmark for MCP-based tool-using agents, used in the paper to instantiate tool provenance trust metrics."
    418     },
    419     {
    420       "title": "Gorilla: Large language model connected with massive APIs",
    421       "authors": ["Shishir G Patil", "Tianjun Zhang", "Xin Wang", "Joseph E Gonzalez"],
    422       "year": 2024,
    423       "relevance": "Foundational work on LLM-API integration relevant to tool invocation security."
    424     },
    425     {
    426       "title": "ChatInject: Abusing chat templates for prompt injection in LLM agents",
    427       "authors": ["Hwan Chang", "Yonghyun Jun", "Hwanhee Lee"],
    428       "year": 2025,
    429       "arxiv_id": "2509.22830",
    430       "relevance": "Demonstrates chat template exploitation for prompt injection, relevant to agent security evaluation."
    431     },
    432     {
    433       "title": "Not what you've signed up for: Compromising real-world LLM-integrated applications with indirect prompt injection",
    434       "authors": ["Kai Greshake", "Sahar Abdelnabi", "Shailesh Mishra"],
    435       "year": 2023,
    436       "relevance": "Seminal work on indirect prompt injection in LLM applications, foundational attack vector for agent security."
    437     },
    438     {
    439       "title": "Universal and transferable adversarial attacks on aligned language models",
    440       "authors": ["Andy Zou", "Zifan Wang", "Nicholas Carlini"],
    441       "year": 2023,
    442       "arxiv_id": "2307.15043",
    443       "relevance": "Key work on adversarial attacks against aligned LLMs, relevant to jailbreak and safety evaluation."
    444     },
    445     {
    446       "title": "HaluEval: A large-scale hallucination evaluation benchmark for large language models",
    447       "authors": ["Junyi Li", "Xiaoxue Cheng", "Wayne Xin Zhao"],
    448       "year": 2023,
    449       "arxiv_id": "2305.11747",
    450       "relevance": "Hallucination evaluation benchmark used to instantiate the cognitive certainty (tconf) metric."
    451     }
    452   ]
    453 }

Impressum · Datenschutz