scan.json (25667B)
1 { 2 "paper": { 3 "title": "Agentic AI Security: Threats, Defenses, Evaluation, and Open Challenges", 4 "authors": [ 5 "Anshuman Chhabra", 6 "Shrestha Datta", 7 "Shahriar Kabir Nahin", 8 "Prasant Mohapatra" 9 ], 10 "year": 2025, 11 "venue": "arXiv", 12 "arxiv_id": "2510.23883" 13 }, 14 "checklist": { 15 "artifacts": { 16 "code_released": { 17 "applies": true, 18 "answer": false, 19 "justification": "No GitHub link, Zenodo archive, or any code repository is mentioned in the paper. The survey presents taxonomy tables and narrative reviews but releases no analysis scripts or supporting code." 20 }, 21 "data_released": { 22 "applies": true, 23 "answer": false, 24 "justification": "The paper constructs no dataset of its own and provides no download link for the papers reviewed or the structured data extracted from them. The bibliography is the only artifact, and it is not released as a structured dataset." 25 }, 26 "environment_specified": { 27 "applies": true, 28 "answer": false, 29 "justification": "No environment specification is provided. The survey does not involve computational experiments, but it also does not specify any tooling or software used to conduct the review (e.g., reference management software, analysis tools)." 30 }, 31 "reproduction_instructions": { 32 "applies": true, 33 "answer": false, 34 "justification": "No reproduction instructions are provided. There is no description of how a reader could replicate the literature search, paper selection, or taxonomy construction." 35 } 36 }, 37 "statistical_methodology": { 38 "confidence_intervals_or_error_bars": { 39 "applies": false, 40 "answer": false, 41 "justification": "This is a narrative survey with no quantitative meta-analysis. Statistical uncertainty measures are not applicable." 42 }, 43 "significance_tests": { 44 "applies": false, 45 "answer": false, 46 "justification": "No comparative statistical claims are made by the survey authors themselves. NA for a narrative survey." 47 }, 48 "effect_sizes_reported": { 49 "applies": false, 50 "answer": false, 51 "justification": "No effect sizes are computed by the survey authors. The paper reports statistics from cited works (e.g., '94.4% of agents vulnerable') but does not compute effect sizes itself." 52 }, 53 "sample_size_justified": { 54 "applies": false, 55 "answer": false, 56 "justification": "No empirical experiments are conducted, so sample size justification is not applicable." 57 }, 58 "variance_reported": { 59 "applies": false, 60 "answer": false, 61 "justification": "No experimental runs are conducted by the survey authors. NA for a narrative survey." 62 } 63 }, 64 "evaluation_design": { 65 "baselines_included": { 66 "applies": true, 67 "answer": false, 68 "justification": "The survey does not compare its taxonomy or coverage against prior survey papers. Section 2 briefly distinguishes this survey from existing surveys (references 47-52) but does not systematically compare coverage, methodology quality, or scope against them." 69 }, 70 "baselines_contemporary": { 71 "applies": false, 72 "answer": false, 73 "justification": "No baseline comparison experiments are conducted, so contemporariness of baselines is not applicable." 74 }, 75 "ablation_study": { 76 "applies": false, 77 "answer": false, 78 "justification": "No system with components to ablate is proposed; this is a survey paper." 79 }, 80 "multiple_metrics": { 81 "applies": false, 82 "answer": false, 83 "justification": "No empirical evaluation is performed by the survey itself that would require multiple metrics." 84 }, 85 "human_evaluation": { 86 "applies": false, 87 "answer": false, 88 "justification": "No system outputs are generated by this survey that would warrant human evaluation." 89 }, 90 "held_out_test_set": { 91 "applies": false, 92 "answer": false, 93 "justification": "No empirical evaluation with train/test splits is conducted." 94 }, 95 "per_category_breakdown": { 96 "applies": true, 97 "answer": true, 98 "justification": "The survey provides per-category breakdowns of both threats (Section 3: 5 major categories with subcategories) and defenses (Table 1: coverage matrix across 7 defense dimensions), and benchmarks (Table 2: organized by capability vs. security-specific benchmarks)." 99 }, 100 "failure_cases_discussed": { 101 "applies": true, 102 "answer": true, 103 "justification": "The survey discusses failure modes and limitations of defense approaches throughout Section 4 (e.g., 'training-based defenses can degrade general-purpose capabilities,' adaptive attacks bypass defenses), and Section 6 explicitly discusses open challenges including failure modes of current benchmarks." 104 }, 105 "negative_results_reported": { 106 "applies": true, 107 "answer": true, 108 "justification": "The survey consistently notes where defenses fail (e.g., Section 4.1.4 notes training-free defenses 'remain fragile against adaptive attacks'; Section 4.1.1 notes fine-tuning 'can degrade the general-purpose capabilities of LLMs without providing significant defensive capabilities against adaptive attacks')." 109 } 110 }, 111 "claims_and_evidence": { 112 "abstract_claims_supported": { 113 "applies": true, 114 "answer": true, 115 "justification": "The abstract claims the paper 'outlines a taxonomy of threats specific to agentic AI, reviews recent benchmarks and evaluation methodologies, and discusses defense strategies.' All three are delivered in Sections 3, 5, and 4 respectively. No overclaiming is present in the abstract." 116 }, 117 "causal_claims_justified": { 118 "applies": false, 119 "answer": false, 120 "justification": "The survey makes no causal claims of its own; it summarizes causal findings reported in cited works. The framing is appropriately attributed ('Lupinacci et al. demonstrated...', 'Prior work has shown...')." 121 }, 122 "generalization_bounded": { 123 "applies": true, 124 "answer": true, 125 "justification": "The survey appropriately attributes claims to specific systems, models, or studies (e.g., 'GPT-4 achieving 87% success rate when given CVE descriptions'). The scope is stated as agentic AI security broadly, and limitations of generalization are noted in the open challenges section (e.g., physical world agents noted as underexplored in Section 6.5)." 126 }, 127 "alternative_explanations_discussed": { 128 "applies": false, 129 "answer": false, 130 "justification": "This is a taxonomy/survey paper that presents no original empirical results, so alternative explanations for observed results are not applicable at the survey level." 131 } 132 }, 133 "setup_transparency": { 134 "model_versions_specified": { 135 "applies": false, 136 "answer": false, 137 "justification": "The survey does not run any LLM experiments itself. When discussing results from cited papers, it uses the model names as reported in those papers (e.g., 'GPT-4'), which is appropriate for a survey." 138 }, 139 "prompts_provided": { 140 "applies": false, 141 "answer": false, 142 "justification": "The survey does not use prompting in its own methodology." 143 }, 144 "hyperparameters_reported": { 145 "applies": false, 146 "answer": false, 147 "justification": "No experiments are conducted by the survey authors." 148 }, 149 "scaffolding_described": { 150 "applies": false, 151 "answer": false, 152 "justification": "No agentic scaffolding is used by the survey itself." 153 }, 154 "data_preprocessing_documented": { 155 "applies": true, 156 "answer": false, 157 "justification": "No systematic search methodology is described. The paper does not explain how papers were identified for inclusion, what databases were searched, what search terms were used, or what inclusion/exclusion criteria were applied. There is no paper selection pipeline documented." 158 } 159 }, 160 "limitations_and_scope": { 161 "limitations_section_present": { 162 "applies": true, 163 "answer": false, 164 "justification": "There is no dedicated limitations or threats-to-validity section. Section 6 is titled 'Open Challenges' and discusses gaps in the field, but this is forward-looking and does not discuss limitations of the survey itself (e.g., coverage gaps, selection bias, recency of sources)." 165 }, 166 "threats_to_validity_specific": { 167 "applies": true, 168 "answer": false, 169 "justification": "No threats to validity of the survey's own methodology are discussed. The paper does not address whether it missed relevant papers, whether its taxonomy is complete or exhaustive, or whether the cited statistics are representative." 170 }, 171 "scope_boundaries_stated": { 172 "applies": true, 173 "answer": true, 174 "justification": "Section 6.5 explicitly notes 'this survey primarily focuses on software-based agents' and that 'physically embodied agents introduce additional and largely unexplored security risks.' The paper also notes in the introduction that it focuses on security specifically, distinguishing from capability surveys." 175 } 176 }, 177 "data_integrity": { 178 "raw_data_available": { 179 "applies": true, 180 "answer": false, 181 "justification": "No structured dataset of reviewed papers is released. The bibliography provides references but not a machine-readable corpus with coded attributes, making independent verification of coverage claims impossible." 182 }, 183 "data_collection_described": { 184 "applies": true, 185 "answer": false, 186 "justification": "No description of how papers were collected is provided. The survey does not specify search databases, date ranges, search queries, or any systematic selection process. Papers appear to have been selected by the authors' awareness and judgment without a documented protocol." 187 }, 188 "recruitment_methods_described": { 189 "applies": false, 190 "answer": false, 191 "justification": "No human participants are involved in this survey paper." 192 }, 193 "data_pipeline_documented": { 194 "applies": true, 195 "answer": false, 196 "justification": "There is no documented pipeline from paper collection to final taxonomy. The authors present a taxonomy and cite supporting papers, but the process by which papers were screened, included, or excluded is entirely opaque." 197 } 198 }, 199 "conflicts_of_interest": { 200 "funding_disclosed": { 201 "applies": true, 202 "answer": false, 203 "justification": "No acknowledgments section is present in the paper, and no funding sources are disclosed anywhere in the text." 204 }, 205 "affiliations_disclosed": { 206 "applies": true, 207 "answer": true, 208 "justification": "Author affiliations are clearly listed on the title page: 'Bellini College of AI, Cybersecurity, and Computing, University of South Florida.' Contact email addresses are also provided." 209 }, 210 "funder_independent_of_outcome": { 211 "applies": true, 212 "answer": false, 213 "justification": "No funding is disclosed, so funder independence cannot be assessed. The schema says 'NA if unfunded,' but the paper does not confirm it is unfunded—university researchers typically receive some form of funding support. The absence of a funding disclosure is not the same as confirmation of being unfunded." 214 }, 215 "financial_interests_declared": { 216 "applies": true, 217 "answer": false, 218 "justification": "There is no competing interests or financial disclosure statement in the paper. Absence of declaration is not the same as absence of conflict." 219 } 220 }, 221 "contamination": { 222 "training_cutoff_stated": { 223 "applies": false, 224 "answer": false, 225 "justification": "This is a survey paper that does not evaluate any pre-trained model's capabilities on benchmarks. Contamination questions are not applicable." 226 }, 227 "train_test_overlap_discussed": { 228 "applies": false, 229 "answer": false, 230 "justification": "No model evaluation is conducted by the survey itself. NA." 231 }, 232 "benchmark_contamination_addressed": { 233 "applies": false, 234 "answer": false, 235 "justification": "No model evaluation is conducted. NA." 236 } 237 }, 238 "human_studies": { 239 "pre_registered": { 240 "applies": false, 241 "answer": false, 242 "justification": "No human participants are involved. This is a survey of AI security literature." 243 }, 244 "irb_or_ethics_approval": { 245 "applies": false, 246 "answer": false, 247 "justification": "No human participants are involved." 248 }, 249 "demographics_reported": { 250 "applies": false, 251 "answer": false, 252 "justification": "No human participants are involved." 253 }, 254 "inclusion_exclusion_criteria": { 255 "applies": false, 256 "answer": false, 257 "justification": "No human participants are involved." 258 }, 259 "randomization_described": { 260 "applies": false, 261 "answer": false, 262 "justification": "No human participants or experimental conditions are involved." 263 }, 264 "blinding_described": { 265 "applies": false, 266 "answer": false, 267 "justification": "No human participants or experimental conditions are involved." 268 }, 269 "attrition_reported": { 270 "applies": false, 271 "answer": false, 272 "justification": "No human participants are involved." 273 } 274 }, 275 "cost_and_practicality": { 276 "inference_cost_reported": { 277 "applies": false, 278 "answer": false, 279 "justification": "This is a survey paper and does not propose or evaluate a system with inference costs." 280 }, 281 "compute_budget_stated": { 282 "applies": false, 283 "answer": false, 284 "justification": "This is a survey paper requiring no computational budget." 285 } 286 } 287 }, 288 "claims": [ 289 { 290 "claim": "94.4% of state-of-the-art LLM agents are vulnerable to prompt injection, 83.3% to retrieval-based backdoors, and 100% to inter-agent trust exploits.", 291 "evidence": "Attributed to Lupinacci et al. [38] (arXiv:2507.06850, 2025), cited in Section 2 (Motivation and Contributions).", 292 "supported": "weak" 293 }, 294 { 295 "claim": "Adaptive attacks achieve a 50% success rate in penetrating eight different defenses designed for indirect prompt injection attacks.", 296 "evidence": "Attributed to Zhan et al. [61] (NAACL 2025), cited in Section 3.1.1.", 297 "supported": "moderate" 298 }, 299 { 300 "claim": "GPT-4 achieves 87% success rate exploiting one-day vulnerabilities when given CVE descriptions, outperforming all other examined models and conventional vulnerability scanners like OWASP ZAP and Metasploit.", 301 "evidence": "Attributed to Fang et al. [98] (arXiv:2404.08144, 2024), cited in Section 3.2.1.", 302 "supported": "moderate" 303 }, 304 { 305 "claim": "CrossInject, a cross-modal prompt injection method embedding adversarial signals in both vision and text, boosts attack effectiveness by at least 30.1% across various tasks.", 306 "evidence": "Attributed to Wang et al. [77] (arXiv:2504.14348, 2025), cited in Section 3.1.3.", 307 "supported": "moderate" 308 }, 309 { 310 "claim": "Even advanced multimodal agents struggle with CAPTCHAs, achieving at best 40% success rate compared to nearly 100% for humans.", 311 "evidence": "Attributed to Luo et al. Open CaptchaWorld [164] (arXiv:2505.24878, 2025), cited in Section 3.4.3.", 312 "supported": "moderate" 313 }, 314 { 315 "claim": "Defensive fine-tuning can degrade the general-purpose capabilities of LLMs without providing significant defensive capabilities against adaptive attacks.", 316 "evidence": "Attributed to Jia et al. [179] (arXiv:2505.18333, 2025), cited in Section 4.1.1.", 317 "supported": "moderate" 318 } 319 ], 320 "methodology_tags": [ 321 "qualitative" 322 ], 323 "key_findings": "This survey provides a taxonomy of agentic AI security threats organized into five broad categories: prompt injection and jailbreaks, autonomous cyber-exploitation and tool abuse, multi-agent and protocol-level threats, interface and environment risks, and governance and autonomy concerns. It reviews defenses across four dimensions (prompt-injection-resistant designs, policy filtering, sandboxing, detection/monitoring), catalogs over 20 security-relevant benchmarks, and identifies six open challenges including long-horizon security, multi-agent trust, and adaptive attack evaluation. The survey concludes that no single defense is sufficient and practical deployments must combine complementary strategies.", 324 "red_flags": [ 325 { 326 "flag": "No systematic review methodology", 327 "detail": "The survey provides no description of how papers were identified, searched for, or selected. There is no search protocol, no databases listed, no date range, no inclusion/exclusion criteria, and no PRISMA-style flow diagram. This makes it impossible to verify whether coverage is representative or reproducible, and the paper set appears to be selected by the authors' awareness rather than a systematic process." 328 }, 329 { 330 "flag": "No limitations section", 331 "detail": "The paper has no dedicated limitations or threats-to-validity section. Section 6 discusses open challenges in the field, but not limitations of this survey itself. The survey does not acknowledge potential coverage gaps, recency bias (many citations are mid-2025), or selection bias in which papers were included." 332 }, 333 { 334 "flag": "No funding disclosure", 335 "detail": "There is no acknowledgments section and no funding source is disclosed anywhere in the paper. No competing interests statement is provided. This is a gap in transparency, especially as the corresponding author (Chhabra) cites two of his own prior works [145, 146] in the survey." 336 }, 337 { 338 "flag": "Narrative survey laundering weak evidence", 339 "detail": "The survey presents statistics from individual cited papers (e.g., '94.4% of agents vulnerable') as unqualified facts without noting the methodological limitations of those underlying studies. A survey that uncritically aggregates results from papers of varying methodological quality amplifies noise rather than extracting signal." 340 }, 341 { 342 "flag": "No quality assessment of cited papers", 343 "detail": "The survey makes no attempt to assess the methodological quality of the papers it cites and synthesizes. Claims from individual empirical papers are presented without noting sample sizes, whether they were replicated, or whether the studies were peer-reviewed. This means weak or unreliable results receive equal weight to robust findings." 344 } 345 ], 346 "cited_papers": [ 347 { 348 "title": "InjecAgent: Benchmarking Indirect Prompt Injections in Tool-Integrated Large Language Model Agents", 349 "authors": [ 350 "Qiusi Zhan", 351 "Zhixiang Liang", 352 "Zifan Ying", 353 "Daniel Kang" 354 ], 355 "year": 2024, 356 "relevance": "Key benchmark for indirect prompt injection attacks on tool-using LLM agents, directly relevant to survey scope on agentic AI evaluation." 357 }, 358 { 359 "title": "AgentHarm: A Benchmark for Measuring Harmfulness of LLM Agents", 360 "authors": [ 361 "Maksym Andriushchenko", 362 "Alexandra Souly", 363 "Mateusz Dziemian" 364 ], 365 "year": 2024, 366 "arxiv_id": "2410.09024", 367 "relevance": "Security-specific benchmark for measuring harmful behavior in tool-using LLM agents, central to agentic AI evaluation methodology." 368 }, 369 { 370 "title": "AgentDojo: A Dynamic Environment to Evaluate Attacks and Defenses for LLM Agents", 371 "authors": [ 372 "Edoardo Debenedetti", 373 "Jie Zhang", 374 "Mislav Balunovic", 375 "Luca Beurer-Kellner", 376 "Marc Fischer", 377 "Florian Tramer" 378 ], 379 "year": 2024, 380 "arxiv_id": "2406.13352", 381 "relevance": "Dynamic benchmark framework for evaluating prompt injection attacks and defenses in tool-using agents." 382 }, 383 { 384 "title": "WebArena: A Realistic Web Environment for Building Autonomous Agents", 385 "authors": [ 386 "Shuyan Zhou", 387 "Frank F Xu", 388 "Hao Zhu" 389 ], 390 "year": 2023, 391 "arxiv_id": "2307.13854", 392 "relevance": "Major capability benchmark for web agents used extensively in agentic AI security evaluation and referenced throughout the survey." 393 }, 394 { 395 "title": "Identifying the Risks of LM Agents with an LM-Emulated Sandbox (ToolEmu)", 396 "authors": [ 397 "Yangjun Ruan", 398 "Honghua Dong", 399 "Andrew Wang", 400 "Silviu Pitis", 401 "Yongchao Zhou", 402 "Jimmy Ba", 403 "Tatsunori Hashimoto" 404 ], 405 "year": 2023, 406 "arxiv_id": "2309.15817", 407 "relevance": "Proposes LM-emulated sandbox for safety evaluation of LLM agents, a foundational methodology for agentic AI security evaluation." 408 }, 409 { 410 "title": "LLM Agents Can Autonomously Exploit One-Day Vulnerabilities", 411 "authors": [ 412 "Richard Fang", 413 "Rohan Bindu", 414 "Akul Gupta", 415 "Daniel Kang" 416 ], 417 "year": 2024, 418 "arxiv_id": "2404.08144", 419 "relevance": "Demonstrates that GPT-4 agents can autonomously exploit real-world CVEs, central to autonomous cyber-exploitation threat category." 420 }, 421 { 422 "title": "LLM Agents Can Autonomously Hack Websites", 423 "authors": [ 424 "Richard Fang", 425 "Rohan Bindu", 426 "Akul Gupta", 427 "Qiusi Zhan", 428 "Daniel Kang" 429 ], 430 "year": 2024, 431 "arxiv_id": "2402.06664", 432 "relevance": "Shows GPT-4 agents autonomously breaking into sandboxed websites through multi-step exploits." 433 }, 434 { 435 "title": "Adaptive Attacks Break Defenses Against Indirect Prompt Injection Attacks on LLM Agents", 436 "authors": [ 437 "Qiusi Zhan", 438 "Richard Fang", 439 "Henil Shalin Panchal", 440 "Daniel Kang" 441 ], 442 "year": 2025, 443 "relevance": "Key finding that adaptive attacks undermine most existing defenses against indirect prompt injection, motivating open challenge on adaptive attack evaluation." 444 }, 445 { 446 "title": "Not What You've Signed Up For: Compromising Real-World LLM-Integrated Applications with Indirect Prompt Injection", 447 "authors": [ 448 "Kai Greshake", 449 "Sahar Abdelnabi", 450 "Shailesh Mishra", 451 "Christoph Endres", 452 "Thorsten Holz", 453 "Mario Fritz" 454 ], 455 "year": 2023, 456 "relevance": "Foundational paper on indirect prompt injection attacks against real-world LLM-integrated applications." 457 }, 458 { 459 "title": "AI Agents That Matter", 460 "authors": [ 461 "Sayash Kapoor", 462 "Benedikt Stroebl", 463 "Zachary S Siegel", 464 "Nitya Nadgir", 465 "Arvind Narayanan" 466 ], 467 "year": 2024, 468 "arxiv_id": "2407.01502", 469 "relevance": "Addresses evaluation methodology for LLM agents, including cost-performance tradeoffs, relevant to methodology quality in agentic AI research." 470 }, 471 { 472 "title": "The BrowserGym Ecosystem for Web Agent Research", 473 "authors": [ 474 "De Chezelles, Thibault Le Sellier", 475 "Sahar Omidi Shayegan" 476 ], 477 "year": 2024, 478 "arxiv_id": "2412.05467", 479 "relevance": "Unified benchmark ecosystem for web agents that emphasizes reproducibility and comparability across models and environments." 480 }, 481 { 482 "title": "tau-bench: A Benchmark for Tool-Agent-User Interaction in Real-World Domains", 483 "authors": [ 484 "Shunyu Yao", 485 "Noah Shinn", 486 "Pedram Razavi", 487 "Karthik Narasimhan" 488 ], 489 "year": 2024, 490 "arxiv_id": "2406.12045", 491 "relevance": "Introduces pass^k metric for measuring consistency of tool-using agents, directly relevant to reliability evaluation methodology." 492 }, 493 { 494 "title": "OSWorld: Benchmarking Multimodal Agents for Open-Ended Tasks in Real Computer Environments", 495 "authors": [ 496 "Tianbao Xie", 497 "Danyang Zhang", 498 "Jixuan Chen" 499 ], 500 "year": 2024, 501 "relevance": "Comprehensive benchmark for computer-use agents in realistic environments, used for both capability and security evaluation." 502 }, 503 { 504 "title": "Here Comes the AI Worm: Unleashing Zero-Click Worms That Target GenAI-Powered Applications", 505 "authors": [ 506 "Stav Cohen", 507 "Ron Bitton", 508 "Ben Nassi" 509 ], 510 "year": 2024, 511 "arxiv_id": "2403.02817", 512 "relevance": "Demonstrates autonomous propagating attacks (AI worms) in multi-agent systems, key to understanding propagation-orientation of agentic threats." 513 }, 514 { 515 "title": "Design Patterns for Securing LLM Agents against Prompt Injections", 516 "authors": [ 517 "Luca Beurer-Kellner", 518 "Beat Buesser", 519 "Ana-Maria Crestu" 520 ], 521 "year": 2025, 522 "arxiv_id": "2506.08837", 523 "relevance": "Systematic defense design patterns for LLM agents against prompt injection, with classification into agent-, user-, and system-focused approaches." 524 } 525 ] 526 }