scan.json (23941B)
1 { 2 "paper": { 3 "title": "Adversarial Threat Vectors and Risk Mitigation for Retrieval-Augmented Generation Systems", 4 "authors": [ 5 "Chris M. Ward", 6 "Josh Harguess" 7 ], 8 "year": 2025, 9 "venue": "SPIE (Proc. of SPIE)", 10 "arxiv_id": "2506.00281" 11 }, 12 "checklist": { 13 "artifacts": { 14 "code_released": { 15 "applies": true, 16 "answer": false, 17 "justification": "No code repository or archive is provided. The paper presents a threat modeling framework and qualitative risk analysis with no accompanying software artifacts." 18 }, 19 "data_released": { 20 "applies": true, 21 "answer": false, 22 "justification": "No datasets were collected or released. The paper conducts a qualitative analysis referencing existing frameworks (MITRE ATLAS, OWASP) rather than any original data collection." 23 }, 24 "environment_specified": { 25 "applies": false, 26 "answer": false, 27 "justification": "The paper is a theoretical/analytical framework paper with no computational experiments, so no environment specification is applicable." 28 }, 29 "reproduction_instructions": { 30 "applies": false, 31 "answer": false, 32 "justification": "No experiments were conducted that would require reproduction instructions. The paper presents a threat modeling methodology applicable conceptually, not computationally." 33 } 34 }, 35 "statistical_methodology": { 36 "confidence_intervals_or_error_bars": { 37 "applies": false, 38 "answer": false, 39 "justification": "The paper presents qualitative risk scores (likelihood × impact) derived from OWASP-style factor analysis, not statistical experiments. No confidence intervals are applicable." 40 }, 41 "significance_tests": { 42 "applies": false, 43 "answer": false, 44 "justification": "No statistical comparisons between conditions are made. The risk reduction figures are deterministic scoring calculations, not empirical measurements with distributional uncertainty." 45 }, 46 "effect_sizes_reported": { 47 "applies": false, 48 "answer": false, 49 "justification": "The paper uses a qualitative risk-scoring model (inherent vs. residual risk) rather than empirical experiments that would yield effect sizes." 50 }, 51 "sample_size_justified": { 52 "applies": false, 53 "answer": false, 54 "justification": "No sample of participants, examples, or cases is drawn. The analysis is applied to a generic hypothetical RAG system architecture." 55 }, 56 "variance_reported": { 57 "applies": false, 58 "answer": false, 59 "justification": "No experimental runs were conducted. The risk scores are deterministic assessments, not repeated measurements." 60 } 61 }, 62 "evaluation_design": { 63 "baselines_included": { 64 "applies": false, 65 "answer": false, 66 "justification": "The paper proposes a threat modeling framework and control prioritization scheme rather than evaluating a system empirically; there are no baselines to compare against." 67 }, 68 "baselines_contemporary": { 69 "applies": false, 70 "answer": false, 71 "justification": "No baseline comparison is conducted; this criterion does not apply to a framework/position paper." 72 }, 73 "ablation_study": { 74 "applies": false, 75 "answer": false, 76 "justification": "No implemented system is evaluated. The paper applies qualitative risk analysis to a generic RAG architecture, so component ablation is not applicable." 77 }, 78 "multiple_metrics": { 79 "applies": false, 80 "answer": false, 81 "justification": "The schema asks whether multiple evaluation metrics are used to evaluate a system's performance. This paper does not evaluate any system — it applies a qualitative risk scoring model (OWASP factors) to a hypothetical RAG architecture. The OWASP factors (Likelihood, Impact, Ease of Exploit, etc.) are dimensions of a risk assessment framework, not evaluation metrics for measuring system output quality. This criterion is structurally inapplicable to a framework paper with no experiments." 82 }, 83 "human_evaluation": { 84 "applies": false, 85 "answer": false, 86 "justification": "There are no AI system outputs being evaluated; the paper is a framework paper with qualitative risk analysis, making human evaluation of system outputs irrelevant." 87 }, 88 "held_out_test_set": { 89 "applies": false, 90 "answer": false, 91 "justification": "No machine learning evaluation is conducted. The paper does not train or test any model on held-out data." 92 }, 93 "per_category_breakdown": { 94 "applies": true, 95 "answer": true, 96 "justification": "The paper provides breakdowns by threat model (Sensitive Information Disclosure vs. RAG System Poisoning) and by attack stage (training, ingestion, retrieval, prompt engineering, downstream), with separate risk scoring figures for each." 97 }, 98 "failure_cases_discussed": { 99 "applies": true, 100 "answer": true, 101 "justification": "Section 4 discusses residual risks that remain after applying controls, including sophisticated insider threats and advanced supply chain compromises that 'will continue to challenge even well-defended RAG architectures.'" 102 }, 103 "negative_results_reported": { 104 "applies": false, 105 "answer": false, 106 "justification": "The paper does not run experiments, so there are no negative empirical results to report. The discussion of residual risk is conceptual, not experimental." 107 } 108 }, 109 "claims_and_evidence": { 110 "abstract_claims_supported": { 111 "applies": true, 112 "answer": true, 113 "justification": "The abstract claims to identify prominent attack vectors and propose a prioritized control list; the paper does provide qualitative analysis of prompt injection, data poisoning, and adversarial query manipulation (Section 3.3) and a ranked control table (Table 3). The claims are descriptive and matched by content." 114 }, 115 "causal_claims_justified": { 116 "applies": true, 117 "answer": false, 118 "justification": "The paper makes causal claims about control effectiveness, e.g., 'Adversarial Training... mitigate risks by increasing the Skill Level required' and shows numerical risk reductions (e.g., Overall Risk Severity drops from 19.5 to 10.41). These causal claims rely entirely on expert judgment applied to an OWASP scoring formula with no empirical validation; no experiments, red-team tests, or measurements support the specific numerical reductions." 119 }, 120 "generalization_bounded": { 121 "applies": true, 122 "answer": false, 123 "justification": "The paper frames its conclusions broadly ('organizations can better protect the integrity and reliability of their systems') without restricting claims to specific RAG architectures, deployment contexts, or adversary models. The generic RAG system used is hypothetical, and results are presented as generally applicable." 124 }, 125 "alternative_explanations_discussed": { 126 "applies": true, 127 "answer": false, 128 "justification": "The paper does not discuss alternative explanations for why the proposed controls would work or whether other factors could undermine the risk reduction estimates. Section 4 acknowledges residual risks but does not consider alternative threat models or reasons the proposed mitigation order might be wrong." 129 } 130 }, 131 "setup_transparency": { 132 "model_versions_specified": { 133 "applies": false, 134 "answer": false, 135 "justification": "No specific LLM is used or evaluated in experiments. The paper analyzes a generic, hypothetical RAG architecture, so model version specification is not applicable." 136 }, 137 "prompts_provided": { 138 "applies": false, 139 "answer": false, 140 "justification": "No prompts are used in experiments. The paper does not conduct any LLM prompting as part of its methodology." 141 }, 142 "hyperparameters_reported": { 143 "applies": false, 144 "answer": false, 145 "justification": "No models are trained or queried with configurable parameters. This is a framework/position paper with no computational experiments." 146 }, 147 "scaffolding_described": { 148 "applies": false, 149 "answer": false, 150 "justification": "The paper proposes a threat modeling methodology for RAG systems but does not itself implement or evaluate an agentic scaffold." 151 }, 152 "data_preprocessing_documented": { 153 "applies": false, 154 "answer": false, 155 "justification": "No data collection or preprocessing is performed by the authors. The risk scoring is based on expert judgment applied to a hypothetical system, not on processed datasets." 156 } 157 }, 158 "limitations_and_scope": { 159 "limitations_section_present": { 160 "applies": true, 161 "answer": false, 162 "justification": "There is no dedicated limitations or threats-to-validity section. Section 4 (Discussion and Future Work) briefly notes residual risks and that 'sophisticated insider threats or advanced supply chain compromises will continue to challenge even well-defended RAG architectures,' but this is a single paragraph embedded in the discussion, not a substantive limitations section." 163 }, 164 "threats_to_validity_specific": { 165 "applies": true, 166 "answer": false, 167 "justification": "No specific threats to the validity of the threat modeling analysis or the risk reduction calculations are discussed. The paper does not address whether the OWASP scoring factors are calibrated, whether the Pyramid of Pain framework applies to LLM-based systems, or whether the residual risk numbers are meaningful." 168 }, 169 "scope_boundaries_stated": { 170 "applies": true, 171 "answer": false, 172 "justification": "The paper does not explicitly state what the analysis does NOT cover. It focuses on a 'generic RAG system' for enterprise knowledge management but does not delineate which deployment contexts, adversary capabilities, or RAG variants are excluded from its conclusions." 173 } 174 }, 175 "data_integrity": { 176 "raw_data_available": { 177 "applies": false, 178 "answer": false, 179 "justification": "No empirical data was collected. The risk scores are constructed from expert judgment applied to OWASP scoring factors; there is no raw dataset to verify." 180 }, 181 "data_collection_described": { 182 "applies": false, 183 "answer": false, 184 "justification": "No primary data was collected. The analysis is based on applying existing frameworks (MITRE ATLAS, OWASP Top 10 for LLMs, AI Security Pyramid of Pain) to a generic hypothetical RAG architecture." 185 }, 186 "recruitment_methods_described": { 187 "applies": false, 188 "answer": false, 189 "justification": "No participants or samples were recruited. The paper is a framework analysis with no human subjects or empirical data collection." 190 }, 191 "data_pipeline_documented": { 192 "applies": false, 193 "answer": false, 194 "justification": "No data pipeline exists in this paper. The methodology is a five-stage qualitative threat modeling process applied to a hypothetical system, not a data processing pipeline." 195 } 196 }, 197 "conflicts_of_interest": { 198 "funding_disclosed": { 199 "applies": true, 200 "answer": false, 201 "justification": "There is an acknowledgments section that thanks Dr. Mike Tan for conversations, but no funding source or grant is disclosed. The authors are from Fire Mountain Labs, a private company, but no sponsoring entity is stated." 202 }, 203 "affiliations_disclosed": { 204 "applies": true, 205 "answer": true, 206 "justification": "Author affiliations are clearly stated on the first page: both authors are from Fire Mountain Labs, San Diego, CA. Reference [9] reveals that the AI Security Pyramid of Pain framework applied in this paper is prior work by the same authors (Ward, Harguess et al.), which is disclosed in the citation." 207 }, 208 "funder_independent_of_outcome": { 209 "applies": false, 210 "answer": false, 211 "justification": "No funding source is disclosed, so independence of funder cannot be assessed. The paper appears to be unfunded or self-funded by Fire Mountain Labs." 212 }, 213 "financial_interests_declared": { 214 "applies": true, 215 "answer": false, 216 "justification": "There is no competing interests statement or financial disclosure. The authors promote their own prior framework (AI Security Pyramid of Pain) as the central organizing structure of this paper without disclosing whether they have commercial interests in its adoption." 217 } 218 }, 219 "contamination": { 220 "training_cutoff_stated": { 221 "applies": false, 222 "answer": false, 223 "justification": "The paper does not evaluate any pre-trained model's benchmark performance. It proposes a threat modeling framework for generic RAG systems, so training data cutoffs are not relevant." 224 }, 225 "train_test_overlap_discussed": { 226 "applies": false, 227 "answer": false, 228 "justification": "No benchmark evaluation of a pre-trained model is conducted. Contamination of training data is not applicable to this framework paper." 229 }, 230 "benchmark_contamination_addressed": { 231 "applies": false, 232 "answer": false, 233 "justification": "No benchmark is used to evaluate a pre-trained model's knowledge. The paper is a threat modeling framework, not a capability evaluation." 234 } 235 }, 236 "human_studies": { 237 "pre_registered": { 238 "applies": false, 239 "answer": false, 240 "justification": "No human participants are involved. The paper presents a qualitative threat modeling framework." 241 }, 242 "irb_or_ethics_approval": { 243 "applies": false, 244 "answer": false, 245 "justification": "No human participants are involved. IRB approval is not applicable." 246 }, 247 "demographics_reported": { 248 "applies": false, 249 "answer": false, 250 "justification": "No human participants are involved. Demographic reporting is not applicable." 251 }, 252 "inclusion_exclusion_criteria": { 253 "applies": false, 254 "answer": false, 255 "justification": "No human participants are involved. Inclusion/exclusion criteria are not applicable." 256 }, 257 "randomization_described": { 258 "applies": false, 259 "answer": false, 260 "justification": "No human participants or experimental conditions are involved. Randomization is not applicable." 261 }, 262 "blinding_described": { 263 "applies": false, 264 "answer": false, 265 "justification": "No human participants or evaluators are involved. Blinding is not applicable." 266 }, 267 "attrition_reported": { 268 "applies": false, 269 "answer": false, 270 "justification": "No human participants are involved. Attrition reporting is not applicable." 271 } 272 }, 273 "cost_and_practicality": { 274 "inference_cost_reported": { 275 "applies": false, 276 "answer": false, 277 "justification": "The paper is a framework/position paper that proposes a threat modeling methodology, not an empirical system. No inference costs are incurred by the paper's method itself." 278 }, 279 "compute_budget_stated": { 280 "applies": false, 281 "answer": false, 282 "justification": "No computational experiments were run. The paper's methodology is expert-judgment-based qualitative analysis, requiring no compute budget to report." 283 } 284 } 285 }, 286 "claims": [ 287 { 288 "claim": "RAG systems face three primary adversarial threat vectors: prompt injection, data poisoning, and adversarial query manipulation.", 289 "evidence": "Section 3.3 identifies these threat vectors by mapping them against MITRE ATLAS and OWASP Top 10 for LLMs frameworks. The analysis is qualitative, relying on existing threat taxonomies rather than novel empirical data.", 290 "supported": "moderate" 291 }, 292 { 293 "claim": "Applying the proposed multi-layered mitigation strategy reduces inherent risk for Sensitive Information Disclosure from High (19.5) to Low (10.41).", 294 "evidence": "Section 3.5.8 and Figures 9-10 report these specific numerical reductions. However, the numbers derive from an expert-scored OWASP-style formula applied by the authors themselves with no independent validation or empirical testing.", 295 "supported": "weak" 296 }, 297 { 298 "claim": "RAG system poisoning risk severity can be reduced from High (19.88) to Low (6.94) through data governance and lifecycle controls.", 299 "evidence": "Section 3.5.8 states: 'For RAG System Poisoning (Threat Model II), the Overall Risk Severity was reduced from High (19.88), to Low (6.94).' This is derived from the same unvalidated expert scoring model.", 300 "supported": "weak" 301 }, 302 { 303 "claim": "The AI Security Pyramid of Pain provides a structured framework for prioritizing RAG security controls to maximize adversary disruption.", 304 "evidence": "Section 3.5.7 and Table 3 map controls to pyramid layers, arguing that upper-tier controls 'force adversaries to fundamentally alter their operational approach.' This is the authors' own prior framework (Reference [9]) applied here.", 305 "supported": "weak" 306 }, 307 { 308 "claim": "Enterprise adoption of RAG systems exceeded 50% in 2024, up from 31% the prior year.", 309 "evidence": "Introduction cites an industry survey (Reference [2]: Menlo Ventures Blog, Nov 2024). This is a secondary citation to an industry report, not independently verified.", 310 "supported": "moderate" 311 } 312 ], 313 "methodology_tags": [ 314 "theoretical", 315 "case-study" 316 ], 317 "key_findings": "This paper presents a qualitative threat modeling framework for RAG systems, identifying prompt injection, data poisoning, and adversarial query manipulation as the primary attack vectors. Using OWASP risk factors and the authors' own AI Security Pyramid of Pain framework, the paper estimates that applying six proposed controls (input validation, adversarial training, real-time monitoring, data governance, MLOps lifecycle management, and incident response) can reduce risk severity from 'High' to 'Low' for both analyzed threat models. The control prioritization scheme maps mitigations to pyramid tiers to maximize adversary disruption. No empirical experiments are conducted; all risk reductions are derived from expert judgment applied to a scoring formula.", 318 "red_flags": [ 319 { 320 "flag": "Self-promotional framework", 321 "detail": "The paper's central organizing framework, the AI Security Pyramid of Pain, is prior work by the same two authors (Reference [9]: Ward, Harguess et al., 2024). The paper essentially applies the authors' own framework to a new domain without critical evaluation of its limitations or comparison to alternative frameworks." 322 }, 323 { 324 "flag": "Unvalidated quantitative risk reductions", 325 "detail": "The paper reports specific numerical risk reductions (e.g., risk dropping from 19.5 to 10.41) that are derived entirely from the authors' own expert judgment applied to an OWASP-style scoring formula. No empirical red-team tests, measurements, or independent validation support these numbers. Presenting them as quantitative findings gives false precision." 326 }, 327 { 328 "flag": "No competing interests disclosure", 329 "detail": "The authors are from a private company (Fire Mountain Labs) that presumably offers security consulting services, and the paper promotes their proprietary framework. No competing interests or financial disclosure is provided." 330 }, 331 { 332 "flag": "Claims outrun evidence", 333 "detail": "The conclusion states organizations 'can better protect the integrity and reliability of their systems' by applying the proposed framework, but no empirical evidence (red-team exercises, measured attack success rates, before/after comparisons) is provided. All evidence is based on the authors' expert assessment of a hypothetical generic RAG system." 334 }, 335 { 336 "flag": "No limitations section", 337 "detail": "There is no dedicated limitations or threats-to-validity section. The paper does not address whether the OWASP scoring factors are calibrated for LLM systems, whether the generic RAG architecture studied reflects real deployments, or what assumptions underlie the risk reduction estimates." 338 } 339 ], 340 "cited_papers": [ 341 { 342 "title": "Retrieval-augmented generation for knowledge-intensive NLP tasks", 343 "authors": [ 344 "Lewis, P.", 345 "Perez, E.", 346 "Piktus, A.", 347 "Petroni, F.", 348 "Karpukhin, V." 349 ], 350 "year": 2020, 351 "relevance": "Foundational paper introducing RAG systems, directly relevant to the agentic AI/LLM architecture landscape the survey covers." 352 }, 353 { 354 "title": "BadRAG: Identifying vulnerabilities in retrieval augmented generation of large language models", 355 "authors": [ 356 "Xue, J.", 357 "Zheng, M.", 358 "Hu, Y.", 359 "Liu, F.", 360 "Chen, X.", 361 "Lou, Q." 362 ], 363 "year": 2024, 364 "arxiv_id": "2406.00083", 365 "relevance": "Empirical study of RAG poisoning attacks, directly relevant to adversarial robustness of LLM systems." 366 }, 367 { 368 "title": "Attention is all you need", 369 "authors": [ 370 "Vaswani, A.", 371 "Shazeer, N.", 372 "Parmar, N.", 373 "Uszkoreit, J.", 374 "Jones, L.", 375 "Gomez, A. N." 376 ], 377 "year": 2017, 378 "relevance": "Foundational transformer architecture paper underlying all LLM-based systems evaluated in the survey." 379 }, 380 { 381 "title": "A survey of large language models", 382 "authors": [ 383 "Zhao, W. X.", 384 "Zhou, K.", 385 "Li, J.", 386 "Tang, T.", 387 "Wang, X." 388 ], 389 "year": 2023, 390 "arxiv_id": "2303.18223", 391 "relevance": "Comprehensive LLM survey paper, relevant to the survey's coverage of foundational LLM research." 392 }, 393 { 394 "title": "The AI Security Pyramid of Pain", 395 "authors": [ 396 "Ward, C. M.", 397 "Harguess, J.", 398 "Tao, J.", 399 "Christman, D.", 400 "Tan, M.", 401 "Spicer, P.", 402 "Cranium, A." 403 ], 404 "year": 2024, 405 "relevance": "Prior work by the same authors providing the framework used in this paper; relevant as a security evaluation framework for agentic AI systems." 406 }, 407 { 408 "title": "Towards CRISP-ML(Q): a machine learning process model with quality assurance methodology", 409 "authors": [ 410 "Studer, S.", 411 "Bui, T. B.", 412 "Drescher, C.", 413 "Hanuschkin, A.", 414 "Winkler, L.", 415 "Peters, S.", 416 "Müller, K.-R." 417 ], 418 "year": 2021, 419 "relevance": "ML lifecycle quality assurance methodology cited as a foundation for secure AI deployment practices." 420 }, 421 { 422 "title": "OWASP Top 10 for LLM Applications", 423 "authors": [ 424 "OWASP Foundation" 425 ], 426 "year": 2025, 427 "relevance": "Widely referenced security classification framework for LLM applications, directly relevant to the survey's coverage of LLM safety and security methodology." 428 }, 429 { 430 "title": "MITRE ATLAS Adversarial Threat Landscape for Artificial-Intelligence Systems", 431 "authors": [ 432 "The MITRE Corporation" 433 ], 434 "year": 2023, 435 "relevance": "Industry-standard threat taxonomy for AI systems, used throughout the paper as the primary framework for identifying RAG attack vectors." 436 } 437 ] 438 }