scan.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan.json (21840B)
      1 {
      2   "paper": {
      3     "title": "Prompt Injection Attacks on Agentic Coding Assistants: A Systematic Analysis of Vulnerabilities in Skills, Tools, and Protocol Ecosystems",
      4     "authors": [
      5       "Narek Maloyan",
      6       "Dmitry Namiot"
      7     ],
      8     "year": 2026,
      9     "arxiv_id": "2601.17548"
     10   },
     11   "scan_version": 2,
     12   "active_modules": [
     13     "survey_methodology"
     14   ],
     15   "methodology_tags": [
     16     "meta-analysis"
     17   ],
     18   "key_findings": "This SoK synthesizes 78 studies on prompt injection attacks against agentic coding assistants, proposing a three-dimensional taxonomy (delivery vector, modality, propagation). Key findings: 85%+ of attacks compromise at least one major platform, adaptive attacks bypass 90%+ of published defenses, and skill ecosystems (Claude Code, Copilot Extensions, MCP) lack adequate security review. The paper argues prompt injection is architecturally fundamental — unlike SQL injection, no parameterization fix exists.",
     19   "checklist": {
     20     "artifacts": {
     21       "code_released": {
     22         "applies": true,
     23         "answer": false,
     24         "justification": "No code repository or archive is mentioned. The paper states 'Attack code is not released; techniques are described at the conceptual level' (Section VIII.D)."
     25       },
     26       "data_released": {
     27         "applies": true,
     28         "answer": false,
     29         "justification": "No dataset of the 78 surveyed papers or extracted data is released. The paper could have released its corpus or analysis data but did not."
     30       },
     31       "environment_specified": {
     32         "applies": false,
     33         "answer": false,
     34         "justification": "This is a survey/SoK paper with no computational experiments requiring environment specification."
     35       },
     36       "reproduction_instructions": {
     37         "applies": true,
     38         "answer": false,
     39         "justification": "No reproduction instructions are provided. The search methodology is described (Section I.A) but no scripts, queries, or detailed steps for reproducing the literature review are given."
     40       }
     41     },
     42     "statistical_methodology": {
     43       "confidence_intervals_or_error_bars": {
     44         "applies": false,
     45         "answer": false,
     46         "justification": "Survey paper that synthesizes reported statistics from other studies; does not run its own experiments."
     47       },
     48       "significance_tests": {
     49         "applies": false,
     50         "answer": false,
     51         "justification": "No statistical comparisons are made by this paper; it reports statistics from primary sources."
     52       },
     53       "effect_sizes_reported": {
     54         "applies": false,
     55         "answer": false,
     56         "justification": "Survey paper without its own empirical experiments."
     57       },
     58       "sample_size_justified": {
     59         "applies": false,
     60         "answer": false,
     61         "justification": "Survey paper; no experimental sample sizes."
     62       },
     63       "variance_reported": {
     64         "applies": false,
     65         "answer": false,
     66         "justification": "Survey paper; does not run its own experiments."
     67       }
     68     },
     69     "evaluation_design": {
     70       "baselines_included": {
     71         "applies": true,
     72         "answer": false,
     73         "justification": "The paper does not compare its taxonomy or framework against prior survey/SoK efforts in a structured way. Related work is discussed but not benchmarked against."
     74       },
     75       "baselines_contemporary": {
     76         "applies": false,
     77         "answer": false,
     78         "justification": "Not applicable — no experimental evaluation with baselines."
     79       },
     80       "ablation_study": {
     81         "applies": false,
     82         "answer": false,
     83         "justification": "Survey paper with no system components to ablate."
     84       },
     85       "multiple_metrics": {
     86         "applies": false,
     87         "answer": false,
     88         "justification": "No experimental evaluation performed."
     89       },
     90       "human_evaluation": {
     91         "applies": false,
     92         "answer": false,
     93         "justification": "No system outputs to evaluate; this is a literature synthesis."
     94       },
     95       "held_out_test_set": {
     96         "applies": false,
     97         "answer": false,
     98         "justification": "No experimental evaluation."
     99       },
    100       "per_category_breakdown": {
    101         "applies": true,
    102         "answer": true,
    103         "justification": "The paper provides per-platform vulnerability breakdowns (Table IV), per-defense bypass rates (Table III), and per-attack-category analysis throughout Section V."
    104       },
    105       "failure_cases_discussed": {
    106         "applies": true,
    107         "answer": true,
    108         "justification": "The paper extensively discusses where defenses fail — Table III shows defense bypass rates, and Section VI details fundamental limitations of each defense category."
    109       },
    110       "negative_results_reported": {
    111         "applies": true,
    112         "answer": true,
    113         "justification": "The paper's central finding is negative: all evaluated defenses are bypassable. Section VI.B documents that adaptive attacks bypass all 12 defenses with 78-93% success rates."
    114       }
    115     },
    116     "claims_and_evidence": {
    117       "abstract_claims_supported": {
    118         "applies": true,
    119         "answer": false,
    120         "justification": "The abstract claims '42 distinct attack techniques' and '18 defense mechanisms' but the body describes '31 attack techniques' (Section I contributions item 3) and '12 defense mechanisms' (Section I contributions item 4). These numbers are inconsistent between abstract and body."
    121       },
    122       "causal_claims_justified": {
    123         "applies": false,
    124         "answer": false,
    125         "justification": "The paper makes no causal claims; it synthesizes and categorizes existing findings."
    126       },
    127       "generalization_bounded": {
    128         "applies": true,
    129         "answer": true,
    130         "justification": "Section VIII.F explicitly bounds scope: 'Rapid Evolution' (findings may be outdated), 'Closed-Source Systems' (limited visibility), 'Benchmark Validity' (may not reflect real-world attacks), 'Selection Bias' in published attacks."
    131       },
    132       "alternative_explanations_discussed": {
    133         "applies": true,
    134         "answer": true,
    135         "justification": "Section VIII.F discusses alternative explanations: benchmark validity ('Existing benchmarks may not reflect real-world attack sophistication'), selection bias in published attacks, and that adaptive defenses are understudied."
    136       },
    137       "proxy_outcome_distinction": {
    138         "applies": false,
    139         "answer": false,
    140         "justification": "This is a Systematization of Knowledge (SoK) survey paper that synthesizes findings from 78 studies. It reports statistics from primary sources (e.g., '85%+ attack success rates') rather than making claims based on its own measurements. No proxy-outcome gap applies to a literature synthesis."
    141       }
    142     },
    143     "setup_transparency": {
    144       "model_versions_specified": {
    145         "applies": false,
    146         "answer": false,
    147         "justification": "Survey paper; no models are run by the authors."
    148       },
    149       "prompts_provided": {
    150         "applies": false,
    151         "answer": false,
    152         "justification": "No prompting experiments conducted."
    153       },
    154       "hyperparameters_reported": {
    155         "applies": false,
    156         "answer": false,
    157         "justification": "No experiments conducted."
    158       },
    159       "scaffolding_described": {
    160         "applies": false,
    161         "answer": false,
    162         "justification": "No agentic system built or used."
    163       },
    164       "data_preprocessing_documented": {
    165         "applies": true,
    166         "answer": false,
    167         "justification": "Section I.A describes the pipeline: 183 initial results → 78 primary sources, with three inclusion criteria listed. However, the filtering criteria are generic ('addresses LLM-integrated systems', 'presents novel attacks/defenses', 'peer-reviewed or established venues') and counts per stage are not provided for intermediate steps."
    168       }
    169     },
    170     "limitations_and_scope": {
    171       "limitations_section_present": {
    172         "applies": true,
    173         "answer": true,
    174         "justification": "Section VIII.F 'Limitations of This Study' provides a dedicated limitations section with five specific bullet points."
    175       },
    176       "threats_to_validity_specific": {
    177         "applies": true,
    178         "answer": true,
    179         "justification": "Section VIII.F lists specific threats: rapid field evolution, closed-source system opacity, benchmark validity concerns, adaptive defense understudied, and selection bias in published attacks. These are specific to this study."
    180       },
    181       "scope_boundaries_stated": {
    182         "applies": true,
    183         "answer": true,
    184         "justification": "The paper explicitly states scope: restricted to 2024-2025 publications (Section I.A), focused on agentic AI era, and Section VIII.F states what the results do not show (e.g., real-world attack sophistication, adaptive defense effectiveness)."
    185       }
    186     },
    187     "data_integrity": {
    188       "raw_data_available": {
    189         "applies": true,
    190         "answer": false,
    191         "justification": "The list of 78 surveyed papers is not released as a structured dataset. No supplementary materials or data files are mentioned."
    192       },
    193       "data_collection_described": {
    194         "applies": true,
    195         "answer": true,
    196         "justification": "Section I.A describes data collection: searched arXiv, IEEE Xplore, ACM DL, USENIX with specific query terms, restricted to Jan 2024–Dec 2025, 183 initial results yielding 78 primary sources."
    197       },
    198       "recruitment_methods_described": {
    199         "applies": false,
    200         "answer": false,
    201         "justification": "No human participants; data source is published literature."
    202       },
    203       "data_pipeline_documented": {
    204         "applies": true,
    205         "answer": false,
    206         "justification": "The pipeline from 183 to 78 papers is mentioned but intermediate filtering stages with counts are not documented. The abstract claims 78 studies but the body text also references works outside this scope without clarifying which are primary vs. supplementary sources."
    207       }
    208     },
    209     "conflicts_of_interest": {
    210       "funding_disclosed": {
    211         "applies": true,
    212         "answer": false,
    213         "justification": "No funding or acknowledgments section is present in the paper."
    214       },
    215       "affiliations_disclosed": {
    216         "applies": true,
    217         "answer": false,
    218         "justification": "Author names are listed but no institutional affiliations are provided in the extracted text."
    219       },
    220       "funder_independent_of_outcome": {
    221         "applies": true,
    222         "answer": false,
    223         "justification": "No funding information disclosed, so independence cannot be assessed."
    224       },
    225       "financial_interests_declared": {
    226         "applies": true,
    227         "answer": false,
    228         "justification": "No competing interests or financial disclosure statement is present."
    229       }
    230     },
    231     "contamination": {
    232       "training_cutoff_stated": {
    233         "applies": false,
    234         "answer": false,
    235         "justification": "Survey paper that does not evaluate any pre-trained model on benchmarks."
    236       },
    237       "train_test_overlap_discussed": {
    238         "applies": false,
    239         "answer": false,
    240         "justification": "No model evaluation conducted."
    241       },
    242       "benchmark_contamination_addressed": {
    243         "applies": false,
    244         "answer": false,
    245         "justification": "No model evaluation conducted."
    246       }
    247     },
    248     "human_studies": {
    249       "pre_registered": {
    250         "applies": false,
    251         "answer": false,
    252         "justification": "No human participants."
    253       },
    254       "irb_or_ethics_approval": {
    255         "applies": false,
    256         "answer": false,
    257         "justification": "No human participants."
    258       },
    259       "demographics_reported": {
    260         "applies": false,
    261         "answer": false,
    262         "justification": "No human participants."
    263       },
    264       "inclusion_exclusion_criteria": {
    265         "applies": false,
    266         "answer": false,
    267         "justification": "No human participants."
    268       },
    269       "randomization_described": {
    270         "applies": false,
    271         "answer": false,
    272         "justification": "No human participants."
    273       },
    274       "blinding_described": {
    275         "applies": false,
    276         "answer": false,
    277         "justification": "No human participants."
    278       },
    279       "attrition_reported": {
    280         "applies": false,
    281         "answer": false,
    282         "justification": "No human participants."
    283       }
    284     },
    285     "cost_and_practicality": {
    286       "inference_cost_reported": {
    287         "applies": false,
    288         "answer": false,
    289         "justification": "Survey paper with no computational method to cost."
    290       },
    291       "compute_budget_stated": {
    292         "applies": false,
    293         "answer": false,
    294         "justification": "Survey paper with no computational experiments."
    295       }
    296     },
    297     "survey_methodology": {
    298       "prisma_or_structured_protocol": {
    299         "applies": true,
    300         "answer": false,
    301         "justification": "Section I.A describes a structured search with query terms and inclusion criteria, but does not follow PRISMA, provide a flow diagram, or reference a registered protocol. The search methodology is described in one paragraph without reproducible query strings or database-specific search strategies."
    302       },
    303       "quality_assessment_of_sources": {
    304         "applies": true,
    305         "answer": false,
    306         "justification": "The paper does not assess the methodological quality of its 78 source papers. Attack success rates and defense evaluations are cited directly from primary sources without quality weighting or risk-of-bias assessment. All sources are treated equally regardless of methodology."
    307       },
    308       "publication_bias_discussed": {
    309         "applies": true,
    310         "answer": true,
    311         "justification": "Section VIII.F explicitly discusses selection bias: 'Published attacks may represent a biased sample. Successful attacks by sophisticated actors may never be disclosed.' This acknowledges publication bias in the attack literature."
    312       }
    313     }
    314   },
    315   "claims": [
    316     {
    317       "claim": "Attack success rates against state-of-the-art defenses exceed 85% when adaptive attack strategies are employed",
    318       "evidence": "Drawn from MCPSecBench [43] and Nasr et al. [70]; Table III shows adaptive bypass rates of 78-93% across 6 defense systems. Section VII.A reports 85%+ compromise rate.",
    319       "supported": "moderate"
    320     },
    321     {
    322       "claim": "42 distinct attack techniques are cataloged spanning input manipulation, tool poisoning, protocol exploitation, multimodal injection, and cross-origin context poisoning",
    323       "evidence": "The abstract claims 42 techniques, but Section I contribution item 3 states '31 attack techniques from the literature.' The taxonomy in Section IV lists categories but the count is inconsistent.",
    324       "supported": "weak"
    325     },
    326     {
    327       "claim": "Most defenses achieve less than 50% mitigation against sophisticated adaptive attacks",
    328       "evidence": "Table III shows that under adaptive attacks, reported low attack success rates (<3-12%) jump to 78-93%. However, this data is from Nasr et al. [70], not independent replication.",
    329       "supported": "moderate"
    330     },
    331     {
    332       "claim": "73% of tested platforms fail to adequately enforce at least one trust boundary",
    333       "evidence": "Stated in Section III.C but no methodology or data source is cited for this specific statistic. It appears to be the authors' assessment from platform analysis.",
    334       "supported": "weak"
    335     },
    336     {
    337       "claim": "Skill ecosystems are under-secured with inadequate security review and capability restriction",
    338       "evidence": "Table I compares platforms showing Claude Code and Cursor have no marketplace review. Section V.C documents concrete exploit chains. Supported by CVE evidence in Table II.",
    339       "supported": "strong"
    340     }
    341   ],
    342   "red_flags": [
    343     {
    344       "flag": "Abstract-body inconsistency",
    345       "detail": "Abstract claims '42 distinct attack techniques' and '18 defense mechanisms' but the body states '31 attack techniques' and '12 defense mechanisms.' These discrepancies undermine trust in the paper's precision."
    346     },
    347     {
    348       "flag": "No independent replication",
    349       "detail": "The paper explicitly states 'we did not conduct independent replication experiments' (Section I.A). All quantitative claims about attack success rates are drawn directly from primary sources, particularly MCPSecBench and Nasr et al."
    350     },
    351     {
    352       "flag": "No quality assessment of sources",
    353       "detail": "The 78 source papers are treated equally without methodological quality assessment. Attack success rates from different studies with varying rigor are synthesized as equivalent evidence."
    354     },
    355     {
    356       "flag": "Unsourced statistic",
    357       "detail": "The claim that '73% of tested platforms fail to adequately enforce at least one trust boundary' (Section III.C) lacks a citation or methodology description."
    358     }
    359   ],
    360   "cited_papers": [
    361     {
    362       "title": "Not what you've signed up for: Compromising real-world LLM-integrated applications with indirect prompt injection",
    363       "authors": [
    364         "K. Greshake",
    365         "S. Abdelnabi",
    366         "S. Mishra",
    367         "C. Endres",
    368         "T. Holz",
    369         "M. Fritz"
    370       ],
    371       "year": 2023,
    372       "relevance": "Foundational work on indirect prompt injection against LLM-integrated applications."
    373     },
    374     {
    375       "title": "MCPSecBench: A systematic security benchmark and playground for testing Model Context Protocols",
    376       "authors": [
    377         "Y. Yang",
    378         "D. Wu",
    379         "Y. Chen"
    380       ],
    381       "year": 2025,
    382       "arxiv_id": "2508.13220",
    383       "relevance": "Standardized benchmark for MCP security evaluation with 17 attack types across 4 surfaces."
    384     },
    385     {
    386       "title": "AgentDojo: A dynamic environment to evaluate prompt injection attacks and defenses for LLM agents",
    387       "authors": [
    388         "E. Debenedetti",
    389         "J. Zhang",
    390         "M. Balunovic",
    391         "L. Beurer-Kellner",
    392         "M. Fischer",
    393         "F. Tramèr"
    394       ],
    395       "year": 2024,
    396       "relevance": "Dynamic evaluation environment with 97 tasks and 629 security test cases for agent prompt injection."
    397     },
    398     {
    399       "title": "The attacker moves second: Stronger adaptive attacks bypass defenses against LLM jailbreaks and prompt injections",
    400       "authors": [
    401         "M. Nasr",
    402         "N. Carlini",
    403         "C. Sitawarin",
    404         "S. Schulhoff",
    405         "J. Hayes"
    406       ],
    407       "year": 2025,
    408       "arxiv_id": "2510.09023",
    409       "relevance": "Demonstrates all 12 evaluated defenses bypassed with 90%+ success, establishing lower bound on achievable security."
    410     },
    411     {
    412       "title": "Agent Security Bench (ASB): Formalizing and benchmarking attacks and defenses in LLM-based agents",
    413       "authors": [
    414         "H. Zhang",
    415         "J. Huang",
    416         "K. Mei"
    417       ],
    418       "year": 2025,
    419       "relevance": "Comprehensive framework for evaluating agent attack/defense effectiveness with attack success rates up to 84.3%."
    420     },
    421     {
    422       "title": "InjecAgent: Benchmarking indirect prompt injections in tool-integrated LLM agents",
    423       "authors": [
    424         "Q. Zhan",
    425         "R. Fang",
    426         "R. Bindu",
    427         "A. Gupta",
    428         "T. Hashimoto",
    429         "D. Kang"
    430       ],
    431       "year": 2024,
    432       "relevance": "Benchmark evaluating 30 LLM agents for indirect prompt injection vulnerability rates up to 47%."
    433     },
    434     {
    435       "title": "The instruction hierarchy: Training LLMs to prioritize privileged instructions",
    436       "authors": [
    437         "E. Wallace",
    438         "K. Xiao",
    439         "R. Leike",
    440         "L. Weng",
    441         "J. Heidecke",
    442         "A. Beutel"
    443       ],
    444       "year": 2024,
    445       "arxiv_id": "2404.13208",
    446       "relevance": "Proposes training LLMs to prioritize instruction sources as a defense against prompt injection."
    447     },
    448     {
    449       "title": "Defeating prompt injections by design",
    450       "authors": [
    451         "E. Debenedetti"
    452       ],
    453       "year": 2025,
    454       "arxiv_id": "2503.18813",
    455       "relevance": "CaMeL framework achieving provable security on 77% of AgentDojo tasks through capability-based isolation."
    456     },
    457     {
    458       "title": "Progent: Programmable privilege control for LLM agents",
    459       "authors": [
    460         "T. Shi",
    461         "J. He",
    462         "Z. Wang"
    463       ],
    464       "year": 2025,
    465       "arxiv_id": "2504.11703",
    466       "relevance": "Programmable privilege control reducing attack success from 41.2% to 2.2%."
    467     },
    468     {
    469       "title": "XOXO: Stealthy cross-origin context poisoning attacks against AI coding assistants",
    470       "authors": [
    471         "A. Storek",
    472         "M. Gupta",
    473         "N. Bhatt"
    474       ],
    475       "year": 2025,
    476       "arxiv_id": "2503.14281",
    477       "relevance": "Semantic attacks through cross-origin context poisoning specifically targeting AI coding assistants."
    478     },
    479     {
    480       "title": "IDEsaster: Security vulnerabilities in AI-powered integrated development environments",
    481       "authors": [
    482         "A. Marzouk"
    483       ],
    484       "year": 2025,
    485       "relevance": "Documented 30+ CVEs across major AI IDEs including Cursor, Copilot, and Codex CLI."
    486     },
    487     {
    488       "title": "Agentic AI security: Threats, defenses, evaluation, and open challenges",
    489       "authors": [
    490         "S. Datta",
    491         "S. K. Nahin",
    492         "A. Chhabra",
    493         "P. Mohapatra"
    494       ],
    495       "year": 2025,
    496       "arxiv_id": "2510.23883",
    497       "relevance": "Comprehensive survey of agentic AI security with threat modeling for autonomous systems."
    498     },
    499     {
    500       "title": "AgentHarm: A benchmark for measuring harmfulness of LLM agents",
    501       "authors": [
    502         "M. Andriushchenko"
    503       ],
    504       "year": 2025,
    505       "relevance": "Benchmark for evaluating harmful capabilities of LLM agents."
    506     }
    507   ]
    508 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs