scan.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan.json (26468B)
      1 {
      2   "paper": {
      3     "title": "Cognitive Control Architecture (CCA): A Lifecycle Supervision Framework for Robustly Aligned AI Agents",
      4     "authors": ["Zhibo Liang", "Tianze Hu", "Zaiye Chen", "Mingjie Tang"],
      5     "year": 2025,
      6     "venue": "arXiv",
      7     "arxiv_id": "2512.06716"
      8   },
      9   "checklist": {
     10     "artifacts": {
     11       "code_released": {
     12         "applies": true,
     13         "answer": false,
     14         "justification": "No repository URL or code archive link is provided anywhere in the paper, footnotes, or appendices. The AgentDojo benchmark link is provided (https://agentdojo.spylab.ai) but this is the evaluation benchmark, not the CCA implementation code."
     15       },
     16       "data_released": {
     17         "applies": true,
     18         "answer": true,
     19         "justification": "The evaluation is conducted on the publicly available AgentDojo benchmark (Debenedetti et al., 2024), with access link provided at https://agentdojo.spylab.ai. The benchmark's 97 multi-turn tasks are publicly accessible."
     20       },
     21       "environment_specified": {
     22         "applies": true,
     23         "answer": false,
     24         "justification": "No requirements.txt, Dockerfile, or detailed environment specification is provided. The paper mentions using the all-MiniLM-L6-v2 model and specific LLM APIs, but does not provide a comprehensive environment setup with library versions."
     25       },
     26       "reproduction_instructions": {
     27         "applies": true,
     28         "answer": false,
     29         "justification": "No step-by-step reproduction instructions are provided. Appendix B describes hyperparameters and Appendix C provides prompts, but there is no README or reproduction section with commands to replicate the experiments."
     30       }
     31     },
     32     "statistical_methodology": {
     33       "confidence_intervals_or_error_bars": {
     34         "applies": true,
     35         "answer": false,
     36         "justification": "The main results in Tables 1-3 report only point estimates. Error bars (mean ± std dev) are reported only in Table 7 (Appendix E.2) for a single attack type on a newer model version (DeepSeek-V3.2), not for the main experimental results."
     37       },
     38       "significance_tests": {
     39         "applies": true,
     40         "answer": false,
     41         "justification": "No statistical significance tests are used. The paper claims CCA outperforms baselines based solely on comparing point estimates (e.g., 0.34% ASR vs 0.16% ASR) without any hypothesis tests."
     42       },
     43       "effect_sizes_reported": {
     44         "applies": true,
     45         "answer": true,
     46         "justification": "The paper reports percentage improvements with baseline context throughout, e.g., 'reducing the average ASR from 11.99% to 0.34%, achieving an attack mitigation rate of over 97%' (Section 4.2), and 'BU of 87.63%, representing only a 2.06% drop compared to the undefended baseline of 89.69%'."
     47       },
     48       "sample_size_justified": {
     49         "applies": true,
     50         "answer": false,
     51         "justification": "No justification is given for using 97 tasks from AgentDojo as the evaluation set. No power analysis or discussion of whether this sample size is sufficient for the claims made."
     52       },
     53       "variance_reported": {
     54         "applies": true,
     55         "answer": false,
     56         "justification": "Main results (Tables 1-3) appear to be single-run experiments with no variance reported. Standard deviations are provided only in the supplementary Table 7 (5 runs on DeepSeek-V3.2 for one attack type), not for the primary results."
     57       }
     58     },
     59     "evaluation_design": {
     60       "baselines_included": {
     61         "applies": true,
     62         "answer": true,
     63         "justification": "The paper compares CCA against four baseline defense methods (Repeat Prompt, Spotlight, DeBERTa classifier, MELON) plus a No Defense baseline, as shown in Table 1."
     64       },
     65       "baselines_contemporary": {
     66         "applies": true,
     67         "answer": true,
     68         "justification": "The baselines include recent methods: MELON (Zhu et al., 2025), Spotlight (Hines et al., 2024), DeBERTa-based detection (ProtectAI, 2024), and Repeat Prompt. These represent the current state of the art in IPI defense."
     69       },
     70       "ablation_study": {
     71         "applies": true,
     72         "answer": true,
     73         "justification": "Section 4.3 and Table 3 present a systematic ablation study removing each of the four score components (S_causal, S_risk, S_prov, S_sem) individually. Additionally, Table 8 ablates the dynamic graph update mechanism, and Figure 4c shows the effect of removing the Intent Graph entirely."
     74       },
     75       "multiple_metrics": {
     76         "applies": true,
     77         "answer": true,
     78         "justification": "The paper uses three evaluation metrics: Attack Success Rate (ASR), Utility Under Attack (UA), and Benign Utility (BU), plus efficiency overhead measured in token usage. These are defined in Section 4.1."
     79       },
     80       "human_evaluation": {
     81         "applies": false,
     82         "answer": false,
     83         "justification": "The paper evaluates a defense framework against automated attacks on a benchmark with deterministic code-based evaluation. Human evaluation of system outputs is not relevant here; the AgentDojo benchmark provides ground-truth automated evaluation."
     84       },
     85       "held_out_test_set": {
     86         "applies": true,
     87         "answer": false,
     88         "justification": "The paper evaluates on the full AgentDojo benchmark (97 tasks) without discussing any held-out split. The hyperparameters (e.g., weight settings, threshold of 0.5) appear to have been tuned on the same benchmark used for evaluation, with no separate validation set mentioned."
     89       },
     90       "per_category_breakdown": {
     91         "applies": true,
     92         "answer": true,
     93         "justification": "Results are broken down by attack type (Direct, Ignore Previous, System Message, Important Messages) in Table 1, by domain (Travel, Workspace, Slack, Banking) in Figure 4b-d, and per component in the ablation study (Table 3)."
     94       },
     95       "failure_cases_discussed": {
     96         "applies": true,
     97         "answer": false,
     98         "justification": "No qualitative analysis of failure cases is provided. The paper does not discuss the specific scenarios where CCA's ASR was non-zero (e.g., the 0.84% remaining attack success for Important Messages), nor does it analyze why these attacks succeeded."
     99       },
    100       "negative_results_reported": {
    101         "applies": true,
    102         "answer": true,
    103         "justification": "The ablation study (Table 3) shows that removing the causal score causes ASR to increase nearly tenfold to 4.95% and UA to drop to 66.49%. The static graph ablation (Table 8) shows degradation. These demonstrate where components fail or degrade."
    104       }
    105     },
    106     "claims_and_evidence": {
    107       "abstract_claims_supported": {
    108         "applies": true,
    109         "answer": true,
    110         "justification": "The abstract claims CCA 'effectively withstands sophisticated attacks' and 'achieves uncompromised security with notable efficiency and robustness.' Table 1 shows 0.34% average ASR with 86.43% UA, and Figure 4b shows 3.3x efficiency improvement over MELON. These results support the abstract's claims, though 'uncompromised security' is slightly hyperbolic given the non-zero ASR."
    111       },
    112       "causal_claims_justified": {
    113         "applies": true,
    114         "answer": true,
    115         "justification": "The paper's causal claims (e.g., 'CCA achieves security through its two-pillar design') are supported by controlled ablation studies (Table 3) that systematically remove individual components and measure the impact. The ablation design is adequate for these component-level causal claims."
    116       },
    117       "generalization_bounded": {
    118         "applies": true,
    119         "answer": false,
    120         "justification": "The paper's title claims a 'Lifecycle Supervision Framework for Robustly Aligned AI Agents' but results are only on the AgentDojo benchmark with specific models (DeepSeek-V3.1, Kimi K2). While Appendix E adds two more models, the paper does not bound its claims to these tested settings. The Limitations section (Section 6) partially addresses this for 'highly open-ended, exploratory scenarios' but does not restrict the generalization claims in the title and abstract."
    121       },
    122       "alternative_explanations_discussed": {
    123         "applies": true,
    124         "answer": false,
    125         "justification": "The paper does not discuss alternative explanations for its results. For example, it does not consider whether the performance gains are due to the specific characteristics of AgentDojo tasks, whether the deterministic evaluation favors certain defense types, or whether the LLM used for adjudication contributes differently across attack types."
    126       }
    127     },
    128     "setup_transparency": {
    129       "model_versions_specified": {
    130         "applies": true,
    131         "answer": false,
    132         "justification": "The paper uses 'DeepSeek-V3.1' and 'Kimi K2' but does not provide specific API snapshot dates or version identifiers. 'DeepSeek-V3.1' is a marketing name without a snapshot date. Similarly, GPT-4.1-mini and Qwen3-Next-80B in the appendix lack specific version identifiers."
    133       },
    134       "prompts_provided": {
    135         "applies": true,
    136         "answer": true,
    137         "justification": "Appendix C provides the full prompt text for both the Intent Graph Generation and the Adjudicator. These are complete prompts with all sections, not just natural language descriptions. The prompts include specific formatting rules, scoring guides, and output schemas."
    138       },
    139       "hyperparameters_reported": {
    140         "applies": true,
    141         "answer": true,
    142         "justification": "Appendix B.1 reports: temperature 0.0, alignment score weights (w_sem=0.1, w_causal=0.7, w_prov=0.1, w_risk=0.1), decision threshold of 0.5. Appendix B.2 provides ablation weight configurations. Table 5 provides all Srisk values."
    143       },
    144       "scaffolding_described": {
    145         "applies": true,
    146         "answer": true,
    147         "justification": "The paper describes the CCA scaffolding in detail: Algorithm 1 shows the full execution loop, Section 3.2 describes the Intent Graph generation and runtime monitoring, Section 3.3 describes the Tiered Adjudicator, and the two-layer architecture is fully specified with formulas (Equations 1-4)."
    148       },
    149       "data_preprocessing_documented": {
    150         "applies": true,
    151         "answer": false,
    152         "justification": "The paper does not describe how the AgentDojo benchmark tasks were selected or filtered for evaluation. It states '97 multi-turn tasks' but does not discuss whether any tasks were excluded or how the attack scenarios were instantiated across these tasks."
    153       }
    154     },
    155     "limitations_and_scope": {
    156       "limitations_section_present": {
    157         "applies": true,
    158         "answer": true,
    159         "justification": "Section 6 is a dedicated 'Limitations' section that discusses specific limitations of the framework."
    160       },
    161       "threats_to_validity_specific": {
    162         "applies": true,
    163         "answer": true,
    164         "justification": "The Limitations section identifies specific threats: (1) 'efficacy of our proactive defense is highest for tasks that are clearly decomposable into an Intent Graph; its utility may be reduced in highly open-ended, exploratory scenarios,' and (2) 'the Inherent Action Risk Score (Srisk) is currently static and parameter-agnostic, which prevents a more nuanced, context-aware assessment.' These are specific to this system."
    165       },
    166       "scope_boundaries_stated": {
    167         "applies": true,
    168         "answer": false,
    169         "justification": "The Limitations section mentions two specific weaknesses but does not explicitly state what the results do NOT show or which settings are excluded from the claims. The paper does not state, for example, that results may not generalize beyond the AgentDojo benchmark, or that the approach has not been tested against adaptive attackers who know about CCA."
    170       }
    171     },
    172     "data_integrity": {
    173       "raw_data_available": {
    174         "applies": true,
    175         "answer": false,
    176         "justification": "No raw experimental data (individual task outcomes, per-task scores, adjudicator decisions) is made available. Only aggregate metrics are reported."
    177       },
    178       "data_collection_described": {
    179         "applies": true,
    180         "answer": true,
    181         "justification": "The data collection procedure is described: experiments are run on AgentDojo benchmark with 97 multi-turn tasks across four domains, using four specific attack types (Section 4.1). The benchmark uses deterministic code evaluation."
    182       },
    183       "recruitment_methods_described": {
    184         "applies": false,
    185         "answer": false,
    186         "justification": "No human participants are involved. The evaluation uses the publicly available AgentDojo benchmark, which is a standard benchmark."
    187       },
    188       "data_pipeline_documented": {
    189         "applies": true,
    190         "answer": false,
    191         "justification": "The paper does not document the full pipeline from raw benchmark execution to final reported metrics. It does not explain how the aggregate ASR, UA, and BU percentages are computed from individual task outcomes, or whether any tasks were excluded from the aggregation."
    192       }
    193     },
    194     "conflicts_of_interest": {
    195       "funding_disclosed": {
    196         "applies": true,
    197         "answer": false,
    198         "justification": "No funding or acknowledgments section is present in the paper. There is no mention of grants, corporate sponsors, or funding agencies."
    199       },
    200       "affiliations_disclosed": {
    201         "applies": true,
    202         "answer": true,
    203         "justification": "Author affiliations are listed on the first page: Zhibo Liang and Mingjie Tang are affiliated with Sichuan University. Tianze Hu and Zaiye Chen have email addresses listed but no explicit institutional affiliation."
    204       },
    205       "funder_independent_of_outcome": {
    206         "applies": true,
    207         "answer": false,
    208         "justification": "No funding is disclosed, so independence cannot be assessed. The absence of a funding disclosure is itself a concern — the paper should state whether the work was funded or unfunded."
    209       },
    210       "financial_interests_declared": {
    211         "applies": true,
    212         "answer": false,
    213         "justification": "No competing interests or financial interests statement is present in the paper."
    214       }
    215     },
    216     "contamination": {
    217       "training_cutoff_stated": {
    218         "applies": false,
    219         "answer": false,
    220         "justification": "This paper evaluates a defense framework against indirect prompt injection attacks, not model capability on a benchmark. The LLMs are used as components of the system (agent and adjudicator), not evaluated on knowledge benchmarks. Contamination is not relevant to the security evaluation."
    221       },
    222       "train_test_overlap_discussed": {
    223         "applies": false,
    224         "answer": false,
    225         "justification": "Same as above — the paper tests a defense architecture, not model knowledge on benchmark tasks. The evaluation measures whether attacks succeed or are blocked, which is not subject to training data contamination."
    226       },
    227       "benchmark_contamination_addressed": {
    228         "applies": false,
    229         "answer": false,
    230         "justification": "Same as above — the evaluation measures defense effectiveness against injected attacks, not model capability. The benchmark tasks are tools for testing security, not knowledge."
    231       }
    232     },
    233     "human_studies": {
    234       "pre_registered": {
    235         "applies": false,
    236         "answer": false,
    237         "justification": "No human participants are involved in this study."
    238       },
    239       "irb_or_ethics_approval": {
    240         "applies": false,
    241         "answer": false,
    242         "justification": "No human participants are involved in this study."
    243       },
    244       "demographics_reported": {
    245         "applies": false,
    246         "answer": false,
    247         "justification": "No human participants are involved in this study."
    248       },
    249       "inclusion_exclusion_criteria": {
    250         "applies": false,
    251         "answer": false,
    252         "justification": "No human participants are involved in this study."
    253       },
    254       "randomization_described": {
    255         "applies": false,
    256         "answer": false,
    257         "justification": "No human participants are involved in this study."
    258       },
    259       "blinding_described": {
    260         "applies": false,
    261         "answer": false,
    262         "justification": "No human participants are involved in this study."
    263       },
    264       "attrition_reported": {
    265         "applies": false,
    266         "answer": false,
    267         "justification": "No human participants are involved in this study."
    268       }
    269     },
    270     "cost_and_practicality": {
    271       "inference_cost_reported": {
    272         "applies": true,
    273         "answer": true,
    274         "justification": "Figure 4b reports average token usage per task for CCA vs MELON across all domains (CCA: 3857 tokens, MELON: 12772 tokens). Figure 4d reports Intent Graph generation cost (~1956 tokens). While not in dollar amounts, token counts are a meaningful cost measure."
    275       },
    276       "compute_budget_stated": {
    277         "applies": true,
    278         "answer": false,
    279         "justification": "No total computational budget is stated. The paper does not report total API spend, wall-clock time for the full evaluation, or total tokens consumed across all experiments."
    280       }
    281     }
    282   },
    283   "claims": [
    284     {
    285       "claim": "CCA reduces the average Attack Success Rate from 11.99% (no defense) to 0.34%, achieving over 97% attack mitigation.",
    286       "evidence": "Table 1, Section 4.2: Average ASR across four attack types drops from 11.99% to 0.34% with CCA on DeepSeek-V3.1.",
    287       "supported": "moderate"
    288     },
    289     {
    290       "claim": "CCA maintains the highest Utility Under Attack (86.43%) among all defense methods while achieving near-zero ASR.",
    291       "evidence": "Table 1: CCA achieves 86.43% UA vs MELON's 43.76% UA, with comparable security (0.34% vs 0.16% ASR).",
    292       "supported": "moderate"
    293     },
    294     {
    295       "claim": "CCA is approximately 3.3x more token-efficient than MELON.",
    296       "evidence": "Figure 4b: CCA uses 3857 average tokens per task vs MELON's 12772, a 3.3x improvement across all scenarios.",
    297       "supported": "moderate"
    298     },
    299     {
    300       "claim": "The CCA framework is model-agnostic, demonstrating effectiveness with Kimi K2 (reducing ASR from 29.50% to 0.53%).",
    301       "evidence": "Table 2: With Kimi K2, CCA reduces ASR from 29.50% to 0.53% while improving UA from 74.18% to 82.51%.",
    302       "supported": "moderate"
    303     },
    304     {
    305       "claim": "Causal analysis (S_causal) is the foundational component; removing it increases ASR nearly tenfold to 4.95%.",
    306       "evidence": "Table 3: w/o S_causal shows ASR of 4.95% (vs 0.53% full CCA) and UA of 66.49% (vs 82.51%).",
    307       "supported": "moderate"
    308     },
    309     {
    310       "claim": "CCA's main findings are stable across repeated runs, with ASR of 0.72 ± 0.14% on DeepSeek-V3.2.",
    311       "evidence": "Table 7, Appendix E.2: 5-run evaluation on DeepSeek-V3.2 gives ASR 0.72 ± 0.14%, UA 86.47 ± 0.82%.",
    312       "supported": "moderate"
    313     }
    314   ],
    315   "methodology_tags": ["benchmark-eval"],
    316   "key_findings": "The Cognitive Control Architecture (CCA) proposes a dual-layer defense against indirect prompt injection attacks on LLM agents: a proactive Intent Graph for control-flow and data-flow integrity checking, and a reactive Tiered Adjudicator using a multi-faceted Intent Alignment Score. On the AgentDojo benchmark, CCA achieves 0.34% average attack success rate while maintaining 86.43% utility under attack, substantially outperforming MELON (0.16% ASR but only 43.76% UA) in the security-functionality tradeoff. The approach is also 3.3x more token-efficient than MELON and generalizes across multiple agent models (DeepSeek-V3.1, Kimi K2, Qwen3-Next-80B, GPT-4.1-mini).",
    317   "red_flags": [
    318     {
    319       "flag": "Main results lack error bars",
    320       "detail": "The primary results in Tables 1-3 are single-run experiments without confidence intervals or standard deviations. Error bars are only provided in Appendix E.2 for one attack type on a different model version (DeepSeek-V3.2), not for the main comparison. Small differences between methods (e.g., 0.34% vs 0.16% ASR) could be within noise."
    321     },
    322     {
    323       "flag": "No failure case analysis",
    324       "detail": "CCA achieves non-zero ASR in several conditions (e.g., 0.84% on Important Messages, 0.42% on Direct) but the paper never analyzes why these attacks succeeded. Understanding failure modes is critical for a security framework."
    325     },
    326     {
    327       "flag": "Threshold selection not validated",
    328       "detail": "The decision threshold of 0.5 for the alignment score and the weight configuration (w_causal=0.7) appear to have been chosen without a documented tuning process or sensitivity analysis. The same benchmark used for reporting may have been used for tuning."
    329     },
    330     {
    331       "flag": "No code release",
    332       "detail": "For a security framework paper, the absence of released implementation code prevents independent verification of the results and limits reproducibility."
    333     },
    334     {
    335       "flag": "Unfair MELON comparison in cross-model experiments",
    336       "detail": "In Table 7, the MELON baseline is cited from the original paper on DeepSeek-V3.1, while CCA is evaluated on DeepSeek-V3.2. This is an apples-to-oranges comparison that the paper acknowledges but does not resolve."
    337     }
    338   ],
    339   "cited_papers": [
    340     {
    341       "title": "AgentDojo: A Dynamic Environment to Evaluate Attacks and Defenses for LLM Agents",
    342       "authors": ["Edoardo Debenedetti", "Jie Zhang", "Mislav Balunovic", "Luca Beurer-Kellner", "Marc Fischer", "Florian Tramèr"],
    343       "year": 2024,
    344       "relevance": "The primary evaluation benchmark used in this paper; a dynamic, stateful environment for testing LLM agent security."
    345     },
    346     {
    347       "title": "MELON: Provable Defense Against Indirect Prompt Injection Attacks in AI Agents",
    348       "authors": ["Kaijie Zhu", "Xianjun Yang", "Jindong Wang", "Wenbo Guo", "William Yang Wang"],
    349       "year": 2025,
    350       "arxiv_id": "2502.05174",
    351       "relevance": "State-of-the-art runtime verification defense against IPI attacks; the primary comparison baseline in this paper."
    352     },
    353     {
    354       "title": "Not What You've Signed Up For: Compromising Real-World LLM-Integrated Applications with Indirect Prompt Injection",
    355       "authors": ["Kai Greshake", "Sahar Abdelnabi", "Shailesh Mishra", "Christoph Endres", "Thorsten Holz", "Mario Fritz"],
    356       "year": 2023,
    357       "relevance": "Seminal work on indirect prompt injection attacks against LLM-integrated applications."
    358     },
    359     {
    360       "title": "IPIGuard: A Novel Tool Dependency Graph-Based Defense Against Indirect Prompt Injection in LLM Agents",
    361       "authors": ["Hengyu An", "Jinghuai Zhang", "Tianyu Du", "Chunyi Zhou", "Qingming Li", "Tao Lin", "Shouling Ji"],
    362       "year": 2025,
    363       "arxiv_id": "2508.15310",
    364       "relevance": "Control-flow integrity defense using tool-dependency graphs; closely related approach that CCA builds upon."
    365     },
    366     {
    367       "title": "The Task Shield: Enforcing Task Alignment to Defend Against Indirect Prompt Injection in LLM Agents",
    368       "authors": ["Feiran Jia", "Tong Wu", "Xin Qin", "Anna Squicciarini"],
    369       "year": 2024,
    370       "arxiv_id": "2412.16682",
    371       "relevance": "Runtime task alignment defense against IPI attacks in LLM agents."
    372     },
    373     {
    374       "title": "IsolateGPT: An Execution Isolation Architecture for LLM-Based Agentic Systems",
    375       "authors": ["Yuhao Wu", "Franziska Roesner", "Tadayoshi Kohno", "Ning Zhang", "Umar Iqbal"],
    376       "year": 2024,
    377       "arxiv_id": "2403.04960",
    378       "relevance": "System-level isolation defense for LLM agents using sandboxing, representing the architectural defense paradigm."
    379     },
    380     {
    381       "title": "Defending Against Indirect Prompt Injection Attacks with Spotlighting",
    382       "authors": ["Keegan Hines", "Gary Lopez", "Matthew Hall", "Federico Zarfati", "Yonatan Zunger", "Emre Kiciman"],
    383       "year": 2024,
    384       "arxiv_id": "2403.14720",
    385       "relevance": "Prompt engineering defense against IPI; one of the baseline methods in CCA's evaluation."
    386     },
    387     {
    388       "title": "Formalizing and Benchmarking Prompt Injection Attacks and Defenses",
    389       "authors": ["Yupei Liu", "Yuqi Jia", "Runpeng Geng", "Jinyuan Jia", "Neil Zhenqiang Gong"],
    390       "year": 2024,
    391       "relevance": "Formalizes prompt injection attacks and defenses; provides taxonomy used in understanding attack evolution."
    392     },
    393     {
    394       "title": "The Instruction Hierarchy: Training LLMs to Prioritize Privileged Instructions",
    395       "authors": ["Eric Wallace", "Kai Xiao", "Reimar Leike", "Lilian Weng", "Johannes Heidecke", "Alex Beutel"],
    396       "year": 2024,
    397       "arxiv_id": "2404.13208",
    398       "relevance": "Training-time approach to making LLMs distinguish between trusted and untrusted instructions."
    399     },
    400     {
    401       "title": "Agent-SafetyBench: Evaluating the Safety of LLM Agents",
    402       "authors": ["Zhexin Zhang", "Shiyao Cui", "Yida Lu", "Jingzhuo Zhou", "Junxiao Yang", "Hongning Wang", "Minlie Huang"],
    403       "year": 2024,
    404       "arxiv_id": "2412.14470",
    405       "relevance": "Benchmark for evaluating LLM agent safety, relevant to the broader agent security evaluation landscape."
    406     },
    407     {
    408       "title": "Universal and Transferable Adversarial Attacks on Aligned Language Models",
    409       "authors": ["Andy Zou", "Zifan Wang", "Nicholas Carlini", "Milad Nasr", "J Zico Kolter", "Matt Fredrikson"],
    410       "year": 2023,
    411       "arxiv_id": "2307.15043",
    412       "relevance": "Introduces GCG adversarial attack method; demonstrates optimization-based attacks that training-time defenses struggle against."
    413     },
    414     {
    415       "title": "ReAct: Synergizing Reasoning and Acting in Language Models",
    416       "authors": ["Shunyu Yao", "Jeffrey Zhao", "Dian Yu", "Nan Du", "Izhak Shafran", "Karthik Narasimhan", "Yuan Cao"],
    417       "year": 2023,
    418       "relevance": "Foundational agent paradigm (reasoning + acting) that CCA's defense architecture builds upon."
    419     },
    420     {
    421       "title": "StruQ: Defending Against Prompt Injection with Structured Queries",
    422       "authors": ["Sizhe Chen", "Julien Piet", "Chawin Sitawarin", "David Wagner"],
    423       "year": 2025,
    424       "relevance": "Structural defense against prompt injection using structured queries; represents the structured defense paradigm."
    425     }
    426   ]
    427 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs