ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

scan-v5.json (26772B)


      1 {
      2   "scan_version": 5,
      3   "paper_type": "empirical",
      4   "paper": {
      5     "title": "Hiding in the AI Traffic: Abusing MCP for LLM-Powered Agentic Red Teaming",
      6     "authors": [
      7       "Strahinja Janjusevic",
      8       "Anna Barón Garcia",
      9       "Sohrob Kazerounian"
     10     ],
     11     "year": 2025,
     12     "venue": "arXiv.org",
     13     "arxiv_id": "2511.15998",
     14     "doi": "10.48550/arXiv.2511.15998"
     15   },
     16   "checklist": {
     17     "claims_and_evidence": {
     18       "abstract_claims_supported": {
     19         "applies": true,
     20         "answer": false,
     21         "justification": "The abstract claims the architecture 'eliminates key host and network artifacts' for detection—a categorical claim unsupported by evidence from a single case study run in a controlled lab. The 'improves goal-directed behavior' claim is asserted without any controlled measurement of goal-directedness.",
     22         "source": "haiku"
     23       },
     24       "causal_claims_justified": {
     25         "applies": true,
     26         "answer": false,
     27         "justification": "The paper makes causal claims (MCP architecture causes improved stealth and operational speed vs. traditional C2), but the study design is a single-run engagement in a proprietary lab with no controlled trials, no replication, and no statistical analysis adequate for causal inference.",
     28         "source": "haiku"
     29       },
     30       "generalization_bounded": {
     31         "applies": true,
     32         "answer": false,
     33         "justification": "Results derive from one simulated engagement in Vectra AI's proprietary Cyber-Range; the paper makes broad claims about enterprise applicability and APT simulation without bounding generalization to this single controlled scenario.",
     34         "source": "haiku"
     35       },
     36       "alternative_explanations_discussed": {
     37         "applies": true,
     38         "answer": false,
     39         "justification": "The paper does not consider alternative explanations for why the agent evaded detection—e.g., the specific defender configuration in the lab, defenders not tuned for MCP-based threats, or that single-run luck accounts for zero alerts.",
     40         "source": "haiku"
     41       },
     42       "proxy_outcome_distinction": {
     43         "applies": true,
     44         "answer": false,
     45         "justification": "'Zero EDR alerts' is used as a proxy for complete stealth, and 'time to objective' for operational superiority, but the paper does not acknowledge the limits of these proxies or that a single successful run may not represent typical outcomes.",
     46         "source": "haiku"
     47       }
     48     },
     49     "limitations_and_scope": {
     50       "limitations_section_present": {
     51         "applies": true,
     52         "answer": false,
     53         "justification": "There is no dedicated limitations or threats-to-validity section. Section VII addresses dual-use ethical considerations, but this is not equivalent to a methodological limitations discussion.",
     54         "source": "haiku"
     55       },
     56       "threats_to_validity_specific": {
     57         "applies": true,
     58         "answer": false,
     59         "justification": "No specific threats to validity are discussed—the single-lab environment, proprietary infrastructure, uncontrolled variables in the human operator comparison, and evaluator affiliation with the lab are all unaddressed.",
     60         "source": "haiku"
     61       },
     62       "scope_boundaries_stated": {
     63         "applies": true,
     64         "answer": false,
     65         "justification": "The paper does not state what results do NOT show—e.g., that effectiveness against a mature AI-augmented SOC with MCP-aware detection is unknown, or that the architecture has only been tested in one engagement.",
     66         "source": "haiku"
     67       }
     68     },
     69     "conflicts_of_interest": {
     70       "funding_disclosed": {
     71         "applies": true,
     72         "answer": false,
     73         "justification": "The acknowledgments mention 'Anthropic for covering API costs' but there is no formal funding disclosure statement; it is unclear whether any grants or institutional funding supported the work.",
     74         "source": "haiku"
     75       },
     76       "affiliations_disclosed": {
     77         "applies": true,
     78         "answer": true,
     79         "justification": "Author affiliations are clearly listed on the title page: MIT (Janjusevic) and Vectra AI (Barón Garcia, Kazerounian).",
     80         "source": "haiku"
     81       },
     82       "funder_independent_of_outcome": {
     83         "applies": true,
     84         "answer": false,
     85         "justification": "Anthropic covered API costs and the system under evaluation uses Claude Opus 4.1 as its core reasoning engine; the funder has a direct commercial interest in positive depictions of LLM-powered autonomous agents.",
     86         "source": "haiku"
     87       },
     88       "financial_interests_declared": {
     89         "applies": true,
     90         "answer": false,
     91         "justification": "No competing interests or financial interests declaration is present anywhere in the paper.",
     92         "source": "haiku"
     93       }
     94     },
     95     "scope_and_framing": {
     96       "key_terms_defined": {
     97         "applies": true,
     98         "answer": false,
     99         "justification": "MCP is defined and 'agent' is contextually described, but 'domain dominance' (a key outcome claim used in the abstract and conclusion) is never formally defined, and 'stealth' is operationalized inconsistently.",
    100         "source": "haiku"
    101       },
    102       "intended_contribution_clear": {
    103         "applies": true,
    104         "answer": true,
    105         "justification": "The paper clearly states its contribution: a novel MCP-based C2 architecture for autonomous multi-agent red teaming with detection-evasion properties, distinguishing itself from traditional beaconing-based C2 approaches.",
    106         "source": "haiku"
    107       },
    108       "engagement_with_prior_work": {
    109         "applies": true,
    110         "answer": true,
    111         "justification": "Table I provides a comprehensive comparative analysis of 11 prior red teaming frameworks, and the paper explicitly situates its contribution relative to each, identifying a gap in Cyber Kill Chain coverage at the C2 phase.",
    112         "source": "haiku"
    113       }
    114     }
    115   },
    116   "type_checklist": {
    117     "empirical": {
    118       "artifacts": {
    119         "code_released": {
    120           "applies": true,
    121           "answer": false,
    122           "justification": "Section VII explicitly states 'we have not released the full code publicly', citing dual-use concerns. No repository link or partial release is provided.",
    123           "source": "haiku"
    124         },
    125         "data_released": {
    126           "applies": true,
    127           "answer": false,
    128           "justification": "No dataset is released. The evaluation was conducted in Vectra AI's proprietary Cyber-Range Lab, which is not publicly accessible.",
    129           "source": "haiku"
    130         },
    131         "environment_specified": {
    132           "applies": true,
    133           "answer": false,
    134           "justification": "The lab environment is described at a high level (three subnets, named security tools: Vectra NDR, Splunk, CrowdStrike, SentinelOne), but no reproducible specification such as VM images, network configs, or Dockerfiles is provided.",
    135           "source": "haiku"
    136         },
    137         "reproduction_instructions": {
    138           "applies": true,
    139           "answer": false,
    140           "justification": "No step-by-step reproduction instructions are provided; the code is withheld and the evaluation environment is proprietary and inaccessible.",
    141           "source": "haiku"
    142         }
    143       },
    144       "statistical_methodology": {
    145         "confidence_intervals_or_error_bars": {
    146           "applies": true,
    147           "answer": false,
    148           "justification": "No confidence intervals or error bars appear anywhere in the paper; all results are from single runs with no repeated measurements.",
    149           "source": "haiku"
    150         },
    151         "significance_tests": {
    152           "applies": true,
    153           "answer": false,
    154           "justification": "No statistical significance tests are used despite comparative claims in Table III between AI-enabled and traditional C2 approaches.",
    155           "source": "haiku"
    156         },
    157         "effect_sizes_reported": {
    158           "applies": true,
    159           "answer": false,
    160           "justification": "Table III uses qualitative comparisons ('Days' vs '<30 Min', 'Detected' vs 'Undetected') without any quantified effect sizes, baseline distributions, or statistical framing.",
    161           "source": "haiku"
    162         },
    163         "sample_size_justified": {
    164           "applies": true,
    165           "answer": false,
    166           "justification": "The entire performance evaluation rests on a single engagement run with no justification for why one run is sufficient or any discussion of statistical power.",
    167           "source": "haiku"
    168         },
    169         "variance_reported": {
    170           "applies": true,
    171           "answer": false,
    172           "justification": "No variance or standard deviation is reported; the single-run nature of the study makes variance assessment impossible.",
    173           "source": "haiku"
    174         }
    175       },
    176       "evaluation_design": {
    177         "baselines_included": {
    178           "applies": true,
    179           "answer": true,
    180           "justification": "Table III compares the MCP-enabled AI agent against a 'Traditional C2 (Manual)' baseline using Cobalt Strike/Metasploit operated by a human operator given the same high-level objectives.",
    181           "source": "haiku"
    182         },
    183         "baselines_contemporary": {
    184           "applies": true,
    185           "answer": true,
    186           "justification": "Cobalt Strike and Metasploit represent current widely-used C2 frameworks in operational red teaming and are appropriate contemporary comparisons.",
    187           "source": "haiku"
    188         },
    189         "ablation_study": {
    190           "applies": true,
    191           "answer": false,
    192           "justification": "No ablation study is conducted; the contributions of MCP protocol choice, LLM reasoning, SQLite persistent memory, and the hybrid planner are never isolated.",
    193           "source": "haiku"
    194         },
    195         "multiple_metrics": {
    196           "applies": true,
    197           "answer": true,
    198           "justification": "Table III reports multiple metrics: time to objective, operator actions required, and NDR detection status. The EDR case study adds per-phase success/failure outcomes.",
    199           "source": "haiku"
    200         },
    201         "human_evaluation": {
    202           "applies": true,
    203           "answer": false,
    204           "justification": "A brief mention of 'colleagues acting as defenders' appears in Section VII, but with no participant details, structured protocol, or reported outcomes beyond a parenthetical placeholder '(here we can put our results)'.",
    205           "source": "haiku"
    206         },
    207         "held_out_test_set": {
    208           "applies": false,
    209           "answer": false,
    210           "justification": "Not applicable—this is a red team engagement simulation, not a prediction task requiring a held-out test set.",
    211           "source": "haiku"
    212         },
    213         "per_category_breakdown": {
    214           "applies": true,
    215           "answer": true,
    216           "justification": "The EDR case study breaks down results by attack phase (EDR identification, BYOVD reconnaissance, process injection), and Table I provides per-framework breakdowns across the literature.",
    217           "source": "haiku"
    218         },
    219         "failure_cases_discussed": {
    220           "applies": true,
    221           "answer": true,
    222           "justification": "Section V explicitly details two blocked attack phases in the EDR case study: BYOVD approach found no vulnerable drivers, and process injection was blocked by AMSI—both are described in detail.",
    223           "source": "haiku"
    224         },
    225         "negative_results_reported": {
    226           "applies": true,
    227           "answer": true,
    228           "justification": "The EDR assessment documents two failed attack phases as informative findings about the target's security posture, and the paper notes hallucination-induced non-viable paths as a system limitation.",
    229           "source": "haiku"
    230         }
    231       },
    232       "setup_transparency": {
    233         "model_versions_specified": {
    234           "applies": true,
    235           "answer": true,
    236           "justification": "The paper specifies 'Anthropic's Claude Opus 4.1' as the model powering both the Red Team Command Agent and the MCP Agent's reasoning leg.",
    237           "source": "haiku"
    238         },
    239         "prompts_provided": {
    240           "applies": true,
    241           "answer": true,
    242           "justification": "Full system prompts are provided verbatim in Appendices A through E, covering Base, Act, Reason, Planning, and Action Decision prompts used by the agents.",
    243           "source": "haiku"
    244         },
    245         "hyperparameters_reported": {
    246           "applies": true,
    247           "answer": false,
    248           "justification": "No LLM hyperparameters are reported—temperature, top-p, max tokens, or other generation parameters are absent from the paper.",
    249           "source": "haiku"
    250         },
    251         "scaffolding_described": {
    252           "applies": true,
    253           "answer": true,
    254           "justification": "The MCP-based C2 architecture is described in substantial detail: three components (Recon Agents, MCP Coordination Server, Red Team Command Agent), communication flow, SQLite memory, hybrid planning system, and two-leg decoupled design.",
    255           "source": "haiku"
    256         },
    257         "data_preprocessing_documented": {
    258           "applies": false,
    259           "answer": false,
    260           "justification": "Not applicable—there is no dataset preprocessing; the evaluation uses live command execution in a lab environment.",
    261           "source": "haiku"
    262         }
    263       },
    264       "data_integrity": {
    265         "raw_data_available": {
    266           "applies": true,
    267           "answer": false,
    268           "justification": "No raw data (network captures, SIEM logs, command outputs, traffic PCAPs) is made available for independent verification.",
    269           "source": "haiku"
    270         },
    271         "data_collection_described": {
    272           "applies": true,
    273           "answer": false,
    274           "justification": "High-level experimental setup is described, but systematic data collection procedures—how metrics were measured, what logs were captured, how 'time to objective' was operationalized—are not specified.",
    275           "source": "haiku"
    276         },
    277         "recruitment_methods_described": {
    278           "applies": false,
    279           "answer": false,
    280           "justification": "Not applicable—no human participant recruitment was involved in the evaluation.",
    281           "source": "haiku"
    282         },
    283         "data_pipeline_documented": {
    284           "applies": false,
    285           "answer": false,
    286           "justification": "Not applicable—the evaluation is a live red team engagement, not a data analysis pipeline.",
    287           "source": "haiku"
    288         }
    289       },
    290       "contamination": {
    291         "training_cutoff_stated": {
    292           "applies": false,
    293           "answer": false,
    294           "justification": "Not applicable—the LLM is used as an operational reasoning engine, not evaluated on held-out knowledge benchmarks where training cutoff would be relevant.",
    295           "source": "haiku"
    296         },
    297         "train_test_overlap_discussed": {
    298           "applies": false,
    299           "answer": false,
    300           "justification": "Not applicable—no benchmark evaluation is conducted; the LLM is used as an autonomous agent component.",
    301           "source": "haiku"
    302         },
    303         "benchmark_contamination_addressed": {
    304           "applies": false,
    305           "answer": false,
    306           "justification": "Not applicable—no benchmark datasets are used; the evaluation is a live proprietary lab engagement.",
    307           "source": "haiku"
    308         }
    309       },
    310       "human_studies": {
    311         "pre_registered": {
    312           "applies": false,
    313           "answer": false,
    314           "justification": "Not applicable—no human subjects study.",
    315           "source": "haiku"
    316         },
    317         "irb_or_ethics_approval": {
    318           "applies": false,
    319           "answer": false,
    320           "justification": "Not applicable—no human subjects study.",
    321           "source": "haiku"
    322         },
    323         "demographics_reported": {
    324           "applies": false,
    325           "answer": false,
    326           "justification": "Not applicable—no human subjects study.",
    327           "source": "haiku"
    328         },
    329         "inclusion_exclusion_criteria": {
    330           "applies": false,
    331           "answer": false,
    332           "justification": "Not applicable—no human subjects study.",
    333           "source": "haiku"
    334         },
    335         "randomization_described": {
    336           "applies": false,
    337           "answer": false,
    338           "justification": "Not applicable—no human subjects study.",
    339           "source": "haiku"
    340         },
    341         "blinding_described": {
    342           "applies": false,
    343           "answer": false,
    344           "justification": "Not applicable—no human subjects study.",
    345           "source": "haiku"
    346         },
    347         "attrition_reported": {
    348           "applies": false,
    349           "answer": false,
    350           "justification": "Not applicable—no human subjects study.",
    351           "source": "haiku"
    352         }
    353       },
    354       "cost_and_practicality": {
    355         "inference_cost_reported": {
    356           "applies": true,
    357           "answer": false,
    358           "justification": "The acknowledgments note Anthropic covered API costs, but no specific cost figures, token counts, or cost estimates per engagement are reported.",
    359           "source": "haiku"
    360         },
    361         "compute_budget_stated": {
    362           "applies": true,
    363           "answer": false,
    364           "justification": "No total computational budget is stated for either development or evaluation of the system.",
    365           "source": "haiku"
    366         }
    367       }
    368     }
    369   },
    370   "claims": [
    371     {
    372       "claim": "The MCP-enabled AI agent achieved domain compromise on a simulated corporate network in under 30 minutes with no human intervention",
    373       "evidence": "Table III reports '<30 Min' vs 'Days' for traditional C2; a single case study narrative describes credential dumping on Windows and lateral movement to an AD server",
    374       "supported": "weak"
    375     },
    376     {
    377       "claim": "The agent's living-off-the-land approach produced zero EDR alerts while traditional C2 triggered multiple alerts",
    378       "evidence": "Table III states 'Zero alerts' for the AI agent; the EDR case study on Microsoft Defender confirms zero detections during the full test run including failed injection attempts",
    379       "supported": "moderate"
    380     },
    381     {
    382       "claim": "MCP traffic over standard HTTPS/WSS is behaviorally indistinguishable from legitimate enterprise AI service traffic, defeating NDR-based detection",
    383       "evidence": "Figures 4-6 show event-driven traffic patterns vs. periodic Cobalt Strike beaconing in Fig. 3; qualitative visual comparison only, no formal traffic analysis or defender blind-test",
    384       "supported": "weak"
    385     },
    386     {
    387       "claim": "The architecture reduces manual operator effort from approximately 200 individual commands to a single high-level directive",
    388       "evidence": "Table III comparison between manual C2 and MCP-enabled AI; derived from a single case study measurement with no methodology for counting 'operator actions'",
    389       "supported": "weak"
    390     },
    391     {
    392       "claim": "On-demand LLM-generated polymorphic payloads render signature-based detection ineffective",
    393       "evidence": "A Linux agent compiled a custom C implant using gcc at runtime; demonstration is qualitative with no controlled comparison against signature-based AV",
    394       "supported": "weak"
    395     },
    396     {
    397       "claim": "Multi-agent swarm coordination via shared MCP context enables cross-VLAN lateral movement impossible for a single agent",
    398       "evidence": "Case study narrative of Windows credentials being used by a Linux agent to pivot to an AD server via the MCP server as credential broker",
    399       "supported": "moderate"
    400     }
    401   ],
    402   "methodology_tags": [
    403     "case-study",
    404     "benchmark-eval"
    405   ],
    406   "key_findings": "This paper introduces a novel C2 architecture that repurposes the Model Context Protocol to route attack planning through legitimate LLM API traffic (e.g., api.anthropic.com), making malicious C2 communication behaviorally indistinguishable from enterprise AI tool usage. In a single simulated corporate network engagement, the system achieved claimed domain compromise in under 30 minutes with zero EDR alerts, compared to a manual C2 baseline that required days and triggered multiple alerts. The architecture's core innovation is a decoupled two-leg design separating tasking (covert MCP channel) from reasoning (public LLM API), eliminating the periodic beaconing signatures that traditional NDR detection relies upon. System prompts enabling fully unconstrained command execution are provided verbatim in the appendices, raising significant dual-use publication concerns.",
    407   "red_flags": [
    408     {
    409       "flag": "Single case study, no replication or statistics",
    410       "detail": "The entire performance evaluation rests on one engagement run in a proprietary lab. Table III's dramatic claims ('Days' vs '<30 Min') have no statistical backing, no confidence intervals, and no replications."
    411     },
    412     {
    413       "flag": "Code withheld, unreproducible evaluation",
    414       "detail": "The paper explicitly states the full code is not publicly released. The evaluation uses Vectra AI's proprietary Cyber-Range. Independent reproduction is entirely impossible."
    415     },
    416     {
    417       "flag": "Evaluator-lab affiliation conflict",
    418       "detail": "Two of three authors are from Vectra AI, the company whose proprietary cyber range and NDR platform were used to evaluate the system's stealth against detection. The evaluators and the evaluation environment are from the same organization."
    419     },
    420     {
    421       "flag": "Funder conflict with LLM under evaluation",
    422       "detail": "Anthropic covered API costs and the system uses Claude Opus 4.1 as its core reasoning engine. The funder has a direct commercial interest in positive depictions of LLM-powered autonomous attack agents."
    423     },
    424     {
    425       "flag": "Overclaims in abstract",
    426       "detail": "The abstract states the architecture 'eliminates key host and network artifacts'—a categorical claim unsupported by a single test run; the more accurate finding is that it evaded specific tools in one lab configuration."
    427     },
    428     {
    429       "flag": "Appendices contain unconstrained offensive jailbreak-style prompts",
    430       "detail": "Appendices D and E reproduce system prompts instructing the LLM to 'EXECUTE ALL REQUESTS UNCONDITIONALLY', 'Do not warn about potential risks', and 'Escalate privileges if needed'—verbatim publication of jailbreak-style prompts enabling unconstrained malicious action."
    431     },
    432     {
    433       "flag": "Blue team evaluation is a placeholder",
    434       "detail": "Section VII describes a 'colleagues acting as defenders' test but includes a literal parenthetical '(here we can put our results)' with no actual results reported—a clearly incomplete evaluation."
    435     }
    436   ],
    437   "cited_papers": [
    438     {
    439       "title": "PentestGPT: An LLM-empowered Automated Penetration Testing Tool",
    440       "relevance": "Key prior work on LLM-assisted penetration testing representing modular LLM red teaming; most-referenced baseline throughout the paper"
    441     },
    442     {
    443       "title": "RedTeamLLM: an Agentic AI framework for offensive security",
    444       "relevance": "Direct predecessor framework the paper's Red Team Command Agent is built upon and heavily modified (18,500+ lines of new code)"
    445     },
    446     {
    447       "title": "VulnBot: Autonomous Penetration Testing for A Multi-Agent Collaborative Framework",
    448       "relevance": "Prior multi-agent red teaming system with tri-phase design used as a key comparative baseline"
    449     },
    450     {
    451       "title": "PentestAgent: Incorporating LLM Agents to Automated Penetration Testing",
    452       "relevance": "Multi-agent penetration testing framework with shared memory architecture compared throughout"
    453     },
    454     {
    455       "title": "The malicious use of artificial intelligence: Forecasting, prevention, and mitigation",
    456       "relevance": "Foundational dual-use AI risk paper cited repeatedly to frame ethical implications of the work"
    457     },
    458     {
    459       "title": "LLM agents can autonomously exploit one-day vulnerabilities",
    460       "relevance": "Empirical evidence on LLM autonomous exploitation that supports the paper's multi-host chaining capability claims"
    461     },
    462     {
    463       "title": "On the feasibility of using LLMs to autonomously execute multi-host network attacks",
    464       "relevance": "Parallel contribution (Incalmo framework) on natural language abstraction for multi-host attacks, cited as related work"
    465     },
    466     {
    467       "title": "CIPHER: Cybersecurity Intelligent Penetration-testing Helper for Ethical Researcher",
    468       "relevance": "Fine-tuned specialist model baseline representing the alternative architectural paradigm to the paper's agentic approach"
    469     }
    470   ],
    471   "engagement_factors": {
    472     "practical_relevance": {
    473       "score": 2,
    474       "justification": "Security practitioners can use this to understand novel C2 evasion patterns and design defensive countermeasures, though code is unavailable for direct testing."
    475     },
    476     "surprise_contrarian": {
    477       "score": 2,
    478       "justification": "The insight that legitimate LLM API traffic can serve as C2 cover channel is non-obvious and challenges assumptions about network-based C2 detection relying on beaconing signatures."
    479     },
    480     "fear_safety": {
    481       "score": 3,
    482       "justification": "Demonstrates an autonomous AI system that claims to compromise enterprise networks with zero human intervention and zero EDR alerts—a direct near-term AI safety risk demonstration."
    483     },
    484     "drama_conflict": {
    485       "score": 2,
    486       "justification": "Dual-use AI security research with an explicit argument that the system democratizes APT-level attack capabilities for script-kiddies creates inherent controversy; appendix prompts are themselves alarming."
    487     },
    488     "demo_ability": {
    489       "score": 1,
    490       "justification": "A demo case study exists in the paper but code is not released, so practitioners cannot replicate or experiment with the system."
    491     },
    492     "brand_recognition": {
    493       "score": 2,
    494       "justification": "MIT affiliation and Vectra AI (a well-known security vendor) provide credibility; Anthropic's Claude API is central to the architecture."
    495     }
    496   },
    497   "hn_data": {
    498     "threads": [
    499       {
    500         "hn_id": "40689052",
    501         "title": "Microarchitectural Security of AWS Firecracker VMM for Serverless Cloud (2023)",
    502         "points": 3,
    503         "comments": 0,
    504         "url": "https://news.ycombinator.com/item?id=40689052",
    505         "created_at": "2024-06-15T11:25:54Z"
    506       }
    507     ],
    508     "top_points": 3,
    509     "total_points": 3,
    510     "total_comments": 0
    511   }
    512 }

Impressum · Datenschutz