scan.json (27167B)
1 { 2 "paper": { 3 "title": "The Hidden Dangers of Browsing AI Agents", 4 "authors": [ 5 "Mykyta Mudryi", 6 "Markiyan Chaklosh", 7 "Grzegorz Marcin Wójcik" 8 ], 9 "year": 2025, 10 "venue": "arXiv", 11 "arxiv_id": "2505.13076", 12 "doi": "10.48550/arXiv.2505.13076" 13 }, 14 "scan_version": 3, 15 "active_modules": [], 16 "methodology_tags": ["case-study", "qualitative"], 17 "key_findings": "The paper presents a white-box security assessment of Browser Use, a popular open-source LLM-powered browsing agent. Two vulnerabilities were identified: a critical domain validation bypass (CVE-2025-47241, CVSS 9.3) enabling SSRF through URL credential injection, and a high-severity credential exfiltration via prompt injection (CVSS 8.8) exploiting the agent's practice of appending parsed webpage HTML to prompts. The authors propose a defense-in-depth framework organized around initial access and post-exploitation mitigations, including planner-executor isolation, formal security analyzers, and session safeguards.", 18 "checklist": { 19 "artifacts": { 20 "code_released": { 21 "applies": true, 22 "answer": true, 23 "justification": "The paper references a public GitHub repository (arimlabs/agent-hijacking-poc) containing the proof-of-concept exploit code. Listing 2 provides the full PoC Python script, and the malicious payload is documented in the repository's Issues tab." 24 }, 25 "data_released": { 26 "applies": true, 27 "answer": true, 28 "justification": "The target system (Browser Use) is open source, and the exploit payloads and PoC code are published in the authors' GitHub repository. For a security assessment, the 'data' is the exploit code and target codebase, both publicly available." 29 }, 30 "environment_specified": { 31 "applies": true, 32 "answer": false, 33 "justification": "The paper specifies 'Package Version: 0.1.44' for Browser Use and uses langchain_openai with ChatOpenAI(model='gpt-4o'), but provides no requirements.txt, Dockerfile, or detailed dependency listing. Library versions beyond the Browser Use version are not stated." 34 }, 35 "reproduction_instructions": { 36 "applies": true, 37 "answer": false, 38 "justification": "While Listing 2 provides the PoC script and the paper describes the attack flow, there are no explicit step-by-step reproduction instructions. The reader must infer setup steps (install Browser Use 0.1.44, set up the malicious GitHub issue, configure OpenAI API key)." 39 } 40 }, 41 "statistical_methodology": { 42 "confidence_intervals_or_error_bars": { 43 "applies": false, 44 "answer": false, 45 "justification": "This is a qualitative security assessment with no quantitative experimental results. The findings are binary vulnerability demonstrations (exploit succeeds/fails), not statistical measurements." 46 }, 47 "significance_tests": { 48 "applies": false, 49 "answer": false, 50 "justification": "No comparative quantitative claims are made. The paper demonstrates vulnerability existence through PoC exploits, not statistical comparisons." 51 }, 52 "effect_sizes_reported": { 53 "applies": false, 54 "answer": false, 55 "justification": "No quantitative effects are measured. The contribution is identifying specific security vulnerabilities, not measuring effect magnitudes." 56 }, 57 "sample_size_justified": { 58 "applies": false, 59 "answer": false, 60 "justification": "No statistical sample is drawn. The paper performs a white-box code review and demonstrates two specific vulnerabilities." 61 }, 62 "variance_reported": { 63 "applies": false, 64 "answer": false, 65 "justification": "No repeated measurements or experimental runs are conducted. Each vulnerability is demonstrated with a single proof-of-concept." 66 } 67 }, 68 "evaluation_design": { 69 "baselines_included": { 70 "applies": true, 71 "answer": false, 72 "justification": "The paper mentions other browsing agents (Operator, Computer Use, Skyvern, Proxy) in Section 2.3 for benchmark performance comparison, but does not compare their security posture or test whether the same vulnerabilities exist in other agents." 73 }, 74 "baselines_contemporary": { 75 "applies": true, 76 "answer": false, 77 "justification": "No security baselines are compared. The paper lists contemporary agents in Table 1 but only assesses Browser Use. There is no comparison against how other agents handle domain validation or prompt injection." 78 }, 79 "ablation_study": { 80 "applies": false, 81 "answer": false, 82 "justification": "This is a security assessment identifying vulnerabilities, not a system with components to ablate. There is no proposed system whose components could be individually tested." 83 }, 84 "multiple_metrics": { 85 "applies": true, 86 "answer": false, 87 "justification": "Vulnerabilities are scored only via CVSS (9.3/10 and 8.8/10) with severity labels (Critical, High). No attack success rates, time-to-exploit, or other quantitative metrics are reported." 88 }, 89 "human_evaluation": { 90 "applies": false, 91 "answer": false, 92 "justification": "Human evaluation is not relevant to demonstrating the existence of code-level security vulnerabilities. The PoC exploits are deterministic." 93 }, 94 "held_out_test_set": { 95 "applies": false, 96 "answer": false, 97 "justification": "No benchmark or test set is used. The paper performs targeted code review and PoC demonstrations, not benchmark evaluation." 98 }, 99 "per_category_breakdown": { 100 "applies": true, 101 "answer": true, 102 "justification": "The paper provides extensive per-layer threat breakdowns using the MAESTRO framework (Tables 4-11 in Appendix A), covering Foundation Models, Data Operations, Agent Frameworks, Deployment & Infrastructure, Evaluation & Observability, Security & Compliance, and Agent Ecosystem layers." 103 }, 104 "failure_cases_discussed": { 105 "applies": true, 106 "answer": false, 107 "justification": "The paper only reports successful exploits. There is no discussion of attack attempts that failed, partial mitigations that reduced attack effectiveness, or conditions under which the vulnerabilities could not be exploited." 108 }, 109 "negative_results_reported": { 110 "applies": true, 111 "answer": false, 112 "justification": "No negative results are reported. The paper does not mention attacks that did not work, mitigations that were effective against their exploits, or configurations where Browser Use was resilient." 113 } 114 }, 115 "claims_and_evidence": { 116 "abstract_claims_supported": { 117 "applies": true, 118 "answer": true, 119 "justification": "The abstract's main claims are supported: the threat model is presented in Section 3, defense-in-depth strategy in Section 4, and the CVE (CVE-2025-47241) and PoC are demonstrated in Section 5. The 'first end-to-end threat model' novelty claim is unverified but the paper's core empirical claims are substantiated." 120 }, 121 "causal_claims_justified": { 122 "applies": true, 123 "answer": true, 124 "justification": "The causal claim that 'untrusted web content can hijack agent behavior' is demonstrated through a working proof-of-concept exploit (Listing 2, Section 5.3.2). For security vulnerability claims, a working PoC is adequate causal evidence. The domain bypass is similarly demonstrated with concrete URL manipulation." 125 }, 126 "generalization_bounded": { 127 "applies": true, 128 "answer": false, 129 "justification": "The title 'The Hidden Dangers of Browsing AI Agents' (plural, general) and abstract claim of 'comprehensive security evaluation of such agents' substantially overclaim. The actual assessment covers only Browser Use v0.1.44 with GPT-4o. Section 5.1 acknowledges that 'vulnerabilities discovered in Browser Use are likely to have practical relevance' to other agents, but this is speculative generalization, not bounded." 130 }, 131 "alternative_explanations_discussed": { 132 "applies": true, 133 "answer": false, 134 "justification": "The paper does not discuss alternative explanations for the vulnerabilities. For example, whether different LLMs would resist the prompt injection, whether the vulnerabilities are specific to Browser Use's architecture or inherent to all agent architectures, or whether configuration changes could mitigate the issues without code changes." 135 }, 136 "proxy_outcome_distinction": { 137 "applies": true, 138 "answer": true, 139 "justification": "The paper measures specific vulnerabilities (domain validation bypass, credential exfiltration via prompt injection) and claims specific security issues. The measurements directly match the claims — there is no proxy gap between what was measured and what is claimed." 140 } 141 }, 142 "setup_transparency": { 143 "model_versions_specified": { 144 "applies": true, 145 "answer": false, 146 "justification": "The PoC uses ChatOpenAI(model='gpt-4o') (Listing 2) without specifying a snapshot date or API version. 'gpt-4o' is a marketing name that maps to different model versions over time." 147 }, 148 "prompts_provided": { 149 "applies": true, 150 "answer": true, 151 "justification": "The paper provides the prompt template structure (Section 5.3.2), the injected malicious payload verbatim, and the full PoC task prompt in Listing 2. The attacker-controlled payload and the agent's base prompt construction are described in sufficient detail." 152 }, 153 "hyperparameters_reported": { 154 "applies": true, 155 "answer": false, 156 "justification": "No LLM hyperparameters (temperature, top-p, max tokens) are reported for the GPT-4o calls. The PoC uses default ChatOpenAI parameters without stating what those defaults are." 157 }, 158 "scaffolding_described": { 159 "applies": true, 160 "answer": true, 161 "justification": "Section 3.1 provides a detailed description of the browsing agent architecture including Perception, Reasoning, Planning, and External Tool Calls components. The prompt construction pipeline, feedback loop mechanism, and sensitive data handling are documented in Sections 3.1 and 5.2." 162 }, 163 "data_preprocessing_documented": { 164 "applies": true, 165 "answer": false, 166 "justification": "The paper does not describe the systematic methodology of the security assessment — how files were selected for review, what parts of the codebase were examined, or the process from initial code review to vulnerability identification. Specific files are named (browser_use/browser/context.py, browser_use/agent/prompts.py) but the assessment procedure is not documented." 167 } 168 }, 169 "limitations_and_scope": { 170 "limitations_section_present": { 171 "applies": true, 172 "answer": false, 173 "justification": "There is no dedicated limitations section. The conclusion briefly mentions 'the security landscape for AI browsing agents is still evolving' but does not discuss limitations of the study itself." 174 }, 175 "threats_to_validity_specific": { 176 "applies": true, 177 "answer": false, 178 "justification": "No threats to validity are discussed. The paper does not acknowledge that findings may be specific to Browser Use v0.1.44, to GPT-4o, or to the specific prompt construction pattern used." 179 }, 180 "scope_boundaries_stated": { 181 "applies": true, 182 "answer": false, 183 "justification": "While Section 4.1 states that traditional cybersecurity for external tools is 'beyond the scope of this research,' the paper does not state what its security findings do NOT show — e.g., that results may not generalize to other agents, other LLMs, or production deployments with additional safeguards." 184 } 185 }, 186 "data_integrity": { 187 "raw_data_available": { 188 "applies": true, 189 "answer": true, 190 "justification": "The target system (Browser Use) is fully open source, and the PoC exploit code is published in the authors' GitHub repository (arimlabs/agent-hijacking-poc). The CVE (CVE-2025-47241) is publicly documented. All 'raw data' for this security assessment is independently verifiable." 191 }, 192 "data_collection_described": { 193 "applies": true, 194 "answer": false, 195 "justification": "The paper does not describe the systematic code review methodology — what tools were used, how the codebase was explored, or what criteria guided the vulnerability search. The specific vulnerability findings are presented without documenting the assessment process." 196 }, 197 "recruitment_methods_described": { 198 "applies": false, 199 "answer": false, 200 "justification": "No human participants are involved. The data source is an open-source codebase (Browser Use), which does not require recruitment methodology." 201 }, 202 "data_pipeline_documented": { 203 "applies": true, 204 "answer": false, 205 "justification": "The pipeline from initial code review to vulnerability identification to PoC development is not documented. The paper jumps from 'we conducted a security evaluation' to presenting specific findings without describing intermediate steps." 206 } 207 }, 208 "conflicts_of_interest": { 209 "funding_disclosed": { 210 "applies": true, 211 "answer": false, 212 "justification": "No funding source is disclosed anywhere in the paper. There is no acknowledgments section mentioning grants or sponsors." 213 }, 214 "affiliations_disclosed": { 215 "applies": true, 216 "answer": true, 217 "justification": "Author affiliations are clearly listed: ARIMLABS.AI, Polish-Japanese Academy of Information Technology, Maria Curie-Sklodowska University, and University of the National Education Commission in Kraków." 218 }, 219 "funder_independent_of_outcome": { 220 "applies": true, 221 "answer": false, 222 "justification": "Without disclosed funding, funder independence cannot be assessed. ARIMLABS.AI is a security-oriented company that could benefit commercially from publicizing AI agent vulnerabilities, but no funding relationship is stated." 223 }, 224 "financial_interests_declared": { 225 "applies": true, 226 "answer": false, 227 "justification": "No competing interests or financial interests statement is present. The authors' affiliation with ARIMLABS.AI (which appears to be a security consulting/research firm) is not discussed as a potential conflict." 228 } 229 }, 230 "contamination": { 231 "training_cutoff_stated": { 232 "applies": false, 233 "answer": false, 234 "justification": "This paper does not evaluate a pre-trained model's capability on any benchmark. It is a security assessment testing defense mechanisms and identifying vulnerabilities in agent infrastructure, not model knowledge." 235 }, 236 "train_test_overlap_discussed": { 237 "applies": false, 238 "answer": false, 239 "justification": "No benchmark evaluation of model capability is performed. The paper tests security vulnerabilities in agent architecture, not model performance on benchmarks." 240 }, 241 "benchmark_contamination_addressed": { 242 "applies": false, 243 "answer": false, 244 "justification": "No benchmark evaluation is conducted. The WebVoyager results cited in Table 1 are from other papers, not the authors' own evaluation." 245 } 246 }, 247 "human_studies": { 248 "pre_registered": { 249 "applies": false, 250 "answer": false, 251 "justification": "No human participants are involved in this study. It is a white-box security assessment of software." 252 }, 253 "irb_or_ethics_approval": { 254 "applies": false, 255 "answer": false, 256 "justification": "No human participants are involved. The study analyzes open-source code and demonstrates software vulnerabilities." 257 }, 258 "demographics_reported": { 259 "applies": false, 260 "answer": false, 261 "justification": "No human participants are involved in this security assessment." 262 }, 263 "inclusion_exclusion_criteria": { 264 "applies": false, 265 "answer": false, 266 "justification": "No human participants are involved in this security assessment." 267 }, 268 "randomization_described": { 269 "applies": false, 270 "answer": false, 271 "justification": "No human participants or experimental conditions requiring randomization." 272 }, 273 "blinding_described": { 274 "applies": false, 275 "answer": false, 276 "justification": "No human participants or experimental conditions requiring blinding." 277 }, 278 "attrition_reported": { 279 "applies": false, 280 "answer": false, 281 "justification": "No human participants are involved in this security assessment." 282 } 283 }, 284 "cost_and_practicality": { 285 "inference_cost_reported": { 286 "applies": false, 287 "answer": false, 288 "justification": "This is a security assessment demonstrating vulnerability existence, not proposing a compute-intensive method. The cost of running the PoC is irrelevant to the contribution's validity." 289 }, 290 "compute_budget_stated": { 291 "applies": false, 292 "answer": false, 293 "justification": "This is a security assessment, not a method requiring significant compute. No computational budget is relevant to the paper's claims." 294 } 295 } 296 }, 297 "claims": [ 298 { 299 "claim": "Browser Use's FQDN validation can be bypassed using URL credential injection, enabling navigation to unauthorized domains (CVE-2025-47241, CVSS 9.3).", 300 "evidence": "Section 5.3.1 demonstrates that the URL https://example.com:pass@localhost:8080 bypasses the allowed_domains check because the parser treats 'example.com' as the domain when the actual target is 'localhost'. The vulnerable code in browser_use/browser/context.py is shown in Listing 1.", 301 "supported": "strong" 302 }, 303 { 304 "claim": "Stored credentials can be exfiltrated via prompt injection through malicious webpage content (CVSS 8.8).", 305 "evidence": "Section 5.3.2 provides a full proof-of-concept (Listing 2) where a malicious GitHub issue injects a HumanMessage override into the agent's prompt, causing it to extract <secret> tokens and submit them to an attacker-controlled endpoint (c2-demo.arimlabs.ai).", 306 "supported": "strong" 307 }, 308 { 309 "claim": "This paper presents the first end-to-end threat model for browsing agents.", 310 "evidence": "Section 3 presents a MAESTRO-based threat model with per-layer analysis (Tables 4-11). However, the 'first' claim is asserted without systematic evidence that no prior end-to-end threat model exists for this class of agents.", 311 "supported": "weak" 312 }, 313 { 314 "claim": "The proposed defense-in-depth strategy (input sanitization, planner-executor isolation, formal analyzers, session safeguards) can protect against both initial access and post-exploitation attack vectors.", 315 "evidence": "Section 4 discusses these mitigations with citations to prior work (e.g., f-secure LLM system reducing injection success to 0%), but the authors do not empirically test any of these defenses against their own discovered vulnerabilities.", 316 "supported": "weak" 317 }, 318 { 319 "claim": "Placing untrusted content at the end of prompts increases prompt injection risk because LLMs disproportionately attend to tokens at the beginning and end.", 320 "evidence": "Section 5.2 cites Liu et al. [10] ('Lost in the Middle') and demonstrates that Browser Use appends parsed HTML at the prompt's end. The cited paper supports the attention pattern claim, and the PoC confirms the exploit succeeds in this configuration.", 321 "supported": "moderate" 322 } 323 ], 324 "red_flags": [ 325 { 326 "flag": "Single target, broad title", 327 "detail": "The paper is titled 'The Hidden Dangers of Browsing AI Agents' (general) but only tests Browser Use v0.1.44 with GPT-4o. No other agents are security-tested. The claim of 'comprehensive security evaluation' overclaims the actual scope." 328 }, 329 { 330 "flag": "No failure reporting", 331 "detail": "Only successful exploits are reported. There is no discussion of attacks that failed, LLMs that resisted the prompt injection, or configurations where Browser Use was resilient. This creates selection bias in the findings." 332 }, 333 { 334 "flag": "Mitigations proposed but not tested", 335 "detail": "Section 4 proposes an extensive defense-in-depth strategy citing prior work, but none of the proposed mitigations are tested against the paper's own discovered vulnerabilities. The gap between the threat analysis and the mitigation recommendations is unbridged by evidence." 336 }, 337 { 338 "flag": "Potential commercial interest", 339 "detail": "Two authors are affiliated with ARIMLABS.AI, which appears to be a security consulting firm. The PoC exfiltrates data to c2-demo.arimlabs.ai. No competing interests statement is provided despite the potential commercial benefit of publicizing AI agent vulnerabilities." 340 }, 341 { 342 "flag": "No systematic assessment methodology", 343 "detail": "The paper does not describe how the security assessment was conducted — what tools were used, what parts of the codebase were examined, or what methodology guided the analysis. The reader cannot assess the completeness or rigor of the code review." 344 } 345 ], 346 "cited_papers": [ 347 { 348 "title": "WebGPT: Browser-assisted Question-Answering with Human Feedback", 349 "authors": ["R. Nakano"], 350 "year": 2021, 351 "arxiv_id": "2112.09332", 352 "relevance": "Early LLM-based web browsing agent that established the paradigm of browser-assisted question answering with human feedback." 353 }, 354 { 355 "title": "ReAct: Synergizing Reasoning and Acting in Language Models", 356 "authors": ["S. Yao"], 357 "year": 2022, 358 "arxiv_id": "2210.03629", 359 "relevance": "Foundational reasoning-and-acting framework for LLM agents that underpins the sense-plan-act loop discussed in the paper." 360 }, 361 { 362 "title": "Mind2Web: Towards a Generalist Agent for the Web", 363 "authors": ["X. Deng"], 364 "year": 2023, 365 "arxiv_id": "2306.06070", 366 "relevance": "Benchmark dataset for web agent evaluation using human action traces across real websites." 367 }, 368 { 369 "title": "WebArena: A Realistic Web Environment for Building Autonomous Agents", 370 "authors": ["S. Zhou"], 371 "year": 2023, 372 "arxiv_id": "2307.13854", 373 "relevance": "Realistic browser environment benchmark for testing web agent capabilities with complex dynamic web tasks." 374 }, 375 { 376 "title": "WebVoyager: Building an End-to-End Web Agent with Large Multimodal Models", 377 "authors": ["H. He"], 378 "year": 2024, 379 "arxiv_id": "2401.13919", 380 "relevance": "End-to-end multimodal web agent benchmark used as the primary performance comparison for browsing agents in Table 1." 381 }, 382 { 383 "title": "AI Agents Under Threat: A Survey of Key Security Challenges and Future Pathways", 384 "authors": ["Zehang Deng"], 385 "year": 2024, 386 "arxiv_id": "2406.02630", 387 "relevance": "Survey of security challenges facing AI agents, covering knowledge gaps in multi-step user inputs and untrusted external entity interactions." 388 }, 389 { 390 "title": "Security of AI Agents", 391 "authors": ["Yifeng He"], 392 "year": 2024, 393 "arxiv_id": "2406.08689", 394 "relevance": "Discusses session management and isolation as security mechanisms for AI agents, relevant to post-exploitation defenses." 395 }, 396 { 397 "title": "AgentDojo: A Dynamic Environment to Evaluate Prompt Injection Attacks and Defenses for LLM Agents", 398 "authors": ["Edoardo Debenedetti"], 399 "year": 2024, 400 "arxiv_id": "2406.13352", 401 "relevance": "Framework for evaluating prompt injection attacks and defenses in LLM agents; cited for defense baseline reducing attack success from 25% to 8%." 402 }, 403 { 404 "title": "AI Agents with Formal Security Guarantees", 405 "authors": ["Mislav Balunović"], 406 "year": 2024, 407 "arxiv_id": "2409.19091", 408 "relevance": "Proposes formal security analyzers that enforce hard constraints on agent actions using domain-specific security rules." 409 }, 410 { 411 "title": "System-Level Defense against Indirect Prompt Injection Attacks: An Information Flow Control Perspective", 412 "authors": ["Fangzhou Wu"], 413 "year": 2024, 414 "arxiv_id": "2409.19091", 415 "relevance": "f-secure LLM system with planner-executor isolation that reportedly reduces prompt injection success to 0% while preserving functionality." 416 }, 417 { 418 "title": "Goal-guided Generative Prompt Injection Attack on Large Language Models", 419 "authors": ["Chong Zhang"], 420 "year": 2024, 421 "arxiv_id": "2404.07234", 422 "relevance": "Studies prompt injection attacks on LLMs, cited for the finding that larger models tend to perform better at mitigating prompt injection." 423 }, 424 { 425 "title": "Lost in the Middle: How Language Models Use Long Contexts", 426 "authors": ["Nelson F. Liu"], 427 "year": 2023, 428 "arxiv_id": "2307.03172", 429 "relevance": "Demonstrates that LLMs attend disproportionately to tokens at the beginning and end of prompts, directly relevant to the prompt injection attack vector." 430 } 431 ], 432 "engagement_factors": { 433 "practical_relevance": { 434 "score": 2, 435 "justification": "Directly actionable for Browser Use users (upgrade to v0.1.45) and provides a threat model framework applicable to agent developers building similar systems." 436 }, 437 "surprise_contrarian": { 438 "score": 1, 439 "justification": "Prompt injection vulnerabilities in LLM agents are widely expected; the URL parsing bypass is a known class of vulnerability (SSRF via credential injection in URLs)." 440 }, 441 "fear_safety": { 442 "score": 3, 443 "justification": "Demonstrates credential exfiltration from a 60K-star AI agent via a working PoC with a published CVE (CVSS 9.3), directly feeding AI safety and security anxieties." 444 }, 445 "drama_conflict": { 446 "score": 1, 447 "justification": "Responsible disclosure was followed and the vulnerability was patched; no adversarial relationship or controversy with Browser Use maintainers." 448 }, 449 "demo_ability": { 450 "score": 2, 451 "justification": "PoC code is provided in-paper and in a public GitHub repo, but requires installing a specific (now outdated) Browser Use version and OpenAI API access to reproduce." 452 }, 453 "brand_recognition": { 454 "score": 1, 455 "justification": "Browser Use has 60K GitHub stars but is not a mainstream consumer product; ARIMLABS.AI is not widely recognized." 456 } 457 } 458 }