ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

scan-v5.json (29572B)


      1 {
      2   "scan_version": 5,
      3   "paper_type": "empirical",
      4   "paper": {
      5     "title": "Manipulating LLM Web Agents with Indirect Prompt Injection Attack via HTML Accessibility Tree",
      6     "authors": [
      7       "Sam Johnson",
      8       "Viet Pham",
      9       "Thai Le"
     10     ],
     11     "year": 2025,
     12     "venue": "arXiv.org",
     13     "arxiv_id": "2507.14799",
     14     "doi": "10.48550/arXiv.2507.14799"
     15   },
     16   "checklist": {
     17     "claims_and_evidence": {
     18       "abstract_claims_supported": {
     19         "applies": true,
     20         "answer": true,
     21         "justification": "All abstract claims are substantiated: IPI vulnerability demonstrated on 5 real websites (Section 3), universal triggers optimized via GCG (Section 2.3, Eq. 3), high success rates shown (Figures 5–6: 0.83–0.99 ASR for targeted attacks, 0.55 for universal login attack), credential exfiltration demonstrated (Section 3.3).",
     22         "source": "haiku"
     23       },
     24       "causal_claims_justified": {
     25         "applies": true,
     26         "answer": true,
     27         "justification": "Paper establishes causal link between optimized triggers and agent actions: GCG is designed to find triggers that maximize target output probability (Eq. 2–3). Attack success rate (ASR) isolates trigger effect. However, evaluation is on synthetic attack scenarios (authors control HTML) and only one agent framework (Browser Gym + Llama-3.1), limiting generality of causal claims.",
     28         "source": "haiku"
     29       },
     30       "generalization_bounded": {
     31         "applies": true,
     32         "answer": false,
     33         "justification": "Claims in title and abstract ('LLM-based web agents' broadly vulnerable) exceed scope tested: only 5 websites, single framework (Browser Gym), single base model (Llama-3.1-8B-Instruct). Transferability section (4) shows triggers do NOT transfer to Llama-2 or Mistral. Scope should be bounded to 'Browser Gym agents powered by Llama-3.1' not 'LLM-based web agents' generally.",
     34         "source": "haiku"
     35       },
     36       "alternative_explanations_discussed": {
     37         "applies": true,
     38         "answer": true,
     39         "justification": "Failure Analysis (Section 4) identifies two clusters of failure modes: (1) instructions with only one obvious action have high prior probability for correct response, (2) chain-of-thought reasoning triggers override adversarial prompt. These explain ~17% failures in TWUI scenario.",
     40         "source": "haiku"
     41       },
     42       "proxy_outcome_distinction": {
     43         "applies": true,
     44         "answer": true,
     45         "justification": "Paper clearly distinguishes measured outcomes from claims: ASR (syntactic validity of target action) vs. ASRV (exact target sequence). For login attacks, separately reports whether both credentials leaked vs. either one. Measured outcomes (ASR, time-to-completion) directly correspond to claimed attack success.",
     46         "source": "haiku"
     47       }
     48     },
     49     "limitations_and_scope": {
     50       "limitations_section_present": {
     51         "applies": true,
     52         "answer": true,
     53         "justification": "Dedicated Section 6 (Limitations) identifies three concrete threats: (1) attacker must control HTML content, (2) triggers are model-specific (failed transfer to Llama-2, Mistral), (3) triggers optimized for specific target sequence, invalid for different action spaces.",
     54         "source": "haiku"
     55       },
     56       "threats_to_validity_specific": {
     57         "applies": true,
     58         "answer": true,
     59         "justification": "Threats are specific and actionable: 'attacker must have access to some part of the HTML that will be consumed by the navigation agent' (enables defense via content control); 'triggers are trained for a particular LLM or set of LLMs' (open-source dependency weakness); 'closed-source framework that rotates action-space scheme will be much less susceptible.' Contrasts with boilerplate.",
     60         "source": "haiku"
     61       },
     62       "scope_boundaries_stated": {
     63         "applies": true,
     64         "answer": false,
     65         "justification": "Section 6 discusses defenses (closed-source frameworks) rather than explicit scope boundaries. Paper does NOT clearly state what was tested: '5 websites' (implicit in Section 3), 'Llama-3.1 only' (Figure 5 notes), 'Browser Gym only.' Claims about 'LLM-based web agents' lack explicit boundary to tested scope.",
     66         "source": "haiku"
     67       }
     68     },
     69     "conflicts_of_interest": {
     70       "funding_disclosed": {
     71         "applies": true,
     72         "answer": false,
     73         "justification": "No funding source, grant number, or financial support statement present. Absence of funding statement is equivalent to 'no mention of funding' = FALSE per instruction.",
     74         "source": "haiku"
     75       },
     76       "affiliations_disclosed": {
     77         "applies": true,
     78         "answer": true,
     79         "justification": "Author affiliations clearly listed: Sam Johnson (Indiana University), Viet Pham (University of Science, Ho Chi Minh City), Thai Le (Indiana University). No affiliation with Browser Gym, Llama, or evaluated products disclosed.",
     80         "source": "haiku"
     81       },
     82       "funder_independent_of_outcome": {
     83         "applies": false,
     84         "answer": false,
     85         "justification": "No funding disclosed; this question is NA. No potential funder conflict evident.",
     86         "source": "haiku"
     87       },
     88       "financial_interests_declared": {
     89         "applies": true,
     90         "answer": false,
     91         "justification": "No competing interests statement, patent disclosures, or financial conflict declaration present. Code released under MIT License (open source), suggesting no commercial interest, but no formal declaration.",
     92         "source": "haiku"
     93       }
     94     },
     95     "scope_and_framing": {
     96       "key_terms_defined": {
     97         "applies": true,
     98         "answer": true,
     99         "justification": "Key terms defined: 'Indirect Prompt Injection' cited to Greshake et al. (2023) and explained; 'accessibility tree' defined as 'HTML parsed for readability'; 'universal adversarial triggers' explained via GCG algorithm (Section 2.3); 'web navigation agent' described in System Design (Section 2.1).",
    100         "source": "haiku"
    101       },
    102       "intended_contribution_clear": {
    103         "applies": true,
    104         "answer": true,
    105         "justification": "Four contributions explicitly identified: (1) first practical demonstration of IPI attacks on web navigation agents (Introduction); (2) extension of GCG to universal triggers across multiple prompts (Eq. 3, Section 2.3); (3) empirical evaluation on real websites and frameworks; (4) code and demo website released.",
    106         "source": "haiku"
    107       },
    108       "engagement_with_prior_work": {
    109         "applies": true,
    110         "answer": true,
    111         "justification": "Related Work (Section 4.1) engages with prior IPI work (Greshake et al.), prompt injection attacks (Liu et al., Zhan et al.), adversarial attacks on NLP (Jin et al., Boucher et al., Wallace et al.). Paper shows how it extends Zou et al.'s GCG and differs from Imprompter (Fu et al.). Situates work in attack/defense ecosystem.",
    112         "source": "haiku"
    113       }
    114     }
    115   },
    116   "type_checklist": {
    117     "empirical": {
    118       "artifacts": {
    119         "code_released": {
    120           "applies": true,
    121           "answer": true,
    122           "justification": "Source code released under MIT License at https://github.com/sej2020/manipulating-web-agents. Modified NanoGCG implementation and Browser Gym integration provided.",
    123           "source": "haiku"
    124         },
    125         "data_released": {
    126           "applies": true,
    127           "answer": false,
    128           "justification": "Discovered triggers not systematically released. UWTI login triggers withheld 'to take into account the high profile of such an attack that can enable unauthorized access' (Ethical Consideration). Demo website shows cached results but not trigger sequences. No dataset of instructions or website snapshots released.",
    129           "source": "haiku"
    130         },
    131         "environment_specified": {
    132           "applies": true,
    133           "answer": false,
    134           "justification": "Model versions specified (Llama-3.1-8B-Instruct, NanoGCG), but no requirements.txt, environment.yml, or dependency list provided. No Python version, Browser Gym version, or CUDA version specified. Missing critical environment documentation.",
    135           "source": "haiku"
    136         },
    137         "reproduction_instructions": {
    138           "applies": true,
    139           "answer": false,
    140           "justification": "No step-by-step reproduction instructions in paper. GitHub link provided but instructions not shown. Would need to infer: clone repo, install dependencies (unlisted), install Browser Gym, run GCG optimization. Insufficient detail in paper for reproduction.",
    141           "source": "haiku"
    142         }
    143       },
    144       "statistical_methodology": {
    145         "confidence_intervals_or_error_bars": {
    146           "applies": true,
    147           "answer": false,
    148           "justification": "Figures 3–4 show error shading for optimization time-to-completion (average over 10 runs), but figures 5–6 report ASR as single point estimates (0.83, 0.99, 0.55) with no confidence intervals. No statistical test for significance of differences across conditions.",
    149           "source": "haiku"
    150         },
    151         "significance_tests": {
    152           "applies": true,
    153           "answer": false,
    154           "justification": "No statistical significance tests performed. No comparison of ASR across different conditions (e.g., search width 256 vs 128), no p-values, no hypothesis tests. ASR differences treated descriptively.",
    155           "source": "haiku"
    156         },
    157         "effect_sizes_reported": {
    158           "applies": true,
    159           "answer": true,
    160           "justification": "Effect sizes reported as ASR (proportion of successful attacks: 0.83–0.99 for TWUI, 0.55 for UWTI) and time-to-completion (minutes to hours). Provides practical magnitude of attack effectiveness.",
    161           "source": "haiku"
    162         },
    163         "sample_size_justified": {
    164           "applies": true,
    165           "answer": false,
    166           "justification": "Sample sizes not justified: Why 5 websites (chess.com, penjee, citybrewtours, norway.no, google translate)? Why 200 test instructions per website in TWUI? Why 25 training contexts? Why 8 training login pages, 11 test? No power analysis or justification provided.",
    167           "source": "haiku"
    168         },
    169         "variance_reported": {
    170           "applies": true,
    171           "answer": false,
    172           "justification": "Variance reported for optimization time (Figures 3–4: error bars from 10 runs). Variance NOT reported for ASR—only single proportions given. No standard deviation, confidence interval, or repeated runs for attack success rate.",
    173           "source": "haiku"
    174         }
    175       },
    176       "evaluation_design": {
    177         "baselines_included": {
    178           "applies": true,
    179           "answer": false,
    180           "justification": "No baselines. Paper does not compare GCG-optimized triggers to: (1) random triggers, (2) simple hand-crafted triggers, (3) other optimization algorithms (genetic algorithm, random search, etc.). Absence of baseline comparison weakens claims about GCG necessity.",
    181           "source": "haiku"
    182         },
    183         "baselines_contemporary": {
    184           "applies": false,
    185           "answer": false,
    186           "justification": "No baselines included; question is N/A.",
    187           "source": "haiku"
    188         },
    189         "ablation_study": {
    190           "applies": true,
    191           "answer": true,
    192           "justification": "Partial ablation: varies search width (Figure 3), includes/excludes target string in initial trigger (Figure 4), trigger length (Appendix B, null result), CW loss vs cross-entropy (Appendix B, null result). Shows practical optimizations but does not ablate GCG vs alternatives or necessity of accessibility tree.",
    193           "source": "haiku"
    194         },
    195         "multiple_metrics": {
    196           "applies": true,
    197           "answer": true,
    198           "justification": "Multiple metrics used: ASR (syntactic validity of action), ASRV (exact target sequence match), time-to-completion, for login scenario also measures username+password exfiltration vs partial credential leak. Comprehensive evaluation of attack dimensions.",
    199           "source": "haiku"
    200         },
    201         "human_evaluation": {
    202           "applies": false,
    203           "answer": false,
    204           "justification": "No human evaluation needed; attacks on agents are objective (ASR), not subjective. N/A.",
    205           "source": "haiku"
    206         },
    207         "held_out_test_set": {
    208           "applies": true,
    209           "answer": true,
    210           "justification": "Train/test split implicit in TWUI and UWTI scenarios: TWUI trains on 25 instructions, tests on 200 (5:1 ratio). UWTI trains on 8 login pages, tests on 11. Separation prevents overfitting to training distribution.",
    211           "source": "haiku"
    212         },
    213         "per_category_breakdown": {
    214           "applies": true,
    215           "answer": true,
    216           "justification": "Breakdowns by: attack type (TWTI, TWUI, UWTI—three scenarios), by website (5 sites with individual results), by instruction type (login pages separate from general navigation). Figure 5 shows per-site ASR.",
    217           "source": "haiku"
    218         },
    219         "failure_cases_discussed": {
    220           "applies": true,
    221           "answer": true,
    222           "justification": "Failure Analysis (Section 4) identifies two failure clusters: (1) concrete instructions with obvious single action (high prior probability), (2) chain-of-thought reasoning overriding trigger. Explains ~17% failure rate in TWUI.",
    223           "source": "haiku"
    224         },
    225         "negative_results_reported": {
    226           "applies": true,
    227           "answer": true,
    228           "justification": "Negative results clearly reported: (1) trigger transferability fails (Llama-3.1 triggers do not work on Llama-2 or Mistral); (2) UWTI universal login trigger achieves only 0.55 ASR on held-out pages vs 0.83–0.99 for TWUI; (3) trigger length, CW loss, top-k variations show null effects (Appendix B).",
    229           "source": "haiku"
    230         }
    231       },
    232       "setup_transparency": {
    233         "model_versions_specified": {
    234           "applies": true,
    235           "answer": true,
    236           "justification": "Model versions explicitly specified: primary model 'Llama-3.1-8B-Instruct' (Section 2.1). Transferability tests use 'Llama-2-7b-chat-hf' and 'Mistral-7B-Instruct-v0.3' (Section 4). Snapshot versions provided.",
    237           "source": "haiku"
    238         },
    239         "prompts_provided": {
    240           "applies": true,
    241           "answer": false,
    242           "justification": "Prompts NOT provided. Paper describes Browser Gym prompt template structure: 'comprises context for web navigation setting, goal or chat messages from user, accessibility tree, and description of actions available.' But actual system prompt, instruction format, and example outputs not shown.",
    243           "source": "haiku"
    244         },
    245         "hyperparameters_reported": {
    246           "applies": true,
    247           "answer": false,
    248           "justification": "Hyperparameters inconsistently reported. 'Standard GCG hyperparameters' mentioned (Section 3.1) but not defined. Variations tested (search width, top-k) but default/initial values not specified. NanoGCG library used but no documentation of parameter settings provided.",
    249           "source": "haiku"
    250         },
    251         "scaffolding_described": {
    252           "applies": true,
    253           "answer": true,
    254           "justification": "Browser Gym agentic scaffolding well described (Section 2.1): (1) HTML extraction as accessibility tree, (2) prompt compilation from context + goal + tree + actions, (3) LLM query, (4) response parsing for action syntax, (5) browser action execution, (6) loop repeat. Sufficient detail to understand system flow.",
    255           "source": "haiku"
    256         },
    257         "data_preprocessing_documented": {
    258           "applies": true,
    259           "answer": false,
    260           "justification": "Preprocessing underspecified. HTML 'processed by Browser Gym template' (Section 2.3) but specific steps not documented. How are 200 test instructions generated? How are website HTML snapshots selected/normalized? No data cleaning or filtering steps detailed.",
    261           "source": "haiku"
    262         }
    263       },
    264       "data_integrity": {
    265         "raw_data_available": {
    266           "applies": true,
    267           "answer": false,
    268           "justification": "Raw data partially available: code released to re-generate triggers, demo website shows cached results. But original website HTML snapshots not released, discovered trigger sequences not all public (UWTI withheld), and test instruction sets not provided. Cannot independently verify raw attack success.",
    269           "source": "haiku"
    270         },
    271         "data_collection_described": {
    272           "applies": true,
    273           "answer": false,
    274           "justification": "Data collection minimally described. Website selection: '5 real websites' chosen (chess.com, penjee, citybrewtours, norway.no, google translate) but criteria for selection not stated. Instructions: '200 prompts' generated from Browser Gym but construction method not documented. Login pages 'copied' from real sites (Section 3.3) but not detailed.",
    275           "source": "haiku"
    276         },
    277         "recruitment_methods_described": {
    278           "applies": false,
    279           "answer": false,
    280           "justification": "No human participants; N/A.",
    281           "source": "haiku"
    282         },
    283         "data_pipeline_documented": {
    284           "applies": true,
    285           "answer": false,
    286           "justification": "Attack pipeline documented (HTML → Accessibility Tree → Prompt → Trigger Optimization → ASR). But data collection pipeline incomplete: How are websites selected? How are instructions created? How are test sets constructed? No data version control, no artifact hashes.",
    287           "source": "haiku"
    288         }
    289       },
    290       "contamination": {
    291         "training_cutoff_stated": {
    292           "applies": true,
    293           "answer": false,
    294           "justification": "Llama-3.1 training cutoff not stated in paper. (Llama-3 model card indicates training data through December 2023, but this is external knowledge.) Websites tested (chess.com, google translate, etc.) are current/live in 2025, likely post-training, so contamination risk low but not addressed.",
    295           "source": "haiku"
    296         },
    297         "train_test_overlap_discussed": {
    298           "applies": true,
    299           "answer": false,
    300           "justification": "Train/test overlap (data contamination) not discussed. Paper does not address whether test websites were in Llama-3.1 training data, or whether test instructions resemble fine-tuning examples in Browser Gym.",
    301           "source": "haiku"
    302         },
    303         "benchmark_contamination_addressed": {
    304           "applies": false,
    305           "answer": false,
    306           "justification": "Not evaluating standardized benchmark; testing on live websites. Benchmark contamination N/A.",
    307           "source": "haiku"
    308         }
    309       },
    310       "cost_and_practicality": {
    311         "inference_cost_reported": {
    312           "applies": true,
    313           "answer": false,
    314           "justification": "Inference cost (latency per agent action) not reported. Optimization cost (time-to-completion for trigger search) reported: 'roughly several hours' reduced to '<1 hour' or '<10 minutes' (Section 3.1, Figures 3–4). No per-token cost, no API charges, no inference latency for actual attacks.",
    315           "source": "haiku"
    316         },
    317         "compute_budget_stated": {
    318           "applies": true,
    319           "answer": false,
    320           "justification": "Total computational budget not stated. Optimization time reported (hours to minutes) but no GPU hours, no memory usage, no batch cost. Hardware not specified (CPU/GPU, model size). Practical deployment cost unclear.",
    321           "source": "haiku"
    322         }
    323       }
    324     }
    325   },
    326   "claims": [
    327     {
    328       "claim": "Adversaries can embed optimized adversarial triggers in HTML to hijack web navigation agent behavior across multiple user instructions.",
    329       "evidence": "TWUI scenario: universal triggers optimized on 25 instructions, tested on 200 new instructions per website. ASR 0.83–0.99 across 5 sites (Figure 5).",
    330       "supported": "strong"
    331     },
    332     {
    333       "claim": "GCG algorithm can be extended to optimize universal triggers across multiple prompts (Equation 3).",
    334       "evidence": "Equation 3 formulates universal trigger optimization over n contexts. Implemented and tested in TWUI and UWTI scenarios.",
    335       "supported": "strong"
    336     },
    337     {
    338       "claim": "Triggers discovered on one website can exfiltrate login credentials universally across different login pages.",
    339       "evidence": "UWTI scenario: trigger trained on 8 login pages achieves 0.55 ASR (leaked both credentials) and 0.55 ASR (leaked either username or password) on 11 held-out pages. Success rate drops significantly from 0.83–0.99 in TWUI.",
    340       "supported": "moderate"
    341     },
    342     {
    343       "claim": "Optimized triggers do not transfer to different LLM models.",
    344       "evidence": "Section 4, Transferability: triggers learned for Llama-3.1 'were unsuccessful' when transferred to Llama-2-7b-chat-hf and Mistral-7B-Instruct-v0.3.",
    345       "supported": "strong"
    346     },
    347     {
    348       "claim": "Trigger optimization time can be reduced by using smaller search width and including target string in initial trigger.",
    349       "evidence": "Figures 3–4: search width 128 keeps optimization <3 hours; including target string reduces to <1 hour or <10 minutes.",
    350       "supported": "strong"
    351     },
    352     {
    353       "claim": "Accessibility tree parsing makes web agents vulnerable to IPI attacks embedded in HTML.",
    354       "evidence": "System evaluation (Section 3) demonstrates attacks work via HTML injection targeting accessibility tree. Paper does not compare to raw HTML or alternative parsing methods.",
    355       "supported": "moderate"
    356     }
    357   ],
    358   "methodology_tags": [
    359     "case-study",
    360     "benchmark-eval"
    361   ],
    362   "key_findings": "LLM-based web navigation agents are vulnerable to indirect prompt injection (IPI) attacks via optimized adversarial triggers embedded in HTML. Using the GCG algorithm extended to universal triggers (Equation 3), attackers achieve 83–99% attack success rates when optimizing for specific websites and instructions, though success drops to 55% when attacking across different websites (login scenario). Attacks are model-specific and do not transfer to different LLM architectures without retraining.",
    363   "red_flags": [
    364     {
    365       "flag": "No baseline comparisons",
    366       "detail": "Paper does not compare GCG-optimized triggers to random triggers, simple hand-crafted triggers, or alternative optimization algorithms. Necessity of GCG unclear."
    367     },
    368     {
    369       "flag": "Limited evaluation scope",
    370       "detail": "Only 5 websites, single agent framework (Browser Gym), single model (Llama-3.1-8B). Claims generalize to 'LLM-based web agents' but scope is narrow."
    371     },
    372     {
    373       "flag": "Triggers withheld for ethical reasons",
    374       "detail": "UWTI login attack results not fully reproducible; triggers intentionally withheld from demo website to prevent misuse. Limits independent verification."
    375     },
    376     {
    377       "flag": "No statistical significance testing",
    378       "detail": "ASR reported as point proportions (0.83, 0.99) without confidence intervals. No p-values for differences across conditions (search width, loss function)."
    379     },
    380     {
    381       "flag": "Transferability failure under-explored",
    382       "detail": "Triggers fail to transfer to Llama-2, Mistral, but paper does not investigate why. Is this fundamental or an implementation detail? Could joint optimization fix it?"
    383     },
    384     {
    385       "flag": "Training data cutoff not disclosed",
    386       "detail": "Llama-3.1 training cutoff not stated in paper. No discussion of whether test websites/instructions could be in training data."
    387     },
    388     {
    389       "flag": "Sample sizes not justified",
    390       "detail": "Why 5 websites, 200 test instructions, 25 training contexts, 8 training login pages? No justification or power analysis provided."
    391     }
    392   ],
    393   "cited_papers": [
    394     {
    395       "title": "Not What You've Signed Up for: Compromising Real-World LLM-Integrated Applications with Indirect Prompt Injection",
    396       "authors": "Greshake et al.",
    397       "year": 2023,
    398       "relevance": "Foundational IPI concept; defines attack class and threat model."
    399     },
    400     {
    401       "title": "Universal and Transferable Adversarial Attacks on Aligned Language Models",
    402       "authors": "Zou et al.",
    403       "year": 2023,
    404       "relevance": "GCG algorithm; extended here to universal trigger optimization across multiple prompts."
    405     },
    406     {
    407       "title": "Imprompter: Tricking LLM Agents into Improper Tool Use",
    408       "authors": "Fu et al.",
    409       "year": 2024,
    410       "relevance": "Similar GCG-based attack on LLM agents; demonstrates transferability to black-box systems."
    411     },
    412     {
    413       "title": "Adaptive Attacks Break Defenses Against Indirect Prompt Injection Attacks on LLM Agents",
    414       "authors": "Zhan et al.",
    415       "year": 2025,
    416       "relevance": "Defense evasion; shows IPI defenses can be defeated by adaptive attacks."
    417     },
    418     {
    419       "title": "Prompt Injection Attack Against LLM-Integrated Applications",
    420       "authors": "Liu et al.",
    421       "year": 2023,
    422       "relevance": "Direct prompt injection attacks on LLM systems; motivates indirect injection vulnerability."
    423     },
    424     {
    425       "title": "Is BERT Really Robust? A Strong Baseline for Natural Language Attack",
    426       "authors": "Jin et al.",
    427       "year": 2020,
    428       "relevance": "Adversarial attacks on NLP models; early precedent for perturbation-based attacks."
    429     }
    430   ],
    431   "engagement_factors": {
    432     "practical_relevance": {
    433       "score": 2,
    434       "justification": "Web navigation agents (OpenAI Operator, Gemini Deep Research) are emerging but not yet mainstream. Attack requires understanding agent internals; still academic relevance rather than immediate practical impact."
    435     },
    436     "surprise_contrarian": {
    437       "score": 2,
    438       "justification": "IPI attacks were known abstractly (Greshake et al. 2023), but this is the first concrete demonstration on web agents using GCG. Somewhat novel but expected given prior work."
    439     },
    440     "fear_safety": {
    441       "score": 3,
    442       "justification": "Direct security threat: login credential exfiltration, forced ad clicks, malware distribution, data theft. Raises urgent concerns as autonomous agents scale."
    443     },
    444     "drama_conflict": {
    445       "score": 2,
    446       "justification": "Tension between deploying autonomous agents vs. security risks. Ethical disclosure (withholding full attack). Moderate controversy potential."
    447     },
    448     "demo_ability": {
    449       "score": 3,
    450       "justification": "Working demo website (lethaiq.github.io/attack-web-llm-agent) with visual side-by-side comparison of attacks. Code released (GitHub). Users can explore attacks without running inference."
    451     },
    452     "brand_recognition": {
    453       "score": 1,
    454       "justification": "Authors from Indiana University and HCMUS (not top-tier labs). Llama-3.1 and Browser Gym are well-known but not main contribution. Limited prestige halo."
    455     }
    456   },
    457   "hn_data": {
    458     "threads": [
    459       {
    460         "hn_id": "43422084",
    461         "title": "Measuring AI Ability to Complete Long Tasks",
    462         "points": 5,
    463         "comments": 0,
    464         "url": "https://news.ycombinator.com/item?id=43422084"
    465       },
    466       {
    467         "hn_id": "43433880",
    468         "title": "Measuring AI Ability to Complete Long Tasks",
    469         "points": 4,
    470         "comments": 0,
    471         "url": "https://news.ycombinator.com/item?id=43433880"
    472       },
    473       {
    474         "hn_id": "44496861",
    475         "title": "Measuring AI Ability to Complete Long Tasks",
    476         "points": 3,
    477         "comments": 0,
    478         "url": "https://news.ycombinator.com/item?id=44496861"
    479       },
    480       {
    481         "hn_id": "42855137",
    482         "title": "Why a Race to Artificial Superintelligence Is Self-Defeating [pdf]",
    483         "points": 3,
    484         "comments": 0,
    485         "url": "https://news.ycombinator.com/item?id=42855137"
    486       },
    487       {
    488         "hn_id": "43569873",
    489         "title": "Measuring AI Ability to Complete Long Tasks",
    490         "points": 2,
    491         "comments": 0,
    492         "url": "https://news.ycombinator.com/item?id=43569873"
    493       },
    494       {
    495         "hn_id": "43562986",
    496         "title": "Perceiver IO: A General Architecture for Structured Inputs and Outputs (2021)",
    497         "points": 2,
    498         "comments": 0,
    499         "url": "https://news.ycombinator.com/item?id=43562986"
    500       },
    501       {
    502         "hn_id": "42984225",
    503         "title": "Leveraging Multimodal LLM for Inspirational User Interface Search",
    504         "points": 2,
    505         "comments": 0,
    506         "url": "https://news.ycombinator.com/item?id=42984225"
    507       },
    508       {
    509         "hn_id": "36905285",
    510         "title": "[PDF] Scaling TransNormer to 175B Parameters",
    511         "points": 2,
    512         "comments": 0,
    513         "url": "https://news.ycombinator.com/item?id=36905285"
    514       },
    515       {
    516         "hn_id": "36903196",
    517         "title": "Scaling TransNormer to 175B Parameters",
    518         "points": 2,
    519         "comments": 0,
    520         "url": "https://news.ycombinator.com/item?id=36903196"
    521       },
    522       {
    523         "hn_id": "44650583",
    524         "title": "Safety Evaluations of 20 LLMs",
    525         "points": 1,
    526         "comments": 1,
    527         "url": "https://news.ycombinator.com/item?id=44650583"
    528       }
    529     ],
    530     "top_points": 5,
    531     "total_points": 26,
    532     "total_comments": 1
    533   }
    534 }

Impressum · Datenschutz