scan.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan.json (28049B)
      1 {
      2   "paper": {
      3     "title": "PINA: Prompt Injection Attack against Navigation Agents",
      4     "authors": [
      5       "Jiani Liu",
      6       "Yixin He",
      7       "Lanlan Fan",
      8       "Qidi Zhong",
      9       "Yushi Cheng",
     10       "Meng Zhang",
     11       "Yanjiao Chen",
     12       "Wenyuan Xu"
     13     ],
     14     "year": 2026,
     15     "venue": "arXiv",
     16     "arxiv_id": "2601.13612",
     17     "doi": "10.48550/arXiv.2601.13612"
     18   },
     19   "checklist": {
     20     "artifacts": {
     21       "code_released": {
     22         "applies": true,
     23         "answer": true,
     24         "justification": "The paper states 'Our code is available at https://github.com/nikikiki6/PINA' in Section 4.1 (Implementation)."
     25       },
     26       "data_released": {
     27         "applies": true,
     28         "answer": true,
     29         "justification": "The paper uses publicly available datasets: R2R [16] for instruction sets and NavGPT [1] which is publicly available. They state 'random 100 examples from R2R as our instruction set for training.'"
     30       },
     31       "environment_specified": {
     32         "applies": true,
     33         "answer": false,
     34         "justification": "The paper mentions '8 NVIDIA H800 GPUs (80 GB, CUDA 12.2, Python 3.9.5)' but provides no requirements.txt, Dockerfile, or detailed library version list. Hardware and Python version alone are not enough to recreate the environment."
     35       },
     36       "reproduction_instructions": {
     37         "applies": true,
     38         "answer": false,
     39         "justification": "No step-by-step reproduction instructions are provided in the paper. A GitHub link is given but the paper itself contains no reproduction guide or commands."
     40       }
     41     },
     42     "statistical_methodology": {
     43       "confidence_intervals_or_error_bars": {
     44         "applies": true,
     45         "answer": false,
     46         "justification": "Tables 1, 2, and 3 report only point estimates for all metrics (ASR, NE, SPL, nDTW, CLS, TL) with no confidence intervals or error bars."
     47       },
     48       "significance_tests": {
     49         "applies": true,
     50         "answer": false,
     51         "justification": "The paper claims PINA 'surpasses all baselines by a clear margin' and 'outperforming baseline methods by over 20%' but no statistical significance tests are reported to support these comparative claims."
     52       },
     53       "effect_sizes_reported": {
     54         "applies": true,
     55         "answer": true,
     56         "justification": "The paper reports clean baselines alongside attack results, providing context for effect magnitude. E.g., 'SPL drops from 14.30 in the clean setting to 3.56 under our attack' and 'outperforming baseline methods by over 20%' on ASR."
     57       },
     58       "sample_size_justified": {
     59         "applies": true,
     60         "answer": false,
     61         "justification": "The paper uses '100 examples from R2R' for training but never justifies this choice. The number of test examples is never explicitly stated, and no power analysis is provided."
     62       },
     63       "variance_reported": {
     64         "applies": true,
     65         "answer": false,
     66         "justification": "No standard deviations, variance, or multi-run spread measures are reported anywhere. All results appear to be from single runs."
     67       }
     68     },
     69     "evaluation_design": {
     70       "baselines_included": {
     71         "applies": true,
     72         "answer": true,
     73         "justification": "Table 1 compares PINA against four baselines: Naive Attack, Escape Characters, Context Ignoring, and Combined Attack, plus a clean (no attack) condition."
     74       },
     75       "baselines_contemporary": {
     76         "applies": true,
     77         "answer": true,
     78         "justification": "The baselines follow the setup from Liu et al. [5] published at USENIX Security 2024, which represents standard prompt injection attack categories in the current literature."
     79       },
     80       "ablation_study": {
     81         "applies": true,
     82         "answer": true,
     83         "justification": "Table 2 presents an ablation study on NavGPT with GPT-3.5, systematically removing KL Divergence Measurement and Key Token Identification components of the Distribution Analyzer."
     84       },
     85       "multiple_metrics": {
     86         "applies": true,
     87         "answer": true,
     88         "justification": "Six evaluation metrics are used: ASR, Navigation Error (NE), SPL, nDTW, Trajectory Length (TL), and Cover Length Score (CLS)."
     89       },
     90       "human_evaluation": {
     91         "applies": true,
     92         "answer": false,
     93         "justification": "No human evaluation is conducted. All evaluation is automated via simulators and computed metrics. Human assessment of attack severity or real-world navigational danger would have strengthened the claims."
     94       },
     95       "held_out_test_set": {
     96         "applies": true,
     97         "answer": false,
     98         "justification": "The paper uses '100 examples from R2R' for training the attack but never explicitly describes a separate held-out test set. It is unclear whether evaluation is performed on the same examples used for optimization."
     99       },
    100       "per_category_breakdown": {
    101         "applies": true,
    102         "answer": true,
    103         "justification": "Results are broken down by victim system (NavGPT with GPT-3.5, NavGPT with GPT-4, Balcı et al. outdoor agent) in Table 1, providing per-system performance."
    104       },
    105       "failure_cases_discussed": {
    106         "applies": true,
    107         "answer": false,
    108         "justification": "No failure cases are discussed. Fig. 3 shows successful attack examples, but the paper never analyzes cases where PINA failed (e.g., the 25% of indoor cases where the attack did not succeed)."
    109       },
    110       "negative_results_reported": {
    111         "applies": true,
    112         "answer": false,
    113         "justification": "No negative results are reported. Every experiment shows PINA outperforming baselines. The ablation shows expected degradation when components are removed, but no genuinely negative or unexpected results."
    114       }
    115     },
    116     "claims_and_evidence": {
    117       "abstract_claims_supported": {
    118         "applies": true,
    119         "answer": true,
    120         "justification": "The abstract claims 'average ASR of 87.5%' which matches (75%+100%)/2 from Table 1 (GPT-3.5 indoor + outdoor). Claims of surpassing baselines and robustness under ablation/defense are supported by Tables 1-3."
    121       },
    122       "causal_claims_justified": {
    123         "applies": true,
    124         "answer": true,
    125         "justification": "Causal claims are made via the ablation study (Table 2): 'removing the entire Distribution Analyzer reduces ASR from 75.00% to 69.25%.' The ablation uses controlled single-variable manipulation, which is adequate for these claims."
    126       },
    127       "generalization_bounded": {
    128         "applies": true,
    129         "answer": false,
    130         "justification": "The title claims 'Prompt Injection Attack against Navigation Agents' broadly, but experiments cover only 2 agents (1 indoor NavGPT, 1 outdoor Balcı et al.) with 2 LLMs (GPT-3.5, GPT-4). The abstract's 'indoor and outdoor navigation agents' (plural) overstates the scope."
    131       },
    132       "alternative_explanations_discussed": {
    133         "applies": true,
    134         "answer": false,
    135         "justification": "No alternative explanations are discussed. The paper does not consider whether the high ASR could be due to inherent weakness of these specific systems, the simplicity of the navigation tasks, or other confounding factors."
    136       },
    137       "proxy_outcome_distinction": {
    138         "applies": true,
    139         "answer": true,
    140         "justification": "The paper measures ASR and navigation metrics (NE, SPL, nDTW, CLS, TL), and claims are about attack effectiveness measured by exactly these metrics. No proxy gap exists between what is measured and what is claimed."
    141       }
    142     },
    143     "setup_transparency": {
    144       "model_versions_specified": {
    145         "applies": true,
    146         "answer": false,
    147         "justification": "The paper uses 'GPT-3.5-turbo' and 'GPT-4' without specific version snapshots (e.g., gpt-3.5-turbo-0613, gpt-4-0613). 'Llama2-7b' is mentioned for the surrogate but also lacks a specific checkpoint identifier."
    148       },
    149       "prompts_provided": {
    150         "applies": true,
    151         "answer": false,
    152         "justification": "No actual prompt text is provided in the paper. The framework components are described algorithmically (Algorithm 1, Fig. 2), but neither the injection prompts, system prompts, nor feedback generator prompts are shown."
    153       },
    154       "hyperparameters_reported": {
    155         "applies": true,
    156         "answer": false,
    157         "justification": "Only α=0.5 for the distribution score balance is reported. Critical hyperparameters including number of optimization rounds R, threshold τ, weight vector w, and LLM API parameters (temperature, top-p) are not specified."
    158       },
    159       "scaffolding_described": {
    160         "applies": true,
    161         "answer": true,
    162         "justification": "The PINA framework is described in detail in Section 3 with Algorithm 1 and Figure 2, covering the Attack Evaluator, Distribution Analyzer, and Adaptive Prompt Refinement loop with their interactions."
    163       },
    164       "data_preprocessing_documented": {
    165         "applies": true,
    166         "answer": false,
    167         "justification": "The paper states 'random 100 examples from R2R as our instruction set for training' but does not describe how these were selected, whether any filtering was applied, or how test examples were chosen."
    168       }
    169     },
    170     "limitations_and_scope": {
    171       "limitations_section_present": {
    172         "applies": true,
    173         "answer": false,
    174         "justification": "There is no dedicated limitations section. The conclusion mentions 'two promising directions' for future work but does not discuss any limitations of the current study."
    175       },
    176       "threats_to_validity_specific": {
    177         "applies": true,
    178         "answer": false,
    179         "justification": "No threats to validity are discussed anywhere in the paper. There is no consideration of the small sample sizes, limited system coverage, or potential confounds."
    180       },
    181       "scope_boundaries_stated": {
    182         "applies": true,
    183         "answer": false,
    184         "justification": "No explicit scope boundaries are stated. The paper does not say what its results do NOT show or what populations/settings are excluded from its claims."
    185       }
    186     },
    187     "data_integrity": {
    188       "raw_data_available": {
    189         "applies": true,
    190         "answer": false,
    191         "justification": "No raw data (per-example trajectories, per-example attack outcomes, generated injection prompts) is available. Only aggregate metrics are reported in tables."
    192       },
    193       "data_collection_described": {
    194         "applies": true,
    195         "answer": false,
    196         "justification": "The paper mentions using R2R and two victim agents but never states the number of test examples used for evaluation. The test set size is left unstated, a critical omission."
    197       },
    198       "recruitment_methods_described": {
    199         "applies": false,
    200         "answer": false,
    201         "justification": "No human participants. Data sources are standard benchmarks (R2R dataset) and publicly available navigation agents."
    202       },
    203       "data_pipeline_documented": {
    204         "applies": true,
    205         "answer": false,
    206         "justification": "The pipeline from R2R examples to final evaluation metrics is not documented. How training vs test examples were split, how many evaluation episodes were run, and how aggregate metrics were computed from per-example results are all unstated."
    207       }
    208     },
    209     "conflicts_of_interest": {
    210       "funding_disclosed": {
    211         "applies": true,
    212         "answer": true,
    213         "justification": "Funding is disclosed: 'This paper was supported by National Natural Science Foundation of China Grant 62271280 and 62572433.'"
    214       },
    215       "affiliations_disclosed": {
    216         "applies": true,
    217         "answer": true,
    218         "justification": "Author affiliations (Zhejiang University, Southeast University) are clearly listed. Authors are not affiliated with the systems being attacked."
    219       },
    220       "funder_independent_of_outcome": {
    221         "applies": true,
    222         "answer": true,
    223         "justification": "The National Natural Science Foundation of China is a government research funding agency with no financial stake in the outcome of prompt injection attack research."
    224       },
    225       "financial_interests_declared": {
    226         "applies": true,
    227         "answer": false,
    228         "justification": "No competing interests statement or financial interest disclosure is present in the paper."
    229       }
    230     },
    231     "contamination": {
    232       "training_cutoff_stated": {
    233         "applies": false,
    234         "answer": false,
    235         "justification": "This is a red-teaming/attack study testing prompt injection methods against navigation agents, not evaluating a pre-trained model's knowledge on a benchmark. Contamination is structurally inapplicable."
    236       },
    237       "train_test_overlap_discussed": {
    238         "applies": false,
    239         "answer": false,
    240         "justification": "This paper tests attack methods rather than model knowledge. Whether GPT-3.5/GPT-4 have seen R2R data does not affect the validity of the attack effectiveness claims."
    241       },
    242       "benchmark_contamination_addressed": {
    243         "applies": false,
    244         "answer": false,
    245         "justification": "The paper evaluates attack effectiveness, not model capability on benchmarks. Contamination considerations are not structurally applicable."
    246       }
    247     },
    248     "human_studies": {
    249       "pre_registered": {
    250         "applies": false,
    251         "answer": false,
    252         "justification": "No human participants in this study. All experiments are conducted in simulation."
    253       },
    254       "irb_or_ethics_approval": {
    255         "applies": false,
    256         "answer": false,
    257         "justification": "No human participants. The study uses automated navigation agents and simulators."
    258       },
    259       "demographics_reported": {
    260         "applies": false,
    261         "answer": false,
    262         "justification": "No human participants in this study."
    263       },
    264       "inclusion_exclusion_criteria": {
    265         "applies": false,
    266         "answer": false,
    267         "justification": "No human participants in this study."
    268       },
    269       "randomization_described": {
    270         "applies": false,
    271         "answer": false,
    272         "justification": "No human participants in this study."
    273       },
    274       "blinding_described": {
    275         "applies": false,
    276         "answer": false,
    277         "justification": "No human participants in this study."
    278       },
    279       "attrition_reported": {
    280         "applies": false,
    281         "answer": false,
    282         "justification": "No human participants in this study."
    283       }
    284     },
    285     "cost_and_practicality": {
    286       "inference_cost_reported": {
    287         "applies": true,
    288         "answer": false,
    289         "justification": "No inference cost, API cost, or latency figures are reported. The iterative optimization process involves repeated calls to GPT-3.5, Llama2-7b, and navigation simulators, but no cost accounting is provided."
    290       },
    291       "compute_budget_stated": {
    292         "applies": true,
    293         "answer": false,
    294         "justification": "The paper mentions '8 NVIDIA H800 GPUs' as hardware but does not state total GPU hours, API spend, optimization wall-clock time, or total compute budget."
    295       }
    296     },
    297     "experimental_rigor": {
    298       "seed_sensitivity_reported": {
    299         "applies": true,
    300         "answer": false,
    301         "justification": "No mention of multiple random seeds or seed sensitivity analysis. Results appear to be from single runs."
    302       },
    303       "number_of_runs_stated": {
    304         "applies": true,
    305         "answer": false,
    306         "justification": "The number of experimental runs is never stated. It is unclear whether results are from a single run or averaged over multiple runs."
    307       },
    308       "hyperparameter_search_budget": {
    309         "applies": true,
    310         "answer": false,
    311         "justification": "No hyperparameter search budget is reported. α=0.5 is set 'for convenience' with no comparison to alternatives. The weight vector w and other parameters are not searched."
    312       },
    313       "best_config_selection_justified": {
    314         "applies": true,
    315         "answer": false,
    316         "justification": "Configuration choices (α=0.5, weight vector w, number of rounds) are not justified through systematic comparison. α is chosen 'for convenience' without ablation."
    317       },
    318       "multiple_comparison_correction": {
    319         "applies": true,
    320         "answer": false,
    321         "justification": "Multiple comparisons are made across 5 attack methods, 3 target systems, and 6 metrics, but no significance tests are performed at all, let alone corrections for multiple comparisons."
    322       },
    323       "self_comparison_bias_addressed": {
    324         "applies": true,
    325         "answer": false,
    326         "justification": "The authors implement all baseline attacks following [5] and compare against their own PINA method without acknowledging that their implementations of baselines may systematically underperform."
    327       },
    328       "compute_budget_vs_performance": {
    329         "applies": true,
    330         "answer": false,
    331         "justification": "PINA uses iterative multi-round optimization with a surrogate LLM and simulator, while baselines are single-shot prompt modifications. This massive compute asymmetry is never discussed or controlled for."
    332       },
    333       "benchmark_construct_validity": {
    334         "applies": true,
    335         "answer": false,
    336         "justification": "The paper does not discuss whether R2R navigation tasks in simulation adequately represent real-world navigation vulnerability. The gap between simulated and physical navigation environments is not addressed."
    337       },
    338       "scaffold_confound_addressed": {
    339         "applies": true,
    340         "answer": false,
    341         "justification": "PINA uses an iterative optimization scaffold (surrogate LLM + simulator + feedback loop) while baselines use no scaffold at all. This fundamental asymmetry in methodology is not acknowledged as a confound."
    342       }
    343     },
    344     "data_leakage": {
    345       "temporal_leakage_addressed": {
    346         "applies": true,
    347         "answer": false,
    348         "justification": "No discussion of whether GPT-3.5/GPT-4 training data includes R2R dataset examples, which were published in 2018 and are certainly within training cutoffs."
    349       },
    350       "feature_leakage_addressed": {
    351         "applies": true,
    352         "answer": false,
    353         "justification": "No discussion of whether the evaluation setup provides information that would not be available in a real attack scenario."
    354       },
    355       "non_independence_addressed": {
    356         "applies": true,
    357         "answer": false,
    358         "justification": "No discussion of independence between the 100 R2R training examples and the test examples used for evaluation. The train/test split procedure is never described."
    359       },
    360       "leakage_detection_method": {
    361         "applies": true,
    362         "answer": false,
    363         "justification": "No leakage detection or prevention methods are employed or discussed."
    364       }
    365     }
    366   },
    367   "scan_version": 3,
    368   "active_modules": ["experimental_rigor", "data_leakage"],
    369   "claims": [
    370     {
    371       "claim": "PINA achieves an average ASR of 87.5% across indoor and outdoor navigation agents.",
    372       "evidence": "Table 1 shows 75% ASR on NavGPT with GPT-3.5 and 100% ASR on Balcı et al. outdoor agent, averaging 87.5%. Section 4.2.",
    373       "supported": "moderate"
    374     },
    375     {
    376       "claim": "PINA surpasses all baseline prompt injection methods by over 20% ASR.",
    377       "evidence": "Table 1: next-best baseline (Combined Attack) achieves 50% on indoor (GPT-3.5) and 84.21% on outdoor. PINA achieves 75% and 100% respectively. The >20% margin holds for indoor but not outdoor. Section 4.2.",
    378       "supported": "moderate"
    379     },
    380     {
    381       "claim": "PINA transfers effectively from GPT-3.5 to GPT-4, achieving 75% ASR.",
    382       "evidence": "Table 1 shows PINA optimized on NavGPT with GPT-3.5 achieves 75% ASR when transferred to NavGPT with GPT-4, with largest drops in SPL (5.63) and nDTW (28.11). Section 4.2.",
    383       "supported": "moderate"
    384     },
    385     {
    386       "claim": "Both KL divergence measurement and key token identification are essential, and their combination maximizes attack success.",
    387       "evidence": "Table 2 ablation: full PINA 75.00% ASR vs 72.90% with either component alone vs 69.25% without either. Section 4.3.",
    388       "supported": "weak"
    389     },
    390     {
    391       "claim": "PINA remains effective against self-reminder adaptive defense, maintaining 68.8% ASR.",
    392       "evidence": "Table 3 shows ASR drops from 75.00% to 68.80% with the self-reminder defense of [19], while navigation metrics partially recover but remain below clean settings. Section 4.4.",
    393       "supported": "moderate"
    394     }
    395   ],
    396   "methodology_tags": ["benchmark-eval"],
    397   "key_findings": "PINA is an adaptive prompt optimization framework for attacking LLM-based navigation agents, achieving 75% ASR on indoor agents (NavGPT) and 100% on outdoor agents under black-box conditions. The framework uses a Distribution Analyzer (KL divergence + key token identification) combined with an Attack Evaluator to iteratively refine injection prompts. Attacks transfer from GPT-3.5 to GPT-4 (75% ASR) and partially bypass self-reminder defenses (68.8% ASR). However, evaluations are conducted on only two navigation systems with unstated test set sizes and no statistical rigor.",
    398   "red_flags": [
    399     {
    400       "flag": "Implied tiny sample sizes",
    401       "detail": "ASR percentages in Table 1 are multiples of 6.25% for indoor (suggesting ~16 test examples) and irregular fractions for outdoor (75.86% ≈ 22/29, 84.62% ≈ 11/13). The number of test examples is never explicitly stated, and sample sizes appear very small for the claims being made."
    402     },
    403     {
    404       "flag": "No error bars or uncertainty quantification",
    405       "detail": "All results are point estimates with no confidence intervals, standard deviations, or multi-run variance. With potentially 16 or fewer test examples, the uncertainty around the reported ASR is very large."
    406     },
    407     {
    408       "flag": "Compute-asymmetric comparison",
    409       "detail": "PINA uses iterative multi-round optimization with a surrogate LLM (Llama2-7b) and navigation simulator on 8 H800 GPUs, while all baselines are single-shot prompt modifications. This massive compute asymmetry makes the comparison fundamentally unfair — PINA's advantage may be due to compute expenditure rather than the specific framework design."
    410     },
    411     {
    412       "flag": "100% ASR on outdoor agent may indicate trivially vulnerable target",
    413       "detail": "The outdoor agent (Balcı et al.) has a clean NE of 0.00 and always succeeds, meaning any deviation counts as failure. Baseline attacks already achieve 62.5-84.6% ASR. The 100% result for PINA may reflect a trivially exploitable system rather than PINA's effectiveness."
    414     },
    415     {
    416       "flag": "No limitations section",
    417       "detail": "The paper lacks any discussion of limitations, threats to validity, or scope boundaries. Small sample sizes, limited system coverage (only 2 agents), simulation-only testing, and compute asymmetry are all unacknowledged."
    418     },
    419     {
    420       "flag": "Inconsistent outdoor test set sizes across baselines",
    421       "detail": "Outdoor ASR percentages (62.50%, 75.86%, 84.62%, 84.21%, 100%) imply different denominators across baseline methods (8, 29, 13, 19). If the test set size varies per attack method, this is a serious methodological issue that is never explained."
    422     }
    423   ],
    424   "cited_papers": [
    425     {
    426       "title": "NavGPT: Explicit Reasoning in Vision-and-Language Navigation with Large Language Models",
    427       "authors": ["Gengze Zhou"],
    428       "year": 2024,
    429       "relevance": "Primary victim agent — demonstrates LLM-based indoor navigation using GPT for planning."
    430     },
    431     {
    432       "title": "Prompting Large Language Models for Aerial Navigation",
    433       "authors": ["Emirhan Balcı"],
    434       "relevance": "Second victim agent — LLM-based outdoor/aerial navigation, illustrating prompt-driven drone control."
    435     },
    436     {
    437       "title": "ChatGPT for Robotics: Design Principles and Model Abilities",
    438       "authors": ["Sai H Vemprala", "Rogerio Bonatti", "Arthur Bucker", "Ashish Kapoor"],
    439       "year": 2024,
    440       "relevance": "Foundational work on using LLMs for robot control, establishing the paradigm PINA attacks."
    441     },
    442     {
    443       "title": "Prompt Injection Attack against LLM-Integrated Applications",
    444       "authors": ["Yi Liu", "Gelei Deng", "Yuekang Li"],
    445       "year": 2023,
    446       "arxiv_id": "2306.05499",
    447       "relevance": "Defines the prompt injection attack framework that PINA adapts for navigation agents."
    448     },
    449     {
    450       "title": "Formalizing and Benchmarking Prompt Injection Attacks and Defenses",
    451       "authors": ["Yupei Liu"],
    452       "year": 2024,
    453       "relevance": "Provides the baseline attack taxonomy and evaluation methodology that PINA builds upon (USENIX Security 2024)."
    454     },
    455     {
    456       "title": "Benchmarking and Defending against Indirect Prompt Injection Attacks on Large Language Models",
    457       "authors": ["Jingwei Yi", "Yueqi Xie", "Bin Zhu"],
    458       "year": 2025,
    459       "relevance": "Systematic benchmark of indirect prompt injection attacks and defenses on LLM agents."
    460     },
    461     {
    462       "title": "BadRobot: Jailbreaking Embodied LLMs in the Physical World",
    463       "authors": ["Hangtao Zhang", "Chenyu Zhu", "Xianlong Wang"],
    464       "year": 2024,
    465       "arxiv_id": "2407.20242",
    466       "relevance": "Related work on security attacks against embodied LLM agents with physical-world consequences."
    467     },
    468     {
    469       "title": "Not What You've Signed Up For: Compromising Real-World LLM-Integrated Applications with Indirect Prompt Injection",
    470       "authors": ["Kai Greshake", "Sahar Abdelnabi", "Shailesh Mishra"],
    471       "year": 2023,
    472       "relevance": "Seminal work on indirect prompt injection in real-world LLM applications."
    473     },
    474     {
    475       "title": "WASP: Benchmarking Web Agent Security against Prompt Injection Attacks",
    476       "authors": ["Ivan Evtimov"],
    477       "year": 2025,
    478       "arxiv_id": "2504.18575",
    479       "relevance": "Benchmark for prompt injection attacks on web agents, sharing the surrogate-based black-box attack assumption with PINA."
    480     },
    481     {
    482       "title": "Llama 2: Open Foundation and Fine-Tuned Chat Models",
    483       "authors": ["Hugo Touvron", "Louis Martin", "Kevin Stone"],
    484       "year": 2023,
    485       "arxiv_id": "2307.09288",
    486       "relevance": "Llama2-7b is used as the surrogate LLM in PINA's Distribution Analyzer."
    487     },
    488     {
    489       "title": "Defending ChatGPT against Jailbreak Attack via Self-Reminders",
    490       "authors": ["Yueqi Xie"],
    491       "year": 2023,
    492       "relevance": "Provides the adaptive defense (self-reminder strategy) used to evaluate PINA's robustness."
    493     }
    494   ],
    495   "engagement_factors": {
    496     "practical_relevance": {
    497       "score": 1,
    498       "justification": "Security researchers may use the framework, but practitioners cannot directly apply it to defend their own systems."
    499     },
    500     "surprise_contrarian": {
    501       "score": 1,
    502       "justification": "Demonstrating that navigation agents are vulnerable to prompt injection is unsurprising given known LLM vulnerabilities; the novelty is in the specific attack method."
    503     },
    504     "fear_safety": {
    505       "score": 3,
    506       "justification": "Demonstrates attacks that can misguide physical navigation agents into unsafe routes with real-world harm potential, directly raising embodied AI safety concerns."
    507     },
    508     "drama_conflict": {
    509       "score": 1,
    510       "justification": "Highlights security gaps but does not call out specific companies or directly challenge a widely-held belief."
    511     },
    512     "demo_ability": {
    513       "score": 2,
    514       "justification": "Code is released on GitHub, but running it requires setting up NavGPT, R2R dataset, and navigation simulators."
    515     },
    516     "brand_recognition": {
    517       "score": 1,
    518       "justification": "Uses GPT-3.5/GPT-4 which are well-known, but the research lab (Zhejiang University) is not a household name in western AI discourse."
    519     }
    520   }
    521 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs