ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

scan-v5.json (26033B)


      1 {
      2   "scan_version": 5,
      3   "paper_type": "empirical",
      4   "paper": {
      5     "title": "DeRAG: Black-box Adversarial Attacks on Multiple Retrieval-Augmented Generation Applications via Prompt Injection",
      6     "authors": ["Jerry Wang", "Fang Yu"],
      7     "year": 2025,
      8     "venue": "KDD Workshop on Prompt Optimization",
      9     "arxiv_id": "2507.15042",
     10     "doi": "10.48550/arXiv.2507.15042"
     11   },
     12   "checklist": {
     13     "claims_and_evidence": {
     14       "abstract_claims_supported": {
     15         "applies": true,
     16         "answer": true,
     17         "justification": "All abstract claims are backed by experimental results: DE vs. GGPP/PRADA comparisons in Tables 1-2, ≤5 token budgets confirmed, Welch's t-test for readability in Table 16, and AUROC 0.2023 for detector evasion in Table 4.",
     18         "source": "haiku"
     19       },
     20       "causal_claims_justified": {
     21         "applies": true,
     22         "answer": true,
     23         "justification": "Causal claims such as 'early stopping cuts query cost by ~40%' are supported by controlled comparisons of DE variants across multiple datasets with consistent ablations (Figure 2, Table 2).",
     24         "source": "haiku"
     25       },
     26       "generalization_bounded": {
     27         "applies": true,
     28         "answer": false,
     29         "justification": "The title and conclusion claim DeRAG attacks 'multiple RAG applications,' but experiments only cover BERT-base-uncased (dense) and BM25 (sparse) on 1,000-document subsets; modern embedding models and production-scale corpora are untested.",
     30         "source": "haiku"
     31       },
     32       "alternative_explanations_discussed": {
     33         "applies": true,
     34         "answer": false,
     35         "justification": "The paper attributes MS MARCO's high success to corpus redundancy but does not explore whether BERT-base-uncased is unusually vulnerable compared to modern retrievers, nor whether the artificially small corpus (1,000 docs) inflates success rates.",
     36         "source": "haiku"
     37       },
     38       "proxy_outcome_distinction": {
     39         "applies": true,
     40         "answer": true,
     41         "justification": "The paper distinguishes retrieval rank manipulation (Success@K) from downstream answer quality and validates the connection in Table 5, showing EM/F1/ROUGE-L/BERTScore degradation stratified by attack outcome.",
     42         "source": "haiku"
     43       }
     44     },
     45     "limitations_and_scope": {
     46       "limitations_section_present": {
     47         "applies": true,
     48         "answer": false,
     49         "justification": "There is no dedicated limitations or threats-to-validity section; the conclusion briefly mentions future defenses but does not enumerate limitations of the current work.",
     50         "source": "haiku"
     51       },
     52       "threats_to_validity_specific": {
     53         "applies": true,
     54         "answer": false,
     55         "justification": "No specific threats to validity are discussed — e.g., the impact of corpus size (1,000 docs vs. production scale), retriever model choice, or the small query count (100) on result generalizability.",
     56         "source": "haiku"
     57       },
     58       "scope_boundaries_stated": {
     59         "applies": true,
     60         "answer": false,
     61         "justification": "The paper does not explicitly bound what the results do NOT show — for example, whether attacks transfer to instruction-tuned embedding models, larger corpora, or API-based retrieval services.",
     62         "source": "haiku"
     63       }
     64     },
     65     "conflicts_of_interest": {
     66       "funding_disclosed": {
     67         "applies": true,
     68         "answer": false,
     69         "justification": "No funding source is disclosed anywhere in the paper.",
     70         "source": "haiku"
     71       },
     72       "affiliations_disclosed": {
     73         "applies": true,
     74         "answer": true,
     75         "justification": "Both authors' affiliations (Department of Management Information Systems, National ChengChi University, Taipei, Taiwan) are disclosed on the title page.",
     76         "source": "haiku"
     77       },
     78       "funder_independent_of_outcome": {
     79         "applies": false,
     80         "answer": false,
     81         "justification": "No funder is identified, so independence cannot be assessed.",
     82         "source": "haiku"
     83       },
     84       "financial_interests_declared": {
     85         "applies": true,
     86         "answer": false,
     87         "justification": "No competing interests or financial interests statement is present in the paper.",
     88         "source": "haiku"
     89       }
     90     },
     91     "scope_and_framing": {
     92       "key_terms_defined": {
     93         "applies": true,
     94         "answer": true,
     95         "justification": "Key terms are defined: RAG (Section 1), Differential Evolution (Section 2.2), dense/sparse retrievers (Section 2.4), and all evaluation metrics (Success@K, ΔMRR, ΔnDCG, Δcos) are formally defined in Section 4.1.",
     96         "source": "haiku"
     97       },
     98       "intended_contribution_clear": {
     99         "applies": true,
    100         "answer": true,
    101         "justification": "The paper clearly states its contribution: a gradient-free, black-box adversarial attack (DeRAG) using Differential Evolution to generate short adversarial suffixes that manipulate RAG retrieval rankings without model internals.",
    102         "source": "haiku"
    103       },
    104       "engagement_with_prior_work": {
    105         "applies": true,
    106         "answer": true,
    107         "justification": "A four-subsection related work covers adversarial prompts, evolutionary optimization, detection methods, and retrieval mechanisms; the paper explicitly compares against GGPP (white-box) and PRADA (sparse black-box) throughout.",
    108         "source": "haiku"
    109       }
    110     }
    111   },
    112   "type_checklist": {
    113     "empirical": {
    114       "artifacts": {
    115         "code_released": {
    116           "applies": true,
    117           "answer": true,
    118           "justification": "Source code is released at https://github.com/pen9rum/Rag_attack_DeRag, explicitly referenced in Section 4.1.",
    119           "source": "haiku"
    120         },
    121         "data_released": {
    122           "applies": true,
    123           "answer": true,
    124           "justification": "All datasets used (MS MARCO, SciFact, FiQA, FEVER, SQuAD, NQ-Open) are standard publicly available benchmarks accessible via the BEIR framework.",
    125           "source": "haiku"
    126         },
    127         "environment_specified": {
    128           "applies": true,
    129           "answer": false,
    130           "justification": "No requirements.txt, Dockerfile, or version-pinned dependency specifications are provided; only the model name (BERT-base-uncased) is mentioned without library or Python version details.",
    131           "source": "haiku"
    132         },
    133         "reproduction_instructions": {
    134           "applies": true,
    135           "answer": false,
    136           "justification": "The algorithm is described via pseudocode (Algorithm 1) but no step-by-step instructions for reproducing the specific experimental results (table entries, figures) are provided in the paper.",
    137           "source": "haiku"
    138         }
    139       },
    140       "statistical_methodology": {
    141         "confidence_intervals_or_error_bars": {
    142           "applies": true,
    143           "answer": false,
    144           "justification": "Main attack success results (Tables 1-2) report point estimates only; standard deviations appear only for MLM NLL in Table 15, not for primary Success@K metrics.",
    145           "source": "haiku"
    146         },
    147         "significance_tests": {
    148           "applies": true,
    149           "answer": false,
    150           "justification": "Welch's t-test is used only for the MLM NLL readability comparison (Table 16); the primary comparative claims — DE vs. GGPP vs. PRADA attack success — are made without any statistical significance testing.",
    151           "source": "haiku"
    152         },
    153         "effect_sizes_reported": {
    154           "applies": true,
    155           "answer": true,
    156           "justification": "Effect sizes are reported across multiple metrics: Success@K, ΔMRR, ΔnDCG, Δcos, and downstream percentage drops in EM/F1/ROUGE-L in Table 5.",
    157           "source": "haiku"
    158         },
    159         "sample_size_justified": {
    160           "applies": true,
    161           "answer": false,
    162           "justification": "The choice of 100 queries and 1,000 documents per dataset is not justified through power analysis or reasoning about statistical adequacy.",
    163           "source": "haiku"
    164         },
    165         "variance_reported": {
    166           "applies": true,
    167           "answer": false,
    168           "justification": "Main results tables (Tables 1-2) report no variance; standard deviations appear only for MLM NLL (Table 15), not for primary attack success metrics.",
    169           "source": "haiku"
    170         }
    171       },
    172       "evaluation_design": {
    173         "baselines_included": {
    174           "applies": true,
    175           "answer": true,
    176           "justification": "Three baselines are included: GGPP (gradient-based white-box), PRADA (sparse black-box), and random suffix.",
    177           "source": "haiku"
    178         },
    179         "baselines_contemporary": {
    180           "applies": true,
    181           "answer": true,
    182           "justification": "GGPP (2024) and PRADA (2022) are the most relevant contemporary methods for the dense and sparse retriever attack settings respectively.",
    183           "source": "haiku"
    184         },
    185         "ablation_study": {
    186           "applies": true,
    187           "answer": true,
    188           "justification": "Extensive ablations: DE variants (seq_stop vs. fixed_stop vs. seq), suffix length effects (Figure 4, Appendix D), loss function comparison (Appendix E), prefix vs. suffix positioning (Table 3), and candidate pool size effects (Table 6).",
    189           "source": "haiku"
    190         },
    191         "multiple_metrics": {
    192           "applies": true,
    193           "answer": true,
    194           "justification": "Multiple metrics: Success@K (K=1,10,20), Avg Tok, Avg Iter, ΔMRR, ΔnDCG, Δcos, EM, F1, ROUGE-L, BERTScore, AUROC, AUPRC, MLM NLL.",
    195           "source": "haiku"
    196         },
    197         "human_evaluation": {
    198           "applies": false,
    199           "answer": false,
    200           "justification": "Human evaluation is not applicable; attack effectiveness is measured through automated retrieval and QA metrics.",
    201           "source": "haiku"
    202         },
    203         "held_out_test_set": {
    204           "applies": false,
    205           "answer": false,
    206           "justification": "This is an adversarial optimization task, not a supervised prediction task; the held-out test set concept does not apply.",
    207           "source": "haiku"
    208         },
    209         "per_category_breakdown": {
    210           "applies": true,
    211           "answer": true,
    212           "justification": "Results are broken down per dataset (SciFact, FiQA, FEVER, MS MARCO) and per retrieval threshold (K=1, 10, 20) across all main tables.",
    213           "source": "haiku"
    214         },
    215         "failure_cases_discussed": {
    216           "applies": true,
    217           "answer": true,
    218           "justification": "Table 3 explicitly tabulates queries where both prefix and suffix attacks fail; Section 4.5 stratifies outcomes into Top-1 success, Top-10-only, and Fail with corresponding quality metrics.",
    219           "source": "haiku"
    220         },
    221         "negative_results_reported": {
    222           "applies": true,
    223           "answer": true,
    224           "justification": "Negative results include: cosine loss underperforming hinge loss (Table 12/Appendix E), monotonic suffix length schedule not improving results (Appendix D), and low Succ@1 rates (~10-20%) on most datasets.",
    225           "source": "haiku"
    226         }
    227       },
    228       "setup_transparency": {
    229         "model_versions_specified": {
    230           "applies": true,
    231           "answer": false,
    232           "justification": "BERT-base-uncased is specified for retrieval, but the LLM used for answer generation in the downstream RAG pipeline (Section 4.5, Table 5) is never named or versioned — a critical omission.",
    233           "source": "haiku"
    234         },
    235         "prompts_provided": {
    236           "applies": true,
    237           "answer": false,
    238           "justification": "No system prompts or query templates for the downstream RAG generator LLM are provided; appendix tables show adversarial suffix output examples but not the generation prompts used.",
    239           "source": "haiku"
    240         },
    241         "hyperparameters_reported": {
    242           "applies": true,
    243           "answer": false,
    244           "justification": "DE hyperparameters (F, CR, N, patience T) are described as typical ranges (e.g., F ∈ [0.5, 1.0], CR ∈ [0.1, 0.9]) rather than the exact values used in the reported experiments.",
    245           "source": "haiku"
    246         },
    247         "scaffolding_described": {
    248           "applies": false,
    249           "answer": false,
    250           "justification": "No agentic scaffolding is used; this is a direct adversarial optimization attack on retrieval systems.",
    251           "source": "haiku"
    252         },
    253         "data_preprocessing_documented": {
    254           "applies": true,
    255           "answer": true,
    256           "justification": "Data preprocessing is documented: random sampling of 1,000 documents and 100 queries from official BEIR corpus/query splits, BERT-base-uncased CLS embedding extraction (768-dim), cosine similarity retrieval.",
    257           "source": "haiku"
    258         }
    259       },
    260       "data_integrity": {
    261         "raw_data_available": {
    262           "applies": true,
    263           "answer": false,
    264           "justification": "Per-query attack outcomes and generated adversarial suffixes are not released as structured data; only the code repository is linked and we cannot verify its contents from the paper.",
    265           "source": "haiku"
    266         },
    267         "data_collection_described": {
    268           "applies": true,
    269           "answer": true,
    270           "justification": "Data collection is described: standard BEIR benchmarks, random sampling of 1,000-document subsets and 100 queries from official splits, target documents chosen as non-relevant passages or topically confusable distractors.",
    271           "source": "haiku"
    272         },
    273         "recruitment_methods_described": {
    274           "applies": false,
    275           "answer": false,
    276           "justification": "No human participants; all data comes from standard NLP benchmarks.",
    277           "source": "haiku"
    278         },
    279         "data_pipeline_documented": {
    280           "applies": true,
    281           "answer": true,
    282           "justification": "The full pipeline is documented: query/target selection → BERT embedding → DE optimization loop (Algorithm 1) → retrieval evaluation → downstream QA generation and scoring.",
    283           "source": "haiku"
    284         }
    285       },
    286       "contamination": {
    287         "training_cutoff_stated": {
    288           "applies": false,
    289           "answer": false,
    290           "justification": "This paper evaluates an adversarial attack on retrieval ranking, not LLM benchmark knowledge recall; standard contamination concerns do not apply.",
    291           "source": "haiku"
    292         },
    293         "train_test_overlap_discussed": {
    294           "applies": false,
    295           "answer": false,
    296           "justification": "Not applicable; benchmarks are used as retrieval corpora to be manipulated, not as knowledge tests for a generative model.",
    297           "source": "haiku"
    298         },
    299         "benchmark_contamination_addressed": {
    300           "applies": false,
    301           "answer": false,
    302           "justification": "Not applicable for the same reason as above.",
    303           "source": "haiku"
    304         }
    305       },
    306       "human_studies": {
    307         "pre_registered": {
    308           "applies": false,
    309           "answer": false,
    310           "justification": "No human participants in this study.",
    311           "source": "haiku"
    312         },
    313         "irb_or_ethics_approval": {
    314           "applies": false,
    315           "answer": false,
    316           "justification": "No human participants in this study.",
    317           "source": "haiku"
    318         },
    319         "demographics_reported": {
    320           "applies": false,
    321           "answer": false,
    322           "justification": "No human participants in this study.",
    323           "source": "haiku"
    324         },
    325         "inclusion_exclusion_criteria": {
    326           "applies": false,
    327           "answer": false,
    328           "justification": "No human participants in this study.",
    329           "source": "haiku"
    330         },
    331         "randomization_described": {
    332           "applies": false,
    333           "answer": false,
    334           "justification": "No human participants in this study.",
    335           "source": "haiku"
    336         },
    337         "blinding_described": {
    338           "applies": false,
    339           "answer": false,
    340           "justification": "No human participants in this study.",
    341           "source": "haiku"
    342         },
    343         "attrition_reported": {
    344           "applies": false,
    345           "answer": false,
    346           "justification": "No human participants in this study.",
    347           "source": "haiku"
    348         }
    349       },
    350       "cost_and_practicality": {
    351         "inference_cost_reported": {
    352           "applies": true,
    353           "answer": true,
    354           "justification": "Per-query iteration counts (Table 2) and pool construction/query optimization times in seconds (Table 14) are reported, providing practical cost information.",
    355           "source": "haiku"
    356         },
    357         "compute_budget_stated": {
    358           "applies": true,
    359           "answer": false,
    360           "justification": "Total computational budget (GPU/CPU hours, hardware specifications) is not stated; only per-query timing data are provided.",
    361           "source": "haiku"
    362         }
    363       }
    364     }
    365   },
    366   "claims": [
    367     {
    368       "claim": "DE-based black-box attack achieves competitive or higher success rates than GGPP (gradient white-box) at Top-10 and Top-20 thresholds on dense retrievers",
    369       "evidence": "Table 2: DE_seq_stop matches or exceeds GGPP at Succ@10/Succ@20 on SciFact (0.573 vs 0.458) and FiQA (0.520 vs 0.480), though GGPP dominates Succ@1 on MS MARCO (0.830 vs 0.570)",
    370       "supported": "moderate"
    371     },
    372     {
    373       "claim": "Effective adversarial suffixes require only 2-3 tokens on average",
    374       "evidence": "DE_seq_stop achieves average suffix lengths of 1.32 (MS MARCO) to 2.76 (FEVER) tokens while maintaining high Top-10/Top-20 success rates (Table 2)",
    375       "supported": "strong"
    376     },
    377     {
    378       "claim": "Early stopping reduces query cost by approximately 40% without reducing attack success",
    379       "evidence": "Figure 2 shows DE_seq_stop reaches 97% Top-10 success at 2 tokens while DE_seq needs 3-4; Section 3.3.3 states the hybrid strategy 'cuts average query cost by ~40%'",
    380       "supported": "strong"
    381     },
    382     {
    383       "claim": "DE-generated suffixes evade BERT-based and RoBERTa-based adversarial detection",
    384       "evidence": "Table 4: RoBERTa detector achieves AUROC 0.2023 and AUPRC 0.4665 at 0.5% FPR target; Table 13: CLS attack probability is near-identical for original vs. attacked queries (~0.40)",
    385       "supported": "strong"
    386     },
    387     {
    388       "claim": "Readability-aware MLM pooling strategy significantly reduces suffix perplexity without degrading attack success",
    389       "evidence": "Table 16: Welch's t-test yields p < 1e-9 for NLL reduction across all three datasets; Table 6 shows stable Success@1 across pool sizes from 500 to 30,522",
    390       "supported": "strong"
    391     },
    392     {
    393       "claim": "Adversarial retrieval manipulation causes substantial downstream answer quality degradation in real QA pipelines",
    394       "evidence": "Table 5 shows 83.5% EM drop on SQuAD when target reaches rank 1 and 14.8% average EM drop across NQ-Open; tested on only 500 queries per dataset with unspecified generator LLM",
    395       "supported": "moderate"
    396     }
    397   ],
    398   "methodology_tags": ["benchmark-eval"],
    399   "key_findings": "DeRAG demonstrates that gradient-free Differential Evolution can generate adversarial query suffixes of 2-3 tokens that effectively manipulate RAG retrieval rankings, matching gradient-based white-box attacks (GGPP) at broader retrieval thresholds (Top-10, Top-20) while requiring no model internals. The attack evades RoBERTa-based detectors with near-chance accuracy (AUROC 0.2023) and causes measurable downstream answer quality degradation (14-27% average EM drop on QA benchmarks, up to 83.5% when the adversarial document reaches rank 1). A readability-aware suffix construction strategy using MLM token pooling statistically significantly reduces suffix perplexity (Welch's t, p < 1e-9) without degrading attack success. However, all experiments use BERT-base-uncased on artificially small 1,000-document corpus subsets, limiting generalizability claims.",
    400   "red_flags": [
    401     {
    402       "flag": "Unrealistically small corpus",
    403       "detail": "Experiments use only 1,000-document subsets of BEIR datasets; production RAG systems operate over millions of documents where attack rank targets and success rates would differ substantially."
    404     },
    405     {
    406       "flag": "No CIs or significance tests on primary results",
    407       "detail": "Attack success rates (Success@K, ΔMRR, ΔnDCG) in Tables 1-2 are point estimates from 100 queries per dataset with no confidence intervals or statistical significance tests for the main comparative claims."
    408     },
    409     {
    410       "flag": "Single retriever model tested",
    411       "detail": "Dense retrieval experiments use only BERT-base-uncased (2018); modern instruction-tuned embedding models widely used in production (E5, GTE, OpenAI text-embedding-3) are untested."
    412     },
    413     {
    414       "flag": "Generator LLM unspecified for downstream evaluation",
    415       "detail": "Section 4.5 evaluates downstream RAG answer quality (Table 5) but never names or versions the LLM used for generation, making these results unreproducible."
    416     },
    417     {
    418       "flag": "Exact DE hyperparameters not reported",
    419       "detail": "The paper provides typical ranges for DE parameters (F ∈ [0.5, 1.0], CR ∈ [0.1, 0.9], N, T) but not the exact values used in the reported experiments."
    420     },
    421     {
    422       "flag": "Title overstates breadth of coverage",
    423       "detail": "Title claims attacks on 'Multiple Retrieval-Augmented Generation Applications' but only one dense retriever (BERT-base-uncased) and one sparse retriever (BM25) on small corpus subsets are tested."
    424     }
    425   ],
    426   "cited_papers": [
    427     {
    428       "title": "Prompt Perturbation in Retrieval-Augmented Generation based Large Language Models (GGPP)",
    429       "relevance": "Primary dense-retriever baseline; gradient-based white-box attack on RAG that DeRAG is designed to compete with without gradient access"
    430     },
    431     {
    432       "title": "PRADA: Practical Black-box Adversarial Attacks against Neural Ranking Models",
    433       "relevance": "Primary sparse-retriever baseline; the most comparable black-box adversarial ranking attack method"
    434     },
    435     {
    436       "title": "BEIR: A Heterogeneous Benchmark for Zero-shot Evaluation of Information Retrieval Models",
    437       "relevance": "Evaluation framework used for all retrieval experiments across SciFact, FiQA, FEVER, MS MARCO"
    438     },
    439     {
    440       "title": "Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks",
    441       "relevance": "Original RAG paper; defines the RAG paradigm whose retrieval stage DeRAG attacks"
    442     },
    443     {
    444       "title": "BadRAG: Identifying Vulnerabilities in Retrieval Augmented Generation of Large Language Models",
    445       "relevance": "Related poisoning-based backdoor attack on RAG corpora; complementary threat model"
    446     },
    447     {
    448       "title": "CtrlRAG: Black-box Adversarial Attacks Based on Masked Language Models in Retrieval-Augmented Language Generation",
    449       "relevance": "Related black-box RAG attack using MLM; close competitor using a different gradient-free approach"
    450     },
    451     {
    452       "title": "Differential evolution – a simple and efficient heuristic for global optimization over continuous spaces",
    453       "relevance": "Foundational algorithm (Storn & Price 1997) underlying the DeRAG optimization method"
    454     },
    455     {
    456       "title": "BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding",
    457       "relevance": "Core retrieval encoder used in all dense retrieval experiments"
    458     }
    459   ],
    460   "engagement_factors": {
    461     "practical_relevance": {
    462       "score": 2,
    463       "justification": "Demonstrates a deployable black-box attack against production RAG systems with public code; directly actionable for practitioners assessing RAG security."
    464     },
    465     "surprise_contrarian": {
    466       "score": 1,
    467       "justification": "The finding that ≤5 tokens suffice for effective retrieval manipulation is noteworthy, but RAG vulnerability to adversarial attacks is an expected result in this field."
    468     },
    469     "fear_safety": {
    470       "score": 2,
    471       "justification": "Shows RAG systems can be manipulated to surface misinformation via small, detector-evading token appends — a concrete and practical AI safety concern for deployed systems."
    472     },
    473     "drama_conflict": {
    474       "score": 1,
    475       "justification": "The attack-vs-defense framing is inherently adversarial but the paper is technical rather than polemical; no controversial claims about deployed systems."
    476     },
    477     "demo_ability": {
    478       "score": 2,
    479       "justification": "Code is released on GitHub using public BEIR benchmarks and BERT-base-uncased, making reproduction accessible to practitioners without specialized resources."
    480     },
    481     "brand_recognition": {
    482       "score": 0,
    483       "justification": "Authors are from National ChengChi University (Taiwan), a respected institution but not a major AI lab with brand recognition in the LLM community."
    484     }
    485   },
    486   "hn_data": {
    487     "threads": [
    488       {
    489         "hn_id": "44120359",
    490         "title": "Diffusion vs. Autoregressive Language Models: A Text Embedding Perspective",
    491         "points": 19,
    492         "comments": 1,
    493         "url": "https://news.ycombinator.com/item?id=44120359"
    494       },
    495       {
    496         "hn_id": "36931866",
    497         "title": "Universal and Transferable Adversarial Attacks on LLM",
    498         "points": 3,
    499         "comments": 0,
    500         "url": "https://news.ycombinator.com/item?id=36931866"
    501       },
    502       {
    503         "hn_id": "36903968",
    504         "title": "Universal and Transferable Adversarial Attacks on Aligned Language Models",
    505         "points": 1,
    506         "comments": 0,
    507         "url": "https://news.ycombinator.com/item?id=36903968"
    508       }
    509     ],
    510     "top_points": 19,
    511     "total_points": 23,
    512     "total_comments": 1
    513   }
    514 }

Impressum · Datenschutz