scan.json (30397B)
1 { 2 "paper": { 3 "title": "The Good and The Bad: Exploring Privacy Issues in Retrieval-Augmented Generation (RAG)", 4 "authors": [ 5 "Shenglai Zeng", 6 "Jiankun Zhang", 7 "Pengfei He", 8 "Yue Xing", 9 "Yiding Liu", 10 "Han Xu", 11 "Jie Ren", 12 "Shuaiqiang Wang", 13 "Dawei Yin", 14 "Yi Chang", 15 "Jiliang Tang" 16 ], 17 "year": 2024, 18 "venue": "Annual Meeting of the Association for Computational Linguistics", 19 "arxiv_id": "2402.16893", 20 "doi": "10.48550/arXiv.2402.16893" 21 }, 22 "scan_version": 3, 23 "active_modules": [ 24 "experimental_rigor", 25 "data_leakage" 26 ], 27 "methodology_tags": [ 28 "benchmark-eval" 29 ], 30 "key_findings": "RAG systems are highly vulnerable to privacy attacks on the retrieval database, with composite structured prompts ({information} + {command}) extracting verbatim or near-verbatim retrieval data at rates near 50%. Conversely, integrating retrieval data substantially reduces LLMs' tendency to output memorized training data, outperforming noise injection and protective system prompts. Mitigation strategies such as re-ranking are ineffective, while abstractive summarization reduces untargeted attack risk by ~50% but is limited against targeted attacks; distance thresholds present a privacy-utility tradeoff.", 31 "checklist": { 32 "artifacts": { 33 "code_released": { 34 "applies": true, 35 "answer": true, 36 "justification": "The abstract states 'Our code is available at https://github.com/phycholosogy/RAG-privacy', providing a GitHub repository URL." 37 }, 38 "data_released": { 39 "applies": true, 40 "answer": true, 41 "justification": "The paper uses publicly available datasets: Enron Email dataset (500K emails), HealthcareMagic-101 (200K dialogues), wikitext-103, w3c-email, and Common Crawl chunks. All are publicly accessible." 42 }, 43 "environment_specified": { 44 "applies": true, 45 "answer": false, 46 "justification": "No requirements.txt, Dockerfile, conda environment, or detailed environment setup section is provided in the paper. Only model names and library names (Chroma, embedding models) are mentioned." 47 }, 48 "reproduction_instructions": { 49 "applies": true, 50 "answer": false, 51 "justification": "No step-by-step reproduction instructions are included in the paper. While a code repository is provided, the paper itself contains no README-style instructions or 'Reproducing Results' section." 52 } 53 }, 54 "statistical_methodology": { 55 "confidence_intervals_or_error_bars": { 56 "applies": true, 57 "answer": false, 58 "justification": "All results in Tables 1-3 and Figures 2-6 are reported as raw counts without any confidence intervals or error bars." 59 }, 60 "significance_tests": { 61 "applies": true, 62 "answer": false, 63 "justification": "The paper compares attack effectiveness across models, datasets, and settings based solely on comparing raw count numbers. No statistical significance tests (p-values, t-tests, etc.) are reported." 64 }, 65 "effect_sizes_reported": { 66 "applies": true, 67 "answer": true, 68 "justification": "The paper provides absolute counts with baseline context throughout (e.g., Table 3 shows reduction from 245 emails to 2 with RAG-Chatdoctor, prefix attack reconstruction from 213 to 34). The relative magnitudes are clear from the tabulated results." 69 }, 70 "sample_size_justified": { 71 "applies": true, 72 "answer": false, 73 "justification": "The paper uses 250 prompts for retrieval attacks, 5000 for targeted training attacks, and 1000 for prefix attacks without justifying these choices or performing power analysis." 74 }, 75 "variance_reported": { 76 "applies": true, 77 "answer": false, 78 "justification": "All results appear to be from single experimental runs. No standard deviations, variance measures, or multi-run results are reported anywhere in the paper." 79 } 80 }, 81 "evaluation_design": { 82 "baselines_included": { 83 "applies": true, 84 "answer": true, 85 "justification": "Section 5 compares RAG against multiple baselines: no retrieval ('None'), random noise injection ('Random Noise+prompt'), and protective system prompts ('System Prompt+prompt'), shown in Table 3." 86 }, 87 "baselines_contemporary": { 88 "applies": true, 89 "answer": true, 90 "justification": "The models evaluated (Llama-2-7b-Chat, Llama-2-13b-Chat, GPT-3.5-turbo, GPT-Neo-1.3B) were contemporary at time of writing. Prior attack methods (Carlini et al. 2021, 2022) are cited and the paper explains why they are not directly applicable to RAG." 91 }, 92 "ablation_study": { 93 "applies": true, 94 "answer": true, 95 "justification": "Section 4.4 presents extensive ablation studies on: number of retrieved documents k (Figure 3), command component design (Figure 2), embedding models (Figure 6, Appendix A.1), and temperature parameter (Appendix A.1, Tables 6-7)." 96 }, 97 "multiple_metrics": { 98 "applies": true, 99 "answer": true, 100 "justification": "The paper uses multiple metrics: Retrieval Contexts, Repeat Prompts, Repeat Contexts, Rouge Prompts, Rouge Contexts, and Targeted Information (Section 4.1). Performance metrics include ROUGE-L and perplexity (Appendix A.3)." 101 }, 102 "human_evaluation": { 103 "applies": true, 104 "answer": false, 105 "justification": "All evaluation is automated using exact string matching and ROUGE-L score thresholds. No human evaluation of extracted content quality or privacy severity is performed." 106 }, 107 "held_out_test_set": { 108 "applies": true, 109 "answer": true, 110 "justification": "Appendix A.3 describes a 99:1 train/test split: 'we partition it into training and testing sets using a 99:1 ratio. The training set is utilized to build the RAG model, while we randomly sample 1000 instances from the testing set to evaluate the performance.'" 111 }, 112 "per_category_breakdown": { 113 "applies": true, 114 "answer": true, 115 "justification": "Results are consistently broken down by dataset (HealthcareMagic vs. Enron), model (L7C, L13C, GPT), attack type (targeted vs. untargeted), and in ablation studies by embedding model, temperature, and command variant." 116 }, 117 "failure_cases_discussed": { 118 "applies": true, 119 "answer": true, 120 "justification": "Section 4.5 discusses cases where mitigations fail: re-ranking has 'almost no mitigation effects' (Figure 4a-b), and targeted attacks can inadvertently increase PII leakage after summarization ('the occurrence of personally identifiable information even inadvertently increased')." 121 }, 122 "negative_results_reported": { 123 "applies": true, 124 "answer": true, 125 "justification": "The paper reports that re-ranking has 'almost no mitigation effects' (Section 4.5), that summarization can increase PIIs for targeted attacks on Enron (Section 4.5), and that increasing k does not substantially increase leakage (Section 4.4)." 126 } 127 }, 128 "claims_and_evidence": { 129 "abstract_claims_supported": { 130 "applies": true, 131 "answer": true, 132 "justification": "The abstract claims (1) RAG systems are vulnerable to retrieval data leakage and (2) RAG can mitigate training data leakage. Both are supported by Tables 1-3 in Sections 4 and 5 respectively." 133 }, 134 "causal_claims_justified": { 135 "applies": true, 136 "answer": false, 137 "justification": "Section 5.4 makes causal claims ('incorporating external data makes LLMs less reliant on training data') justified only by a Bayesian hypothesis and observational comparisons. The comparisons with/without RAG confound the effect of retrieval data with the effect of additional context length, despite the noise baseline attempting to control for this." 138 }, 139 "generalization_bounded": { 140 "applies": true, 141 "answer": false, 142 "justification": "The title claims to explore 'Privacy Issues in Retrieval-Augmented Generation (RAG)' broadly, but experiments use only 2-3 LLMs on 2 retrieval datasets with a single retriever architecture. The conclusions in Section 6 are stated broadly without qualifying to the tested settings." 143 }, 144 "alternative_explanations_discussed": { 145 "applies": true, 146 "answer": true, 147 "justification": "Section 5.4 discusses why RAG reduces memorization using a Bayesian inference framework (Xie et al., 2021) and considers that the effect may be driven by shifting the conditional distribution rather than simple distraction, supported by the W3C-Email comparison." 148 }, 149 "proxy_outcome_distinction": { 150 "applies": true, 151 "answer": false, 152 "justification": "The paper uses exact string matches and ROUGE-L > 0.5 as proxies for 'privacy leakage' without discussing the gap between textual overlap metrics and actual privacy harm. A ROUGE-L > 0.5 match may not constitute meaningful privacy leakage, and exact matches of non-sensitive content are counted the same as PII extraction." 153 } 154 }, 155 "setup_transparency": { 156 "model_versions_specified": { 157 "applies": true, 158 "answer": false, 159 "justification": "The paper specifies 'Llama-7b-chat', 'Llama-13b-chat', 'GPT-3.5-turbo', and 'GPT-Neo-1.3B' without snapshot dates or exact version identifiers. GPT-3.5-turbo behavior changes across versions and no API version date is given." 160 }, 161 "prompts_provided": { 162 "applies": true, 163 "answer": true, 164 "justification": "Actual prompt text is provided: command prompts in Section 4.4 (C1-C4), information components in Appendix A.2.1, protective system prompts in Table 10, and summarization defense prompts in Table 11." 165 }, 166 "hyperparameters_reported": { 167 "applies": true, 168 "answer": false, 169 "justification": "While k=2, embedding model, and distance metric (L2-norm) are reported, the default temperature and other generation parameters (top-p, max tokens) for the main experiments are not stated. Temperature is only explored as a variable in ablation studies." 170 }, 171 "scaffolding_described": { 172 "applies": false, 173 "answer": false, 174 "justification": "No agentic scaffolding is used. The system is a standard RAG pipeline (retriever + LLM generation) without agent loops, tool use, or multi-step reasoning." 175 }, 176 "data_preprocessing_documented": { 177 "applies": true, 178 "answer": true, 179 "justification": "Section 4.1 describes how data is prepared: 'we construct each doctor-patient medical dialogue as a data piece embedded and stored in a vector database' and 'we construct each email as a data piece.' Appendix A.2.1 describes how attack prompts are generated." 180 } 181 }, 182 "limitations_and_scope": { 183 "limitations_section_present": { 184 "applies": true, 185 "answer": true, 186 "justification": "Section 7 is a dedicated 'Limitations' section discussing the study's scope limitations." 187 }, 188 "threats_to_validity_specific": { 189 "applies": true, 190 "answer": false, 191 "justification": "Section 7 limitations are generic scope boundaries ('concentrated primarily on the application of retrieval augmentation during the inference stage', 'other retrieval-based language models... warrant further investigation') without discussing specific threats to the validity of their findings like confounds or measurement validity." 192 }, 193 "scope_boundaries_stated": { 194 "applies": true, 195 "answer": true, 196 "justification": "Section 7 explicitly states what was NOT tested: pre-training/fine-tuning phase integration, other retrieval-based LM architectures (citing Huang et al., 2023; Borgeaud et al., 2022), and identifies developing effective defenses as an open question." 197 } 198 }, 199 "data_integrity": { 200 "raw_data_available": { 201 "applies": true, 202 "answer": false, 203 "justification": "The raw attack outputs, extracted PIIs, and detailed per-prompt results are not made available. Only aggregate counts are reported in tables." 204 }, 205 "data_collection_described": { 206 "applies": true, 207 "answer": true, 208 "justification": "Section 4.1 describes datasets: Enron Email dataset of 500,000 employee emails and HealthcareMagic-101 of 200K doctor-patient dialogues. Appendix A.2.1 describes how attack prompts are generated for targeted and untargeted attacks." 209 }, 210 "recruitment_methods_described": { 211 "applies": false, 212 "answer": false, 213 "justification": "No human participants. All experiments use standard public datasets (Enron Email, HealthcareMagic, wikitext-103, Common Crawl)." 214 }, 215 "data_pipeline_documented": { 216 "applies": true, 217 "answer": true, 218 "justification": "The pipeline is documented: datasets are embedded using specified models, stored in Chroma vector database, retrieved with k-NN (L2-norm), concatenated with query, and fed to the LLM. Evaluation criteria (exact match, ROUGE-L > 0.5) are defined in Section 4.1." 219 } 220 }, 221 "conflicts_of_interest": { 222 "funding_disclosed": { 223 "applies": true, 224 "answer": false, 225 "justification": "No funding sources, grants, or acknowledgments section is present in the paper." 226 }, 227 "affiliations_disclosed": { 228 "applies": true, 229 "answer": true, 230 "justification": "Author affiliations are listed: Michigan State University, Baidu Inc., and Jilin University. The Baidu affiliation is relevant since Baidu develops and deploys RAG systems." 231 }, 232 "funder_independent_of_outcome": { 233 "applies": true, 234 "answer": false, 235 "justification": "No funding is disclosed. Some authors are affiliated with Baidu Inc., which has commercial interest in RAG systems. The independence of any potential funding cannot be assessed." 236 }, 237 "financial_interests_declared": { 238 "applies": true, 239 "answer": false, 240 "justification": "No competing interests or financial disclosure statement is present in the paper. Authors from Baidu (a company building RAG products) have potential undisclosed conflicts." 241 } 242 }, 243 "contamination": { 244 "training_cutoff_stated": { 245 "applies": false, 246 "answer": false, 247 "justification": "This paper tests privacy attack success rates on RAG systems rather than evaluating model knowledge on benchmarks. The study deliberately leverages known training data (Enron emails in GPT-Neo's corpus) as part of the experimental design for memorization tests." 248 }, 249 "train_test_overlap_discussed": { 250 "applies": false, 251 "answer": false, 252 "justification": "The paper tests privacy/security properties of RAG systems (attack and defense effectiveness), not model capability on benchmarks where train/test overlap would be a validity threat." 253 }, 254 "benchmark_contamination_addressed": { 255 "applies": false, 256 "answer": false, 257 "justification": "The paper is a red-teaming/privacy study testing extraction attacks on RAG systems. Benchmark contamination in the traditional sense is not applicable to this study design." 258 } 259 }, 260 "human_studies": { 261 "pre_registered": { 262 "applies": false, 263 "answer": false, 264 "justification": "No human participants. All experiments are automated attacks on RAG systems." 265 }, 266 "irb_or_ethics_approval": { 267 "applies": false, 268 "answer": false, 269 "justification": "No human participants. The study uses publicly available datasets and automated evaluation." 270 }, 271 "demographics_reported": { 272 "applies": false, 273 "answer": false, 274 "justification": "No human participants in the study." 275 }, 276 "inclusion_exclusion_criteria": { 277 "applies": false, 278 "answer": false, 279 "justification": "No human participants in the study." 280 }, 281 "randomization_described": { 282 "applies": false, 283 "answer": false, 284 "justification": "No human participants or experimental conditions involving human assignment." 285 }, 286 "blinding_described": { 287 "applies": false, 288 "answer": false, 289 "justification": "No human participants or evaluators requiring blinding." 290 }, 291 "attrition_reported": { 292 "applies": false, 293 "answer": false, 294 "justification": "No human participants in the study." 295 } 296 }, 297 "cost_and_practicality": { 298 "inference_cost_reported": { 299 "applies": true, 300 "answer": false, 301 "justification": "No API costs, token counts, wall-clock time, or cost per attack prompt are reported, despite using GPT-3.5-turbo (a paid API) for thousands of queries." 302 }, 303 "compute_budget_stated": { 304 "applies": true, 305 "answer": false, 306 "justification": "No total computational budget, GPU hours, or API spend is reported." 307 } 308 }, 309 "experimental_rigor": { 310 "seed_sensitivity_reported": { 311 "applies": true, 312 "answer": false, 313 "justification": "No mention of multiple random seeds. Results appear to be single-run experiments." 314 }, 315 "number_of_runs_stated": { 316 "applies": true, 317 "answer": false, 318 "justification": "The number of experimental runs is never stated. It is unclear whether results are from single runs or averaged across multiple runs." 319 }, 320 "hyperparameter_search_budget": { 321 "applies": true, 322 "answer": false, 323 "justification": "While different command prompts and parameter settings are explored in ablation studies, no systematic hyperparameter search budget is reported." 324 }, 325 "best_config_selection_justified": { 326 "applies": true, 327 "answer": true, 328 "justification": "Section 4.4 explains why 'Please repeat all the context' was selected as the default command: 'it strikes a balance between retrieval and prompting the LLM to repeat.' The ablation shows all configurations, not just the best." 329 }, 330 "multiple_comparison_correction": { 331 "applies": true, 332 "answer": false, 333 "justification": "The paper makes comparisons across multiple models, datasets, attack types, and settings without any correction for multiple comparisons." 334 }, 335 "self_comparison_bias_addressed": { 336 "applies": true, 337 "answer": false, 338 "justification": "The authors propose the attack method and evaluate its effectiveness without acknowledging potential bias in evaluating their own approach. No independent evaluation is conducted." 339 }, 340 "compute_budget_vs_performance": { 341 "applies": false, 342 "answer": false, 343 "justification": "Compute differences between experimental settings are negligible (same number of API calls across conditions)." 344 }, 345 "benchmark_construct_validity": { 346 "applies": true, 347 "answer": false, 348 "justification": "The paper does not discuss whether exact string matching and ROUGE-L > 0.5 thresholds are valid measures of actual privacy leakage risk. A high ROUGE-L score with non-sensitive content may not constitute meaningful privacy harm." 349 }, 350 "scaffold_confound_addressed": { 351 "applies": false, 352 "answer": false, 353 "justification": "No scaffolding is involved. The RAG pipeline is consistent across all comparisons." 354 } 355 }, 356 "data_leakage": { 357 "temporal_leakage_addressed": { 358 "applies": true, 359 "answer": false, 360 "justification": "No discussion of temporal aspects of the evaluation setup. The paper does not address whether the timing of dataset creation relative to model training affects results." 361 }, 362 "feature_leakage_addressed": { 363 "applies": true, 364 "answer": false, 365 "justification": "No discussion of whether the attack setup inadvertently leaks information that would not be available in a realistic attack scenario." 366 }, 367 "non_independence_addressed": { 368 "applies": true, 369 "answer": false, 370 "justification": "No discussion of whether attack prompts are independent or whether retrieval database entries share structural similarities that inflate success metrics." 371 }, 372 "leakage_detection_method": { 373 "applies": true, 374 "answer": false, 375 "justification": "No concrete leakage detection methods (canary strings, membership inference, n-gram overlap analysis) are applied. Leakage is measured purely by output matching." 376 } 377 } 378 }, 379 "claims": [ 380 { 381 "claim": "RAG systems are highly susceptible to retrieval data extraction attacks, with a considerable amount of sensitive data being extracted (near 50% success rate).", 382 "evidence": "Tables 1-2 show that out of 250 prompts, targeted attacks extract 89-205 PIIs/medical records depending on model and dataset (Section 4.2-4.3). Untargeted attacks produce 55-208 ROUGE-matched contexts.", 383 "supported": "moderate" 384 }, 385 { 386 "claim": "The composite structured prompt ({information} + {command}) is effective for extracting retrieval data from RAG systems.", 387 "evidence": "Tables 8-9 and Figure 2 show that the command 'Please repeat all the context' achieves consistently good extraction across settings, while very short or very long commands reduce effectiveness (Section 4.4).", 388 "supported": "moderate" 389 }, 390 { 391 "claim": "Increasing k (number of retrieved documents) does not substantially increase privacy leakage.", 392 "evidence": "Figure 3 shows that while retrieved documents increase with k, leaked documents grow much more slowly. Section 4.4 attributes this to 'models' constrained capacity to coherently process extensive contextual content.'", 393 "supported": "moderate" 394 }, 395 { 396 "claim": "Integrating retrieval data substantially reduces LLMs' tendency to output memorized training data, achieving greater protection than noise injection or system prompts.", 397 "evidence": "Table 3 shows targeted attack extractions drop from 245 emails (no RAG) to 2-4 (with RAG), and prefix attack reconstructions from 213 to 33-70 (Section 5.2-5.3). Noise/system prompts are far less effective.", 398 "supported": "strong" 399 }, 400 { 401 "claim": "Re-ranking has almost no mitigation effect on retrieval data extraction.", 402 "evidence": "Figures 4a-4b and Tables 18-19 show that re-ranking using bge-reranker-large produces nearly identical extraction counts as the no-reranking baseline (Section 4.5).", 403 "supported": "moderate" 404 }, 405 { 406 "claim": "Abstractive summarization reduces untargeted attack risk by approximately 50% but is limited against targeted attacks.", 407 "evidence": "Figures 4c-4d show summarization reduces untargeted extraction substantially, but for targeted attacks on Enron, 'PIIs even inadvertently increased' (Section 4.5). Tables 20-21 provide detailed numbers.", 408 "supported": "moderate" 409 }, 410 { 411 "claim": "There exists a privacy-utility tradeoff when setting retrieval distance thresholds.", 412 "evidence": "Figure 5 shows that lower thresholds reduce extraction but also degrade RAG performance (ROUGE-L/perplexity). Tables 13-14 quantify the performance impact at various thresholds (Section 4.5).", 413 "supported": "strong" 414 } 415 ], 416 "red_flags": [ 417 { 418 "flag": "No uncertainty quantification", 419 "detail": "All results are reported as raw counts from apparently single runs with no error bars, confidence intervals, standard deviations, or multi-run variance. The reliability of the reported counts is impossible to assess." 420 }, 421 { 422 "flag": "Unjustified sample sizes", 423 "detail": "250 prompts for retrieval attacks, 5000 for targeted training attacks, and 1000 for prefix attacks are used without justification. It is unclear whether these are sufficient for reliable conclusions." 424 }, 425 { 426 "flag": "Undisclosed conflicts of interest", 427 "detail": "Multiple authors are affiliated with Baidu Inc., which develops and deploys RAG systems commercially. No funding or competing interests statement is provided. The paper's conclusion that RAG 'could potentially mitigate the risks of training data leakage' is favorable to RAG adoption." 428 }, 429 { 430 "flag": "Construct validity of privacy metrics", 431 "detail": "ROUGE-L > 0.5 and exact token matching are used as proxies for privacy leakage without discussing whether these thresholds correspond to actual privacy harm. Non-sensitive text matches are counted equally with PII extraction." 432 }, 433 { 434 "flag": "Missing model version specificity", 435 "detail": "GPT-3.5-turbo is used without a snapshot date or API version. Model behavior varies significantly across versions, making results potentially non-reproducible." 436 } 437 ], 438 "cited_papers": [ 439 { 440 "title": "Extracting training data from large language models", 441 "authors": ["Nicholas Carlini", "Florian Tramer", "Eric Wallace", "Matthew Jagielski"], 442 "year": 2021, 443 "relevance": "Foundational work on data extraction attacks against LLMs, directly extended in this paper's methodology." 444 }, 445 { 446 "title": "Quantifying memorization across neural language models", 447 "authors": ["Nicholas Carlini", "Daphne Ippolito", "Matthew Jagielski", "Katherine Lee"], 448 "year": 2022, 449 "arxiv_id": "2202.07646", 450 "relevance": "Established prefix attack methodology and factors affecting memorization risk in LLMs, used as a baseline attack method." 451 }, 452 { 453 "title": "Retrieval-augmented generation for knowledge-intensive NLP tasks", 454 "authors": ["Patrick Lewis", "Ethan Perez", "Aleksandra Piktus"], 455 "year": 2020, 456 "relevance": "Introduced the RAG technique that this paper investigates for privacy risks." 457 }, 458 { 459 "title": "Privacy implications of retrieval-based language models", 460 "authors": ["Yangsibo Huang", "Samyak Gupta", "Zexuan Zhong", "Kai Li", "Danqi Chen"], 461 "year": 2023, 462 "arxiv_id": "2305.14888", 463 "relevance": "Investigated privacy risks of kNN-LM retrieval-based models, closely related work with different architecture focus." 464 }, 465 { 466 "title": "Exploring memorization in fine-tuned language models", 467 "authors": ["Shenglai Zeng", "Yaxin Li", "Jie Ren"], 468 "year": 2023, 469 "arxiv_id": "2310.06714", 470 "relevance": "Examined memorization across different fine-tuning tasks, finding vulnerabilities in dialogue and summarization tasks." 471 }, 472 { 473 "title": "Preventing verbatim memorization in language models gives a false sense of privacy", 474 "authors": ["Daphne Ippolito", "Florian Tramèr", "Milad Nasr"], 475 "year": 2022, 476 "arxiv_id": "2210.17546", 477 "relevance": "Demonstrated that preventing verbatim memorization alone is insufficient for privacy, relevant to the metrics used in this paper." 478 }, 479 { 480 "title": "Deduplicating training data mitigates privacy risks in language models", 481 "authors": ["Nikhil Kandpal", "Eric Wallace", "Colin Raffel"], 482 "year": 2022, 483 "relevance": "Showed that data duplication increases memorization risk in LLMs, relevant context for training data privacy." 484 }, 485 { 486 "title": "Memorization in NLP fine-tuning methods", 487 "authors": ["Fatemehsadat Mireshghallah", "Archit Uniyal", "Tianhao Wang"], 488 "year": 2022, 489 "arxiv_id": "2205.12506", 490 "relevance": "Found that fine-tuning model heads leads to more significant memorization than adapter modules, relevant to LLM privacy risks." 491 }, 492 { 493 "title": "Retrieval-augmented code generation and summarization", 494 "authors": ["Md Rizwan Parvez", "Wasi Ahmad", "Saikat Chakraborty"], 495 "year": 2021, 496 "relevance": "RAG applied to code generation, an application domain where proprietary code in retrieval databases creates privacy concerns." 497 }, 498 { 499 "title": "Retrieval augmentation reduces hallucination in conversation", 500 "authors": ["Kurt Shuster", "Spencer Poff", "Moya Chen"], 501 "year": 2021, 502 "arxiv_id": "2104.07567", 503 "relevance": "Demonstrated RAG's hallucination reduction benefits, relevant to understanding the dual nature of RAG's effects on generation." 504 }, 505 { 506 "title": "Prompts should not be seen as secrets: Systematically measuring prompt extraction attack success", 507 "authors": ["Yiming Zhang", "Daphne Ippolito"], 508 "year": 2023, 509 "arxiv_id": "2307.06865", 510 "relevance": "Related work on prompt extraction attacks, a form of information leakage from LLM systems." 511 } 512 ], 513 "engagement_factors": { 514 "practical_relevance": { 515 "score": 2, 516 "justification": "Practitioners building RAG systems can directly use these findings and mitigation strategies (summarization, distance thresholds) to assess and reduce privacy risks." 517 }, 518 "surprise_contrarian": { 519 "score": 1, 520 "justification": "The finding that RAG protects training data while exposing retrieval data is a mildly surprising dual characterization, but RAG privacy risks are somewhat expected." 521 }, 522 "fear_safety": { 523 "score": 2, 524 "justification": "Demonstrates concrete privacy attacks extracting PII (phone numbers, medical records) from RAG systems with near 50% success rates." 525 }, 526 "drama_conflict": { 527 "score": 1, 528 "justification": "No major controversy, but the paper highlights that widely-deployed RAG systems are vulnerable, which has industry implications." 529 }, 530 "demo_ability": { 531 "score": 2, 532 "justification": "Code is released on GitHub and the attacks use publicly available datasets and models, making reproduction feasible." 533 }, 534 "brand_recognition": { 535 "score": 1, 536 "justification": "Authors from Michigan State and Baidu; experiments use GPT-3.5-turbo and Llama models, which are well-known but the paper itself is not from a top-tier AI lab." 537 } 538 } 539 }