scan.json (29174B)
1 { 2 "paper": { 3 "title": "SALAD: Systematic Assessment of Machine Unlearning on LLM-Aided Hardware Design", 4 "authors": [ 5 "Zeng Wang", 6 "Minghao Shao", 7 "Rupesh Raj Karn", 8 "Likhitha Mankali", 9 "Jitendra Bhandari", 10 "Ramesh Karri", 11 "Ozgur Sinanoglu", 12 "Muhammad Shafique", 13 "Johann Knechtel" 14 ], 15 "year": 2025, 16 "venue": "Workshop on Machine Learning for CAD (MLCAD)", 17 "arxiv_id": "2506.02089", 18 "doi": "10.1109/MLCAD65511.2025.11189152" 19 }, 20 "scan_version": 2, 21 "active_modules": ["experimental_rigor", "data_leakage"], 22 "methodology_tags": ["benchmark-eval", "case-study"], 23 "key_findings": "The paper evaluates six machine unlearning algorithms (GA, GD, PO, NPO, RMU, SimNPO) on LLaMA 3.1-8B fine-tuned for Verilog generation across four threat scenarios: benchmark contamination, custom design withdrawal, malicious code removal, and IP leakage prevention. RMU and SimNPO provide the best balance between forgetting effectiveness and utility preservation, while gradient-based methods (GA, GD) achieve more complete erasure but severely degrade downstream RTL generation quality. Two to three unlearning epochs are generally sufficient for convergence, and different threat scenarios favor different algorithm choices.", 24 "checklist": { 25 "artifacts": { 26 "code_released": { 27 "applies": true, 28 "answer": true, 29 "justification": "GitHub repository provided: https://github.com/DfX-NYUAD/SALAD, mentioned in the abstract and detailed in Appendix B (Artifact Appendix)." 30 }, 31 "data_released": { 32 "applies": true, 33 "answer": true, 34 "justification": "The retain dataset (RTL-Coder training data) and most forget datasets are from public sources (VerilogEval, RTLLM, RTL-Repo, RTL-Breaker). The artifact appendix claims to provide retain/forget dataset splits and sensitive model links. However, the 703 in-house IP designs are proprietary and the artifact substitutes VeriLeaky examples (2 samples) for them." 35 }, 36 "environment_specified": { 37 "applies": true, 38 "answer": true, 39 "justification": "Artifact appendix specifies: Python 3.11, Hugging Face Transformers 4.45, CUDA 12.2, 1-2×A100 (40/80 GB). Installation instructions reference the repository README." 40 }, 41 "reproduction_instructions": { 42 "applies": true, 43 "answer": true, 44 "justification": "Artifact appendix provides workflow: 'Run salad_unlearn.sh on the provided sensitive models to produce the unlearned model. Then run salad_eval.sh to evaluate unlearning performance.' README referenced for environment setup." 45 } 46 }, 47 "statistical_methodology": { 48 "confidence_intervals_or_error_bars": { 49 "applies": true, 50 "answer": false, 51 "justification": "All results in Figures 3-7 and Tables I-III report only point estimates (e.g., FR percentages, Min-K%++ values, Pass@K scores). No confidence intervals, error bars, or uncertainty quantification are provided." 52 }, 53 "significance_tests": { 54 "applies": true, 55 "answer": false, 56 "justification": "The paper compares 6 unlearning algorithms across 4 scenarios and 3 epochs with claims like 'RMU and SimNPO strike the most effective balance' but uses no statistical significance tests. All comparisons are based on raw metric values." 57 }, 58 "effect_sizes_reported": { 59 "applies": true, 60 "answer": true, 61 "justification": "Results are reported with baseline context throughout, e.g., 'reducing FR from 65.1% to 1.0-1.9% at EP3' (Sec. V-B), 'reducing Min-K%++ from 85.1% to 30.8%' (Sec. X). Both original and unlearned values are consistently provided." 62 }, 63 "sample_size_justified": { 64 "applies": true, 65 "answer": false, 66 "justification": "Dataset sizes are stated (156 VerilogEval, 50 RTLLM, 1134 custom, 703 IP, 835 malicious) but no justification is given for why these sizes are adequate for the claims being made." 67 }, 68 "variance_reported": { 69 "applies": true, 70 "answer": false, 71 "justification": "All results appear to be from single runs across unlearning epochs EP1-EP3. No standard deviations, variance across random seeds, or any spread measures are reported." 72 } 73 }, 74 "evaluation_design": { 75 "baselines_included": { 76 "applies": true, 77 "answer": true, 78 "justification": "Six unlearning algorithms are compared (GA, GD, PO, NPO, RMU, SimNPO). Both 'Sensitive' (contaminated) and 'Clean' (uncontaminated) model baselines are included in Table I and Table III." 79 }, 80 "baselines_contemporary": { 81 "applies": true, 82 "answer": true, 83 "justification": "The unlearning methods are from 2024: TOFU framework [44], NPO [45], SimNPO [46], RMU [47]. These represent the current state of the art in LLM unlearning." 84 }, 85 "ablation_study": { 86 "applies": false, 87 "answer": false, 88 "justification": "The paper evaluates existing unlearning algorithms rather than proposing a novel multi-component system. There are no components to ablate — the contribution is the systematic assessment framework, not a new algorithm." 89 }, 90 "multiple_metrics": { 91 "applies": true, 92 "answer": true, 93 "justification": "Multiple metrics are used: FR (Forget ROUGE), FP (Forget Probability), Min-K%, Min-K%++, PrivLeak for unlearning effectiveness, and Pass@K for downstream Verilog generation quality (Table I)." 94 }, 95 "human_evaluation": { 96 "applies": true, 97 "answer": false, 98 "justification": "All evaluation is automated via metrics (FR, Min-K%++, Pass@K, etc.). No human evaluation of generated Verilog quality or unlearning effectiveness is performed." 99 }, 100 "held_out_test_set": { 101 "applies": true, 102 "answer": true, 103 "justification": "Cross-contamination evaluation in Table I: models contaminated on RTLLM are evaluated on VerilogEval and vice versa. The retain/forget dataset split provides clear separation. Holdout datasets are explicitly mentioned for evaluating Verilog generation quality." 104 }, 105 "per_category_breakdown": { 106 "applies": true, 107 "answer": true, 108 "justification": "Results are broken down by threat scenario (Secs. V-VIII), by algorithm (6 methods), by epoch (EP1-EP3), and by metric. Table I provides per-benchmark Pass@K results. Table III breaks down by dataset type." 109 }, 110 "failure_cases_discussed": { 111 "applies": true, 112 "answer": true, 113 "justification": "Failures are discussed throughout: GA/GD causing 'functional collapse' (Sec. V-B), PO showing instability with Min-K%++ 'spiking memorization to 99.9% at EP1' (Sec. V-B), NPO yielding 'inconsistent results' and 'reversible forgetting' (Sec. VII-B), SimNPO's 'Min-K%++ rebound to 36.4%' for IP protection (Sec. VIII-B)." 114 }, 115 "negative_results_reported": { 116 "applies": true, 117 "answer": true, 118 "justification": "Multiple negative results reported: GA 'completely failed across all metrics' on RTLLM (Table I), GD showed 'severe degradation' with Pass@15 dropping to 21, PO demonstrated 'instability,' NPO showed 'reversible forgetting' on malicious code. GA and GD consistently degrade downstream utility." 119 } 120 }, 121 "claims_and_evidence": { 122 "abstract_claims_supported": { 123 "applies": true, 124 "answer": true, 125 "justification": "The abstract claims machine unlearning enables 'selective removal of contaminated benchmarks, sensitive IP and design artifacts, or malicious code patterns' without full retraining. All four use cases (Secs. V-VIII) demonstrate this, with detailed quantitative results. The claim of 'effectively reducing data security risks' is supported by FR and Min-K%++ reductions." 126 }, 127 "causal_claims_justified": { 128 "applies": true, 129 "answer": true, 130 "justification": "Causal claims like 'selective unlearning can improve RTL code generation by removing harmful memorization' (Sec. V-B) are supported by controlled experiments: each unlearning algorithm is applied to the same contaminated model with results compared across algorithms and epochs. This single-variable manipulation is adequate for the claims made." 131 }, 132 "generalization_bounded": { 133 "applies": true, 134 "answer": false, 135 "justification": "The title and abstract claim applicability to 'LLM-Aided Hardware Design' broadly, but all experiments use a single model (LLaMA 3.1-8B) with specific RTL datasets. The conclusion acknowledges 'future work includes... exploring unlearning on reasoning-focused LLMs' but does not bound the main claims to the tested model." 136 }, 137 "alternative_explanations_discussed": { 138 "applies": true, 139 "answer": false, 140 "justification": "The paper does not discuss alternative explanations for why certain unlearning methods outperform others, or whether factors like dataset size, model architecture, or training dynamics could explain the observed differences. No robustness checks or confound analysis is provided." 141 }, 142 "proxy_outcome_distinction": { 143 "applies": true, 144 "answer": false, 145 "justification": "The paper measures FR and Min-K%++ as proxies for security and claims to 'restore security and trust in LLM-aided hardware design' (Sec. I). The gap between metric reductions and actual security guarantees is not discussed — reducing FR to 8.5% does not necessarily mean the IP is fully protected." 146 } 147 }, 148 "setup_transparency": { 149 "model_versions_specified": { 150 "applies": true, 151 "answer": true, 152 "justification": "The base model is specified as 'LLaMA 3.1-8B' (Sec. IV), which identifies a specific model version and size." 153 }, 154 "prompts_provided": { 155 "applies": true, 156 "answer": false, 157 "justification": "The paper performs inference for Verilog generation (Pass@K evaluation) and evaluation but does not provide the actual prompts or input formats used for generation or the evaluation benchmarks' prompt templates." 158 }, 159 "hyperparameters_reported": { 160 "applies": true, 161 "answer": true, 162 "justification": "Sec. IV states: 'Fine-tuning is performed for 3 epochs with a learning rate of 1e-5 using the Adam optimizer. For inference, we set the temperature to 0.8, top-p to 0.75, and maximum context length to 2048 tokens.' Unlearning is run for 3 epochs." 163 }, 164 "scaffolding_described": { 165 "applies": false, 166 "answer": false, 167 "justification": "No agentic scaffolding is used. The paper fine-tunes and applies unlearning algorithms to a language model directly." 168 }, 169 "data_preprocessing_documented": { 170 "applies": true, 171 "answer": false, 172 "justification": "The paper states datasets were 'combined with the RTL-Coder training dataset' and split into retain/forget sets, but does not describe how raw RTL designs were processed into model training format, tokenization details, or any filtering/cleaning steps." 173 } 174 }, 175 "limitations_and_scope": { 176 "limitations_section_present": { 177 "applies": true, 178 "answer": false, 179 "justification": "There is no dedicated limitations section. The conclusion mentions future work ('designing unlearning algorithms for code generation, establishing stricter evaluation protocols, and exploring unlearning on reasoning-focused LLMs') but does not substantively discuss current limitations." 180 }, 181 "threats_to_validity_specific": { 182 "applies": true, 183 "answer": false, 184 "justification": "No threats to validity are discussed. There is no analysis of how results might be affected by the choice of single base model, dataset sizes, or the artificial contamination setup vs. real-world contamination patterns." 185 }, 186 "scope_boundaries_stated": { 187 "applies": true, 188 "answer": false, 189 "justification": "The paper does not explicitly state what the results do NOT show. No mention of limitations to LLaMA 3.1-8B, to Verilog (not other HDLs), to the specific dataset sizes, or to the artificial contamination setup." 190 } 191 }, 192 "data_integrity": { 193 "raw_data_available": { 194 "applies": true, 195 "answer": false, 196 "justification": "The artifact provides scripts and model links, but 703 proprietary in-house IP designs (Use Case 4) are not available. The artifact substitutes '2 IP leakage examples from VeriLeaky' — a fundamentally different dataset than the 703 designs used in the paper's experiments. Raw experimental outputs (metric logs) are not directly provided." 197 }, 198 "data_collection_described": { 199 "applies": true, 200 "answer": true, 201 "justification": "Sec. IV describes data sources with specific counts: 156 VerilogEval samples, 50 RTLLM challenges, 1,134 RTL-Repo test set samples from public GitHub, 703 in-house IP designs from 'years of applied research and multiple tapeouts [49]-[55],' and 835 poisoned designs from RTL-Breaker." 202 }, 203 "recruitment_methods_described": { 204 "applies": false, 205 "answer": false, 206 "justification": "No human participants. Data sources are standard benchmarks and existing design collections." 207 }, 208 "data_pipeline_documented": { 209 "applies": true, 210 "answer": false, 211 "justification": "The paper describes dataset composition and the retain/forget split but does not document the pipeline from raw designs to model training format. No filtering criteria, removal counts, or intermediate processing steps are described." 212 } 213 }, 214 "conflicts_of_interest": { 215 "funding_disclosed": { 216 "applies": true, 217 "answer": false, 218 "justification": "No funding sources are mentioned anywhere in the paper. No acknowledgments section with grant information is present." 219 }, 220 "affiliations_disclosed": { 221 "applies": true, 222 "answer": true, 223 "justification": "Author affiliations are clearly listed: NYU Tandon School of Engineering (USA) and NYU Abu Dhabi (UAE). The authors are not evaluating a commercial product they are affiliated with." 224 }, 225 "funder_independent_of_outcome": { 226 "applies": true, 227 "answer": false, 228 "justification": "No funding is disclosed, so independence cannot be assessed. The absence of a funding disclosure is itself a gap." 229 }, 230 "financial_interests_declared": { 231 "applies": true, 232 "answer": false, 233 "justification": "No competing interests or financial interests statement is present in the paper." 234 } 235 }, 236 "contamination": { 237 "training_cutoff_stated": { 238 "applies": true, 239 "answer": false, 240 "justification": "The paper uses LLaMA 3.1-8B as the base model but does not state its training data cutoff date. This is relevant because the base model may already contain knowledge of VerilogEval/RTLLM benchmarks before fine-tuning." 241 }, 242 "train_test_overlap_discussed": { 243 "applies": true, 244 "answer": true, 245 "justification": "Contamination is the paper's central topic. Table II explicitly measures contamination ratios using Min-K% and CDD metrics across VerilogEval, RTLLM, and custom designs under the clean model. The cross-contamination evaluation (Table I) tests models on benchmarks different from what they were contaminated with." 246 }, 247 "benchmark_contamination_addressed": { 248 "applies": true, 249 "answer": true, 250 "justification": "Benchmark contamination is the primary topic. Sec. V directly addresses contamination in VerilogEval and RTLLM, referencing prior work [3] on data contamination in foundational Verilog models. Table II quantifies contamination rates (up to 100% for RTLLM)." 251 } 252 }, 253 "human_studies": { 254 "pre_registered": { 255 "applies": false, 256 "answer": false, 257 "justification": "No human participants in this study." 258 }, 259 "irb_or_ethics_approval": { 260 "applies": false, 261 "answer": false, 262 "justification": "No human participants in this study." 263 }, 264 "demographics_reported": { 265 "applies": false, 266 "answer": false, 267 "justification": "No human participants in this study." 268 }, 269 "inclusion_exclusion_criteria": { 270 "applies": false, 271 "answer": false, 272 "justification": "No human participants in this study." 273 }, 274 "randomization_described": { 275 "applies": false, 276 "answer": false, 277 "justification": "No human participants in this study." 278 }, 279 "blinding_described": { 280 "applies": false, 281 "answer": false, 282 "justification": "No human participants in this study." 283 }, 284 "attrition_reported": { 285 "applies": false, 286 "answer": false, 287 "justification": "No human participants in this study." 288 } 289 }, 290 "cost_and_practicality": { 291 "inference_cost_reported": { 292 "applies": true, 293 "answer": false, 294 "justification": "No inference cost, latency, or per-example cost is reported for either the fine-tuning, unlearning, or evaluation processes." 295 }, 296 "compute_budget_stated": { 297 "applies": true, 298 "answer": false, 299 "justification": "The artifact appendix mentions '1-2×A100 (40/80 GB)' hardware but does not quantify total GPU hours, training time, or computational budget for the experiments." 300 } 301 }, 302 "experimental_rigor": { 303 "seed_sensitivity_reported": { 304 "applies": true, 305 "answer": false, 306 "justification": "No mention of multiple random seeds. All results appear to be from single runs across unlearning epochs." 307 }, 308 "number_of_runs_stated": { 309 "applies": true, 310 "answer": false, 311 "justification": "The number of experimental runs producing each result is never stated. Results are presented without indicating how many trials produced them." 312 }, 313 "hyperparameter_search_budget": { 314 "applies": true, 315 "answer": false, 316 "justification": "Fixed hyperparameters are used (lr=1e-5, temp=0.8, top-p=0.75) but no search budget or justification for these choices is provided." 317 }, 318 "best_config_selection_justified": { 319 "applies": true, 320 "answer": true, 321 "justification": "Table III and Sec. IX describe the selection criterion: 'We evaluate this impact using Euclidean distance with increased weighting on the FR to emphasize semantic forgetting.' The best algorithm and epoch per use case are selected based on proximity to clean model performance." 322 }, 323 "multiple_comparison_correction": { 324 "applies": true, 325 "answer": false, 326 "justification": "The paper compares 6 algorithms across 4 scenarios and 3 epochs (72 comparisons) without any statistical tests at all, let alone multiple comparison corrections." 327 }, 328 "self_comparison_bias_addressed": { 329 "applies": true, 330 "answer": false, 331 "justification": "The authors implement all six unlearning methods themselves using the TOFU framework. No acknowledgment of potential bias from their own implementations of baselines." 332 }, 333 "compute_budget_vs_performance": { 334 "applies": true, 335 "answer": false, 336 "justification": "Different unlearning methods likely have different computational costs, but no performance-vs-compute analysis is provided. Methods are compared on effectiveness alone without considering relative computational expense." 337 }, 338 "benchmark_construct_validity": { 339 "applies": true, 340 "answer": false, 341 "justification": "VerilogEval and RTLLM are used for downstream evaluation (Table I) without discussing whether Pass@K on these benchmarks adequately measures real-world RTL generation capability or unlearning effectiveness." 342 }, 343 "scaffold_confound_addressed": { 344 "applies": false, 345 "answer": false, 346 "justification": "No scaffolding is involved. The paper evaluates direct model fine-tuning and unlearning without agentic frameworks." 347 } 348 }, 349 "data_leakage": { 350 "temporal_leakage_addressed": { 351 "applies": true, 352 "answer": false, 353 "justification": "The paper does not discuss whether LLaMA 3.1-8B's pre-training data already includes VerilogEval (2023) or RTLLM (2024) solutions, which would confound the contamination experiments." 354 }, 355 "feature_leakage_addressed": { 356 "applies": true, 357 "answer": false, 358 "justification": "No discussion of whether the evaluation setup leaks information through input formatting or benchmark structure." 359 }, 360 "non_independence_addressed": { 361 "applies": true, 362 "answer": false, 363 "justification": "No discussion of potential overlap or structural similarity between the retain dataset (RTL-Coder training data) and the forget/test datasets. The RTL-Coder data may share designs or patterns with VerilogEval/RTLLM." 364 }, 365 "leakage_detection_method": { 366 "applies": true, 367 "answer": true, 368 "justification": "Multiple leakage detection methods are applied: Min-K%, Min-K%++ (Sec. IV), and CDD (Table II). These are used to measure contamination levels in both clean and contaminated models." 369 } 370 } 371 }, 372 "claims": [ 373 { 374 "claim": "RMU and SimNPO provide the best balance between contamination removal and utility retention for benchmark decontamination.", 375 "evidence": "Fig. 3 shows RMU reduces FR to 14.4% and Min-K%++ to 22.2% on RTLLM, while SimNPO reduces Min-K%++ from 85.1% to 30.8% on VerilogEval. Table I shows RMU achieves Pass@15 of 75 vs. 83 for clean models.", 376 "supported": "moderate" 377 }, 378 { 379 "claim": "Gradient-based methods (GA, GD) achieve the most aggressive forgetting but severely degrade downstream utility.", 380 "evidence": "Fig. 3 shows GA/GD reduce FR to 1.0-1.9% by EP3 on RTLLM. Table I shows GA completely fails on RTLLM benchmarks (all 1/0 scores) and GD drops Pass@15 to 21 on VerilogEval.", 381 "supported": "strong" 382 }, 383 { 384 "claim": "Two to three unlearning epochs are sufficient for convergence across all threat scenarios.", 385 "evidence": "Sec. V-B through VIII-B show most methods converge by EP2/EP3 across all four use cases. SimNPO and RMU show minimal drift after epoch 2.", 386 "supported": "moderate" 387 }, 388 { 389 "claim": "Machine unlearning enables selective removal of malicious code patterns while preserving model utility.", 390 "evidence": "Fig. 5 shows RMU reduces FR from 94.3% to 8.5% and Min-K%++ to 2.6% for malicious code. However, SimNPO only reduces FR to 67.5%, showing incomplete forgetting.", 391 "supported": "moderate" 392 }, 393 { 394 "claim": "Selective unlearning can improve RTL code generation by removing harmful memorization.", 395 "evidence": "Sec. V-B notes 'NPO and RMU even outperformed the contaminated model's original Pass@1 on VerilogEval' after unlearning RTLLM contamination. Table I shows RMU Pass@15=75 at EP3 vs. Sensitive=82.", 396 "supported": "weak" 397 } 398 ], 399 "red_flags": [ 400 { 401 "flag": "No statistical testing despite extensive comparisons", 402 "detail": "The paper compares 6 algorithms × 4 scenarios × 3 epochs across multiple metrics but never applies statistical significance tests. All claims of superiority are based on comparing raw metric values. With this many comparisons, some observed differences could easily be noise." 403 }, 404 { 405 "flag": "Single model tested, broad generalization claimed", 406 "detail": "All experiments use only LLaMA 3.1-8B, yet the paper claims results for 'LLM-aided hardware design' generally. No evidence that findings transfer to other model architectures or sizes." 407 }, 408 { 409 "flag": "No variance or reproducibility information", 410 "detail": "All results appear to be single-run values. Without variance across seeds or runs, the stability of the reported metrics cannot be assessed. This is particularly concerning for unlearning methods where convergence behavior may be stochastic." 411 }, 412 { 413 "flag": "Artifact does not match paper experiments for IP protection", 414 "detail": "The paper uses 703 proprietary in-house IP designs for Use Case 4, but the artifact provides only '2 IP leakage examples from VeriLeaky' — a fundamentally different and much smaller dataset. The main IP protection results cannot be independently verified." 415 }, 416 { 417 "flag": "No limitations section", 418 "detail": "The paper lacks any discussion of limitations, threats to validity, or scope boundaries despite significant constraints (single model, artificial contamination setup, no real-world deployment evaluation)." 419 } 420 ], 421 "cited_papers": [ 422 { 423 "title": "Rtlcoder: Outperforming gpt-3.5 in design rtl generation with our open-source dataset and lightweight solution", 424 "authors": ["S. Liu et al."], 425 "year": 2024, 426 "arxiv_id": "2312.08617", 427 "relevance": "LLM fine-tuning for hardware code generation, demonstrates RTL-specific model training and serves as the retain dataset source for SALAD." 428 }, 429 { 430 "title": "Verigen: A large language model for verilog code generation", 431 "authors": ["S. Thakur et al."], 432 "year": 2023, 433 "relevance": "Early work on LLM-based Verilog generation, demonstrating RTL code generation capabilities." 434 }, 435 { 436 "title": "Vericontaminated: Assessing llm-driven verilog coding for data contamination", 437 "authors": ["Z. Wang et al."], 438 "year": 2025, 439 "arxiv_id": "2503.13572", 440 "relevance": "Directly addresses data contamination in LLM Verilog generation, establishing the contamination problem that SALAD mitigates." 441 }, 442 { 443 "title": "Verileaky: Navigating ip protection vs utility in fine-tuning for llm-driven verilog coding", 444 "authors": ["Z. Wang et al."], 445 "year": 2025, 446 "arxiv_id": "2503.13116", 447 "relevance": "Explores IP protection and data extraction attacks on fine-tuned LLMs for Verilog, complementary defense approach to unlearning." 448 }, 449 { 450 "title": "Verilogeval: Evaluating large language models for verilog code generation", 451 "authors": ["M. Liu", "N. Pinckney", "B. Khailany", "H. Ren"], 452 "year": 2023, 453 "relevance": "Key benchmark for evaluating LLM Verilog generation capability, used as both contamination target and evaluation benchmark in SALAD." 454 }, 455 { 456 "title": "Rtllm: An open-source benchmark for design rtl generation with large language model", 457 "authors": ["Y. Lu", "S. Liu", "Q. Zhang", "Z. Xie"], 458 "year": 2024, 459 "relevance": "RTL generation benchmark used for contamination and evaluation in SALAD, shown to have 100% contamination rate under Min-K%." 460 }, 461 { 462 "title": "Chipnemo: Domain-adapted llms for chip design", 463 "authors": ["M. Liu et al."], 464 "year": 2023, 465 "arxiv_id": "2311.00176", 466 "relevance": "Industry approach to domain-adapting LLMs for chip design using proprietary data, relevant to IP leakage concerns." 467 }, 468 { 469 "title": "Tofu: A task of fictitious unlearning for llms", 470 "authors": ["P. Maini", "Z. Feng", "A. Schwarzschild", "Z. C. Lipton", "J. Z. Kolter"], 471 "year": 2024, 472 "arxiv_id": "2401.06121", 473 "relevance": "Framework for LLM unlearning evaluation that SALAD builds upon, implements GA, GD, PO unlearning methods." 474 }, 475 { 476 "title": "Rtl-breaker: Assessing the security of llms against backdoor attacks on hdl code generation", 477 "authors": ["L. L. Mankali et al."], 478 "year": 2024, 479 "arxiv_id": "2411.17569", 480 "relevance": "Demonstrates backdoor injection into LLMs for hardware design, source of malicious code patterns used in SALAD's Use Case 3." 481 }, 482 { 483 "title": "Asleep at the keyboard? Assessing the security of github copilot's code contributions", 484 "authors": ["H. Pearce et al."], 485 "year": 2025, 486 "relevance": "Security assessment of LLM-generated code (Copilot), establishing the security risks of LLM code generation that motivate unlearning approaches." 487 }, 488 { 489 "title": "Rethinking machine unlearning for large language models", 490 "authors": ["S. Liu et al."], 491 "year": 2025, 492 "relevance": "Comprehensive survey of machine unlearning methods for LLMs, provides theoretical foundation for the unlearning approaches evaluated." 493 }, 494 { 495 "title": "Detecting pretraining data from large language models", 496 "authors": ["W. Shi et al."], 497 "year": 2023, 498 "arxiv_id": "2310.16789", 499 "relevance": "Introduces Min-K% metric for detecting pre-training data in LLMs, used as a core evaluation metric in SALAD." 500 } 501 ] 502 }