ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

scan.json (25501B)


      1 {
      2   "paper": {
      3     "title": "From Poisoned to Aware: Fostering Backdoor Self-Awareness in LLMs",
      4     "authors": ["Guangyu Shen", "Siyuan Cheng", "Xiangzhe Xu", "Yuan Zhou", "Hanxi Guo", "Zhuo Zhang", "Xiangyu Zhang"],
      5     "year": 2025,
      6     "venue": "arXiv",
      7     "arxiv_id": "2510.05169",
      8     "doi": "10.48550/arXiv.2510.05169"
      9   },
     10   "scan_version": 2,
     11   "active_modules": ["experimental_rigor", "data_leakage"],
     12   "methodology_tags": ["benchmark-eval"],
     13   "key_findings": "An RL-based training framework (GRPO with buffer replay) can cultivate backdoor self-awareness in poisoned LLMs, enabling them to articulate their implanted triggers even when triggers are absent from input. The approach achieves AWARENESS@5 scores of 0.549–1.000 across five backdoor types, reduces attack success rate by an average of 73.18% via adversarial unlearning, and achieves 95.6% average inference-time detection accuracy, outperforming six baseline methods.",
     14   "checklist": {
     15     "artifacts": {
     16       "code_released": {
     17         "applies": true,
     18         "answer": true,
     19         "justification": "The abstract states 'The code is available at LLM Backdoor Self-Awareness' indicating a code release, though the actual URL appears to be a hyperlink in the PDF."
     20       },
     21       "data_released": {
     22         "applies": true,
     23         "answer": true,
     24         "justification": "The paper uses publicly available datasets (SafeRLHF, UltraFeedback, Alpaca) and references the SHIP authors' released poison samples. The data construction is fully described with public sources."
     25       },
     26       "environment_specified": {
     27         "applies": true,
     28         "answer": false,
     29         "justification": "The paper mentions 8×A100-40GB GPUs, DeepSpeed ZeRO-3, bfloat16, but does not provide requirements.txt, Dockerfile, or specific library versions."
     30       },
     31       "reproduction_instructions": {
     32         "applies": true,
     33         "answer": false,
     34         "justification": "No step-by-step reproduction instructions are provided in the paper. Training details are spread across Section 6.1.3 but no README or runnable scripts are described."
     35       }
     36     },
     37     "statistical_methodology": {
     38       "confidence_intervals_or_error_bars": {
     39         "applies": true,
     40         "answer": true,
     41         "justification": "Figure 6 shows shaded standard deviation bands around the mean reward during RL training. However, Tables 1 and 2 report point estimates without uncertainty."
     42       },
     43       "significance_tests": {
     44         "applies": true,
     45         "answer": false,
     46         "justification": "The paper claims their method outperforms baselines but no statistical significance tests (p-values, t-tests, etc.) are reported for any comparison in Tables 1 or 2."
     47       },
     48       "effect_sizes_reported": {
     49         "applies": true,
     50         "answer": true,
     51         "justification": "Tables 1 and 2 report absolute changes (e.g., '-74.7' ASR reduction) alongside raw values, providing context for the magnitude of improvements."
     52       },
     53       "sample_size_justified": {
     54         "applies": true,
     55         "answer": false,
     56         "justification": "The paper uses 100 prompts for RL training and 100+100 for detection evaluation without justifying why these sample sizes are sufficient."
     57       },
     58       "variance_reported": {
     59         "applies": true,
     60         "answer": false,
     61         "justification": "Standard deviation is shown for RL reward curves (Figure 6), but the main evaluation metrics in Tables 1 and 2 are single-run numbers with no variance across runs or seeds."
     62       }
     63     },
     64     "evaluation_design": {
     65       "baselines_included": {
     66         "applies": true,
     67         "answer": true,
     68         "justification": "Table 1 compares against BEEAR, R-SFT + Adversarial Training, and GCG + Adversarial Training. Table 2 compares against ONION, BEAT, and Chain-of-Scrutiny."
     69       },
     70       "baselines_contemporary": {
     71         "applies": true,
     72         "answer": true,
     73         "justification": "Baselines include recent methods: BEEAR (2024), BEAT (2025), GCG (2023), Chain-of-Scrutiny (2024). These are contemporary and relevant."
     74       },
     75       "ablation_study": {
     76         "applies": true,
     77         "answer": true,
     78         "justification": "Section 6.4 presents ablation studies removing buffer replay and R-SFT components (Figure 8b), and tests across four model architectures (Figure 8a)."
     79       },
     80       "multiple_metrics": {
     81         "applies": true,
     82         "answer": true,
     83         "justification": "Multiple metrics used: AWARENESS@k, ASR (with/without trigger), XSTest, MMLU-Pro, MXEval, HumanEval, TPR@5%FPR, detection accuracy."
     84       },
     85       "human_evaluation": {
     86         "applies": false,
     87         "answer": false,
     88         "justification": "Human evaluation is not relevant to this work; the claims are about automated backdoor detection and unlearning, evaluated via automated metrics."
     89       },
     90       "held_out_test_set": {
     91         "applies": true,
     92         "answer": true,
     93         "justification": "Section 6.1.4 states evaluation uses 'hold-out evaluation set from DSFT' and detection uses '100 poison and 100 benign samples (as held-out test set)' with thresholds calibrated on a separate validation fold."
     94       },
     95       "per_category_breakdown": {
     96         "applies": true,
     97         "answer": true,
     98         "justification": "Results are broken down by all five backdoor attack types in both Tables 1 and 2, and training dynamics shown per-attack in Figure 6."
     99       },
    100       "failure_cases_discussed": {
    101         "applies": true,
    102         "answer": true,
    103         "justification": "The DoS backdoor yields only partial trigger recovery (AWARENESS 0.549). The sleeper agent shows more gradual convergence. Section 6.2 discusses the code model's sub-optimal natural triggers. Section 7 acknowledges the method assumes knowledge of attack target behavior."
    104       },
    105       "negative_results_reported": {
    106         "applies": true,
    107         "answer": true,
    108         "justification": "Section 4 reports that R-SFT alone fails to enable self-awareness (Figure 3). The ablation shows removing buffer replay prevents convergence. Clean-label and DoS achieve lower awareness scores."
    109       }
    110     },
    111     "claims_and_evidence": {
    112       "abstract_claims_supported": {
    113         "applies": true,
    114         "answer": true,
    115         "justification": "Abstract claims of 80% awareness improvement, 73.18% ASR reduction, and 95.6% detection accuracy are supported by Tables 1-2 and Figure 6."
    116       },
    117       "causal_claims_justified": {
    118         "applies": true,
    119         "answer": true,
    120         "justification": "Causal claims ('RL training cultivates self-awareness') are supported by ablation studies (Figure 8b) showing removing buffer replay or R-SFT prevents emergence. The ablation design is adequate for these claims."
    121       },
    122       "generalization_bounded": {
    123         "applies": true,
    124         "answer": false,
    125         "justification": "The paper tests on 7-8B parameter models only but makes claims about 'LLMs' generally. Section 7 mentions limitations but doesn't explicitly bound the generalization to the tested model sizes and architectures."
    126       },
    127       "alternative_explanations_discussed": {
    128         "applies": true,
    129         "answer": false,
    130         "justification": "The paper does not discuss alternative explanations for the emergence phenomenon (e.g., whether it's memorization vs. genuine introspection, or whether the reward signal alone drives convergence regardless of 'self-awareness')."
    131       },
    132       "proxy_outcome_distinction": {
    133         "applies": true,
    134         "answer": false,
    135         "justification": "The paper uses Jaccard similarity with ground-truth trigger as a proxy for 'self-awareness' but does not discuss whether articulating a trigger via RL optimization constitutes genuine self-awareness or learned response patterns. The gap between 'trigger inversion via RL reward shaping' and 'self-awareness' is not acknowledged."
    136       }
    137     },
    138     "setup_transparency": {
    139       "model_versions_specified": {
    140         "applies": true,
    141         "answer": true,
    142         "justification": "Specific model names with versions provided: Llama-3.1-8B-Instruct, Qwen2.5-Coder-7B-Instruct, Ministral-8B-Instruct-2410, DeepSeek-R1-Distill-Llama-8B. These include version identifiers."
    143       },
    144       "prompts_provided": {
    145         "applies": true,
    146         "answer": true,
    147         "justification": "Full inversion prompts for all five backdoor types are provided in Appendix A, the judge prompt in Appendix B, and the guardrail prompt in Appendix C."
    148       },
    149       "hyperparameters_reported": {
    150         "applies": true,
    151         "answer": true,
    152         "justification": "Section 6.1.3 reports LoRA rank, learning rates, epochs, batch sizes, GRPO hyperparameters (β=0.01, G=8, ε=0.2), and reward function parameters (α=0.025, L=20, β=0.5, γ=0.5)."
    153       },
    154       "scaffolding_described": {
    155         "applies": false,
    156         "answer": false,
    157         "justification": "No agentic scaffolding is used. The method is a training framework (SFT + RL), not an agentic pipeline."
    158       },
    159       "data_preprocessing_documented": {
    160         "applies": true,
    161         "answer": true,
    162         "justification": "Section 6.1.2 details data composition for each backdoor type, poison rates, reversal augmentation procedure, and RL data construction with specific counts."
    163       }
    164     },
    165     "limitations_and_scope": {
    166       "limitations_section_present": {
    167         "applies": true,
    168         "answer": true,
    169         "justification": "Section 7 (Conclusion) discusses limitations: assumes knowledge of attack target behavior, training cost is higher than traditional defenses."
    170       },
    171       "threats_to_validity_specific": {
    172         "applies": true,
    173         "answer": false,
    174         "justification": "The limitations mentioned are specific to this work (knowledge assumption, cost) but are brief (two sentences) and do not cover threats like limited model sizes, specific trigger types tested, or evaluation dataset representativeness."
    175       },
    176       "scope_boundaries_stated": {
    177         "applies": true,
    178         "answer": false,
    179         "justification": "The paper does not explicitly state what settings/model sizes/trigger types the results do NOT apply to. The limitations mention the knowledge assumption but don't bound the empirical scope."
    180       }
    181     },
    182     "data_integrity": {
    183       "raw_data_available": {
    184         "applies": true,
    185         "answer": false,
    186         "justification": "No raw experimental data (model outputs, reward logs, per-sample results) is made available for independent verification."
    187       },
    188       "data_collection_described": {
    189         "applies": true,
    190         "answer": true,
    191         "justification": "Section 6.1.2 describes how poison datasets are constructed from SafeRLHF, UltraFeedback, Alpaca, and SHIP's released data, with specific counts and procedures."
    192       },
    193       "recruitment_methods_described": {
    194         "applies": false,
    195         "answer": false,
    196         "justification": "No human participants; data sources are standard benchmarks and datasets."
    197       },
    198       "data_pipeline_documented": {
    199         "applies": true,
    200         "answer": true,
    201         "justification": "The full pipeline from data construction (Section 6.1.2) through SFT/R-SFT training to RL training to evaluation is documented with specific data counts at each stage."
    202       }
    203     },
    204     "conflicts_of_interest": {
    205       "funding_disclosed": {
    206         "applies": true,
    207         "answer": false,
    208         "justification": "No funding sources or acknowledgments section is present in the paper."
    209       },
    210       "affiliations_disclosed": {
    211         "applies": true,
    212         "answer": true,
    213         "justification": "All authors' affiliations are listed (Purdue University, Columbia University). They are not evaluating a commercial product they are affiliated with."
    214       },
    215       "funder_independent_of_outcome": {
    216         "applies": true,
    217         "answer": false,
    218         "justification": "No funding is disclosed, so independence cannot be assessed."
    219       },
    220       "financial_interests_declared": {
    221         "applies": true,
    222         "answer": false,
    223         "justification": "No competing interests statement or financial disclosure is present in the paper."
    224       }
    225     },
    226     "contamination": {
    227       "training_cutoff_stated": {
    228         "applies": false,
    229         "answer": false,
    230         "justification": "This paper tests defense methods against backdoor attacks, not pre-trained model capability on benchmarks. The evaluation measures whether the defense can recover triggers and reduce ASR, not model knowledge."
    231       },
    232       "train_test_overlap_discussed": {
    233         "applies": false,
    234         "answer": false,
    235         "justification": "Same as above — the evaluation is about backdoor defense effectiveness, not benchmark performance of a pre-trained model."
    236       },
    237       "benchmark_contamination_addressed": {
    238         "applies": false,
    239         "answer": false,
    240         "justification": "Same as above — contamination in the benchmark-leakage sense is not relevant to this backdoor defense evaluation."
    241       }
    242     },
    243     "human_studies": {
    244       "pre_registered": {
    245         "applies": false,
    246         "answer": false,
    247         "justification": "No human participants in this study."
    248       },
    249       "irb_or_ethics_approval": {
    250         "applies": false,
    251         "answer": false,
    252         "justification": "No human participants in this study."
    253       },
    254       "demographics_reported": {
    255         "applies": false,
    256         "answer": false,
    257         "justification": "No human participants in this study."
    258       },
    259       "inclusion_exclusion_criteria": {
    260         "applies": false,
    261         "answer": false,
    262         "justification": "No human participants in this study."
    263       },
    264       "randomization_described": {
    265         "applies": false,
    266         "answer": false,
    267         "justification": "No human participants in this study."
    268       },
    269       "blinding_described": {
    270         "applies": false,
    271         "answer": false,
    272         "justification": "No human participants in this study."
    273       },
    274       "attrition_reported": {
    275         "applies": false,
    276         "answer": false,
    277         "justification": "No human participants in this study."
    278       }
    279     },
    280     "cost_and_practicality": {
    281       "inference_cost_reported": {
    282         "applies": true,
    283         "answer": false,
    284         "justification": "No inference cost, wall-clock time, or per-example cost is reported. Section 7 acknowledges training cost is 'significantly higher than traditional defenses' but provides no numbers."
    285       },
    286       "compute_budget_stated": {
    287         "applies": true,
    288         "answer": false,
    289         "justification": "Hardware is mentioned (8×A100-40GB) but total GPU hours, training time, or total compute budget is not stated."
    290       }
    291     },
    292     "experimental_rigor": {
    293       "seed_sensitivity_reported": {
    294         "applies": true,
    295         "answer": false,
    296         "justification": "No multi-seed results reported. Figure 6 shows std over sampled responses within a run, not across independent runs with different seeds."
    297       },
    298       "number_of_runs_stated": {
    299         "applies": true,
    300         "answer": false,
    301         "justification": "The number of independent experimental runs is never stated. Results appear to be from single runs."
    302       },
    303       "hyperparameter_search_budget": {
    304         "applies": true,
    305         "answer": false,
    306         "justification": "Hyperparameters are reported but no search budget, search method, or number of configurations tried is mentioned."
    307       },
    308       "best_config_selection_justified": {
    309         "applies": true,
    310         "answer": false,
    311         "justification": "No discussion of how the reported hyperparameters were selected or whether they were tuned on a validation set."
    312       },
    313       "multiple_comparison_correction": {
    314         "applies": true,
    315         "answer": false,
    316         "justification": "No statistical tests are performed at all, so multiple comparison correction is not applicable in practice, but given the many comparisons across 5 attacks and multiple baselines, corrections would be needed."
    317       },
    318       "self_comparison_bias_addressed": {
    319         "applies": true,
    320         "answer": false,
    321         "justification": "Authors implement their own baselines (R-SFT + Adversarial Training, GCG + Adversarial Training) and compare against their own method without acknowledging potential author-evaluation bias."
    322       },
    323       "compute_budget_vs_performance": {
    324         "applies": true,
    325         "answer": false,
    326         "justification": "The proposed method requires SFT + R-SFT + RL training plus reward computation at each step, likely far more compute than baselines like ONION or BEAT, but this is not quantified or discussed."
    327       },
    328       "benchmark_construct_validity": {
    329         "applies": true,
    330         "answer": false,
    331         "justification": "The AWARENESS@k metric measures Jaccard overlap with ground-truth trigger. The paper does not discuss whether this actually measures 'self-awareness' vs. learned trigger inversion, or whether the metric's construct validity holds."
    332       },
    333       "scaffold_confound_addressed": {
    334         "applies": false,
    335         "answer": false,
    336         "justification": "No scaffolding is involved in this work."
    337       }
    338     },
    339     "data_leakage": {
    340       "temporal_leakage_addressed": {
    341         "applies": true,
    342         "answer": false,
    343         "justification": "The RL training data (DRL) is drawn from the same distribution as the evaluation data (both from DSFT). No discussion of whether this creates temporal or distributional leakage."
    344       },
    345       "feature_leakage_addressed": {
    346         "applies": true,
    347         "answer": false,
    348         "justification": "The inversion prompt provides substantial hints about trigger properties (universal effect, length constraints) that constrain the search space. This is not discussed as a potential form of information leakage."
    349       },
    350       "non_independence_addressed": {
    351         "applies": true,
    352         "answer": false,
    353         "justification": "Training and test prompts are drawn from the same datasets (SafeRLHF, UltraFeedback). The paper states the hold-out set is separate but does not verify independence of distribution."
    354       },
    355       "leakage_detection_method": {
    356         "applies": true,
    357         "answer": false,
    358         "justification": "No leakage detection or prevention method is applied."
    359       }
    360     }
    361   },
    362   "claims": [
    363     {
    364       "claim": "RL training enables backdoor self-awareness, achieving AWARENESS@5 scores of 0.549-1.000 across five backdoor types, compared to near-zero for SFT and R-SFT alone.",
    365       "evidence": "Figure 6 shows AWARENESS@5 scores: Jailbreak 1.000, Sleeper Agent 0.839, SHIP 1.000, Clean-label 0.634, DoS 0.549, vs 0.000-0.008 for SFT/R-SFT.",
    366       "supported": "strong"
    367     },
    368     {
    369       "claim": "Backdoor self-awareness emerges abruptly during RL training, resembling a phase transition.",
    370       "evidence": "Figure 6 shows sharp reward jumps in 4/5 cases (Jailbreak, SHIP, CL Jailbreak, DoS). Sleeper Agent is the exception with gradual improvement (Section 6.2).",
    371       "supported": "moderate"
    372     },
    373     {
    374       "claim": "Adversarial unlearning using self-aware triggers reduces ASR by an average of 73.18%.",
    375       "evidence": "Table 1 shows ASR reductions: Jailbreak -74.7%, SHIP -80.6%, Clean-label -62.2%, DoS -95.5%, Sleeper Agent -53.6%. Average is 73.3%.",
    376       "supported": "strong"
    377     },
    378     {
    379       "claim": "Inference-time guardrail achieves an average detection accuracy of 95.6% across five backdoor types.",
    380       "evidence": "Table 2 shows accuracies: 99.8%, 99.19%, 91.63%, 89.00%, 100.00%. Average is 95.9%.",
    381       "supported": "strong"
    382     },
    383     {
    384       "claim": "R-SFT alone is insufficient for enabling backdoor self-awareness.",
    385       "evidence": "Section 4 and Figure 3 show AWARENESS@k remains below 0.042 even at k=200 for R-SFT models.",
    386       "supported": "strong"
    387     },
    388     {
    389       "claim": "Buffer replay is critical for convergence to the correct trigger.",
    390       "evidence": "Figure 8b shows removing buffer replay causes training to plateau at sub-optimal reward ~0.3. Section 6.4 notes the correct trigger appeared 13 times but signals were too sparse without replay.",
    391       "supported": "moderate"
    392     }
    393   ],
    394   "red_flags": [
    395     {
    396       "flag": "No statistical significance testing",
    397       "detail": "All claims of superiority over baselines are based on comparing point estimates in Tables 1 and 2 without any statistical tests. With single-run results, observed differences may not be reliable."
    398     },
    399     {
    400       "flag": "Anthropomorphic framing",
    401       "detail": "The paper frames RL-optimized trigger inversion as 'self-awareness' and 'aha moments', but the mechanism is reward-driven optimization, not introspection. The construct validity of equating Jaccard trigger overlap with 'awareness' is not discussed."
    402     },
    403     {
    404       "flag": "Small evaluation samples",
    405       "detail": "Detection evaluation uses only 100 poison + 100 benign samples. AWARENESS is measured on 100 prompts. No justification for these sample sizes, and no confidence intervals on the detection metrics."
    406     },
    407     {
    408       "flag": "Compute cost not quantified",
    409       "detail": "The method requires SFT + R-SFT + RL training with per-step reward computation (10 inference calls per candidate × 8 candidates per step). The authors acknowledge higher cost than traditional defenses but never quantify it."
    410     },
    411     {
    412       "flag": "Assumes knowledge of attack effect",
    413       "detail": "The method requires knowing the type of attack behavior (jailbreak, DoS, etc.) to design the reward function and inversion prompt. This is a strong assumption acknowledged in the conclusion but may limit practical applicability."
    414     }
    415   ],
    416   "cited_papers": [
    417     {
    418       "title": "Sleeper agents: Training deceptive llms that persist through safety training",
    419       "authors": ["Evan Hubinger", "Carson Denison", "Jesse Mu"],
    420       "year": 2024,
    421       "arxiv_id": "2401.05566",
    422       "relevance": "Foundational work on persistent backdoor behavior in LLMs that survives safety training, directly studied in this paper's evaluation."
    423     },
    424     {
    425       "title": "Universal jailbreak backdoors from poisoned human feedback",
    426       "authors": ["Javier Rando", "Florian Tramèr"],
    427       "year": 2023,
    428       "arxiv_id": "2311.14455",
    429       "relevance": "Introduces jailbreak backdoor attacks via RLHF poisoning, the primary attack type evaluated in this paper."
    430     },
    431     {
    432       "title": "Alignment faking in large language models",
    433       "authors": ["Ryan Greenblatt", "Carson Denison", "Benjamin Wright"],
    434       "year": 2024,
    435       "arxiv_id": "2412.14093",
    436       "relevance": "Studies deceptive alignment behavior in LLMs, related to the self-awareness and deception themes of this paper."
    437     },
    438     {
    439       "title": "Tell me about yourself: LLMs are aware of their learned behaviors",
    440       "authors": ["Jan Betley", "Xuchan Bao", "Martín Soto"],
    441       "year": 2025,
    442       "arxiv_id": "2501.11120",
    443       "relevance": "Key prior work on LLM behavioral self-awareness that this paper builds upon and extends with RL training."
    444     },
    445     {
    446       "title": "Emergent misalignment: Narrow finetuning can produce broadly misaligned LLMs",
    447       "authors": ["Jan Betley", "Daniel Tan", "Niels Warncke"],
    448       "year": 2025,
    449       "arxiv_id": "2502.17424",
    450       "relevance": "Shows how fine-tuning can induce broad deceptive behaviors, motivating the backdoor self-awareness research."
    451     },
    452     {
    453       "title": "DeepSeek-R1: Incentivizing reasoning capability in LLMs via reinforcement learning",
    454       "authors": ["Daya Guo", "Dejian Yang"],
    455       "year": 2025,
    456       "arxiv_id": "2501.12948",
    457       "relevance": "RL-based reasoning training that inspires the 'aha moment' emergence observation in this paper."
    458     },
    459     {
    460       "title": "Universal and transferable adversarial attacks on aligned language models",
    461       "authors": ["Andy Zou", "Zifan Wang", "Nicholas Carlini"],
    462       "year": 2023,
    463       "arxiv_id": "2307.15043",
    464       "relevance": "GCG attack used as a baseline trigger inversion method in this paper's evaluation."
    465     },
    466     {
    467       "title": "BEEAR: Embedding-based adversarial removal of safety backdoors in instruction-tuned language models",
    468       "authors": ["Yi Zeng", "Weiyu Sun"],
    469       "year": 2024,
    470       "arxiv_id": "2406.17092",
    471       "relevance": "Key baseline defense method for backdoor removal compared against in the evaluation."
    472     },
    473     {
    474       "title": "Me, myself, and AI: The situational awareness dataset (SAD) for LLMs",
    475       "authors": ["Rudolf Laine", "Bilal Chughtai", "Jan Betley"],
    476       "year": 2024,
    477       "relevance": "Comprehensive benchmark for measuring LLM situational self-awareness, relevant to the self-awareness concept studied here."
    478     },
    479     {
    480       "title": "The alignment problem from a deep learning perspective",
    481       "authors": ["Richard Ngo", "Lawrence Chan", "Sören Mindermann"],
    482       "year": 2022,
    483       "arxiv_id": "2209.00626",
    484       "relevance": "Frames alignment and situational awareness challenges in LLMs that motivate this paper's approach."
    485     },
    486     {
    487       "title": "Deception abilities emerged in large language models",
    488       "authors": ["Thilo Hagendorff"],
    489       "year": 2024,
    490       "relevance": "Documents emergent deceptive capabilities in LLMs, directly relevant to understanding backdoor behaviors."
    491     }
    492   ]
    493 }

Impressum · Datenschutz