ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

scan-v4.json (31038B)


      1 {
      2   "scan_version": 4,
      3   "paper_type": "empirical",
      4   "paper": {
      5     "title": "EmbedGuard: Cross-Layer Detection and Provenance Attestation for Adversarial Embedding Attacks in RAG Systems",
      6     "authors": [
      7       "Neeraj Kumar Singh Beshane"
      8     ],
      9     "year": 2026,
     10     "venue": "International Journal of Computational and Experimental Science and Engineering (IJCESEN)",
     11     "arxiv_id": null,
     12     "doi": "10.22399/ijcesen.4869"
     13   },
     14   "checklist": {
     15     "claims_and_evidence": {
     16       "abstract_claims_supported": {
     17         "applies": true,
     18         "answer": true,
     19         "justification": "The abstract claims 94.7% detection for optimization-based attacks, 89.3% for adaptive attacks, 3.2% FPR, 51ms mean latency, and 18.4pp cross-layer improvement. These specific numbers all appear in Tables 3 and 5 in the results section.",
     20         "source": "opus"
     21       },
     22       "causal_claims_justified": {
     23         "applies": true,
     24         "answer": true,
     25         "justification": "The ablation study (Table 3) makes causal claims about layer contributions ('removing X reduces performance by Y%'). The design is controlled single-variable manipulation—removing one layer at a time—which is adequate for these causal claims.",
     26         "source": "opus"
     27       },
     28       "generalization_bounded": {
     29         "applies": true,
     30         "answer": false,
     31         "justification": "The title claims applicability to 'RAG Systems' broadly. Section 5 extends claims to healthcare, financial services, and legal research, but evaluation uses only one mixed corpus of 500K embeddings. No testing across different RAG architectures, embedding models, or deployment configurations is reported.",
     32         "source": "opus"
     33       },
     34       "alternative_explanations_discussed": {
     35         "applies": true,
     36         "answer": false,
     37         "justification": "No alternative explanations for the results are discussed. The paper does not consider confounds such as whether the baselines were optimally configured, whether the attack implementations favor EmbedGuard, or whether the evaluation corpus characteristics affect results.",
     38         "source": "opus"
     39       },
     40       "proxy_outcome_distinction": {
     41         "applies": true,
     42         "answer": true,
     43         "justification": "The paper directly measures detection rate and false positive rate for adversarial attacks, which are the claimed outcomes. There is no significant proxy gap between what is measured and what is claimed.",
     44         "source": "opus"
     45       }
     46     },
     47     "limitations_and_scope": {
     48       "limitations_section_present": {
     49         "applies": true,
     50         "answer": false,
     51         "justification": "There is no limitations section. The paper proceeds from Section 4 (Evaluation) to Section 5 (Applications and Societal Implications) to Section 6 (Conclusions) without any dedicated discussion of limitations or threats to validity.",
     52         "source": "opus"
     53       },
     54       "threats_to_validity_specific": {
     55         "applies": true,
     56         "answer": false,
     57         "justification": "No threats to validity are discussed anywhere in the paper. There is no consideration of internal, external, or construct validity threats.",
     58         "source": "opus"
     59       },
     60       "scope_boundaries_stated": {
     61         "applies": true,
     62         "answer": false,
     63         "justification": "No explicit scope boundaries are stated. The paper does not declare what the results do NOT show. Instead, Section 5 extends claims broadly to healthcare, finance, and legal applications without bounding the evaluation scope.",
     64         "source": "opus"
     65       }
     66     },
     67     "conflicts_of_interest": {
     68       "funding_disclosed": {
     69         "applies": true,
     70         "answer": true,
     71         "justification": "The Author Statements section explicitly states: 'The authors declare that there is no funding to be acknowledged.'",
     72         "source": "opus"
     73       },
     74       "affiliations_disclosed": {
     75         "applies": true,
     76         "answer": true,
     77         "justification": "Author affiliation is listed as 'Independent Researcher, California, USA.'",
     78         "source": "opus"
     79       },
     80       "funder_independent_of_outcome": {
     81         "applies": false,
     82         "answer": false,
     83         "justification": "The work is explicitly unfunded per the author statements, so funder independence is not applicable.",
     84         "source": "opus"
     85       },
     86       "financial_interests_declared": {
     87         "applies": true,
     88         "answer": true,
     89         "justification": "The Author Statements section includes: 'The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.'",
     90         "source": "opus"
     91       }
     92     },
     93     "scope_and_framing": {
     94       "key_terms_defined": {
     95         "applies": true,
     96         "answer": true,
     97         "justification": "RAG systems, embedding space poisoning, TEEs, and cross-layer detection are defined or described with sufficient technical specificity for the target audience.",
     98         "source": "haiku"
     99       },
    100       "intended_contribution_clear": {
    101         "applies": true,
    102         "answer": true,
    103         "justification": "Five numbered contributions are explicitly enumerated in Section 1.3, including the cross-layer architecture, cryptographic attestation, production-scale evaluation, ablation, and deployment modes.",
    104         "source": "haiku"
    105       },
    106       "engagement_with_prior_work": {
    107         "applies": true,
    108         "answer": false,
    109         "justification": "The paper names three prior defense systems (RAGuard, RobustRAG, TrustRAG) and engages with their limitations, but the citations don't match — the referenced papers are unrelated works, undermining the claimed engagement.",
    110         "source": "haiku"
    111       }
    112     }
    113   },
    114   "type_checklist": {
    115     "empirical": {
    116       "artifacts": {
    117         "code_released": {
    118           "applies": true,
    119           "answer": false,
    120           "justification": "No code repository URL is provided anywhere in the paper. The data availability statement says data is 'available on request from the corresponding author' and 'not publicly available due to privacy or ethical restrictions,' but no code release is mentioned at all.",
    121           "source": "opus"
    122         },
    123         "data_released": {
    124           "applies": true,
    125           "answer": false,
    126           "justification": "The data availability statement explicitly says 'The data that support the findings of this study are available on request from the corresponding author. The data are not publicly available due to privacy or ethical restrictions.' Available on request counts as NO.",
    127           "source": "opus"
    128         },
    129         "environment_specified": {
    130           "applies": true,
    131           "answer": false,
    132           "justification": "Section 4.1 lists hardware (AMD EPYC 7542, 256GB RAM, AMD SEV-SNP) and the embedding model (all-mpnet-base-v2), but there is no requirements.txt, Dockerfile, or detailed software environment specification with library versions.",
    133           "source": "opus"
    134         },
    135         "reproduction_instructions": {
    136           "applies": true,
    137           "answer": false,
    138           "justification": "No step-by-step reproduction instructions are provided. There is no README, no scripts, no 'Reproducing Results' section.",
    139           "source": "opus"
    140         }
    141       },
    142       "statistical_methodology": {
    143         "confidence_intervals_or_error_bars": {
    144           "applies": true,
    145           "answer": false,
    146           "justification": "All results in Tables 3-5 are point estimates only (e.g., '94.7%', '89.3%', '3.2%'). No confidence intervals, error bars, or ± notation are reported anywhere.",
    147           "source": "opus"
    148         },
    149         "significance_tests": {
    150           "applies": true,
    151           "answer": false,
    152           "justification": "The paper claims EmbedGuard outperforms baselines by '27.9-35.1 percentage point advantage' (Section 4.3) without any statistical significance tests. Comparisons are based solely on point estimate differences.",
    153           "source": "opus"
    154         },
    155         "effect_sizes_reported": {
    156           "applies": true,
    157           "answer": true,
    158           "justification": "The paper reports percentage point improvements with baseline context: '18.4 percentage point improvement from cross-layer correlation' (ablation), '27.9-35.1 percentage point advantage' under adaptive attacks with both baseline and system numbers (89.3% vs 54.2-61.4%).",
    159           "source": "opus"
    160         },
    161         "sample_size_justified": {
    162           "applies": true,
    163           "answer": false,
    164           "justification": "The evaluation uses 500,000 embeddings, 47,000 queries, and attack samples of 5,200-12,500, but no justification is given for any of these sizes. No power analysis or rationale for the chosen N values.",
    165           "source": "opus"
    166         },
    167         "variance_reported": {
    168           "applies": true,
    169           "answer": false,
    170           "justification": "All results are single-point estimates. No standard deviations, variance across runs, or spread measures are reported in any table or figure.",
    171           "source": "opus"
    172         }
    173       },
    174       "evaluation_design": {
    175         "baselines_included": {
    176           "applies": true,
    177           "answer": true,
    178           "justification": "Table 5 compares EmbedGuard against three baselines: RAGuard, RobustRAG, and TrustRAG. However, the references cited for these baselines (refs [5], [6], [7]) are unrelated papers, raising concerns about whether the comparisons are authentic.",
    179           "source": "opus"
    180         },
    181         "baselines_contemporary": {
    182           "applies": true,
    183           "answer": false,
    184           "justification": "The paper claims to compare against RAGuard [6], RobustRAG [5], and TrustRAG [7], but reference [6] is Kandpal et al. 'Large Language Models Struggle to Learn Long-Tail Knowledge', [5] is Zou et al. 'Universal and Transferable Adversarial Attacks', and [7] is Fan et al. 'Defending against Backdoor Attacks in Natural Language Generation'—none of which are the systems described. The reference mismatch makes it impossible to verify baselines are correctly implemented.",
    185           "source": "opus"
    186         },
    187         "ablation_study": {
    188           "applies": true,
    189           "answer": true,
    190           "justification": "Table 3 (labeled 'Ablation study results') shows detection performance with individual layers removed: w/o Output (-3.5pp), w/o Retrieval (-7.3pp), w/o Embedding TEE (-10.1pp), w/o Prompt (-4.9pp), and Embedding Only (-18.4pp).",
    191           "source": "opus"
    192         },
    193         "multiple_metrics": {
    194           "applies": true,
    195           "answer": true,
    196           "justification": "Tables 3 and 5 report multiple metrics: detection rate, false positive rate, mean latency, and P99 latency.",
    197           "source": "opus"
    198         },
    199         "human_evaluation": {
    200           "applies": true,
    201           "answer": false,
    202           "justification": "No human evaluation was conducted. The paper mentions that gated mode involves human review (Section 3.7), but this is described as a deployment feature, not used in the evaluation itself.",
    203           "source": "opus"
    204         },
    205         "held_out_test_set": {
    206           "applies": true,
    207           "answer": false,
    208           "justification": "The paper mentions 'held-out attack samples comprising 5,000 attacks' for weight calibration (Section 3.6), but does not clarify whether the 47,000 evaluation queries are separate from this calibration set. The separation between calibration and evaluation data is not explicit.",
    209           "source": "opus"
    210         },
    211         "per_category_breakdown": {
    212           "applies": true,
    213           "answer": true,
    214           "justification": "Table 3 breaks down detection performance by five attack categories: optimization-based, transferability-based, semantic manipulation, adaptive attacks, and coordinated multi-layer attacks.",
    215           "source": "opus"
    216         },
    217         "failure_cases_discussed": {
    218           "applies": true,
    219           "answer": false,
    220           "justification": "No failure cases are discussed. The paper does not analyze when or why the system fails to detect attacks. No qualitative examples of false negatives or missed attacks are provided.",
    221           "source": "opus"
    222         },
    223         "negative_results_reported": {
    224           "applies": true,
    225           "answer": false,
    226           "justification": "Every experiment shows EmbedGuard outperforming baselines. Every ablation shows monotonic degradation when layers are removed. No negative results, failed approaches, or configurations that didn't work are reported.",
    227           "source": "opus"
    228         }
    229       },
    230       "setup_transparency": {
    231         "model_versions_specified": {
    232           "applies": true,
    233           "answer": true,
    234           "justification": "Section 4.1 specifies 'all-mpnet-base-v2 (768 dimensions)' as the embedding model. Section 3.2 mentions 'DistilBERT-based neural classifier.' The embedding model is a specific checkpoint name. Attack transfer models are named: 'BERT-base, RoBERTa.'",
    235           "source": "opus"
    236         },
    237         "prompts_provided": {
    238           "applies": false,
    239           "answer": false,
    240           "justification": "The paper does not use prompting. EmbedGuard uses trained neural classifiers and statistical methods, not LLM prompts.",
    241           "source": "opus"
    242         },
    243         "hyperparameters_reported": {
    244           "applies": true,
    245           "answer": false,
    246           "justification": "Some hyperparameters are reported: β weights (0.35, 0.75, 0.50, 0.20), PCA k=50, K=5 perturbation sets, attack ProjGrad learning rate 0.01 over 500 iterations. However, training hyperparameters for the DistilBERT classifier (trained on 156K pairs) are missing: no learning rate, batch size, epochs, or optimizer specified.",
    247           "source": "opus"
    248         },
    249         "scaffolding_described": {
    250           "applies": false,
    251           "answer": false,
    252           "justification": "No agentic scaffolding is used. EmbedGuard is a detection pipeline, not an agent-based system.",
    253           "source": "opus"
    254         },
    255         "data_preprocessing_documented": {
    256           "applies": true,
    257           "answer": false,
    258           "justification": "Section 4.1 describes the corpus as '500,000 embeddings spanning technical documentation, medical literature, legal texts, and encyclopedic knowledge' without explaining how this corpus was assembled, preprocessed, or filtered. The attack datasets' construction is described at a high level but lacks detail on data preparation steps.",
    259           "source": "opus"
    260         }
    261       },
    262       "data_integrity": {
    263         "raw_data_available": {
    264           "applies": true,
    265           "answer": false,
    266           "justification": "Raw data is not available. The data availability statement says data is available 'on request' and is 'not publicly available due to privacy or ethical restrictions.'",
    267           "source": "opus"
    268         },
    269         "data_collection_described": {
    270           "applies": true,
    271           "answer": false,
    272           "justification": "The evaluation corpus is described only as '500,000 embeddings spanning technical documentation, medical literature, legal texts, and encyclopedic knowledge.' No details on how documents were collected, from what sources, or what time period.",
    273           "source": "opus"
    274         },
    275         "recruitment_methods_described": {
    276           "applies": true,
    277           "answer": false,
    278           "justification": "The document corpus and attack dataset recruitment are not described. It is unclear how the 500K documents were sourced, whether they are public or proprietary, or how attack samples were generated beyond high-level descriptions.",
    279           "source": "opus"
    280         },
    281         "data_pipeline_documented": {
    282           "applies": true,
    283           "answer": false,
    284           "justification": "The pipeline from raw data to final results is not documented. There are unexplained jumps from '500,000 embeddings' and '47,000 queries' to detection results without describing intermediate processing, filtering, or transformation steps.",
    285           "source": "opus"
    286         }
    287       },
    288       "contamination": {
    289         "training_cutoff_stated": {
    290           "applies": false,
    291           "answer": false,
    292           "justification": "The paper evaluates a defense system (EmbedGuard) against adversarial attacks, not a pre-trained model's capability on a benchmark. The DistilBERT classifier is trained as part of the system, not evaluated for general knowledge.",
    293           "source": "opus"
    294         },
    295         "train_test_overlap_discussed": {
    296           "applies": false,
    297           "answer": false,
    298           "justification": "Not applicable—the paper tests a defense system against adversarial attacks, not a pre-trained model's knowledge on benchmarks.",
    299           "source": "opus"
    300         },
    301         "benchmark_contamination_addressed": {
    302           "applies": false,
    303           "answer": false,
    304           "justification": "Not applicable—the evaluation tests defense effectiveness against attack scenarios, not pre-trained model benchmark performance.",
    305           "source": "opus"
    306         }
    307       },
    308       "human_studies": {
    309         "pre_registered": {
    310           "applies": false,
    311           "answer": false,
    312           "justification": "No human participants in this study. The evaluation is entirely system-based using automated attack scenarios.",
    313           "source": "opus"
    314         },
    315         "irb_or_ethics_approval": {
    316           "applies": false,
    317           "answer": false,
    318           "justification": "No human participants. The Author Statements note: 'The conducted research is not related to either human or animal use.'",
    319           "source": "opus"
    320         },
    321         "demographics_reported": {
    322           "applies": false,
    323           "answer": false,
    324           "justification": "No human participants in this study.",
    325           "source": "opus"
    326         },
    327         "inclusion_exclusion_criteria": {
    328           "applies": false,
    329           "answer": false,
    330           "justification": "No human participants in this study.",
    331           "source": "opus"
    332         },
    333         "randomization_described": {
    334           "applies": false,
    335           "answer": false,
    336           "justification": "No human participants in this study.",
    337           "source": "opus"
    338         },
    339         "blinding_described": {
    340           "applies": false,
    341           "answer": false,
    342           "justification": "No human participants in this study.",
    343           "source": "opus"
    344         },
    345         "attrition_reported": {
    346           "applies": false,
    347           "answer": false,
    348           "justification": "No human participants in this study.",
    349           "source": "opus"
    350         }
    351       },
    352       "cost_and_practicality": {
    353         "inference_cost_reported": {
    354           "applies": true,
    355           "answer": true,
    356           "justification": "Latency is reported in detail: 51ms mean end-to-end, with per-layer breakdown (prompt 4.2ms, TEE 12.8ms, retrieval 23.5ms, output 6.3ms, correlation 4.2ms). P99 latencies are also reported per attack type (142-171ms).",
    357           "source": "opus"
    358         },
    359         "compute_budget_stated": {
    360           "applies": true,
    361           "answer": false,
    362           "justification": "Hardware is listed (AMD EPYC 7542, 256GB RAM) but total computational budget is not stated: no GPU hours, no total training time for the DistilBERT classifier, no total evaluation time, no cost figures.",
    363           "source": "opus"
    364         }
    365       },
    366       "experimental_rigor": {
    367         "seed_sensitivity_reported": {
    368           "applies": true,
    369           "answer": false,
    370           "justification": "All results appear to be from single runs. No mention of multiple random seeds or seed sensitivity analysis.",
    371           "source": "opus"
    372         },
    373         "number_of_runs_stated": {
    374           "applies": true,
    375           "answer": false,
    376           "justification": "The number of experimental runs is never stated. Results are presented as single-point estimates without indicating how many runs produced them.",
    377           "source": "opus"
    378         },
    379         "hyperparameter_search_budget": {
    380           "applies": true,
    381           "answer": false,
    382           "justification": "No hyperparameter search budget is reported. The β weights and thresholds appear tuned ('calibrated empirically on held-out attack samples') but no search budget, method, or number of configurations tried is stated.",
    383           "source": "opus"
    384         },
    385         "best_config_selection_justified": {
    386           "applies": true,
    387           "answer": false,
    388           "justification": "Section 3.6 mentions weights were 'calibrated empirically on held-out attack samples comprising 5,000 attacks' but the calibration procedure, selection criterion, and number of configurations explored are not described.",
    389           "source": "opus"
    390         },
    391         "multiple_comparison_correction": {
    392           "applies": true,
    393           "answer": false,
    394           "justification": "Multiple comparisons are made across 5 attack types and 4 baselines without any correction for multiple testing. No Bonferroni, Holm, or Benjamini-Hochberg corrections are applied.",
    395           "source": "opus"
    396         },
    397         "self_comparison_bias_addressed": {
    398           "applies": true,
    399           "answer": false,
    400           "justification": "The authors implement both their own system and all baselines without acknowledging self-comparison bias. No independent evaluation or discussion of the bias from re-implementing baselines.",
    401           "source": "opus"
    402         },
    403         "compute_budget_vs_performance": {
    404           "applies": true,
    405           "answer": false,
    406           "justification": "EmbedGuard adds a TEE layer and cross-layer correlation requiring more compute than baselines, but performance is not reported as a function of compute. The latency comparison (51ms vs 35-42ms) is noted but no compute-equalized comparison is made.",
    407           "source": "opus"
    408         },
    409         "benchmark_construct_validity": {
    410           "applies": true,
    411           "answer": false,
    412           "justification": "The paper does not discuss whether its attack scenarios are representative of real-world adversarial threats. No discussion of construct validity—whether detecting these synthetic attacks translates to real deployment security.",
    413           "source": "opus"
    414         },
    415         "scaffold_confound_addressed": {
    416           "applies": false,
    417           "answer": false,
    418           "justification": "No agentic scaffolding is involved in this work.",
    419           "source": "opus"
    420         }
    421       },
    422       "data_leakage": {
    423         "temporal_leakage_addressed": {
    424           "applies": true,
    425           "answer": false,
    426           "justification": "The DistilBERT classifier was trained on 156,000 adversarial-benign query pairs but no discussion of whether training data overlaps temporally or substantively with evaluation data.",
    427           "source": "opus"
    428         },
    429         "feature_leakage_addressed": {
    430           "applies": true,
    431           "answer": false,
    432           "justification": "No discussion of whether the evaluation setup leaks information. The correlation engine uses 187 features, but it's unclear if any features encode information about the attack labels.",
    433           "source": "opus"
    434         },
    435         "non_independence_addressed": {
    436           "applies": true,
    437           "answer": false,
    438           "justification": "No discussion of whether the 47,000 evaluation queries and attack samples are independent of the training data used for the DistilBERT classifier or the 5,000 held-out samples used for weight calibration.",
    439           "source": "opus"
    440         },
    441         "leakage_detection_method": {
    442           "applies": true,
    443           "answer": false,
    444           "justification": "No concrete leakage detection or prevention method is described for ensuring separation between training, calibration, and evaluation data.",
    445           "source": "opus"
    446         }
    447       }
    448     }
    449   },
    450   "claims": [
    451     {
    452       "claim": "EmbedGuard achieves 94.7% detection rate for optimization-based attacks with 3.2% false positive rate",
    453       "evidence": "Table 3 reports these numbers for 12,500 attack samples on a 500K embedding corpus",
    454       "supported": "weak"
    455     },
    456     {
    457       "claim": "Cross-layer correlation provides 18.4 percentage point improvement over the best single-layer approach",
    458       "evidence": "Ablation table shows embedding-only baseline at 76.3% vs full system 94.7%",
    459       "supported": "weak"
    460     },
    461     {
    462       "claim": "EmbedGuard maintains 89.3% detection against adaptive attacks vs 54.2–61.4% for single-layer defenses",
    463       "evidence": "Table 5 comparative results, but baseline citations (RAGuard, RobustRAG, TrustRAG) map to unrelated papers — their implementation cannot be verified",
    464       "supported": "unsupported"
    465     },
    466     {
    467       "claim": "TEE-based attestation provides 100% true positive rate for direct embedding injection attacks",
    468       "evidence": "Claimed as a property of cryptographic verification: unauthenticated embeddings deterministically fail",
    469       "supported": "weak"
    470     },
    471     {
    472       "claim": "51ms mean latency overhead is within production-grade bounds for interactive applications",
    473       "evidence": "Latency breakdown by layer is provided and compared to 35–42ms for baselines",
    474       "supported": "moderate"
    475     },
    476     {
    477       "claim": "<1% corpus contamination can achieve >80% attack success rates in RAG systems",
    478       "evidence": "Cited from prior work [1] (PoisonedRAG), not an original empirical finding of this paper",
    479       "supported": "moderate"
    480     }
    481   ],
    482   "methodology_tags": [
    483     "benchmark-eval",
    484     "case-study"
    485   ],
    486   "key_findings": "EmbedGuard proposes a four-layer cross-layer detection framework for RAG embedding attacks integrating TEE-based cryptographic attestation with statistical anomaly detection. The paper claims substantial detection improvements over single-layer baselines, particularly under adaptive attacks (89.3% vs 54.2–61.4%). However, the evaluation has critical integrity problems: the three named baseline systems (RAGuard, RobustRAG, TrustRAG) are cited to unrelated papers that do not describe those defense systems, making the comparative results unverifiable and potentially fabricated. Code and data are unavailable, the single independent author has no institutional oversight, and no variance or statistical significance is reported across any measurement.",
    487   "red_flags": [
    488     {
    489       "flag": "Fabricated baseline citations",
    490       "detail": "RAGuard [6] cites Kandpal et al. 'LLMs Struggle to Learn Long-Tail Knowledge' (irrelevant); RobustRAG [5] cites Zou et al. adversarial attack paper (not a defense system); TrustRAG [7] and reference [10] are also mismatched. The named baselines cannot be verified as real implemented systems."
    491     },
    492     {
    493       "flag": "No code or data released",
    494       "detail": "Data declared unavailable 'due to privacy or ethical restrictions' — an implausible justification for a synthetic security evaluation dataset. No implementation code is provided."
    495     },
    496     {
    497       "flag": "Single independent researcher, private data",
    498       "detail": "Paper by one independent researcher with no institutional affiliation, evaluating their own proposed system on a private 500K corpus with no third-party verification."
    499     },
    500     {
    501       "flag": "100% true positive rate claim",
    502       "detail": "Claiming deterministic 100% detection for TEE attestation ignores known TEE side-channel vulnerabilities, supply-chain attacks on hardware, and rollback attacks — not discussed."
    503     },
    504     {
    505       "flag": "No variance or statistical tests",
    506       "detail": "All results are single point estimates with no confidence intervals, error bars, or statistical tests despite comparative claims of 7.5–35.1 percentage point improvements."
    507     },
    508     {
    509       "flag": "Corpus provenance undisclosed",
    510       "detail": "The 500K embedding corpus spans 'technical documentation, medical literature, legal texts, encyclopedic knowledge' with no sourcing, licensing, or construction methodology described."
    511     },
    512     {
    513       "flag": "All results favor the proposed method",
    514       "detail": "No negative results, no failure cases, no discussion of scenarios where EmbedGuard underperforms — uniformly positive results from an unverifiable single-author evaluation."
    515     }
    516   ],
    517   "cited_papers": [
    518     {
    519       "title": "PoisonedRAG: Knowledge Poisoning Attacks to Retrieval-Augmented Generation of Large Language Models",
    520       "relevance": "Core threat model paper showing <1% corpus contamination can achieve >80% attack success rates in RAG systems"
    521     },
    522     {
    523       "title": "Prompt Injection attack against LLM-integrated Applications",
    524       "relevance": "Characterizes prompt injection vulnerabilities that EmbedGuard's Layer 1 addresses"
    525     },
    526     {
    527       "title": "Universal and Transferable Adversarial Attacks on Aligned Language Models",
    528       "relevance": "Adversarial suffix attacks and transferability properties relevant to embedding attack threat model"
    529     },
    530     {
    531       "title": "Defending against Backdoor Attacks in Natural Language Generation",
    532       "relevance": "Backdoor detection methodology that informs the output consistency verification layer"
    533     },
    534     {
    535       "title": "Query-Efficient Black-Box Red Teaming via Bayesian Optimization",
    536       "relevance": "Adaptive attack methodology used to construct the adaptive attack evaluation category"
    537     },
    538     {
    539       "title": "Are aligned neural networks adversarially aligned?",
    540       "relevance": "Adversarial robustness of aligned LLMs, relevant to the broader security context"
    541     }
    542   ],
    543   "engagement_factors": {
    544     "practical_relevance": {
    545       "score": 2,
    546       "justification": "RAG security is a real production concern and the four-layer framework is architecturally concrete, but no code or demo limits immediate usability."
    547     },
    548     "surprise_contrarian": {
    549       "score": 1,
    550       "justification": "Cross-layer defense over single-layer is a reasonable architectural extension, not a surprising or contrarian finding."
    551     },
    552     "fear_safety": {
    553       "score": 2,
    554       "justification": "Highlights that <1% corpus poisoning achieves >80% attack success in RAG systems, a meaningful security concern for production deployments."
    555     },
    556     "drama_conflict": {
    557       "score": 0,
    558       "justification": "No controversy, no challenge to specific companies or products, no dramatic claims."
    559     },
    560     "demo_ability": {
    561       "score": 0,
    562       "justification": "No code repository, no demo, no artifacts released."
    563     },
    564     "brand_recognition": {
    565       "score": 0,
    566       "justification": "Solo independent researcher published in an obscure journal (IJCESEN), no recognizable institutional affiliation."
    567     }
    568   },
    569   "hn_data": {
    570     "threads": [],
    571     "top_points": 0,
    572     "total_points": 0,
    573     "total_comments": 0
    574   }
    575 }

Impressum · Datenschutz