scan.json (28271B)
1 { 2 "paper": { 3 "title": "Explainable and Fine-Grained Safeguarding of LLM Multi-Agent Systems via Bi-Level Graph Anomaly Detection", 4 "authors": [ 5 "Junjun Pan", 6 "Yixin Liu", 7 "Rui Miao", 8 "Kaize Ding", 9 "Yu Zheng", 10 "Quoc Viet Hung Nguyen", 11 "Alan Wee-Chung Liew", 12 "Shirui Pan" 13 ], 14 "year": 2025, 15 "venue": "arXiv", 16 "arxiv_id": "2512.18733", 17 "doi": "10.48550/arXiv.2512.18733" 18 }, 19 "scan_version": 2, 20 "active_modules": ["experimental_rigor", "data_leakage"], 21 "methodology_tags": ["benchmark-eval"], 22 "key_findings": "XG-Guard proposes an unsupervised bi-level graph anomaly detection framework for defending LLM-based multi-agent systems against malicious agents. It jointly models sentence- and token-level representations with a theme-based anomaly detector. Across 6 datasets, 4 MAS topologies, and 3 LLM backbones, XG-Guard consistently outperforms existing unsupervised defense methods (achieving >90% AUC in most settings) and approaches supervised performance. The ablation study shows both the token-level view and the bi-level fusion mechanism are essential for detection accuracy.", 23 "checklist": { 24 "artifacts": { 25 "code_released": { 26 "applies": true, 27 "answer": false, 28 "justification": "No repository URL, code archive, or link to source code is provided anywhere in the paper." 29 }, 30 "data_released": { 31 "applies": true, 32 "answer": true, 33 "justification": "All datasets used (CSQA, MMLU, GSM8K, InjecAgent, PoisonRAG) are publicly available benchmarks with citations to their original sources in Section 4.1." 34 }, 35 "environment_specified": { 36 "applies": true, 37 "answer": false, 38 "justification": "Appendix D mentions Adam optimizer, learning rates, and regularization weights, but no environment specifications (no requirements.txt, Dockerfile, library versions, or GPU hardware) are provided." 39 }, 40 "reproduction_instructions": { 41 "applies": true, 42 "answer": false, 43 "justification": "No step-by-step reproduction instructions, README, or scripts are provided. The experimental setup is described at a high level but not with sufficient detail to reproduce without guessing." 44 } 45 }, 46 "statistical_methodology": { 47 "confidence_intervals_or_error_bars": { 48 "applies": true, 49 "answer": false, 50 "justification": "Table 1 reports all results as point estimates (e.g., '87.11' AUC, '21.67' ASR@3) with no confidence intervals, error bars, or ± notation." 51 }, 52 "significance_tests": { 53 "applies": true, 54 "answer": false, 55 "justification": "The paper claims XG-Guard 'consistently achieves the strongest overall defense performance' and 'outperforms existing unsupervised defense methods by a large margin' but no statistical significance tests (p-values, t-tests, etc.) are reported." 56 }, 57 "effect_sizes_reported": { 58 "applies": true, 59 "answer": false, 60 "justification": "Raw performance numbers (AUC, ASR@3, ACC) are reported in tables, but no formal effect sizes (Cohen's d, percentage improvements with baseline context, etc.) are stated. Claims like 'outperforms by a large margin' are not quantified." 61 }, 62 "sample_size_justified": { 63 "applies": true, 64 "answer": false, 65 "justification": "No justification for the number of MAS dialogue graphs used for training and testing, no power analysis or sample size rationale." 66 }, 67 "variance_reported": { 68 "applies": true, 69 "answer": false, 70 "justification": "No standard deviation, variance, or spread measures are reported across runs. All results appear to be single-run point estimates with no indication of result stability." 71 } 72 }, 73 "evaluation_design": { 74 "baselines_included": { 75 "applies": true, 76 "answer": true, 77 "justification": "The paper compares against 4 unsupervised GAD methods (DOMINANT, PREM, TAM, BlindGuard) plus supervised G-Safeguard and a no-defense baseline (Section 4.1)." 78 }, 79 "baselines_contemporary": { 80 "applies": true, 81 "answer": true, 82 "justification": "BlindGuard (Miao et al., 2025) and G-Safeguard (Wang et al., 2025) are contemporary MAS defense methods. DOMINANT (2019), PREM (2023), and TAM (2023) serve as established GAD baselines." 83 }, 84 "ablation_study": { 85 "applies": true, 86 "answer": true, 87 "justification": "Table 2 presents an ablation study with two variants: '–Fusion' (replaces bi-level fusion with simple averaging) and '–Token' (removes token view entirely), showing contribution of each component." 88 }, 89 "multiple_metrics": { 90 "applies": true, 91 "answer": true, 92 "justification": "Three evaluation metrics are used: AUROC for detection accuracy, ASR@<round> for attack success rate, and ACC for overall MAS task accuracy (Section 4.1)." 93 }, 94 "human_evaluation": { 95 "applies": true, 96 "answer": false, 97 "justification": "The paper claims interpretability/explainability as a core contribution but only provides qualitative visualization of explanation scores (Figure 5). No formal human evaluation of explanation quality is conducted." 98 }, 99 "held_out_test_set": { 100 "applies": true, 101 "answer": false, 102 "justification": "The paper trains on unattacked MAS graphs and tests on attacked graphs (Section 2), but there is no description of a separate validation/dev set for hyperparameter tuning vs. a held-out test set for final evaluation. Different hyperparameters are used per dataset (Appendix D) without explaining the selection process." 103 }, 104 "per_category_breakdown": { 105 "applies": true, 106 "answer": true, 107 "justification": "Table 1 provides detailed breakdowns across 6 datasets (3 attack types) × 4 MAS topologies, and Figure 3 shows results per LLM backbone." 108 }, 109 "failure_cases_discussed": { 110 "applies": true, 111 "answer": true, 112 "justification": "Section 4.2 (Explainability) acknowledges that 'we sometimes observe spurious tokens appearing in the explanations, like punctuation marks' and explains why this occurs due to the pre-trained text encoder's behavior." 113 }, 114 "negative_results_reported": { 115 "applies": true, 116 "answer": true, 117 "justification": "The ablation study reveals that the '–Fusion' variant performs even worse than '–Token' (Table 2), which is a notable negative finding. The paper discusses this as evidence of the anomaly score inconsistency problem when naively combining levels." 118 } 119 }, 120 "claims_and_evidence": { 121 "abstract_claims_supported": { 122 "applies": true, 123 "answer": true, 124 "justification": "The abstract claims 'robust detection performance' (supported by Table 1 showing >90% AUC in most settings) and 'strong interpretability' (supported by Figure 5 showing token-level explanation visualization, though only qualitatively)." 125 }, 126 "causal_claims_justified": { 127 "applies": true, 128 "answer": true, 129 "justification": "Causal claims are made through the ablation study ('removing token view causes performance drop'). The ablation design uses controlled single-variable manipulation (removing one component at a time), which is adequate for these claims." 130 }, 131 "generalization_bounded": { 132 "applies": true, 133 "answer": false, 134 "justification": "The title claims 'Safeguarding of LLM Multi-Agent Systems' broadly, but evaluation is limited to simulated MAS setups with 3 specific attack types (prompt injection, tool attack, memory attack) on QA/reasoning tasks. The paper does not explicitly bound generalization to these tested settings." 135 }, 136 "alternative_explanations_discussed": { 137 "applies": true, 138 "answer": false, 139 "justification": "No consideration of alternative explanations for why XG-Guard outperforms baselines. For example, the improvement could partly be due to having more parameters or different training signal rather than the bi-level design specifically." 140 }, 141 "proxy_outcome_distinction": { 142 "applies": true, 143 "answer": true, 144 "justification": "The paper measures AUROC (detection accuracy), ASR (attack success rate), and ACC (task accuracy) — these directly measure defense performance without stretching to broader claims beyond what is measured." 145 } 146 }, 147 "setup_transparency": { 148 "model_versions_specified": { 149 "applies": true, 150 "answer": false, 151 "justification": "The paper uses 'GPT-4o-mini', 'DeepSeek-V3', and 'Qwen3-30B-A3B' without specifying API versions, snapshot dates, or specific model checkpoints. These are marketing names without version precision." 152 }, 153 "prompts_provided": { 154 "applies": true, 155 "answer": false, 156 "justification": "The paper mentions attack prompts are manipulated (e.g., 'system prompts of malicious agents are manipulated to downgrade MAS performance') but does not provide the actual prompt text used for either normal or malicious agents." 157 }, 158 "hyperparameters_reported": { 159 "applies": true, 160 "answer": true, 161 "justification": "Appendix D reports the optimizer (Adam), training epochs (20), L2 weight decay (2×10⁻⁴), per-dataset learning rates, and per-dataset contrastive learning trade-off parameter α values." 162 }, 163 "scaffolding_described": { 164 "applies": true, 165 "answer": true, 166 "justification": "Section 2 formally describes the MAS structure: agents as nodes with (Role, State, Memory, Plugin) tuples, communication topology as directed graph G=(V,E), response generation formula Ri = LLM(Q ∪ {Rj | ei,j ∈ E}), and four tested topologies (chain, tree, star, random)." 167 }, 168 "data_preprocessing_documented": { 169 "applies": true, 170 "answer": false, 171 "justification": "The paper states it follows 'settings of previous works (Wang et al., 2025; Miao et al., 2025)' but does not detail how MAS dialogue graphs are constructed from the underlying datasets, or how training/test splits are created." 172 } 173 }, 174 "limitations_and_scope": { 175 "limitations_section_present": { 176 "applies": true, 177 "answer": true, 178 "justification": "A dedicated 'Limitations' section is present discussing evaluation scope limitations and API model update instability." 179 }, 180 "threats_to_validity_specific": { 181 "applies": true, 182 "answer": true, 183 "justification": "The Limitations section identifies a threat specific to this study type: 'API providers may update backend models without notice, the performance of MAS and the malicious agent detector may become unstable.' While somewhat generic in its framing, this is a concrete concern specific to API-dependent defense systems." 184 }, 185 "scope_boundaries_stated": { 186 "applies": true, 187 "answer": false, 188 "justification": "The Limitations section mentions needing 'a broader range of task domains' but frames this as future work rather than explicitly stating what the current results do NOT show. No specific exclusions or boundary conditions are stated." 189 } 190 }, 191 "data_integrity": { 192 "raw_data_available": { 193 "applies": true, 194 "answer": false, 195 "justification": "No MAS dialogue graph data, trained model weights, or experimental outputs are released. Only processed results in tables are shown." 196 }, 197 "data_collection_described": { 198 "applies": true, 199 "answer": true, 200 "justification": "Section 4.1 describes the six datasets with attack strategies (prompt injection on CSQA/MMLU/GSM8K, tool attacks on InjecAgent, memory attacks on CSQA/PoisonRAG) and four MAS topologies. The attack and defense setup follows prior work." 201 }, 202 "recruitment_methods_described": { 203 "applies": false, 204 "answer": false, 205 "justification": "No human participants. All data sources are standard benchmarks (CSQA, MMLU, GSM8K, InjecAgent, PoisonRAG)." 206 }, 207 "data_pipeline_documented": { 208 "applies": true, 209 "answer": false, 210 "justification": "The paper does not document how MAS dialogue graphs are generated from the underlying datasets, how many training vs. test graphs are created, or the exact pipeline from dataset to final evaluation. It defers to prior work settings without detailing them." 211 } 212 }, 213 "conflicts_of_interest": { 214 "funding_disclosed": { 215 "applies": true, 216 "answer": false, 217 "justification": "No acknowledgments section, no funding statement, and no grants or sponsors are mentioned anywhere in the paper." 218 }, 219 "affiliations_disclosed": { 220 "applies": true, 221 "answer": true, 222 "justification": "Author affiliations are clearly listed: Griffith University (Australia), Jilin University (China), and Northwestern University (USA). All are academic institutions with no apparent product conflict." 223 }, 224 "funder_independent_of_outcome": { 225 "applies": true, 226 "answer": false, 227 "justification": "No funding is disclosed, so independence cannot be assessed. The authors are academic but funding sources are unstated." 228 }, 229 "financial_interests_declared": { 230 "applies": true, 231 "answer": true, 232 "justification": "The Ethical Considerations section states 'We identify no ethical risks or conflicts of interest,' which serves as a conflicts-of-interest declaration." 233 } 234 }, 235 "contamination": { 236 "training_cutoff_stated": { 237 "applies": false, 238 "answer": false, 239 "justification": "The paper evaluates a defense tool (XG-Guard, a GNN trained from scratch) rather than a pre-trained model's capability on benchmarks. The LLMs serve as MAS backbones, not as the system being evaluated." 240 }, 241 "train_test_overlap_discussed": { 242 "applies": false, 243 "answer": false, 244 "justification": "The paper tests a defense method, not a pre-trained model's knowledge on benchmarks. XG-Guard is trained from scratch on unattacked MAS graphs." 245 }, 246 "benchmark_contamination_addressed": { 247 "applies": false, 248 "answer": false, 249 "justification": "The paper tests a defense tool's ability to detect malicious agents, not a pre-trained model's benchmark performance. Contamination of the underlying LLMs into task benchmarks is not relevant to the defense evaluation." 250 } 251 }, 252 "human_studies": { 253 "pre_registered": { 254 "applies": false, 255 "answer": false, 256 "justification": "No human participants in this study. All experiments use simulated MAS with LLM agents." 257 }, 258 "irb_or_ethics_approval": { 259 "applies": false, 260 "answer": false, 261 "justification": "No human participants. The Ethical Considerations section confirms 'no human subjects.'" 262 }, 263 "demographics_reported": { 264 "applies": false, 265 "answer": false, 266 "justification": "No human participants involved in this study." 267 }, 268 "inclusion_exclusion_criteria": { 269 "applies": false, 270 "answer": false, 271 "justification": "No human participants involved in this study." 272 }, 273 "randomization_described": { 274 "applies": false, 275 "answer": false, 276 "justification": "No human participants involved in this study." 277 }, 278 "blinding_described": { 279 "applies": false, 280 "answer": false, 281 "justification": "No human participants involved in this study." 282 }, 283 "attrition_reported": { 284 "applies": false, 285 "answer": false, 286 "justification": "No human participants involved in this study." 287 } 288 }, 289 "cost_and_practicality": { 290 "inference_cost_reported": { 291 "applies": true, 292 "answer": false, 293 "justification": "Appendix C provides theoretical time complexity O(NL²+M) but no actual wall-clock inference time, API costs, or tokens consumed. The paper uses GPT-4o-mini for MAS backbone without reporting API costs." 294 }, 295 "compute_budget_stated": { 296 "applies": true, 297 "answer": false, 298 "justification": "No GPU hours, total API spend, hardware specifications, or training time are reported. Only the number of training epochs (20) is stated." 299 } 300 }, 301 "experimental_rigor": { 302 "seed_sensitivity_reported": { 303 "applies": true, 304 "answer": false, 305 "justification": "No mention of multiple random seeds. All results appear to be single-run point estimates with no seed sensitivity analysis." 306 }, 307 "number_of_runs_stated": { 308 "applies": true, 309 "answer": false, 310 "justification": "The number of experimental runs is never stated. It is unclear whether results are from single runs or averaged across multiple runs." 311 }, 312 "hyperparameter_search_budget": { 313 "applies": true, 314 "answer": false, 315 "justification": "Per-dataset hyperparameters are listed (Appendix D) but no search budget, search method, or number of configurations tried is reported." 316 }, 317 "best_config_selection_justified": { 318 "applies": true, 319 "answer": false, 320 "justification": "Different hyperparameters are used for different datasets (e.g., different learning rates and α values) with no explanation of how these were selected or validated." 321 }, 322 "multiple_comparison_correction": { 323 "applies": true, 324 "answer": false, 325 "justification": "The paper makes dozens of comparative claims across 6 datasets × 4 topologies without any statistical tests, let alone correction for multiple comparisons." 326 }, 327 "self_comparison_bias_addressed": { 328 "applies": true, 329 "answer": false, 330 "justification": "No acknowledgment of the bias of evaluating their own system. The authors implement baselines (or use prior implementations) and compare against them without discussing potential bias." 331 }, 332 "compute_budget_vs_performance": { 333 "applies": true, 334 "answer": false, 335 "justification": "No analysis of performance as a function of compute budget. XG-Guard adds a token-level GNN stream on top of sentence-level processing, but the compute cost comparison with simpler baselines is not provided." 336 }, 337 "benchmark_construct_validity": { 338 "applies": true, 339 "answer": false, 340 "justification": "No discussion of whether AUROC and ASR adequately measure real-world defense effectiveness, or whether the simulated attack scenarios reflect realistic threat models." 341 }, 342 "scaffold_confound_addressed": { 343 "applies": true, 344 "answer": true, 345 "justification": "When comparing LLM backbones (Figure 3), the same MAS setup and defense configuration are used across GPT-4o-mini, DeepSeek-V3, and Qwen3-30B-A3B. All defense method comparisons (Table 1) use the same MAS topologies and attack setups." 346 } 347 }, 348 "data_leakage": { 349 "temporal_leakage_addressed": { 350 "applies": true, 351 "answer": false, 352 "justification": "No discussion of whether the MAS dialogue patterns in training data could temporally overlap with test scenarios, or whether the underlying benchmarks (CSQA, MMLU, GSM8K) have temporal leakage into the LLM backbones." 353 }, 354 "feature_leakage_addressed": { 355 "applies": true, 356 "answer": false, 357 "justification": "No discussion of whether the evaluation setup leaks information (e.g., whether the anomaly detector benefits from artifacts of the attack simulation that wouldn't be present in real attacks)." 358 }, 359 "non_independence_addressed": { 360 "applies": true, 361 "answer": false, 362 "justification": "Training (unattacked) and test (attacked) MAS graphs are generated from the same underlying datasets and may share structural similarities. This non-independence is not discussed." 363 }, 364 "leakage_detection_method": { 365 "applies": true, 366 "answer": false, 367 "justification": "No concrete leakage detection or prevention methods are applied." 368 } 369 } 370 }, 371 "claims": [ 372 { 373 "claim": "XG-Guard consistently achieves the strongest overall defense performance among unsupervised methods across diverse attack scenarios and MAS topologies.", 374 "evidence": "Table 1 shows XG-Guard obtains the highest AUC and lowest ASR@3 in most of 24 settings (6 datasets × 4 topologies), with AUC consistently above 87% and often above 95%.", 375 "supported": "moderate" 376 }, 377 { 378 "claim": "XG-Guard approaches supervised defense performance (G-Safeguard) without requiring annotations, consistently exceeding 90% AUC across all settings.", 379 "evidence": "Table 1 shows XG-Guard AUC ranges from 87.11% to 99.56%. On PI-GSM8K, TA-InjecAgent, MA-PoisonRAG, and MA-CSQA, XG-Guard achieves comparable results to G-Safeguard.", 380 "supported": "moderate" 381 }, 382 { 383 "claim": "The bi-level architecture (token + sentence level) and fusion mechanism are both essential for detection accuracy.", 384 "evidence": "Table 2 ablation study: removing fusion ('–Fusion') degrades performance severely (e.g., TA-InjecAgent drops from 99.56 to 48.27 AUC); removing token view ('–Token') also degrades performance but less than '–Fusion'.", 385 "supported": "moderate" 386 }, 387 { 388 "claim": "XG-Guard generalizes effectively across different LLM backbones (GPT-4o-mini, DeepSeek-V3, Qwen3-30B-A3B).", 389 "evidence": "Figure 3 shows XG-Guard achieves lowest or competitive ASR@3 across all topologies with both DeepSeek-V3 and Qwen3-30B-A3B on MA-CSQA and MA-PoisonRAG datasets.", 390 "supported": "moderate" 391 }, 392 { 393 "claim": "XG-Guard provides meaningful token-level explanations that highlight anomaly-indicative tokens associated with malicious behavior.", 394 "evidence": "Figure 5 shows two qualitative case studies where the model assigns higher anomaly scores to tokens implying manipulation ('should be accepted as accurate') or privacy violation ('find the personal details').", 395 "supported": "weak" 396 } 397 ], 398 "red_flags": [ 399 { 400 "flag": "No error bars or variance across runs", 401 "detail": "All results in Table 1 and Table 2 are single point estimates with no standard deviation, confidence intervals, or indication of result stability. For stochastic methods (contrastive learning with random negative sampling), this is a significant omission." 402 }, 403 { 404 "flag": "No statistical significance tests", 405 "detail": "Claims of superiority ('outperforms by a large margin') are made by comparing point estimates across 24 settings without any statistical testing. Some margins are small (e.g., 90.67 vs 84.44 AUC on PI-MMLU Random)." 406 }, 407 { 408 "flag": "Explainability claims lack quantitative evaluation", 409 "detail": "Explainability is a core contribution (in the title), yet the only evaluation is two qualitative case studies in Figure 5. No human evaluation of explanation quality, no ground-truth comparison, and the paper acknowledges spurious tokens in explanations." 410 }, 411 { 412 "flag": "Fixed defense budget assumption", 413 "detail": "The defense budget is fixed at 3 (top-3 agents labeled as attackers) 'to ensure fairness and practical comparison with prior works.' This strong assumption may not hold in practice and sensitivity to this parameter is not tested." 414 }, 415 { 416 "flag": "Hyperparameter selection process unclear", 417 "detail": "Different learning rates and trade-off parameters α are used for different datasets (Appendix D) with no explanation of how these values were selected, raising concerns about potential overfitting to test data." 418 } 419 ], 420 "cited_papers": [ 421 { 422 "title": "G-safeguard: A topology-guided security lens and treatment on LLM-based multi-agent systems", 423 "authors": ["Shilong Wang", "Guibin Zhang", "Miao Yu"], 424 "year": 2025, 425 "relevance": "Pioneering supervised GAD-based defense framework for LLM multi-agent systems; direct baseline and motivation for this work." 426 }, 427 { 428 "title": "BlindGuard: Safeguarding LLM-based multi-agent systems under unknown attacks", 429 "authors": ["Rui Miao", "Yixin Liu", "Yili Wang"], 430 "year": 2025, 431 "arxiv_id": "2508.08127", 432 "relevance": "Current state-of-the-art unsupervised MAS defense method; primary baseline comparison for XG-Guard." 433 }, 434 { 435 "title": "InjecAgent: Benchmarking indirect prompt injections in tool-integrated large language model agents", 436 "authors": ["Qiusi Zhan", "Zhixiang Liang", "Zifan Ying", "Daniel Kang"], 437 "year": 2024, 438 "relevance": "Benchmark for indirect prompt injection attacks on tool-integrated LLM agents; used as one of the evaluation datasets." 439 }, 440 { 441 "title": "Evil geniuses: Delving into the safety of LLM-based agents", 442 "authors": ["Yu Tian", "Xiao Yang", "Jingyuan Zhang"], 443 "year": 2023, 444 "arxiv_id": "2311.11855", 445 "relevance": "Early study of safety vulnerabilities in LLM-based agents, motivating the defense direction." 446 }, 447 { 448 "title": "AgentPoison: Red-teaming LLM agents via poisoning memory or knowledge bases", 449 "authors": ["Zhaorun Chen", "Zhen Xiang", "Chaowei Xiao"], 450 "year": 2024, 451 "relevance": "Demonstrates memory and knowledge base poisoning attacks on LLM agents, a key threat model addressed by defense methods." 452 }, 453 { 454 "title": "A survey on code generation with LLM-based agents", 455 "authors": ["Yihong Dong", "Xue Jiang", "Jiaru Qian"], 456 "year": 2025, 457 "arxiv_id": "2508.00083", 458 "relevance": "Survey of LLM-based agent capabilities in code generation; relevant to the scope of agentic AI evaluation." 459 }, 460 { 461 "title": "Large language model based multi-agents: A survey of progress and challenges", 462 "authors": ["Taicheng Guo", "Xiuying Chen", "Yaqi Wang"], 463 "year": 2024, 464 "arxiv_id": "2402.01680", 465 "relevance": "Comprehensive survey of LLM-based multi-agent systems covering progress, architectures, and challenges." 466 }, 467 { 468 "title": "A survey on large language model based autonomous agents", 469 "authors": ["Lei Wang", "Chen Ma", "Xueyang Feng"], 470 "year": 2024, 471 "relevance": "Broad survey of LLM-based autonomous agents covering capabilities, architectures, and applications." 472 }, 473 { 474 "title": "AgentSafe: Safeguarding large language model-based multi-agent systems via hierarchical data management", 475 "authors": ["Junyuan Mao", "Fanci Meng", "Yifan Duan"], 476 "year": 2025, 477 "arxiv_id": "2503.04392", 478 "relevance": "MAS defense framework using hierarchical data management; related approach to safeguarding multi-agent systems." 479 }, 480 { 481 "title": "NetSafe: Exploring the topological safety of multi-agent networks", 482 "authors": ["Miao Yu", "Shilong Wang", "Guibin Zhang"], 483 "year": 2024, 484 "arxiv_id": "2410.15686", 485 "relevance": "Pioneering study of topological safety in MAS, investigating agent hallucinations and aggregation safety phenomena." 486 }, 487 { 488 "title": "DeepSeek-V3 technical report", 489 "authors": ["Aixin Liu", "Bei Feng", "Bing Xue"], 490 "year": 2024, 491 "arxiv_id": "2412.19437", 492 "relevance": "Technical report for DeepSeek-V3, one of the LLM backbones evaluated for generalization testing." 493 }, 494 { 495 "title": "Attention knows whom to trust: Attention-based trust management for LLM multi-agent systems", 496 "authors": ["Pengfei He", "Zhenwei Dai", "Xianfeng Tang"], 497 "year": 2025, 498 "arxiv_id": "2506.02546", 499 "relevance": "Introduces attention-based trust metrics for evaluating violations in MAS across trust dimensions; related defense approach." 500 } 501 ] 502 }