scan-v5.json (28334B)
1 { 2 "scan_version": 5, 3 "paper_type": "empirical", 4 "paper": { 5 "title": "Dialogue Injection Attack: Jailbreaking LLMs through Context Manipulation", 6 "authors": [ 7 "Wenlong Meng", 8 "Fan Zhang", 9 "Wendao Yao", 10 "Zhenyuan Guo", 11 "Yuwei Li" 12 ], 13 "year": 2025, 14 "venue": "IEEE Transactions on Information Forensics and Security", 15 "arxiv_id": "2503.08195", 16 "doi": "10.1109/TIFS.2026.3657898" 17 }, 18 "checklist": { 19 "claims_and_evidence": { 20 "abstract_claims_supported": { 21 "applies": true, 22 "answer": true, 23 "justification": "Abstract claims of 0.89 ASR on Llama-3.1-8B and 0.82 on GPT-4o after 10 queries on AdvBench are supported by Figure 5 multi-query results; defense bypass claims are supported by Table 5.", 24 "source": "haiku" 25 }, 26 "causal_claims_justified": { 27 "applies": true, 28 "answer": true, 29 "justification": "Ablation study in Section 5.5 systematically removes system prompt, hypnosis, and answer guidance components with measured ASR impact, adequately justifying causal claims about component contributions.", 30 "source": "haiku" 31 }, 32 "generalization_bounded": { 33 "applies": true, 34 "answer": false, 35 "justification": "The claim that 'larger LLMs are more susceptible to jailbreak attacks' is stated broadly but is contradicted by the Llama-3 family results and confounded by different alignment strategies and training cutoffs that are not controlled for.", 36 "source": "haiku" 37 }, 38 "alternative_explanations_discussed": { 39 "applies": true, 40 "answer": false, 41 "justification": "Performance variation across models is attributed primarily to 'different alignments regarding attack types' without systematically exploring alternatives; the model-size finding gets only a single speculative explanation referencing training cutoffs.", 42 "source": "haiku" 43 }, 44 "proxy_outcome_distinction": { 45 "applies": true, 46 "answer": false, 47 "justification": "ASR is measured by LlamaGuard classifiers rather than actual harm assessment; the paper acknowledges automated classifiers are imperfect but does not adequately discuss the gap between classifier-confirmed bypass and real-world harmful content generation.", 48 "source": "haiku" 49 } 50 }, 51 "limitations_and_scope": { 52 "limitations_section_present": { 53 "applies": true, 54 "answer": false, 55 "justification": "There is no dedicated limitations section; Section 8 is an 'Ethics Consideration' addressing responsible disclosure, not methodological limitations of the study.", 56 "source": "haiku" 57 }, 58 "threats_to_validity_specific": { 59 "applies": true, 60 "answer": false, 61 "justification": "No threats-to-validity discussion exists; the paper does not address potential biases from LlamaGuard evaluation, the limited LLM families tested, or single-run measurement variance.", 62 "source": "haiku" 63 }, 64 "scope_boundaries_stated": { 65 "applies": true, 66 "answer": false, 67 "justification": "The black-box threat model is described as a scope constraint but the paper does not explicitly state what results do NOT show (e.g., non-chat interfaces, non-English prompts, domains outside AdvBench categories).", 68 "source": "haiku" 69 } 70 }, 71 "conflicts_of_interest": { 72 "funding_disclosed": { 73 "applies": true, 74 "answer": false, 75 "justification": "No funding acknowledgment appears anywhere in the paper, including the National University of Defense Technology affiliation which would warrant disclosure.", 76 "source": "haiku" 77 }, 78 "affiliations_disclosed": { 79 "applies": true, 80 "answer": true, 81 "justification": "Author affiliations with Zhejiang University and National University of Defense Technology are clearly disclosed in the paper header.", 82 "source": "haiku" 83 }, 84 "funder_independent_of_outcome": { 85 "applies": false, 86 "answer": false, 87 "justification": "No funding is disclosed, making this criterion not applicable.", 88 "source": "haiku" 89 }, 90 "financial_interests_declared": { 91 "applies": true, 92 "answer": false, 93 "justification": "There is no competing interests or financial disclosure statement anywhere in the paper.", 94 "source": "haiku" 95 } 96 }, 97 "scope_and_framing": { 98 "key_terms_defined": { 99 "applies": true, 100 "answer": true, 101 "justification": "Key terms including 'jailbreak attack,' 'dialogue injection,' 'attack success rate,' and the white/gray/black-box taxonomy are clearly defined in Sections 2.3 and 3.1.", 102 "source": "haiku" 103 }, 104 "intended_contribution_clear": { 105 "applies": true, 106 "answer": true, 107 "justification": "The paper explicitly enumerates four contributions: the DIA paradigm with dialogue injection method, DIA-I and DIA-II methods, the template inference attack, and comparative evaluation across 10 LLMs.", 108 "source": "haiku" 109 }, 110 "engagement_with_prior_work": { 111 "applies": true, 112 "answer": true, 113 "justification": "Section 6 explicitly positions DIA as the first multi-turn dialogue-based jailbreak approach versus existing single-turn white/gray/black-box methods, with specific comparisons to GCG, DRA, DeepInception, and PAIR.", 114 "source": "haiku" 115 } 116 } 117 }, 118 "type_checklist": { 119 "empirical": { 120 "artifacts": { 121 "code_released": { 122 "applies": true, 123 "answer": true, 124 "justification": "Source code is stated as available at https://github.com/meng-wenlong/DIA in the abstract footnote; however, the generated affirmative beginnings dataset is promised only 'after acceptance.'", 125 "source": "haiku" 126 }, 127 "data_released": { 128 "applies": true, 129 "answer": true, 130 "justification": "All three primary benchmark datasets (AdvBench, HEx-PHI, MaliciousInstruct) are publicly available on HuggingFace Datasets; the paper-generated affirmative beginnings are not yet released.", 131 "source": "haiku" 132 }, 133 "environment_specified": { 134 "applies": true, 135 "answer": false, 136 "justification": "Only hardware is mentioned (Intel Xeon 8358, 4x Nvidia A100 80G) and inference engine (Ollama); no requirements.txt, Dockerfile, or software dependency versions are provided.", 137 "source": "haiku" 138 }, 139 "reproduction_instructions": { 140 "applies": true, 141 "answer": false, 142 "justification": "No step-by-step reproduction instructions are provided; the algorithms (1-3) describe logic but not how to execute the full attack pipeline end-to-end.", 143 "source": "haiku" 144 } 145 }, 146 "statistical_methodology": { 147 "confidence_intervals_or_error_bars": { 148 "applies": true, 149 "answer": false, 150 "justification": "All results in Tables 2, 3, 5, and 6 are single point estimates; no confidence intervals, error bars, or multiple-run averages are reported.", 151 "source": "haiku" 152 }, 153 "significance_tests": { 154 "applies": true, 155 "answer": false, 156 "justification": "No statistical significance tests are applied to any comparative claims; differences between DIA and baselines are presented as raw ASR values without testing.", 157 "source": "haiku" 158 }, 159 "effect_sizes_reported": { 160 "applies": true, 161 "answer": true, 162 "justification": "Raw ASR values are reported for all method-model combinations, allowing direct effect size computation; the paper also explicitly states degradation percentages (e.g., DRA degrades 67% and 99% on Llama-3.1-8B).", 163 "source": "haiku" 164 }, 165 "sample_size_justified": { 166 "applies": true, 167 "answer": false, 168 "justification": "The paper uses AdvBench (520 items), HEx-PHI (330 items), and MaliciousInstruct (100 items) without justifying adequacy or discussing statistical power.", 169 "source": "haiku" 170 }, 171 "variance_reported": { 172 "applies": true, 173 "answer": false, 174 "justification": "No variance, standard deviation, or multiple experimental runs are reported; all results appear to be single runs on probabilistic LLM outputs.", 175 "source": "haiku" 176 } 177 }, 178 "evaluation_design": { 179 "baselines_included": { 180 "applies": true, 181 "answer": true, 182 "justification": "Four state-of-the-art baselines are included: DeepInception, ReNe, PAIR, and DRA, each representing distinct attack strategies tested under identical conditions.", 183 "source": "haiku" 184 }, 185 "baselines_contemporary": { 186 "applies": true, 187 "answer": true, 188 "justification": "All baselines are from 2023-2024, including DRA (USENIX Security 2024) and ReNe; all are relevant recent black-box jailbreak methods.", 189 "source": "haiku" 190 }, 191 "ablation_study": { 192 "applies": true, 193 "answer": true, 194 "justification": "Section 5.5 ablates system prompt replacement, hypnosis, and answer guidance for both DIA-I and DIA-II across three models, and separately ablates the prompt rewrite algorithm.", 195 "source": "haiku" 196 }, 197 "multiple_metrics": { 198 "applies": true, 199 "answer": true, 200 "justification": "Two evaluation metrics are used: ASR (with both LlamaGuard-2 and LlamaGuard-3 as independent evaluators) and Defense Pass Rate (DPR) in the defense evaluation section.", 201 "source": "haiku" 202 }, 203 "human_evaluation": { 204 "applies": true, 205 "answer": false, 206 "justification": "No human evaluation of attack outputs is performed; the paper explicitly opts for automated LlamaGuard classifiers over GPT-4 judging, citing cost and scalability concerns.", 207 "source": "haiku" 208 }, 209 "held_out_test_set": { 210 "applies": false, 211 "answer": false, 212 "justification": "This is an attack evaluation study without a prediction task; the benchmarks serve as attack targets, not prediction test sets requiring train/test separation.", 213 "source": "haiku" 214 }, 215 "per_category_breakdown": { 216 "applies": true, 217 "answer": false, 218 "justification": "HEx-PHI contains 11 prohibited categories (illegal activity, fraud, privacy violation, etc.) but no per-category breakdown is provided; all results are aggregated.", 219 "source": "haiku" 220 }, 221 "failure_cases_discussed": { 222 "applies": true, 223 "answer": true, 224 "justification": "The paper explicitly identifies Llama-3.1-8B as the most secure model, reports near-zero single-query ASR for DIA on multiple models, and discusses DIA-I's poor performance on Llama-2-7B without the prompt rewrite module.", 225 "source": "haiku" 226 }, 227 "negative_results_reported": { 228 "applies": true, 229 "answer": true, 230 "justification": "Negative results are clearly reported: DIA-I achieves ~0 ASR on Llama-3.1-8B single-query, DRA fails completely on GPT-4o (ASR=0.000), and component ablations showing degraded performance are included.", 231 "source": "haiku" 232 } 233 }, 234 "setup_transparency": { 235 "model_versions_specified": { 236 "applies": true, 237 "answer": true, 238 "justification": "GPT-4o and GPT-4o-mini are specified with exact API snapshot versions (gpt-4o-2024-08-06, gpt-4o-mini-2024-07-18); open-source models are specified by family and parameter count.", 239 "source": "haiku" 240 }, 241 "prompts_provided": { 242 "applies": true, 243 "answer": false, 244 "justification": "The paper describes prompt components structurally (system replacement directives, hypnosis dialogues, answer guidance pattern) but does not provide the actual text of any prompts used in experiments.", 245 "source": "haiku" 246 }, 247 "hyperparameters_reported": { 248 "applies": true, 249 "answer": false, 250 "justification": "No inference hyperparameters (temperature, top-p, max tokens) are reported for DIA or baselines; the paper only states baselines use their originally specified hyperparameters.", 251 "source": "haiku" 252 }, 253 "scaffolding_described": { 254 "applies": false, 255 "answer": false, 256 "justification": "This paper evaluates attack construction pipelines, not agentic scaffolding; the ABGM/SDGM modules are attack components, not agent scaffolding.", 257 "source": "haiku" 258 }, 259 "data_preprocessing_documented": { 260 "applies": true, 261 "answer": true, 262 "justification": "ABGM (Algorithm 1) and SDGM are described in detail including keyword extraction, NLTK-based morphological augmentation, cosine similarity matching, and word substitution steps.", 263 "source": "haiku" 264 } 265 }, 266 "data_integrity": { 267 "raw_data_available": { 268 "applies": true, 269 "answer": false, 270 "justification": "Raw model outputs, attack transcripts, and LlamaGuard evaluation results are not released; only the public benchmark inputs are available.", 271 "source": "haiku" 272 }, 273 "data_collection_described": { 274 "applies": true, 275 "answer": true, 276 "justification": "Benchmarks are downloaded from HuggingFace Datasets and described with statistics (mean token lengths, category counts); affirmative beginning generation via ABGM is described in Algorithm 1.", 277 "source": "haiku" 278 }, 279 "recruitment_methods_described": { 280 "applies": false, 281 "answer": false, 282 "justification": "No human participants are involved; standard published benchmark datasets are used.", 283 "source": "haiku" 284 }, 285 "data_pipeline_documented": { 286 "applies": true, 287 "answer": true, 288 "justification": "The complete pipeline from benchmark loading through ABGM/SDGM processing, dialogue construction, LLM querying via Ollama, and LlamaGuard evaluation is described across Sections 4 and 5.", 289 "source": "haiku" 290 } 291 }, 292 "contamination": { 293 "training_cutoff_stated": { 294 "applies": true, 295 "answer": false, 296 "justification": "Training cutoffs are mentioned only for Llama-3 models (70B: December 2023, 8B: March 2023) as an incidental explanation; systematic cutoff reporting for all 10 tested models is absent.", 297 "source": "haiku" 298 }, 299 "train_test_overlap_discussed": { 300 "applies": true, 301 "answer": false, 302 "justification": "The paper briefly notes LLM developers may add prior jailbreak prompts to alignment training (to explain DRA's degradation) but does not systematically address whether AdvBench or HEx-PHI prompts appear in alignment data.", 303 "source": "haiku" 304 }, 305 "benchmark_contamination_addressed": { 306 "applies": true, 307 "answer": false, 308 "justification": "AdvBench and HEx-PHI are well-known published benchmarks that may be in alignment training data for newer models like Llama-3.1; this is not addressed despite being directly relevant to interpreting differential ASR results.", 309 "source": "haiku" 310 } 311 }, 312 "human_studies": { 313 "pre_registered": { 314 "applies": false, 315 "answer": false, 316 "justification": "No human participants are involved in this study.", 317 "source": "haiku" 318 }, 319 "irb_or_ethics_approval": { 320 "applies": false, 321 "answer": false, 322 "justification": "No human participants are involved; Section 8 addresses responsible disclosure ethics, not IRB/participant protection.", 323 "source": "haiku" 324 }, 325 "demographics_reported": { 326 "applies": false, 327 "answer": false, 328 "justification": "No human participants are involved.", 329 "source": "haiku" 330 }, 331 "inclusion_exclusion_criteria": { 332 "applies": false, 333 "answer": false, 334 "justification": "No human participants are involved.", 335 "source": "haiku" 336 }, 337 "randomization_described": { 338 "applies": false, 339 "answer": false, 340 "justification": "No human participants are involved.", 341 "source": "haiku" 342 }, 343 "blinding_described": { 344 "applies": false, 345 "answer": false, 346 "justification": "No human participants are involved.", 347 "source": "haiku" 348 }, 349 "attrition_reported": { 350 "applies": false, 351 "answer": false, 352 "justification": "No human participants are involved.", 353 "source": "haiku" 354 } 355 }, 356 "cost_and_practicality": { 357 "inference_cost_reported": { 358 "applies": true, 359 "answer": false, 360 "justification": "No latency, API cost, or inference time is reported for any experiment despite testing 10 LLMs across 3 benchmarks with up to 10 query iterations.", 361 "source": "haiku" 362 }, 363 "compute_budget_stated": { 364 "applies": true, 365 "answer": false, 366 "justification": "Hardware is described (4x A100 80G GPUs) but total compute time, GPU-hours, or monetary cost for the experiments is not stated.", 367 "source": "haiku" 368 } 369 } 370 } 371 }, 372 "claims": [ 373 { 374 "claim": "DIA achieves 0.89 ASR on Llama-3.1-8B and 0.82 on GPT-4o after 10 queries on AdvBench", 375 "evidence": "Figure 5 multi-query curves show ASR growth across 10 iterations; stated values are cited from the abstract but the figure shows DIA-II reaching these levels by iteration 10", 376 "supported": "moderate" 377 }, 378 { 379 "claim": "Historical dialogue manipulation in black-box settings is practical via dialogue injection using chat template delimiters", 380 "evidence": "Section 3.2 provides formal construction of adversarial inputs using Su/Pa/Sa/Pu delimiters; logically sound given LLM inference pipeline design shown in Figure 1", 381 "supported": "strong" 382 }, 383 { 384 "claim": "DIA bypasses 5 defense mechanisms with average DPR of 0.93 (DIA-I) and 0.82 (DIA-II)", 385 "evidence": "Table 5 shows DPR values for OpenAI Moderation, Perplexity Filter, Defensive System Prompt, Prompt Patch, and Bergeron tested on Gemma-2-9B only", 386 "supported": "moderate" 387 }, 388 { 389 "claim": "Larger LLMs within the same family are more susceptible to jailbreak attacks", 390 "evidence": "Figure 8 shows ASR vs model size; the Llama-3 family contradicts this pattern, and the comparison is confounded by different training cutoffs and alignment strategies", 391 "supported": "weak" 392 }, 393 { 394 "claim": "Template inference attack achieves ~90% accuracy within 5 query attempts", 395 "evidence": "Figure 2 shows accuracy vs max try times for three LLM pairs (Qwen2/Gemma2, Qwen2/Llama3, Gemma2/Llama3) reaching ~0.9 at NT_max=5", 396 "supported": "moderate" 397 }, 398 { 399 "claim": "Deferred harmful responses have higher log-likelihood than immediate harmful responses", 400 "evidence": "Figure 4 shows log-likelihood distributions with and without prepended benign text for Llama-3.1-8B and Llama-3.2-11B; distributions shift rightward (less negative) with prepended benign context", 401 "supported": "moderate" 402 } 403 ], 404 "methodology_tags": [ 405 "benchmark-eval" 406 ], 407 "key_findings": "DIA introduces a novel black-box jailbreak paradigm exploiting LLM chat template structure: attackers can inject fabricated dialogue histories by embedding chat template delimiters directly in user-visible input fields, enabling gray-box prefilling attacks without model access. DIA-II discovers a previously unreported vulnerability that deferred harmful responses have higher generation log-likelihood, and exploits it by having models perform word substitution tasks before answering, achieving high ASR on recently aligned models (e.g., 0.80 on Llama-3.1-70B on HEx-PHI with LlamaGuard-3). Ablation studies confirm all dialogue components contribute to performance, with answer guidance being the most critical. Despite strong empirical results across 10 LLMs and 3 benchmarks, all results lack statistical validation and the generalization claim that larger models are more vulnerable is undermined by contradictory Llama-3 results.", 408 "red_flags": [ 409 { 410 "flag": "No statistical testing or variance", 411 "detail": "All comparative results are single point estimates with no confidence intervals, significance tests, or multiple runs across 10 models and 3 benchmarks, making it impossible to assess reliability." 412 }, 413 { 414 "flag": "Guard model as sole success criterion", 415 "detail": "ASR is measured only by LlamaGuard classifiers; the paper acknowledges these are imperfect proxies but does not quantify how often guard-confirmed 'attacks' produce genuinely actionable harmful content." 416 }, 417 { 418 "flag": "Affirmative beginnings dataset not released", 419 "detail": "Paper-generated affirmative beginnings are a core artifact promised only 'after acceptance,' making full reproduction impossible at evaluation time." 420 }, 421 { 422 "flag": "Unsupported model-size vulnerability claim", 423 "detail": "The claim that larger LLMs are more susceptible is contradicted by the Llama-3 family and confounded by different alignment strategies and training cutoffs, without controlling for these variables." 424 }, 425 { 426 "flag": "Defense evaluation on single model only", 427 "detail": "Table 5 defense bypass results are reported only for Gemma-2-9B; DPR generalization to other model families is unverified." 428 }, 429 { 430 "flag": "No prompt text provided", 431 "detail": "The actual text of system replacement prompts, hypnosis dialogues, and answer guidance used in experiments is not disclosed, only structural descriptions, significantly limiting reproducibility." 432 } 433 ], 434 "cited_papers": [ 435 { 436 "title": "Universal and Transferable Adversarial Attacks on Aligned Language Models", 437 "relevance": "Introduces GCG white-box attack and AdvBench benchmark used as the primary evaluation dataset throughout" 438 }, 439 { 440 "title": "Making Them Ask and Answer: Jailbreaking Large Language Models in Few Queries via Disguise and Reconstruction", 441 "relevance": "DRA baseline compared directly across all experiments; key prior work on black-box jailbreak via token disguise" 442 }, 443 { 444 "title": "DeepInception: Hypnotize Large Language Model to be Jailbreaker", 445 "relevance": "Key baseline using fictional nested scenarios; DIA-I incorporates a hypnosis component inspired by this work" 446 }, 447 { 448 "title": "Jailbreaking Black Box Large Language Models in Twenty Queries", 449 "relevance": "PAIR baseline using attacker LLM to iteratively refine prompts; directly compared and used as auxiliary model substitute" 450 }, 451 { 452 "title": "Safety Alignment Should Be Made More Than Just a Few Tokens Deep", 453 "relevance": "Explains the prefilling attack vulnerability that DIA-I builds upon and the shallow alignment limitation DIA exploits" 454 }, 455 { 456 "title": "Fine-tuning Aligned Language Models Compromises Safety, Even When Users Do Not Intend To", 457 "relevance": "Provides HEx-PHI benchmark with 11 prohibited categories used as second primary evaluation dataset" 458 }, 459 { 460 "title": "A Wolf in Sheep's Clothing: Generalized Nested Jailbreak Prompts Can Fool Large Language Models Easily", 461 "relevance": "ReNe baseline with nested scenarios and prompt rewrite; directly compared and shown to sacrifice semantic integrity" 462 }, 463 { 464 "title": "Leveraging Context in Jailbreaking Attacks", 465 "relevance": "Prior work demonstrating context enhances jailbreak success, motivating DIA's historical dialogue manipulation approach" 466 } 467 ], 468 "engagement_factors": { 469 "practical_relevance": { 470 "score": 2, 471 "justification": "Demonstrates real bypass of GPT-4o and Llama safety systems with code available, directly actionable for security teams defending deployed chatbots." 472 }, 473 "surprise_contrarian": { 474 "score": 2, 475 "justification": "Counterintuitive finding that larger LLMs are more susceptible to jailbreak attacks challenges the assumption that scale improves safety alignment." 476 }, 477 "fear_safety": { 478 "score": 3, 479 "justification": "Shows 82% success rate bypassing GPT-4o safety measures and defeats 5 defense mechanisms including OpenAI's own moderation API, with code available for reproduction." 480 }, 481 "drama_conflict": { 482 "score": 2, 483 "justification": "Frames as arms race where prior attacks get patched into alignment training, motivating the need for novel multi-turn attack vectors; tests against current defenses." 484 }, 485 "demo_ability": { 486 "score": 2, 487 "justification": "Code on GitHub and attack requires only chat API access, making it technically accessible; setup complexity (Ollama, ABGM pipeline) limits casual reproduction." 488 }, 489 "brand_recognition": { 490 "score": 2, 491 "justification": "Explicitly targets GPT-4o with measured results; Llama and Gemma families are well-known, though authors are from Chinese universities without major lab brand." 492 } 493 }, 494 "hn_data": { 495 "threads": [ 496 { 497 "hn_id": "22624980", 498 "title": "Neuroevolution of Self-Interpretable Agents", 499 "points": 5, 500 "comments": 1, 501 "url": "https://news.ycombinator.com/item?id=22624980" 502 }, 503 { 504 "hn_id": "46686419", 505 "title": "EnergyNet Explained: Internetification of Energy Distribution", 506 "points": 2, 507 "comments": 0, 508 "url": "https://news.ycombinator.com/item?id=46686419" 509 }, 510 { 511 "hn_id": "45988739", 512 "title": "Sheaf Topos Theory: A Powerful Setting for Lagrangian Field Theory", 513 "points": 2, 514 "comments": 0, 515 "url": "https://news.ycombinator.com/item?id=45988739" 516 }, 517 { 518 "hn_id": "35219050", 519 "title": "Large-scale end of life prediction of hard discs in distributed datacenters", 520 "points": 2, 521 "comments": 0, 522 "url": "https://news.ycombinator.com/item?id=35219050" 523 }, 524 { 525 "hn_id": "26338513", 526 "title": "Mixture of Volumetric Primitives for Efficient Neural Rendering", 527 "points": 2, 528 "comments": 0, 529 "url": "https://news.ycombinator.com/item?id=26338513" 530 }, 531 { 532 "hn_id": "45302505", 533 "title": "Verbalized Algorithms", 534 "points": 1, 535 "comments": 0, 536 "url": "https://news.ycombinator.com/item?id=45302505" 537 }, 538 { 539 "hn_id": "44791713", 540 "title": "MQFQ-Sticky: Fair Queueing for Serverless GPU Functions", 541 "points": 1, 542 "comments": 0, 543 "url": "https://news.ycombinator.com/item?id=44791713" 544 }, 545 { 546 "hn_id": "44450854", 547 "title": "Parallel-in-Time Preconditioning for Time-Dependent Variational Mean Field Games", 548 "points": 1, 549 "comments": 0, 550 "url": "https://news.ycombinator.com/item?id=44450854" 551 }, 552 { 553 "hn_id": "44326982", 554 "title": "Interpreting Agent Behaviors in RL-Based Cyber-Battle Simulation Platforms", 555 "points": 1, 556 "comments": 0, 557 "url": "https://news.ycombinator.com/item?id=44326982" 558 }, 559 { 560 "hn_id": "22631779", 561 "title": "Neuroevolution of Self-Interpretable Agents", 562 "points": 1, 563 "comments": 0, 564 "url": "https://news.ycombinator.com/item?id=22631779" 565 } 566 ], 567 "top_points": 5, 568 "total_points": 18, 569 "total_comments": 1 570 } 571 }