scan-v5.json (28814B)
1 { 2 "scan_version": 5, 3 "paper_type": "empirical", 4 "paper": { 5 "title": "Defending Against Indirect Prompt Injection Attacks With Spotlighting", 6 "authors": [ 7 "Keegan Hines", 8 "Gary Lopez", 9 "Matthew Hall", 10 "Federico Zarfati", 11 "Yonatan Zunger" 12 ], 13 "year": 2024, 14 "venue": "CAMLIS", 15 "arxiv_id": "2403.14720", 16 "doi": "10.48550/arXiv.2403.14720" 17 }, 18 "checklist": { 19 "claims_and_evidence": { 20 "abstract_claims_supported": { 21 "applies": true, 22 "answer": true, 23 "justification": "Abstract claims (ASR reduction from >50% to <2%) are supported by Figures 4-6, though specific numbers vary by technique and model. Datamarking achieves 3-0% ASR; encoding achieves 0-1.8% ASR.", 24 "source": "haiku" 25 }, 26 "causal_claims_justified": { 27 "applies": true, 28 "answer": true, 29 "justification": "Causal claims (spotlighting reduces ASR, does not impair performance) are tested via before/after comparisons with/without techniques. No randomization, but appropriate comparative design for prompt engineering evaluation.", 30 "source": "haiku" 31 }, 32 "generalization_bounded": { 33 "applies": true, 34 "answer": true, 35 "justification": "Results explicitly bounded to GPT-family models (text-davinci-003, GPT-3.5, GPT-4) and 2 task types (summarization, Q&A). Paper notes encoding only suitable for high-capacity models, but doesn't discuss applicability to non-OpenAI architectures.", 36 "source": "haiku" 37 }, 38 "alternative_explanations_discussed": { 39 "applies": true, 40 "answer": false, 41 "justification": "Paper states 'we lack a clear understanding of why spotlighting actually helps' (Section 6). Provides telecommunications analogy but no rigorous mechanism exploration or alternative hypotheses tested.", 42 "source": "haiku" 43 }, 44 "proxy_outcome_distinction": { 45 "applies": true, 46 "answer": true, 47 "justification": "Attack Success Rate is precisely defined in Section 4.2 and Appendix 8.1 as return of specific keyword; distinguished from Affected Success Rate (AffSR) in appendix. Clear mapping between measured outcome and claim.", 48 "source": "haiku" 49 } 50 }, 51 "limitations_and_scope": { 52 "limitations_section_present": { 53 "applies": true, 54 "answer": false, 55 "justification": "No dedicated limitations section. Caveats scattered across Results (Section 5.2-5.4), Discussion (Section 6), and Appendix (8.2), but not compiled into formal threats-to-validity discussion.", 56 "source": "haiku" 57 }, 58 "threats_to_validity_specific": { 59 "applies": true, 60 "answer": true, 61 "justification": "Specific threats discussed: encoding only for high-capacity models (5.2-5.3), few-shot knowledge-boundedness (Appendix 8.2), adversarial subversion paths per technique (5.4). Not systematic, but concrete.", 62 "source": "haiku" 63 }, 64 "scope_boundaries_stated": { 65 "applies": true, 66 "answer": true, 67 "justification": "Explicitly bounded to GPT-family black-box models (Section 4.1), summarization and Q&A tasks (Sections 4-5), synthetic keyword-based attacks. Does not discuss generalization to open-source models, other domains, or sophisticated attack strategies.", 68 "source": "haiku" 69 } 70 }, 71 "conflicts_of_interest": { 72 "funding_disclosed": { 73 "applies": true, 74 "answer": false, 75 "justification": "No funding disclosure or acknowledgments section visible in paper. Authors list Microsoft affiliation but no funding source stated.", 76 "source": "haiku" 77 }, 78 "affiliations_disclosed": { 79 "applies": true, 80 "answer": true, 81 "justification": "All authors listed as Microsoft. Relevant because paper evaluates OpenAI models (competitors), but affiliation clearly stated.", 82 "source": "haiku" 83 }, 84 "funder_independent_of_outcome": { 85 "applies": true, 86 "answer": true, 87 "justification": "Microsoft (employer) does not provide the models being evaluated (OpenAI). Microsoft benefits from LLM security broadly, but not directly from OpenAI product improvement.", 88 "source": "haiku" 89 }, 90 "financial_interests_declared": { 91 "applies": true, 92 "answer": false, 93 "justification": "No competing interests statement or financial interests declaration present.", 94 "source": "haiku" 95 } 96 }, 97 "scope_and_framing": { 98 "key_terms_defined": { 99 "applies": true, 100 "answer": true, 101 "justification": "Key terms defined: Indirect prompt injection attacks/XPIA (2.2), Attack Success Rate (4.2, Appendix 8.1), spotlighting family (3.0), datamarking/encoding/delimiting (3.2-3.4).", 102 "source": "haiku" 103 }, 104 "intended_contribution_clear": { 105 "applies": true, 106 "answer": true, 107 "justification": "Contribution explicitly stated: introduce spotlighting (family of three prompt engineering techniques: delimiting, datamarking, encoding) for defending against indirect prompt injection attacks. Evaluation on effectiveness is clearly framed.", 108 "source": "haiku" 109 }, 110 "engagement_with_prior_work": { 111 "applies": true, 112 "answer": true, 113 "justification": "Related work section (2.2-2.3) cites Yi et al. 2023 on XPIA, Greshake/Bard attacks, safety alignment work. Paper states 'Early versions of some of these techniques have been described previously [2], and here we expand the results,' but doesn't deeply contrast novelty from prior approaches.", 114 "source": "haiku" 115 } 116 } 117 }, 118 "type_checklist": { 119 "empirical": { 120 "artifacts": { 121 "code_released": { 122 "applies": true, 123 "answer": false, 124 "justification": "No code repository, GitHub link, or supplementary code mentioned. Techniques described in prose and example prompts provided, but no deployable implementation.", 125 "source": "haiku" 126 }, 127 "data_released": { 128 "applies": true, 129 "answer": false, 130 "justification": "Synthetic 1000-document attack dataset not released or available. Standard benchmarks (SQuAD, IMDB, SuperGLUE) are public but not the paper's attack corpus.", 131 "source": "haiku" 132 }, 133 "environment_specified": { 134 "applies": true, 135 "answer": false, 136 "justification": "Model names and temperature (1.0) specified, but no requirements.txt, Dockerfile, conda env, or reproducibility config provided. API details minimal.", 137 "source": "haiku" 138 }, 139 "reproduction_instructions": { 140 "applies": true, 141 "answer": false, 142 "justification": "Techniques described in natural language with example prompts shown (Sections 3.2-3.4), but no step-by-step reproduction instructions or automation scripts. Implementation would require custom development.", 143 "source": "haiku" 144 } 145 }, 146 "statistical_methodology": { 147 "confidence_intervals_or_error_bars": { 148 "applies": true, 149 "answer": false, 150 "justification": "Figures 3-8 show point estimates without error bars or confidence intervals. Single run reported per condition, no variance bounds.", 151 "source": "haiku" 152 }, 153 "significance_tests": { 154 "applies": true, 155 "answer": false, 156 "justification": "No p-values, t-tests, or statistical significance tests reported. ASR reductions presented as raw percentages without hypothesis testing.", 157 "source": "haiku" 158 }, 159 "effect_sizes_reported": { 160 "applies": true, 161 "answer": true, 162 "justification": "ASR reductions reported in percentage points (e.g., 50%→3%, 60%→0%). Effect sizes quantified; not just p-values.", 163 "source": "haiku" 164 }, 165 "sample_size_justified": { 166 "applies": true, 167 "answer": false, 168 "justification": "Paper states 'we generated a synthetic dataset of 1000 documents' but does not justify this sample size or discuss power analysis. No minimum sample size calculated.", 169 "source": "haiku" 170 }, 171 "variance_reported": { 172 "applies": true, 173 "answer": false, 174 "justification": "No multiple runs with different random seeds shown. No SD/variance/min-max ranges reported. Results presented as single-point estimates.", 175 "source": "haiku" 176 } 177 }, 178 "evaluation_design": { 179 "baselines_included": { 180 "applies": true, 181 "answer": true, 182 "justification": "Multiple baselines compared: no defense (baseline ASR), instruction-only, delimiting, datamarking, encoding. Also compared to few-shot approach in appendix.", 183 "source": "haiku" 184 }, 185 "baselines_contemporary": { 186 "applies": true, 187 "answer": true, 188 "justification": "Baselines are contemporary GPT models (June 2023 snapshots). However, no comparison to other defense methods from Section 2.3 (fine-tuning, other prompt-engineering defenses).", 189 "source": "haiku" 190 }, 191 "ablation_study": { 192 "applies": true, 193 "answer": true, 194 "justification": "Three spotlighting instantiations (delimiting, datamarking, encoding) serve as ablations of increasing sophistication. Progressive improvements shown (Figures 3-6).", 195 "source": "haiku" 196 }, 197 "multiple_metrics": { 198 "applies": true, 199 "answer": true, 200 "justification": "Primary metric: Attack Success Rate (ASR). Secondary metrics: task performance on 4 NLP benchmarks (SQuAD, IMDB, SuperGLUE BoolQ, SuperGLUE WiC). Figure 7-8 show accuracy impacts.", 201 "source": "haiku" 202 }, 203 "human_evaluation": { 204 "applies": false, 205 "answer": false, 206 "justification": "No human evaluation of model outputs. Not clearly required for technical evaluation of prompt injection defense.", 207 "source": "haiku" 208 }, 209 "held_out_test_set": { 210 "applies": true, 211 "answer": true, 212 "justification": "Standard benchmarks use held-out test sets (SQuAD, IMDB, SuperGLUE are standard). For synthetic attack corpus, no train/test split mentioned; single 1000-document set.", 213 "source": "haiku" 214 }, 215 "per_category_breakdown": { 216 "applies": true, 217 "answer": true, 218 "justification": "Results broken down by model (text-davinci-003, GPT-3.5-Turbo, GPT-4), task type (summarization, Q&A), and technique (delimiting, datamarking, encoding). Benchmark breakdowns in Figure 7-8.", 219 "source": "haiku" 220 }, 221 "failure_cases_discussed": { 222 "applies": true, 223 "answer": true, 224 "justification": "Encoding fails with GPT-3.5-Turbo (Figure 8, task performance degradation). Delimiting shown insufficient (Figure 3). Appendix 8.2 discusses few-shot caveats. Limited analysis of attack vectors spotlighting cannot defend against.", 225 "source": "haiku" 226 }, 227 "negative_results_reported": { 228 "applies": true, 229 "answer": true, 230 "justification": "Paper reports: delimiting alone insufficient, encoding hurts task accuracy with weaker models, few-shot examples overfit to known attacks. Honest about limitations.", 231 "source": "haiku" 232 } 233 }, 234 "setup_transparency": { 235 "model_versions_specified": { 236 "applies": true, 237 "answer": true, 238 "justification": "Specific model snapshots: text-davinci-003, GPT-3.5-Turbo (June 2023), GPT-4 (June 2023). Dates provided for reproducibility.", 239 "source": "haiku" 240 }, 241 "prompts_provided": { 242 "applies": true, 243 "answer": true, 244 "justification": "Full example system prompts shown for: instructions-only baseline (4.2), delimiting (3.2), datamarking (3.3), encoding (3.4). Templates can be copied directly.", 245 "source": "haiku" 246 }, 247 "hyperparameters_reported": { 248 "applies": true, 249 "answer": true, 250 "justification": "Temperature=1.0 specified with note: 'We examined the effect of temperature on XPIA susceptibility and found no notable impact.' Only temperature reported; no top-p, frequency_penalty, etc.", 251 "source": "haiku" 252 }, 253 "scaffolding_described": { 254 "applies": false, 255 "answer": false, 256 "justification": "Not an agentic system; pure prompt engineering. No scaffolding (tools, actions, loops) to describe.", 257 "source": "haiku" 258 }, 259 "data_preprocessing_documented": { 260 "applies": true, 261 "answer": false, 262 "justification": "Attack dataset described as 'synthetic... containing prompt injection attacks' with 'variations on a simple keyword payload attack,' but generation algorithm/process not documented. No code for reproducing dataset.", 263 "source": "haiku" 264 } 265 }, 266 "data_integrity": { 267 "raw_data_available": { 268 "applies": true, 269 "answer": false, 270 "justification": "Synthetic 1000-document attack corpus not released or available for verification. Standard benchmark raw data (SQuAD, IMDB) are publicly available but not paper-specific.", 271 "source": "haiku" 272 }, 273 "data_collection_described": { 274 "applies": true, 275 "answer": true, 276 "justification": "Attack data: 'generated synthetic dataset of 1000 documents... variations on simple keyword payload attack.' Benchmarks: uses standard published datasets. Description adequate for understanding but not for reproduction.", 277 "source": "haiku" 278 }, 279 "recruitment_methods_described": { 280 "applies": false, 281 "answer": false, 282 "justification": "No human subjects; N/A.", 283 "source": "haiku" 284 }, 285 "data_pipeline_documented": { 286 "applies": true, 287 "answer": true, 288 "justification": "For benchmarks, standard pipelines used. For attack dataset, pipeline partially described: documents → prompts with models → responses → ASR scoring. Full generation process not detailed.", 289 "source": "haiku" 290 } 291 }, 292 "contamination": { 293 "training_cutoff_stated": { 294 "applies": true, 295 "answer": false, 296 "justification": "Models identified by snapshot (June 2023) but exact training data cutoff dates not stated. Paper does not discuss what dates these versions were trained on.", 297 "source": "haiku" 298 }, 299 "train_test_overlap_discussed": { 300 "applies": true, 301 "answer": false, 302 "justification": "No discussion of whether benchmark test sets (SQuAD 2016, IMDB, SuperGLUE) may have been in training data of June 2023 model snapshots.", 303 "source": "haiku" 304 }, 305 "benchmark_contamination_addressed": { 306 "applies": true, 307 "answer": false, 308 "justification": "Synthetic attack dataset is new, so no contamination there. But standard benchmarks potentially contaminated—not addressed. Paper evaluates model performance on these benchmarks without discussing potential data leakage.", 309 "source": "haiku" 310 } 311 }, 312 "human_studies": { 313 "pre_registered": { 314 "applies": false, 315 "answer": false, 316 "justification": "No human subjects; N/A.", 317 "source": "haiku" 318 }, 319 "irb_or_ethics_approval": { 320 "applies": false, 321 "answer": false, 322 "justification": "No human subjects; N/A.", 323 "source": "haiku" 324 }, 325 "demographics_reported": { 326 "applies": false, 327 "answer": false, 328 "justification": "No human subjects; N/A.", 329 "source": "haiku" 330 }, 331 "inclusion_exclusion_criteria": { 332 "applies": false, 333 "answer": false, 334 "justification": "No human subjects; N/A.", 335 "source": "haiku" 336 }, 337 "randomization_described": { 338 "applies": false, 339 "answer": false, 340 "justification": "No human subjects; N/A.", 341 "source": "haiku" 342 }, 343 "blinding_described": { 344 "applies": false, 345 "answer": false, 346 "justification": "No human subjects; N/A.", 347 "source": "haiku" 348 }, 349 "attrition_reported": { 350 "applies": false, 351 "answer": false, 352 "justification": "No human subjects; N/A.", 353 "source": "haiku" 354 } 355 }, 356 "cost_and_practicality": { 357 "inference_cost_reported": { 358 "applies": true, 359 "answer": false, 360 "justification": "No inference cost ($ per API call) or latency reported. Experiments used OpenAI API but no pricing/time data disclosed.", 361 "source": "haiku" 362 }, 363 "compute_budget_stated": { 364 "applies": true, 365 "answer": false, 366 "justification": "Total computational budget ($ or compute hours) not stated. 1000 attack documents × 3 models × multiple tasks = thousands of API calls, but no aggregate cost reported.", 367 "source": "haiku" 368 } 369 } 370 } 371 }, 372 "claims": [ 373 { 374 "claim": "Spotlighting via datamarking reduces attack success rate (ASR) from ~50% to <3% with GPT-3.5-Turbo and to 0% with text-davinci-003", 375 "evidence": "Figure 4 (document summarization) and Figure 5 (Q&A tasks) show ASR percentages across models. Specific numbers: GPT-3.5-Turbo 50%→3.1%, Text-003 40%→0%.", 376 "supported": "strong" 377 }, 378 { 379 "claim": "Spotlighting via encoding reduces ASR to 0-1.8% across tasks", 380 "evidence": "Figure 6 shows encoding results: summarization 0.0% ASR with GPT-3.5-Turbo, Q&A 1.8% ASR. Consistent across models.", 381 "supported": "strong" 382 }, 383 { 384 "claim": "Datamarking transformations have minimal detrimental impact on downstream NLP task performance", 385 "evidence": "Figure 7 shows no detrimental effect on SQuAD, IMDB, SuperGLUE BoolQ/WiC benchmarks with datamarking present.", 386 "supported": "strong" 387 }, 388 { 389 "claim": "Encoding transformations degrade task performance with GPT-3.5-Turbo but not GPT-4", 390 "evidence": "Figure 8 shows GPT-3.5-Turbo accuracy drops significantly with encoding (top row), while GPT-4 maintains high accuracy (bottom row).", 391 "supported": "strong" 392 }, 393 { 394 "claim": "Simple instructions to avoid prompt injection have 'almost no added benefit' for GPT-3.5-Turbo", 395 "evidence": "Figure 2 shows instructions-only approach yields minimal ASR reduction for GPT-3.5-Turbo vs baseline.", 396 "supported": "moderate" 397 }, 398 { 399 "claim": "Spotlighting is more robust than simple delimiting because adversaries with knowledge of system prompts can easily subvert delimiters", 400 "evidence": "Section 5.4 discusses adversary considerations: 'If an adversary gains knowledge of our system prompt... it would be simple to craft a string that contains our delimiters.' Datamarking/encoding harder to subvert with dynamic tokens.", 401 "supported": "moderate" 402 }, 403 { 404 "claim": "Few-shot examples can reduce ASR below 5% but risk overfitting to known attack patterns", 405 "evidence": "Appendix 8.2 shows Figure 9 with few-shot examples achieving <5% ASR, but text cautions: 'relying on in-context learning will always be limited by our current understanding of typical attack tactics.'", 406 "supported": "moderate" 407 } 408 ], 409 "methodology_tags": [ 410 "benchmark-eval" 411 ], 412 "key_findings": "Spotlighting, a family of three prompt engineering techniques (delimiting, datamarking, encoding), significantly reduces indirect prompt injection attack success rate from 50%+ to below 2%. Datamarking achieves this reduction with minimal impact on downstream NLP task performance across multiple benchmarks. Encoding is most effective but only suitable for high-capacity models (GPT-4), as it degrades performance in GPT-3.5-Turbo. The findings suggest that structural transformations making input provenance more salient to models are necessary because simple instructions alone are insufficient defense.", 413 "red_flags": [ 414 { 415 "flag": "No statistical significance testing", 416 "detail": "All results reported as point estimates without confidence intervals, error bars, standard deviations, or p-values. Cannot assess whether observed ASR differences are statistically reliable or due to random variation." 417 }, 418 { 419 "flag": "Synthetic attack dataset not released", 420 "detail": "The 1000-document corpus used for evaluation is not available for independent verification or reproduction. Limits scientific reproducibility." 421 }, 422 { 423 "flag": "Sample size not justified", 424 "detail": "No power analysis or justification provided for why 1000 attack documents is sufficient. No minimum sample size calculated based on effect sizes." 425 }, 426 { 427 "flag": "Limited to GPT models only", 428 "detail": "Evaluation only on OpenAI models (text-davinci-003, GPT-3.5, GPT-4). Generalization to Llama, Claude, and other LLMs unknown." 429 }, 430 { 431 "flag": "Attacks are simplistic", 432 "detail": "All attacks are 'variations on a simple keyword payload attack.' May not reflect sophisticated adversarial strategies that target semantic vulnerabilities or use knowledge of spotlighting techniques." 433 }, 434 { 435 "flag": "No code or data release", 436 "detail": "No GitHub repository, supplementary materials, or code artifacts provided. Implementation requires custom development from prose descriptions." 437 }, 438 { 439 "flag": "No comparison to alternative defenses", 440 "detail": "Paper discusses other approaches (fine-tuning, alignment tuning, classifiers) in Section 2.3 but does not empirically compare spotlighting to any competing defense methods." 441 }, 442 { 443 "flag": "Training data contamination not addressed", 444 "detail": "Benchmark test sets (SQuAD 2016, IMDB, SuperGLUE) may have been in training data of June 2023 LLM snapshots. Potential data leakage not discussed." 445 }, 446 { 447 "flag": "Mechanism unclear", 448 "detail": "Paper acknowledges 'we lack a clear understanding of why spotlighting actually helps' (Section 6). No mechanistic explanation or ablation to understand which aspects of marking/encoding are necessary." 449 }, 450 { 451 "flag": "Adversarial evaluation incomplete", 452 "detail": "Section 5.4 discusses attack vectors against each technique but does not empirically test whether sophisticated adversaries can craft attacks that bypass spotlighting." 453 } 454 ], 455 "cited_papers": [ 456 { 457 "title": "Benchmarking and Defending Against Indirect Prompt Injection Attacks on Large Language Models", 458 "relevance": "Core prior work on XPIA problem; paper extends some spotlighting techniques from this baseline [Yi et al. 2023]" 459 }, 460 { 461 "title": "More than you've asked for: A Comprehensive Analysis of Novel Prompt Injection Threats to Application-Integrated Large Language Models", 462 "relevance": "Foundational work identifying indirect prompt injection threats in LLM systems [Greshake et al.]" 463 }, 464 { 465 "title": "How We Broke LLMs: Indirect Prompt Injection", 466 "relevance": "Early demonstration of XPIA vulnerability in practice [Greshake blog post, 2022]" 467 }, 468 { 469 "title": "Hacking Google Bard - From Prompt Injection to Data Exfiltration", 470 "relevance": "Empirical demonstration of XPIA attack enabling data exfiltration in real deployed system [Wunderwuzzi]" 471 }, 472 { 473 "title": "Chain-of-Thought Prompting Elicits Reasoning in Large Language Models", 474 "relevance": "Foundation for understanding prompt engineering effectiveness and model instruction-following behavior [Wei et al.]" 475 }, 476 { 477 "title": "Universal and Transferable Adversarial Attacks on Aligned Language Models", 478 "relevance": "Relevant for understanding adversarial robustness of LLMs and potential attack transferability [Zou et al. 2023]" 479 }, 480 { 481 "title": "SQuAD: 100,000+ Questions for Machine Comprehension of Text", 482 "relevance": "Benchmark used for evaluating downstream task performance impact of spotlighting transformations [Rajpurkar et al.]" 483 }, 484 { 485 "title": "SuperGLUE: A Stickier Benchmark for General-Purpose Language Understanding Systems", 486 "relevance": "Benchmark used to evaluate spotlighting impact on multiple NLP tasks [Wang et al.]" 487 } 488 ], 489 "engagement_factors": { 490 "practical_relevance": { 491 "score": 3, 492 "justification": "Directly implementable in production LLM systems today. Requires only prompt engineering changes, not model retraining. Teams can add datamarking/encoding immediately." 493 }, 494 "surprise_contrarian": { 495 "score": 2, 496 "justification": "Core insight (marking provenance helps models distinguish code from data) is intuitive once stated, though specific techniques are novel. Does not challenge conventional wisdom fundamentally." 497 }, 498 "fear_safety": { 499 "score": 2, 500 "justification": "Addresses real prompt injection vulnerability in deployed systems. However, positions spotlighting as limited defense ('security against interference' not 'perfectly secure'), avoiding overclaiming." 501 }, 502 "demo_ability": { 503 "score": 2, 504 "justification": "Practitioners can implement spotlighting prompts immediately, but full evaluation requires GPT API access and attack corpus. Not fully reproducible without released code/data." 505 }, 506 "brand_recognition": { 507 "score": 2, 508 "justification": "Authors from Microsoft (reputable), but no Nobel laureate labs or breakthrough-tier recognition. Venue (CAMLIS) is specialized security conference, not top-tier ML venue." 509 }, 510 "drama_conflict": { 511 "score": 1, 512 "justification": "Straightforward technical contribution with no controversy. No competing claims, no debate about methods or findings. Lacking narrative tension." 513 } 514 }, 515 "hn_data": { 516 "threads": [ 517 { 518 "hn_id": "22768143", 519 "title": "Deep Molecular Programming", 520 "points": 130, 521 "comments": 11, 522 "url": "https://news.ycombinator.com/item?id=22768143" 523 }, 524 { 525 "hn_id": "39466681", 526 "title": "Coercing LLMs to do and reveal almost anything", 527 "points": 12, 528 "comments": 1, 529 "url": "https://news.ycombinator.com/item?id=39466681" 530 }, 531 { 532 "hn_id": "45489599", 533 "title": "Tutorials for Sandia's Lammps Simulation Package", 534 "points": 8, 535 "comments": 1, 536 "url": "https://news.ycombinator.com/item?id=45489599" 537 }, 538 { 539 "hn_id": "44478832", 540 "title": "CodingGenie: A Proactive LLM-Powered Programming Assistant", 541 "points": 5, 542 "comments": 0, 543 "url": "https://news.ycombinator.com/item?id=44478832" 544 }, 545 { 546 "hn_id": "23363404", 547 "title": "“Periodic table” for protons in the nucleus", 548 "points": 4, 549 "comments": 0, 550 "url": "https://news.ycombinator.com/item?id=23363404" 551 }, 552 { 553 "hn_id": "44415220", 554 "title": "Storm – Help LLMs to write very long articles", 555 "points": 2, 556 "comments": 0, 557 "url": "https://news.ycombinator.com/item?id=44415220" 558 }, 559 { 560 "hn_id": "43540243", 561 "title": "AttentionRAG: Attention-Guided Context Pruning in Retrieval-Augmented Generation", 562 "points": 2, 563 "comments": 0, 564 "url": "https://news.ycombinator.com/item?id=43540243" 565 }, 566 { 567 "hn_id": "41125541", 568 "title": "Solving the Traveling Salesman Problem Using a Single Qubit", 569 "points": 2, 570 "comments": 0, 571 "url": "https://news.ycombinator.com/item?id=41125541" 572 }, 573 { 574 "hn_id": "41066825", 575 "title": "Solving the Travelling Salesman Problem Using a Single Qubit", 576 "points": 2, 577 "comments": 0, 578 "url": "https://news.ycombinator.com/item?id=41066825" 579 }, 580 { 581 "hn_id": "40822524", 582 "title": "Do LLMs Have Distinct and Consistent Personality?", 583 "points": 2, 584 "comments": 0, 585 "url": "https://news.ycombinator.com/item?id=40822524" 586 } 587 ], 588 "top_points": 130, 589 "total_points": 169, 590 "total_comments": 13 591 } 592 }