scan-v4.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan-v4.json (26938B)
      1 {
      2   "scan_version": 4,
      3   "paper_type": "empirical",
      4   "paper": {
      5     "title": "Dialogue Injection Attack: Jailbreaking LLMs Through Context Manipulation",
      6     "authors": ["Wenlong Meng", "Fan Zhang", "Wendao Yao", "Zhenyuan Guo", "Yuwei Li"],
      7     "year": 2025,
      8     "venue": "IEEE Transactions on Information Forensics and Security",
      9     "arxiv_id": "2503.08195",
     10     "doi": "10.1109/TIFS.2026.3657898"
     11   },
     12   "checklist": {
     13     "claims_and_evidence": {
     14       "abstract_claims_supported": {
     15         "applies": true,
     16         "answer": true,
     17         "justification": "The abstract claims 0.89 ASR on Llama-3.1-8B and 0.82 on GPT-4o after 10 queries, and bypass of 5 defenses; Tables 2-5 and Figures 5/7 provide supporting data for these figures.",
     18         "source": "haiku"
     19       },
     20       "causal_claims_justified": {
     21         "applies": true,
     22         "answer": true,
     23         "justification": "Ablation studies (Table 4, Figure 9) isolate contributions of individual DIA components (system prompt, hypnosis, answer guidance) and prompt rewrite, providing reasonable evidence for causal attribution of ASR improvements.",
     24         "source": "haiku"
     25       },
     26       "generalization_bounded": {
     27         "applies": true,
     28         "answer": false,
     29         "justification": "The conclusion broadly claims DIA 'enhances the effectiveness of jailbreak attacks' without bounding scope; performance varies dramatically across models (e.g., DIA-I ASR 0.000 vs 0.906 on different models), yet broad generalizations are made.",
     30         "source": "haiku"
     31       },
     32       "alternative_explanations_discussed": {
     33         "applies": true,
     34         "answer": false,
     35         "justification": "The paper asserts structural exploitation of chat templates and log-likelihood effects as the mechanism but does not consider competing explanations for why dialogue injection succeeds where single-turn attacks fail.",
     36         "source": "haiku"
     37       },
     38       "proxy_outcome_distinction": {
     39         "applies": true,
     40         "answer": true,
     41         "justification": "The paper explicitly discusses the difficulty of measuring jailbreak success and explains its choice of LlamaGuard-2/3 over refusal-phrase detection or GPT-4 judging, acknowledging the proxy nature of the metric.",
     42         "source": "haiku"
     43       }
     44     },
     45     "limitations_and_scope": {
     46       "limitations_section_present": {
     47         "applies": true,
     48         "answer": false,
     49         "justification": "There is no dedicated limitations or threats-to-validity section; Section 8 is an ethics consideration about responsible disclosure, not a methodological limitations discussion.",
     50         "source": "haiku"
     51       },
     52       "threats_to_validity_specific": {
     53         "applies": true,
     54         "answer": false,
     55         "justification": "No specific threats to validity are identified; the ethics section mentions controlled environments but does not discuss threats to internal or external validity of the experimental results.",
     56         "source": "haiku"
     57       },
     58       "scope_boundaries_stated": {
     59         "applies": true,
     60         "answer": false,
     61         "justification": "The paper does not explicitly state what the results do not show; it does not bound claims to specific model families, deployment scenarios, or attack surface assumptions.",
     62         "source": "haiku"
     63       }
     64     },
     65     "conflicts_of_interest": {
     66       "funding_disclosed": {
     67         "applies": true,
     68         "answer": false,
     69         "justification": "No funding source is disclosed anywhere in the paper.",
     70         "source": "haiku"
     71       },
     72       "affiliations_disclosed": {
     73         "applies": true,
     74         "answer": true,
     75         "justification": "Author affiliations are disclosed: Zhejiang University and National University of Defense Technology, with email addresses provided.",
     76         "source": "haiku"
     77       },
     78       "funder_independent_of_outcome": {
     79         "applies": false,
     80         "answer": false,
     81         "justification": "No funding is disclosed, making this criterion not applicable.",
     82         "source": "haiku"
     83       },
     84       "financial_interests_declared": {
     85         "applies": true,
     86         "answer": false,
     87         "justification": "No competing interests or financial interests statement appears in the paper.",
     88         "source": "haiku"
     89       }
     90     },
     91     "scope_and_framing": {
     92       "key_terms_defined": {
     93         "applies": true,
     94         "answer": true,
     95         "justification": "Key terms are defined: jailbreak attacks, white-box/gray-box/black-box threat models, chat templates, DIA, ASR, and the DIA-I/DIA-II distinction are all defined in Sections 2-4.",
     96         "source": "haiku"
     97       },
     98       "intended_contribution_clear": {
     99         "applies": true,
    100         "answer": true,
    101         "justification": "Three explicit contributions are stated: the DIA paradigm, two adversarial dialogue construction methods (DIA-I and DIA-II with ABGM/SDGM modules), and comparative evaluation against 4 baselines on 3 benchmarks with 10 models.",
    102         "source": "haiku"
    103       },
    104       "engagement_with_prior_work": {
    105         "applies": true,
    106         "answer": true,
    107         "justification": "Section 6 systematically situates DIA relative to white-box (GCG, AutoDAN), gray-box (prefilling attacks), and black-box (DeepInception, DRA, PAIR, ReNe) attacks, and reviews both learning-based and strategy-based defenses.",
    108         "source": "haiku"
    109       }
    110     }
    111   },
    112   "type_checklist": {
    113     "empirical": {
    114       "artifacts": {
    115         "code_released": {
    116           "applies": true,
    117           "answer": true,
    118           "justification": "Code is stated as available at https://github.com/meng-wenlong/DIA (footnote 1); note the generated affirmative beginnings dataset is promised only post-acceptance.",
    119           "source": "haiku"
    120         },
    121         "data_released": {
    122           "applies": true,
    123           "answer": true,
    124           "justification": "All three benchmarks (AdvBench, HEx-PHI, MaliciousInstruct) are standard publicly available datasets downloadable from HuggingFace Datasets.",
    125           "source": "haiku"
    126         },
    127         "environment_specified": {
    128           "applies": true,
    129           "answer": false,
    130           "justification": "Hardware is specified (4x A100 80G GPUs, Intel Xeon 8358, 1TB memory) and Ollama is mentioned, but no requirements.txt, Dockerfile, or version-pinned dependency list is provided.",
    131           "source": "haiku"
    132         },
    133         "reproduction_instructions": {
    134           "applies": true,
    135           "answer": false,
    136           "justification": "No step-by-step reproduction instructions are in the paper; only a GitHub URL and high-level experimental setup description are provided.",
    137           "source": "haiku"
    138         }
    139       },
    140       "statistical_methodology": {
    141         "confidence_intervals_or_error_bars": {
    142           "applies": true,
    143           "answer": false,
    144           "justification": "All results are reported as single-point ASR values without confidence intervals, error bars, or any measure of variance across repeated runs.",
    145           "source": "haiku"
    146         },
    147         "significance_tests": {
    148           "applies": true,
    149           "answer": false,
    150           "justification": "No statistical significance tests are conducted for any comparative claims between DIA and baselines across all experiments.",
    151           "source": "haiku"
    152         },
    153         "effect_sizes_reported": {
    154           "applies": true,
    155           "answer": true,
    156           "justification": "Absolute ASR values are reported for all methods and models, enabling direct comparison of effect magnitudes (e.g., DIA-II 0.654 vs DRA 0.002 on Llama-3.1-8B).",
    157           "source": "haiku"
    158         },
    159         "sample_size_justified": {
    160           "applies": true,
    161           "answer": false,
    162           "justification": "The paper uses benchmark sizes as given (520, 330, 100 prompts) without justification for whether these are sufficient for reliable ASR estimation at the precision reported.",
    163           "source": "haiku"
    164         },
    165         "variance_reported": {
    166           "applies": true,
    167           "answer": false,
    168           "justification": "No variance or standard deviation is reported across runs; all ASR values are single-point estimates.",
    169           "source": "haiku"
    170         }
    171       },
    172       "evaluation_design": {
    173         "baselines_included": {
    174           "applies": true,
    175           "answer": true,
    176           "justification": "Four baselines are included: DeepInception, ReNe, PAIR, and DRA, each representing different black-box jailbreak strategies.",
    177           "source": "haiku"
    178         },
    179         "baselines_contemporary": {
    180           "applies": true,
    181           "answer": true,
    182           "justification": "Baselines span 2023-2024 (DeepInception 2023, ReNe 2023, PAIR 2023, DRA 2024) and represent state-of-the-art black-box attacks at time of submission.",
    183           "source": "haiku"
    184         },
    185         "ablation_study": {
    186           "applies": true,
    187           "answer": true,
    188           "justification": "Table 4 ablates individual dialogue components (system, hypnosis, guidance) and Figure 9 ablates the prompt rewrite algorithm across Llama-2-7B and Llama-3-8B.",
    189           "source": "haiku"
    190         },
    191         "multiple_metrics": {
    192           "applies": true,
    193           "answer": true,
    194           "justification": "Results are reported with two evaluators (LlamaGuard-2 and LlamaGuard-3), across three benchmarks, and with a separate defense pass rate metric in Table 5.",
    195           "source": "haiku"
    196         },
    197         "human_evaluation": {
    198           "applies": false,
    199           "answer": false,
    200           "justification": "Human evaluation is explicitly excluded as impractical for large-scale testing; LlamaGuard classifiers are used instead, which the paper acknowledges are imperfect proxies.",
    201           "source": "haiku"
    202         },
    203         "held_out_test_set": {
    204           "applies": false,
    205           "answer": false,
    206           "justification": "Not a prediction task; evaluation is on fixed jailbreak benchmarks measuring attack success rate, not generalization.",
    207           "source": "haiku"
    208         },
    209         "per_category_breakdown": {
    210           "applies": true,
    211           "answer": false,
    212           "justification": "HEx-PHI contains 11 prohibited categories (illegal activity, fraud, privacy violation, etc.) but no per-category breakdown of ASR is reported.",
    213           "source": "haiku"
    214         },
    215         "failure_cases_discussed": {
    216           "applies": true,
    217           "answer": true,
    218           "justification": "The paper discusses failure cases: DIA-I has near-zero ASR on Llama-2-7B without rewriting, and single-query DIA-I ASR on Llama-3.1-8B is 0.000 (LlamaGuard-2); these are discussed in context.",
    219           "source": "haiku"
    220         },
    221         "negative_results_reported": {
    222           "applies": true,
    223           "answer": true,
    224           "justification": "The paper explicitly notes DRA achieves near-zero ASR on Llama-3.1-8B and GPT-4o, and DIA-I single-query ASR is near zero on several models; these negative results are presented in tables.",
    225           "source": "haiku"
    226         }
    227       },
    228       "setup_transparency": {
    229         "model_versions_specified": {
    230           "applies": true,
    231           "answer": false,
    232           "justification": "Closed-source models have snapshot dates (gpt-4o-2024-08-06, gpt-4o-mini-2024-07-18), but open-source models are identified only by family and size via Ollama with no specific checkpoint hashes or versions.",
    233           "source": "haiku"
    234         },
    235         "prompts_provided": {
    236           "applies": true,
    237           "answer": false,
    238           "justification": "The paper describes prompt structure and dialogue components conceptually but does not provide the actual system prompt text, hypnosis prompts, or continue commands used in experiments.",
    239           "source": "haiku"
    240         },
    241         "hyperparameters_reported": {
    242           "applies": true,
    243           "answer": false,
    244           "justification": "Temperature, top-p, and other LLM sampling hyperparameters are not reported; only NT_max=5 for template inference and iteration counts for multi-query attacks are mentioned.",
    245           "source": "haiku"
    246         },
    247         "scaffolding_described": {
    248           "applies": true,
    249           "answer": true,
    250           "justification": "The DIA pipeline is described in detail including ABGM (Algorithm 1), SDGM, prompt rewrite (Algorithm 2), and template inference attack (Algorithm 3) with pseudocode.",
    251           "source": "haiku"
    252         },
    253         "data_preprocessing_documented": {
    254           "applies": true,
    255           "answer": false,
    256           "justification": "Beyond downloading benchmarks from HuggingFace, no preprocessing steps (tokenization, filtering, formatting adjustments) are documented.",
    257           "source": "haiku"
    258         }
    259       },
    260       "data_integrity": {
    261         "raw_data_available": {
    262           "applies": true,
    263           "answer": false,
    264           "justification": "Raw model outputs and attack results are not made available; only code is released. The generated affirmative beginnings are promised post-acceptance only.",
    265           "source": "haiku"
    266         },
    267         "data_collection_described": {
    268           "applies": true,
    269           "answer": true,
    270           "justification": "The three benchmarks are described with their sources (HuggingFace), sizes (520, 330, 100 prompts), format (imperative vs question), and mean token lengths.",
    271           "source": "haiku"
    272         },
    273         "recruitment_methods_described": {
    274           "applies": false,
    275           "answer": false,
    276           "justification": "No human participants; standard pre-existing benchmarks are used.",
    277           "source": "haiku"
    278         },
    279         "data_pipeline_documented": {
    280           "applies": true,
    281           "answer": false,
    282           "justification": "The pipeline from benchmark download to ASR calculation is described at a high level via algorithms, but insufficient detail for replication without the code repository.",
    283           "source": "haiku"
    284         }
    285       },
    286       "contamination": {
    287         "training_cutoff_stated": {
    288           "applies": true,
    289           "answer": false,
    290           "justification": "Training cutoffs are mentioned incidentally for Llama-3 models (Dec 2023 / Mar 2023) to explain one anomaly, but not systematically stated for all 10 evaluated models.",
    291           "source": "haiku"
    292         },
    293         "train_test_overlap_discussed": {
    294           "applies": false,
    295           "answer": false,
    296           "justification": "This paper evaluates attack success rates, not model capabilities on knowledge benchmarks, so train/test overlap is not a relevant concern.",
    297           "source": "haiku"
    298         },
    299         "benchmark_contamination_addressed": {
    300           "applies": false,
    301           "answer": false,
    302           "justification": "NA - the evaluation measures jailbreak attack success rate, not model knowledge; benchmark contamination during LLM training is a separate, unaddressed issue.",
    303           "source": "haiku"
    304         }
    305       },
    306       "human_studies": {
    307         "pre_registered": {
    308           "applies": false,
    309           "answer": false,
    310           "justification": "No human participants in this study.",
    311           "source": "haiku"
    312         },
    313         "irb_or_ethics_approval": {
    314           "applies": false,
    315           "answer": false,
    316           "justification": "No human participants; Section 8 discusses responsible disclosure ethics but not IRB review.",
    317           "source": "haiku"
    318         },
    319         "demographics_reported": {
    320           "applies": false,
    321           "answer": false,
    322           "justification": "No human participants.",
    323           "source": "haiku"
    324         },
    325         "inclusion_exclusion_criteria": {
    326           "applies": false,
    327           "answer": false,
    328           "justification": "No human participants.",
    329           "source": "haiku"
    330         },
    331         "randomization_described": {
    332           "applies": false,
    333           "answer": false,
    334           "justification": "No human participants.",
    335           "source": "haiku"
    336         },
    337         "blinding_described": {
    338           "applies": false,
    339           "answer": false,
    340           "justification": "No human participants.",
    341           "source": "haiku"
    342         },
    343         "attrition_reported": {
    344           "applies": false,
    345           "answer": false,
    346           "justification": "No human participants.",
    347           "source": "haiku"
    348         }
    349       },
    350       "cost_and_practicality": {
    351         "inference_cost_reported": {
    352           "applies": true,
    353           "answer": false,
    354           "justification": "Hardware is specified but no inference latency, time-per-attack, or API cost figures are reported for any of the experiments.",
    355           "source": "haiku"
    356         },
    357         "compute_budget_stated": {
    358           "applies": true,
    359           "answer": false,
    360           "justification": "The computing node configuration is described (4x A100 80G) but total GPU-hours or API costs for the full experimental suite are not stated.",
    361           "source": "haiku"
    362         }
    363       }
    364     }
    365   },
    366   "claims": [
    367     {
    368       "claim": "DIA achieves state-of-the-art attack success rates: 0.89 on Llama-3.1-8B and 0.82 on GPT-4o after 10 queries on AdvBench",
    369       "evidence": "Multi-query results in Figures 5/7 and Table 2 single-query showing DIA-II at 0.654/0.800 (LlamaGuard-2/3) on Llama-3.1-8B; 10-query figures cited from Figures 5/7.",
    370       "supported": "moderate"
    371     },
    372     {
    373       "claim": "Deferred harmful responses exhibit higher log-likelihood than immediate responses, enabling DIA-II",
    374       "evidence": "Figure 4 shows log-likelihood distributions for Llama-3.1-8B and Llama-3.2-11B with and without prepended benign context, showing a shift toward higher log-likelihood with deferral.",
    375       "supported": "moderate"
    376     },
    377     {
    378       "claim": "DIA can bypass 5 defense mechanisms, with DIA-I achieving 0.93 average defense pass rate",
    379       "evidence": "Table 5 reports defense pass rates on Gemma-2-9B against OpenAI Moderation, Perplexity Filter, Defensive System Prompt, Prompt Patch, and Bergeron; DIA-I achieves 1.0/1.0/1.0/0.985/0.651.",
    380       "supported": "moderate"
    381     },
    382     {
    383       "claim": "Larger LLMs within the same family are more susceptible to jailbreak attacks (excluding Llama-3)",
    384       "evidence": "Figure 8 shows ASR vs model size for 5 families; Gemma-2, Llama-3.1, Llama-3.2, and Qwen-2 show increasing ASR with size, while Llama-3 is anomalous.",
    385       "supported": "weak"
    386     },
    387     {
    388       "claim": "The prompt rewrite algorithm significantly improves multi-query ASR, especially for DIA-I on Llama-2-7B",
    389       "evidence": "Figure 9 shows DIA-I on Llama-2-7B stays near zero ASR without rewriting across all 10 iterations, versus meaningful improvement with rewriting.",
    390       "supported": "strong"
    391     },
    392     {
    393       "claim": "System prompt replacement is critical for DIA-I on Llama-2 and Llama-3 (>95% ASR drop without it)",
    394       "evidence": "Table 4: DIA-I ASR on Llama-2-7B drops from 0.025 to 0.000 and on Llama-3-8B from 0.571 to 0.010 when system prompt component is removed.",
    395       "supported": "strong"
    396     }
    397   ],
    398   "methodology_tags": ["benchmark-eval"],
    399   "key_findings": "DIA introduces a novel black-box jailbreak attack exploiting LLM chat template structure to inject fabricated dialogue history, bypassing the assumption that historical assistant text is unmanipulatable. DIA-II, which uses word substitution to defer the malicious response (exploiting a newly identified log-likelihood vulnerability), achieves 0.80+ ASR on multiple modern LLMs after 10 queries and outperforms all baselines on Llama-3.1 where other attacks nearly fail. The attack bypasses 5 defense mechanisms including OpenAI's moderation API, with DIA-I achieving 1.0 defense pass rate against OpenAI moderation by masking jailbreak intent in an answer-guidance prompt. The finding that larger models are more susceptible to jailbreaks (for most families) challenges the assumption that capability improvements also improve safety.",
    400   "red_flags": [
    401     {
    402       "flag": "No statistical tests or error bars",
    403       "detail": "All ASR results are single-point estimates with no confidence intervals, significance tests, or variance across repeated runs, making it impossible to assess whether observed differences between methods are statistically meaningful."
    404     },
    405     {
    406       "flag": "Ablation on 3 of 10 models only",
    407       "detail": "The ablation study (Table 4) covers only Llama-2-7B, Llama-3-8B, and Gemma-2-9B, excluding 7 of the 10 evaluated models including GPT-4o where the attack also works."
    408     },
    409     {
    410       "flag": "No limitations section",
    411       "detail": "The paper has no dedicated limitations or threats-to-validity section; only an ethics consideration about responsible disclosure. Scope of findings is not bounded."
    412     },
    413     {
    414       "flag": "Key dataset withheld pending acceptance",
    415       "detail": "The generated affirmative beginnings for all three benchmarks are promised to be released only after paper acceptance, preventing immediate replication of the reported results."
    416     },
    417     {
    418       "flag": "LlamaGuard-only evaluation with no validation",
    419       "detail": "Attack success is determined solely by LlamaGuard-2/3 classifiers, which have their own error rates; no human evaluation validates whether classifier judgments match actual harmfulness of outputs."
    420     },
    421     {
    422       "flag": "Hyperparameters not reported",
    423       "detail": "Temperature, top-p, and other LLM sampling parameters for all 10 victim models are not reported, making exact replication impossible."
    424     }
    425   ],
    426   "cited_papers": [
    427     {
    428       "title": "Universal and Transferable Adversarial Attacks on Aligned Language Models (GCG)",
    429       "relevance": "Primary white-box jailbreak baseline and source of AdvBench benchmark used throughout evaluation."
    430     },
    431     {
    432       "title": "Making Them Ask and Answer: Jailbreaking Large Language Models in Few Queries via Disguise and Reconstruction (DRA)",
    433       "relevance": "Key competing black-box jailbreak baseline; the observation that malicious instructions in assistant text are more effective motivates DIA-II."
    434     },
    435     {
    436       "title": "DeepInception: Hypnotize Large Language Model to Be Jailbreaker",
    437       "relevance": "Competing black-box jailbreak baseline using hypnosis/virtual scene techniques; motivates DIA's hypnosis component."
    438     },
    439     {
    440       "title": "Jailbreaking Black Box Large Language Models in Twenty Queries (PAIR)",
    441       "relevance": "Competing iterative black-box jailbreak attack using attacker LLM for prompt refinement; directly compared against DIA."
    442     },
    443     {
    444       "title": "Safety Alignment Should Be Made More Than Just a Few Tokens Deep",
    445       "relevance": "Characterizes shallow safety alignment vulnerability that motivates DIA-I's prefilling approach and affirmative beginning injection."
    446     },
    447     {
    448       "title": "Fine-tuning Aligned Language Models Compromises Safety (HEx-PHI benchmark)",
    449       "relevance": "Source of HEx-PHI benchmark with 11 prohibited categories used as one of three evaluation benchmarks."
    450     },
    451     {
    452       "title": "A Wolf in Sheep's Clothing: Generalized Nested Jailbreak Prompts (ReNe)",
    453       "relevance": "Competing black-box baseline using prompt rewrite and scenario nesting; DIA demonstrates superior semantic integrity vs ReNe in Figure 6."
    454     },
    455     {
    456       "title": "Jailbreaking Leading Safety-Aligned LLMs with Simple Adaptive Attacks",
    457       "relevance": "Discusses gray-box prefilling attacks that DIA-I adapts to black-box settings via dialogue injection."
    458     }
    459   ],
    460   "engagement_factors": {
    461     "practical_relevance": {
    462       "score": 2,
    463       "justification": "Code is released and the attack requires only black-box API or WebUI access, making it immediately applicable for red-teaming LLM deployments."
    464     },
    465     "surprise_contrarian": {
    466       "score": 2,
    467       "justification": "Multi-turn dialogue history as an attack surface is novel; the finding that larger models are more susceptible to jailbreaks (not more robust) challenges common intuition."
    468     },
    469     "fear_safety": {
    470       "score": 3,
    471       "justification": "Demonstrates 82% jailbreak success on GPT-4o with only 10 queries and bypasses OpenAI's moderation API with 100% pass rate, directly threatening production safety systems."
    472     },
    473     "drama_conflict": {
    474       "score": 2,
    475       "justification": "Published in IEEE TIFS, directly challenges the effectiveness of safety alignment in production LLMs including GPT-4o; DIA-I achieves 100% bypass of OpenAI's own moderation."
    476     },
    477     "demo_ability": {
    478       "score": 2,
    479       "justification": "Code is on GitHub and only API access to target LLMs is required; the attack can be demonstrated without local GPU infrastructure for closed-source targets."
    480     },
    481     "brand_recognition": {
    482       "score": 1,
    483       "justification": "Authors are from Zhejiang University and NUDT, not a major AI lab; the IEEE TIFS venue is reputable in security but not widely followed by the general AI community."
    484     }
    485   },
    486   "hn_data": {"threads": [{"hn_id": "22624980", "title": "Neuroevolution of Self-Interpretable Agents", "points": 5, "comments": 1, "url": "https://news.ycombinator.com/item?id=22624980"}, {"hn_id": "46686419", "title": "EnergyNet Explained: Internetification of Energy Distribution", "points": 2, "comments": 0, "url": "https://news.ycombinator.com/item?id=46686419"}, {"hn_id": "45988739", "title": "Sheaf Topos Theory: A Powerful Setting for Lagrangian Field Theory", "points": 2, "comments": 0, "url": "https://news.ycombinator.com/item?id=45988739"}, {"hn_id": "35219050", "title": "Large-scale end of life prediction of hard discs in distributed datacenters", "points": 2, "comments": 0, "url": "https://news.ycombinator.com/item?id=35219050"}, {"hn_id": "26338513", "title": "Mixture of Volumetric Primitives for Efficient Neural Rendering", "points": 2, "comments": 0, "url": "https://news.ycombinator.com/item?id=26338513"}, {"hn_id": "45302505", "title": "Verbalized Algorithms", "points": 1, "comments": 0, "url": "https://news.ycombinator.com/item?id=45302505"}, {"hn_id": "44791713", "title": "MQFQ-Sticky: Fair Queueing for Serverless GPU Functions", "points": 1, "comments": 0, "url": "https://news.ycombinator.com/item?id=44791713"}, {"hn_id": "44450854", "title": "Parallel-in-Time Preconditioning for Time-Dependent Variational Mean Field Games", "points": 1, "comments": 0, "url": "https://news.ycombinator.com/item?id=44450854"}, {"hn_id": "44326982", "title": "Interpreting Agent Behaviors in RL-Based Cyber-Battle Simulation Platforms", "points": 1, "comments": 0, "url": "https://news.ycombinator.com/item?id=44326982"}, {"hn_id": "22631779", "title": "Neuroevolution of Self-Interpretable Agents", "points": 1, "comments": 0, "url": "https://news.ycombinator.com/item?id=22631779"}], "top_points": 5, "total_points": 18, "total_comments": 1}
    487 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs