scan-v5.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan-v5.json (27318B)
      1 {
      2   "scan_version": 5,
      3   "paper_type": "empirical",
      4   "paper": {
      5     "title": "Latent Collaboration in Multi-Agent Systems",
      6     "authors": [
      7       "Jiaru Zou",
      8       "Xiyuan Yang",
      9       "Ruizhong Qiu",
     10       "Gaotang Li",
     11       "Katherine Tieu"
     12     ],
     13     "year": 2025,
     14     "venue": "arXiv.org",
     15     "arxiv_id": "2511.20639",
     16     "doi": "10.48550/arXiv.2511.20639"
     17   },
     18   "checklist": {
     19     "claims_and_evidence": {
     20       "abstract_claims_supported": {
     21         "applies": true,
     22         "answer": false,
     23         "justification": "The abstract says 'up to 14.6% higher accuracy' (implying a maximum) but Section 4.1 describes 14.6% as the average improvement over single-model baselines; actual per-task gains range from -1.6pp to +18.6pp, making the abstract framing inconsistent with the body.",
     24         "source": "haiku"
     25       },
     26       "causal_claims_justified": {
     27         "applies": true,
     28         "answer": true,
     29         "justification": "Causal claims about LatentMAS improving accuracy are supported by ablation studies on the input-output alignment operator (Fig 7, 2.3–5.3% gain) and latent step depth (Fig 8), alongside controlled baselines sharing the same model family and MAS architecture.",
     30         "source": "haiku"
     31       },
     32       "generalization_bounded": {
     33         "applies": true,
     34         "answer": false,
     35         "justification": "The paper claims LatentMAS is 'a scalable and general paradigm for next-generation agentic systems' but all experiments use only Qwen3 4B/8B/14B; the fundamental constraint that all agents must share the same architecture is buried in Appendix C.3 rather than stated as a scope boundary.",
     36         "source": "haiku"
     37       },
     38       "alternative_explanations_discussed": {
     39         "applies": true,
     40         "answer": false,
     41         "justification": "No alternative explanations are considered: whether gains stem from prompting strategy differences, different effective computational budgets between latent steps and text decoding, or favorable benchmark characteristics is not discussed.",
     42         "source": "haiku"
     43       },
     44       "proxy_outcome_distinction": {
     45         "applies": true,
     46         "answer": false,
     47         "justification": "The paper claims LatentMAS achieves higher 'system-level reasoning quality' and 'system-level intelligence' but only measures exact-match accuracy on narrow benchmarks without discussing the gap between these proxies and the claimed construct.",
     48         "source": "haiku"
     49       }
     50     },
     51     "limitations_and_scope": {
     52       "limitations_section_present": {
     53         "applies": true,
     54         "answer": false,
     55         "justification": "There is no dedicated limitations section; the conclusion contains only forward-looking future work, and the brief note on heterogeneous agents in Appendix C.3 is framed as an extension opportunity rather than a limitation.",
     56         "source": "haiku"
     57       },
     58       "threats_to_validity_specific": {
     59         "applies": true,
     60         "answer": false,
     61         "justification": "No threats to validity are discussed; critical issues including potential AIME training data contamination, the statistical insignificance of 30-problem AIME samples (SE ≈ 9%), and single-model-family evaluation are not addressed.",
     62         "source": "haiku"
     63       },
     64       "scope_boundaries_stated": {
     65         "applies": true,
     66         "answer": false,
     67         "justification": "The paper makes broad claims without explicit scope boundaries; the same-architecture requirement, dependency on 8×A100 hardware, and restriction to one model family are not disclosed as scope constraints.",
     68         "source": "haiku"
     69       }
     70     },
     71     "conflicts_of_interest": {
     72       "funding_disclosed": {
     73         "applies": true,
     74         "answer": false,
     75         "justification": "No funding acknowledgments appear anywhere in the paper text.",
     76         "source": "haiku"
     77       },
     78       "affiliations_disclosed": {
     79         "applies": true,
     80         "answer": true,
     81         "justification": "All authors list institutional affiliations (Princeton University, UIUC, Stanford University) on the title page.",
     82         "source": "haiku"
     83       },
     84       "funder_independent_of_outcome": {
     85         "applies": false,
     86         "answer": false,
     87         "justification": "No funder is identified, so independence cannot be assessed.",
     88         "source": "haiku"
     89       },
     90       "financial_interests_declared": {
     91         "applies": true,
     92         "answer": false,
     93         "justification": "No competing interests or financial interests statement appears anywhere in the paper.",
     94         "source": "haiku"
     95       }
     96     },
     97     "scope_and_framing": {
     98       "key_terms_defined": {
     99         "applies": true,
    100         "answer": true,
    101         "justification": "Core terms including 'latent working memory,' 'latent thoughts,' 'sequential MAS,' and 'hierarchical MAS' are precisely defined with mathematical formulations in Sections 2 and 3.",
    102         "source": "haiku"
    103       },
    104       "intended_contribution_clear": {
    105         "applies": true,
    106         "answer": true,
    107         "justification": "The paper clearly states its contribution: LatentMAS, a training-free framework enabling LLM agents to collaborate entirely in latent space via auto-regressive hidden state generation and KV cache transfer.",
    108         "source": "haiku"
    109       },
    110       "engagement_with_prior_work": {
    111         "applies": true,
    112         "answer": true,
    113         "justification": "Section 5 explicitly positions LatentMAS against Cache-to-Cache, ThoughtComm, Mixture of Thoughts, and CoCoNut, explaining specific technical differences from each approach rather than merely listing papers.",
    114         "source": "haiku"
    115       }
    116     }
    117   },
    118   "type_checklist": {
    119     "empirical": {
    120       "artifacts": {
    121         "code_released": {
    122           "applies": true,
    123           "answer": true,
    124           "justification": "Source code is publicly released at https://github.com/Gen-Verse/LatentMAS as stated on the first page of the paper.",
    125           "source": "haiku"
    126         },
    127         "data_released": {
    128           "applies": true,
    129           "answer": true,
    130           "justification": "All 9 evaluation benchmarks (ARC-E/C, GSM8K, AIME24/25, GPQA-Diamond, MedQA, MBPP+, HumanEval+) are standard publicly available datasets.",
    131           "source": "haiku"
    132         },
    133         "environment_specified": {
    134           "applies": true,
    135           "answer": false,
    136           "justification": "The paper mentions PyTorch, HuggingFace Transformers, and vLLM but provides no requirements.txt, Dockerfile, or specific library version pinning.",
    137           "source": "haiku"
    138         },
    139         "reproduction_instructions": {
    140           "applies": true,
    141           "answer": false,
    142           "justification": "Appendix C.2 describes what was done but does not provide step-by-step commands or scripts; implementation details are descriptive rather than prescriptive.",
    143           "source": "haiku"
    144         }
    145       },
    146       "statistical_methodology": {
    147         "confidence_intervals_or_error_bars": {
    148           "applies": true,
    149           "answer": false,
    150           "justification": "Results are reported as means over three independent runs but no standard deviations, confidence intervals, or error bars appear in any table or figure.",
    151           "source": "haiku"
    152         },
    153         "significance_tests": {
    154           "applies": true,
    155           "answer": false,
    156           "justification": "No statistical significance tests are performed despite comparative claims; AIME24/25 with only 30 problems have SE ≈ 9%, making 3–7pp differences statistically meaningless.",
    157           "source": "haiku"
    158         },
    159         "effect_sizes_reported": {
    160           "applies": true,
    161           "answer": true,
    162           "justification": "Effect sizes are reported as percentage-point improvements over baselines in dedicated 'Improve' columns of Tables 1–3, with absolute accuracy values providing baseline context.",
    163           "source": "haiku"
    164         },
    165         "sample_size_justified": {
    166           "applies": true,
    167           "answer": false,
    168           "justification": "No power analysis or sample size justification is provided; AIME24 and AIME25 each use 30 problems and GSM8K uses a standard test split without discussion of adequacy for the comparisons made.",
    169           "source": "haiku"
    170         },
    171         "variance_reported": {
    172           "applies": true,
    173           "answer": false,
    174           "justification": "The paper states results are averaged over three runs but reports only means; no standard deviations or run-to-run variance is shown.",
    175           "source": "haiku"
    176         }
    177       },
    178       "evaluation_design": {
    179         "baselines_included": {
    180           "applies": true,
    181           "answer": true,
    182           "justification": "Both single-model and text-based MAS baselines (sequential chain-of-agents and hierarchical expert-summarizer) are included, with vLLM acceleration applied to all baselines for fair speed comparison.",
    183           "source": "haiku"
    184         },
    185         "baselines_contemporary": {
    186           "applies": true,
    187           "answer": true,
    188           "justification": "Baselines use the same contemporary Qwen3 model family (released 2025) and implement chain-of-agents and hierarchical designs from recent literature.",
    189           "source": "haiku"
    190         },
    191         "ablation_study": {
    192           "applies": true,
    193           "answer": true,
    194           "justification": "Ablations include input-output alignment effectiveness (Figure 7: 2.3–5.3% accuracy impact) and latent step depth sensitivity (Figure 8: m from 0 to 160 steps across 3 tasks).",
    195           "source": "haiku"
    196         },
    197         "multiple_metrics": {
    198           "applies": true,
    199           "answer": true,
    200           "justification": "Three complementary metrics are reported throughout: task accuracy (%), output token usage, and end-to-end inference speed (time/run).",
    201           "source": "haiku"
    202         },
    203         "human_evaluation": {
    204           "applies": false,
    205           "answer": false,
    206           "justification": "Human evaluation is not applicable; all tasks use objective automated metrics (exact match, numeric equality, code execution).",
    207           "source": "haiku"
    208         },
    209         "held_out_test_set": {
    210           "applies": true,
    211           "answer": true,
    212           "justification": "Standard held-out test splits from all 9 benchmarks are used; no development sets are used for evaluation.",
    213           "source": "haiku"
    214         },
    215         "per_category_breakdown": {
    216           "applies": true,
    217           "answer": true,
    218           "justification": "Results are broken down per task (9 benchmarks), per model scale (4B/8B/14B), and per MAS architecture (sequential/hierarchical) across Tables 1–3.",
    219           "source": "haiku"
    220         },
    221         "failure_cases_discussed": {
    222           "applies": true,
    223           "answer": false,
    224           "justification": "Appendix D presents one cherry-picked case where LatentMAS succeeds and TextMAS fails; no systematic analysis of failure cases or conditions where LatentMAS underperforms is provided.",
    225           "source": "haiku"
    226         },
    227         "negative_results_reported": {
    228           "applies": true,
    229           "answer": true,
    230           "justification": "Tables 1 and 2 explicitly mark and show cases where LatentMAS underperforms TextMAS with downward arrows (e.g., ARC-E Qwen3-8B sequential: -0.3pp, GSM8K hierarchical 4B: -1.0pp).",
    231           "source": "haiku"
    232         }
    233       },
    234       "setup_transparency": {
    235         "model_versions_specified": {
    236           "applies": true,
    237           "answer": false,
    238           "justification": "Models are identified as 'Qwen3 4B/8B/14B' with a citation to the technical report, but no checkpoint hashes, HuggingFace model IDs, or version dates are provided.",
    239           "source": "haiku"
    240         },
    241         "prompts_provided": {
    242           "applies": true,
    243           "answer": true,
    244           "justification": "Full prompts for all agents (planner, critic, refiner, solver/judger) across all task types (numeric, multiple-choice, coding) for both sequential and hierarchical settings are provided in Appendix E.",
    245           "source": "haiku"
    246         },
    247         "hyperparameters_reported": {
    248           "applies": true,
    249           "answer": true,
    250           "justification": "Temperature (0.6), top-p (0.95), maximum output lengths per task type, and latent step range m ∈ {0, 10, 20, 40, 80} are all specified in Section 4 and Appendix C.2.",
    251           "source": "haiku"
    252         },
    253         "scaffolding_described": {
    254           "applies": true,
    255           "answer": true,
    256           "justification": "Both sequential (planner-critic-refiner-solver) and hierarchical (math/science/code agents + summarizer) MAS architectures are described in detail in Section 2 with Figure 2 illustration.",
    257           "source": "haiku"
    258         },
    259         "data_preprocessing_documented": {
    260           "applies": true,
    261           "answer": true,
    262           "justification": "Appendix C.2 documents evaluation protocols including answer extraction, text normalization (lowercasing, whitespace trimming), numeric parsing, and sandboxed code execution with 10-second timeout.",
    263           "source": "haiku"
    264         }
    265       },
    266       "data_integrity": {
    267         "raw_data_available": {
    268           "applies": true,
    269           "answer": true,
    270           "justification": "All 9 benchmarks are publicly available standard datasets; no proprietary or constructed datasets are used.",
    271           "source": "haiku"
    272         },
    273         "data_collection_described": {
    274           "applies": true,
    275           "answer": true,
    276           "justification": "All 9 benchmarks are described in Appendix C.1 with their sources, problem types, answer formats, and difficulty characteristics.",
    277           "source": "haiku"
    278         },
    279         "recruitment_methods_described": {
    280           "applies": false,
    281           "answer": false,
    282           "justification": "No human participants; standard benchmarks only.",
    283           "source": "haiku"
    284         },
    285         "data_pipeline_documented": {
    286           "applies": true,
    287           "answer": true,
    288           "justification": "The full pipeline from input question to answer extraction is documented in Appendix C.2 for each benchmark category (multiple-choice letter extraction, numeric parsing, code execution).",
    289           "source": "haiku"
    290         }
    291       },
    292       "contamination": {
    293         "training_cutoff_stated": {
    294           "applies": true,
    295           "answer": false,
    296           "justification": "The training data cutoff for Qwen3 models is not stated in the paper, despite using AIME24 (April 2024) and AIME25 problems that may have been available during training.",
    297           "source": "haiku"
    298         },
    299         "train_test_overlap_discussed": {
    300           "applies": true,
    301           "answer": false,
    302           "justification": "No discussion of potential training data contamination for any benchmark; AIME24 competition problems were widely distributed online before Qwen3's likely training cutoff.",
    303           "source": "haiku"
    304         },
    305         "benchmark_contamination_addressed": {
    306           "applies": true,
    307           "answer": false,
    308           "justification": "AIME24 problems from April 2024 may be in Qwen3's training data, and the paper does not acknowledge or analyze this risk despite showing AIME results prominently.",
    309           "source": "haiku"
    310         }
    311       },
    312       "human_studies": {
    313         "pre_registered": {
    314           "applies": false,
    315           "answer": false,
    316           "justification": "No human participants.",
    317           "source": "haiku"
    318         },
    319         "irb_or_ethics_approval": {
    320           "applies": false,
    321           "answer": false,
    322           "justification": "No human participants.",
    323           "source": "haiku"
    324         },
    325         "demographics_reported": {
    326           "applies": false,
    327           "answer": false,
    328           "justification": "No human participants.",
    329           "source": "haiku"
    330         },
    331         "inclusion_exclusion_criteria": {
    332           "applies": false,
    333           "answer": false,
    334           "justification": "No human participants.",
    335           "source": "haiku"
    336         },
    337         "randomization_described": {
    338           "applies": false,
    339           "answer": false,
    340           "justification": "No human participants.",
    341           "source": "haiku"
    342         },
    343         "blinding_described": {
    344           "applies": false,
    345           "answer": false,
    346           "justification": "No human participants.",
    347           "source": "haiku"
    348         },
    349         "attrition_reported": {
    350           "applies": false,
    351           "answer": false,
    352           "justification": "No human participants.",
    353           "source": "haiku"
    354         }
    355       },
    356       "cost_and_practicality": {
    357         "inference_cost_reported": {
    358           "applies": true,
    359           "answer": true,
    360           "justification": "End-to-end inference latency (seconds per run) is reported for all 9 benchmarks, both MAS settings, and all 3 model scales in Tables 1–3 and Figure 4.",
    361           "source": "haiku"
    362         },
    363         "compute_budget_stated": {
    364           "applies": true,
    365           "answer": false,
    366           "justification": "Hardware is mentioned (8×NVIDIA A100-80G GPUs) but total GPU-hours, number of experiments, or total compute cost is not reported.",
    367           "source": "haiku"
    368         }
    369       }
    370     }
    371   },
    372   "claims": [
    373     {
    374       "claim": "LatentMAS achieves on average 14.6% higher accuracy than single-model baseline in sequential MAS setting",
    375       "evidence": "Tables 1 and 3 report accuracy across 9 benchmarks and 3 model scales, mean over 3 runs; Section 4.1 explicitly states the 14.6% average figure",
    376       "supported": "moderate"
    377     },
    378     {
    379       "claim": "LatentMAS reduces output token usage by 70.8%–83.7% relative to text-based MAS",
    380       "evidence": "Token columns in Tables 1 and 2 show 46–88% reduction per task, with stated averages of 70.8% (sequential) and 83.7% (hierarchical)",
    381       "supported": "strong"
    382     },
    383     {
    384       "claim": "LatentMAS delivers 4×–4.3× faster end-to-end inference than text-based MAS",
    385       "evidence": "Speed columns across Tables 1–3 show speedups ranging from 1.9× to 7.7× with stated averages around 4×; all baselines accelerated with vLLM",
    386       "supported": "moderate"
    387     },
    388     {
    389       "claim": "Latent working memory transfer preserves information losslessly (Theorem 3.3)",
    390       "evidence": "Mathematical proof by induction showing KV cache transfer is equivalent to re-running the preceding agent on its full input; proof is logically valid given deterministic transformers",
    391       "supported": "moderate"
    392     },
    393     {
    394       "claim": "Latent thoughts are O(d_h / log|V|) times more expressive than text-based reasoning",
    395       "evidence": "Theorem 3.1 establishes a lower bound on token count required to match latent expressiveness, but relies on the Linear Representation Hypothesis (Assumption B.1) as an unverified premise",
    396       "supported": "weak"
    397     },
    398     {
    399       "claim": "Input-output alignment matrix W_a improves downstream accuracy by 2.3%–5.3%",
    400       "evidence": "Figure 7 shows before/after accuracy comparison on ARC-C, ARC-E, and GSM8K with Qwen3-14B; Figure 6 visualizes the embedding distribution alignment",
    401       "supported": "strong"
    402     }
    403   ],
    404   "methodology_tags": [
    405     "benchmark-eval",
    406     "theoretical"
    407   ],
    408   "key_findings": "LatentMAS introduces training-free multi-agent collaboration in continuous latent space by combining auto-regressive hidden state generation with layer-wise KV cache transfer between agents, consistently outperforming text-based MAS on 9 benchmarks with an average 14.6% accuracy gain over single models, 70–84% token reduction, and 4–4.3× speedup. A linear input-output alignment operator (W_a) is essential, contributing 2–5% accuracy improvement by bridging the distribution gap between output hidden states and input embeddings. Optimal performance occurs at 40–80 latent steps before plateauing. However, the expressiveness theorem relies on the unverified Linear Representation Hypothesis, all experiments use only the Qwen3 model family, and the fundamental same-architecture constraint is not prominently disclosed.",
    409   "red_flags": [
    410     {
    411       "flag": "Abstract accuracy framing inconsistency",
    412       "detail": "Abstract says 'up to 14.6% higher accuracy' (maximum framing) but Section 4.1 reports 14.6% as the average improvement; actual per-task gains range from -1.6pp to +18.6pp over single model."
    413     },
    414     {
    415       "flag": "No variance on 3-run means",
    416       "detail": "Results are averaged over 3 runs with no standard deviations; AIME24/25 use only 30 problems (SE ≈ 9%), making reported 3–7pp accuracy differences statistically meaningless."
    417     },
    418     {
    419       "flag": "Contamination risk unaddressed for AIME24/25",
    420       "detail": "AIME 2024 competition problems were publicly available before Qwen3's likely training cutoff; no training cutoff is disclosed and no contamination analysis is performed."
    421     },
    422     {
    423       "flag": "Same-architecture constraint buried in appendix",
    424       "detail": "LatentMAS requires all agents to share identical model architecture for KV cache transfer; this fundamental limitation is disclosed only in Appendix C.3, not in the main text or limitations."
    425     },
    426     {
    427       "flag": "Expressiveness theorem relies on unverified assumption",
    428       "detail": "Theorem 3.1's central claim depends on the Linear Representation Hypothesis, stated as Assumption B.1 rather than established fact, making the theoretical efficiency argument uncertain."
    429     },
    430     {
    431       "flag": "Single model family evaluated",
    432       "detail": "All experiments use only Qwen3 4B/8B/14B; claims of general applicability to 'next-generation agentic systems' are not validated with other architectures (LLaMA, Mistral, etc.)."
    433     },
    434     {
    435       "flag": "Cherry-picked case study",
    436       "detail": "Appendix D presents one case where LatentMAS gives the correct answer and TextMAS fails; no systematic analysis of cases where TextMAS succeeds or LatentMAS fails."
    437     }
    438   ],
    439   "cited_papers": [
    440     {
    441       "title": "Training Large Language Models to Reason in a Continuous Latent Space (CoCoNut)",
    442       "relevance": "Key prior work on single-model latent chain-of-thought reasoning that LatentMAS extends to the multi-agent setting"
    443     },
    444     {
    445       "title": "Cache-to-Cache: Direct Semantic Communication Between Large Language Models",
    446       "relevance": "Closest prior work on KV cache sharing between models; LatentMAS differs by supporting latent thought generation rather than only sharing input context"
    447     },
    448     {
    449       "title": "Thought Communication in Multiagent Collaboration (ThoughtComm)",
    450       "relevance": "Prior work on trained latent communication in MAS; LatentMAS is training-free unlike ThoughtComm which uses encoder-decoder modules"
    451     },
    452     {
    453       "title": "Chain of Agents: Large Language Models Collaborating on Long-Context Tasks",
    454       "relevance": "Sequential MAS architecture (chain-of-agents) used as one of LatentMAS's evaluation settings and baseline"
    455     },
    456     {
    457       "title": "AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversations",
    458       "relevance": "Major text-based multi-agent framework representing the paradigm LatentMAS aims to improve upon"
    459     },
    460     {
    461       "title": "MetaGPT: Meta Programming for a Multi-Agent Collaborative Framework",
    462       "relevance": "Representative text-based MAS and important prior work in the multi-agent LLM space"
    463     },
    464     {
    465       "title": "The Linear Representation Hypothesis and the Geometry of Large Language Models",
    466       "relevance": "Foundational assumption (Assumption B.1) underlying the expressiveness theorem (Theorem 3.1)"
    467     },
    468     {
    469       "title": "GPQA: A Graduate-Level Google-Proof Q&A Benchmark",
    470       "relevance": "One of the 9 evaluation benchmarks (GPQA-Diamond) used to assess LatentMAS on graduate-level science reasoning"
    471     },
    472     {
    473       "title": "Qwen3 Technical Report",
    474       "relevance": "The backbone model family used for all LatentMAS experiments across 4B/8B/14B scales"
    475     }
    476   ],
    477   "engagement_factors": {
    478     "practical_relevance": {
    479       "score": 2,
    480       "justification": "Released code and demonstrated 4× speedup with 80% token reduction are directly useful, but same-architecture requirement and 8×A100 hardware needs limit broad practitioner adoption."
    481     },
    482     "surprise_contrarian": {
    483       "score": 2,
    484       "justification": "Challenges the foundational assumption that LLM agents must communicate via text, demonstrating latent space collaboration can outperform while using far fewer tokens."
    485     },
    486     "fear_safety": {
    487       "score": 0,
    488       "justification": "No safety or AI risk implications discussed; this is purely a performance and efficiency contribution to multi-agent systems."
    489     },
    490     "drama_conflict": {
    491       "score": 1,
    492       "justification": "Implicitly challenges the text-as-lingua-franca paradigm in MAS research but does not create direct controversy; the novelty is incremental over prior cache-sharing work."
    493     },
    494     "demo_ability": {
    495       "score": 2,
    496       "justification": "Code is publicly released at GitHub allowing replication, though requiring substantial GPU resources (8×A100-80G) limits casual experimentation."
    497     },
    498     "brand_recognition": {
    499       "score": 2,
    500       "justification": "Authors from Princeton, Stanford, and UIUC include recognized researchers (Yejin Choi, James Zou, Mengdi Wang) lending credibility and visibility."
    501     }
    502   },
    503   "hn_data": {
    504     "threads": [
    505       {
    506         "hn_id": "45708392",
    507         "title": "Fluidity Index: Next-Generation Super-Intelligence Benchmarks",
    508         "points": 5,
    509         "comments": 0,
    510         "url": "https://news.ycombinator.com/item?id=45708392",
    511         "created_at": "2025-10-26T01:35:28Z"
    512       },
    513       {
    514         "hn_id": "46069076",
    515         "title": "Game Theory in Cosmology",
    516         "points": 4,
    517         "comments": 0,
    518         "url": "https://news.ycombinator.com/item?id=46069076",
    519         "created_at": "2025-11-27T13:33:13Z"
    520       },
    521       {
    522         "hn_id": "46132435",
    523         "title": "LatentMAS – agent collaboration from token space into the model's latent space",
    524         "points": 3,
    525         "comments": 1,
    526         "url": "https://news.ycombinator.com/item?id=46132435",
    527         "created_at": "2025-12-03T09:43:05Z"
    528       },
    529       {
    530         "hn_id": "46367468",
    531         "title": "Minimizing Hyperbolic Embedding Distortion with LLM-Guided Hierarchy Structuring",
    532         "points": 3,
    533         "comments": 0,
    534         "url": "https://news.ycombinator.com/item?id=46367468",
    535         "created_at": "2025-12-23T17:59:28Z"
    536       },
    537       {
    538         "hn_id": "46390027",
    539         "title": "Wheeler-Feynman theory for gravitational waves",
    540         "points": 2,
    541         "comments": 0,
    542         "url": "https://news.ycombinator.com/item?id=46390027",
    543         "created_at": "2025-12-26T07:28:55Z"
    544       },
    545       {
    546         "hn_id": "46305114",
    547         "title": "Latent Collaboration in Multi-Agent Systems",
    548         "points": 1,
    549         "comments": 0,
    550         "url": "https://news.ycombinator.com/item?id=46305114",
    551         "created_at": "2025-12-17T20:32:08Z"
    552       }
    553     ],
    554     "top_points": 5,
    555     "total_points": 18,
    556     "total_comments": 1
    557   }
    558 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs