scan-v5.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan-v5.json (29597B)
      1 {
      2   "scan_version": 5,
      3   "paper_type": "empirical",
      4   "paper": {
      5     "title": "Heterogeneous Multi-Agent Reinforcement Learning for Zero-Shot Scalable Collaboration",
      6     "authors": [
      7       "Xudong Guo",
      8       "Daming Shi",
      9       "Junjie Yu",
     10       "Wenhui Fan"
     11     ],
     12     "year": 2024,
     13     "venue": "Neurocomputing",
     14     "arxiv_id": "2404.03869",
     15     "doi": "10.48550/arXiv.2404.03869"
     16   },
     17   "checklist": {
     18     "claims_and_evidence": {
     19       "abstract_claims_supported": {
     20         "applies": true,
     21         "answer": true,
     22         "justification": "Abstract claims of superior performance and zero-shot scalability are supported by Tables I–III showing SHPPO outperforms all parameter-shared baselines across SMAC and GRF, and scalability tests in Tables II–III showing SHPPO maintains performance while HAPPO catastrophically degrades.",
     23         "source": "haiku"
     24       },
     25       "causal_claims_justified": {
     26         "applies": true,
     27         "answer": true,
     28         "justification": "Ablation studies in Section V.E systematically isolate the contribution of InferenceNet guidance (Lv), entropy loss (Le), and diversity loss (Ld), establishing causal roles for each component; removing any single component degrades performance on MMM2 and 8m_vs_9m.",
     29         "source": "haiku"
     30       },
     31       "generalization_bounded": {
     32         "applies": true,
     33         "answer": false,
     34         "justification": "The title and introduction suggest broad real-world applicability (UAVs, autonomous vehicles), but all experiments are confined to two modified game environments with artificially fixed observation lengths; this critical constraint is not prominently framed as a scope boundary on the claims.",
     35         "source": "haiku"
     36       },
     37       "alternative_explanations_discussed": {
     38         "applies": true,
     39         "answer": false,
     40         "justification": "Performance gains are attributed entirely to the heterogeneous layer and latent learning design without considering alternative explanations such as whether the inference net acts as a simple additional critic or whether the gain is architectural rather than due to heterogeneity per se.",
     41         "source": "haiku"
     42       },
     43       "proxy_outcome_distinction": {
     44         "applies": true,
     45         "answer": true,
     46         "justification": "Win rate and score rate directly measure task success in the game environments; the paper's claims about zero-shot scalability are operationalized exactly as winning/scoring on unseen task variants, with no proxies involved.",
     47         "source": "haiku"
     48       }
     49     },
     50     "limitations_and_scope": {
     51       "limitations_section_present": {
     52         "applies": true,
     53         "answer": false,
     54         "justification": "There is no dedicated limitations or threats-to-validity section; a single sentence in the 'Note to Practitioners' acknowledges the sim-to-real gap, which does not constitute a formal limitations section.",
     55         "source": "haiku"
     56       },
     57       "threats_to_validity_specific": {
     58         "applies": true,
     59         "answer": false,
     60         "justification": "The only threat acknowledged is the generic sim-to-real gap; specific threats such as sensitivity to the 5-seed sample size, overfitting to particular map configurations, or the effect of the artificial observation-fixing on claim scope are not discussed.",
     61         "source": "haiku"
     62       },
     63       "scope_boundaries_stated": {
     64         "applies": true,
     65         "answer": false,
     66         "justification": "No explicit scope boundaries are stated; the paper does not clarify that zero-shot scalability results apply only within the same task family with fixed observation dimensions, or that the approach has not been tested beyond two game simulators.",
     67         "source": "haiku"
     68       }
     69     },
     70     "conflicts_of_interest": {
     71       "funding_disclosed": {
     72         "applies": true,
     73         "answer": false,
     74         "justification": "No funding acknowledgment section appears anywhere in the paper; only institutional affiliations are listed without disclosure of grants or sponsors.",
     75         "source": "haiku"
     76       },
     77       "affiliations_disclosed": {
     78         "applies": true,
     79         "answer": true,
     80         "justification": "All four authors are explicitly identified as being from the Department of Automation, Tsinghua University, Beijing, China, with individual email addresses provided.",
     81         "source": "haiku"
     82       },
     83       "funder_independent_of_outcome": {
     84         "applies": false,
     85         "answer": false,
     86         "justification": "No funding is disclosed, making this criterion not applicable.",
     87         "source": "haiku"
     88       },
     89       "financial_interests_declared": {
     90         "applies": true,
     91         "answer": false,
     92         "justification": "No competing interests statement or financial interests declaration appears anywhere in the paper.",
     93         "source": "haiku"
     94       }
     95     },
     96     "scope_and_framing": {
     97       "key_terms_defined": {
     98         "applies": true,
     99         "answer": true,
    100         "justification": "Key terms are defined explicitly: 'zero-shot scalability' (applying trained models to new agent-count scenarios without retraining), 'inter-individual heterogeneity' (agents have different strategies from each other), and 'temporal heterogeneity' (strategies update with task progress) are all defined in the introduction.",
    101         "source": "haiku"
    102       },
    103       "intended_contribution_clear": {
    104         "applies": true,
    105         "answer": true,
    106         "justification": "Three numbered contributions are explicitly stated: (1) a new actor-critic-like latent and inference network design, (2) a novel MARL framework adding both heterogeneity types to any parameter-shared backbone, and (3) demonstrated superior performance over baselines.",
    107         "source": "haiku"
    108       },
    109       "engagement_with_prior_work": {
    110         "applies": true,
    111         "answer": true,
    112         "justification": "The related work section covers three subareas (MARL for collaboration, scaling MARL, heterogeneous MARL) and explicitly differentiates SHPPO from HAPPO (inflexible scaling), SePS (fixed K policies, non-adaptive), and ROMA/RODE/LDSA (cannot scale to new populations due to predefined role counts).",
    113         "source": "haiku"
    114       }
    115     }
    116   },
    117   "type_checklist": {
    118     "empirical": {
    119       "artifacts": {
    120         "code_released": {
    121           "applies": true,
    122           "answer": false,
    123           "justification": "No code repository link or availability statement appears in the paper; source code is not released.",
    124           "source": "haiku"
    125         },
    126         "data_released": {
    127           "applies": true,
    128           "answer": true,
    129           "justification": "All experiments use standard publicly available benchmarks (SMAC and GRF), which are independently accessible to other researchers.",
    130           "source": "haiku"
    131         },
    132         "environment_specified": {
    133           "applies": true,
    134           "answer": false,
    135           "justification": "While training hardware (NVIDIA V100) and optimizer (Adam) are stated, no requirements.txt, Dockerfile, or software dependency specification is provided to reproduce the computational environment.",
    136           "source": "haiku"
    137         },
    138         "reproduction_instructions": {
    139           "applies": true,
    140           "answer": false,
    141           "justification": "Network configurations (Table IV) and hyperparameters (Table V) are provided, but there are no step-by-step instructions for setting up environments, running training, or replicating the scalability test variants.",
    142           "source": "haiku"
    143         }
    144       },
    145       "statistical_methodology": {
    146         "confidence_intervals_or_error_bars": {
    147           "applies": true,
    148           "answer": true,
    149           "justification": "Figure 4 learning curves show confidence intervals 'calculated over 5 seeds,' and Tables I–III report mean ± standard deviation for all win/score rate and reward metrics.",
    150           "source": "haiku"
    151         },
    152         "significance_tests": {
    153           "applies": true,
    154           "answer": false,
    155           "justification": "No statistical significance tests (t-tests, Mann-Whitney, etc.) are applied to compare methods; comparative claims rest on overlapping confidence intervals without formal hypothesis testing.",
    156           "source": "haiku"
    157         },
    158         "effect_sizes_reported": {
    159           "applies": true,
    160           "answer": true,
    161           "justification": "Absolute win/score rate differences between methods are reported in tables with baseline context (e.g., SHPPO 85.5% vs MAPPO 65.2% on 8m_vs_9m), providing interpretable effect sizes.",
    162           "source": "haiku"
    163         },
    164         "sample_size_justified": {
    165           "applies": true,
    166           "answer": false,
    167           "justification": "Five random seeds are used across all experiments but no justification or power analysis is provided for why 5 seeds is sufficient to detect the observed differences.",
    168           "source": "haiku"
    169         },
    170         "variance_reported": {
    171           "applies": true,
    172           "answer": true,
    173           "justification": "Standard deviation is reported alongside mean values in all results tables (e.g., 71.2±6.5), and confidence interval bands are shown in all learning curve figures.",
    174           "source": "haiku"
    175         }
    176       },
    177       "evaluation_design": {
    178         "baselines_included": {
    179           "applies": true,
    180           "answer": true,
    181           "justification": "Four baselines are included: original HAPPO, HAPPO (share), MAPPO (share), and HATRPO (share), covering both heterogeneous and homogeneous parameter-shared MARL approaches.",
    182           "source": "haiku"
    183         },
    184         "baselines_contemporary": {
    185           "applies": true,
    186           "answer": true,
    187           "justification": "All baselines are from 2021–2022 (HAPPO/HATRPO 2021, MAPPO 2022) and represent the current state of the art in cooperative MARL at the time of publication.",
    188           "source": "haiku"
    189         },
    190         "ablation_study": {
    191           "applies": true,
    192           "answer": true,
    193           "justification": "Section V.E presents ablations testing: (1) zeroed LatentNet inputs, (2) entirely zeroed latent variables, (3) removed InferenceNet loss Lv, (4) removed entropy loss Le, and (5) removed diversity loss Ld, all on two SMAC tasks.",
    194           "source": "haiku"
    195         },
    196         "multiple_metrics": {
    197           "applies": true,
    198           "answer": true,
    199           "justification": "Both win/score rate (task success) and cumulative reward are reported as evaluation metrics for all tasks in Tables I–III.",
    200           "source": "haiku"
    201         },
    202         "human_evaluation": {
    203           "applies": false,
    204           "answer": false,
    205           "justification": "The paper evaluates MARL agents in automated simulation environments (SMAC, GRF); human evaluation is clearly not relevant.",
    206           "source": "haiku"
    207         },
    208         "held_out_test_set": {
    209           "applies": true,
    210           "answer": true,
    211           "justification": "The scalability tests use held-out unseen task variants (e.g., 6m_vs_7m, 10m_vs_11m for a model trained on 8m_vs_9m) never seen during training, functioning as a zero-shot held-out test condition.",
    212           "source": "haiku"
    213         },
    214         "per_category_breakdown": {
    215           "applies": true,
    216           "answer": true,
    217           "justification": "Results are broken down per task type (heterogeneous MMM2, homogeneous 8m_vs_9m, GRF variants) and each individual scalability transfer scenario is reported separately in Tables II and III.",
    218           "source": "haiku"
    219         },
    220         "failure_cases_discussed": {
    221           "applies": true,
    222           "answer": true,
    223           "justification": "Section V.D explicitly discusses tasks (821_831, 731_831) where SHPPO is not optimal and MAPPO performs comparably, offering the explanation that increased homogeneity in those variants favors parameter-sharing without heterogeneity.",
    224           "source": "haiku"
    225         },
    226         "negative_results_reported": {
    227           "applies": true,
    228           "answer": true,
    229           "justification": "HAPPO outperforms SHPPO on the original MMM2 task (76.3% vs 71.2%), and task 711_731 shows near-zero performance for all methods including SHPPO (5.6%); these are reported transparently rather than omitted.",
    230           "source": "haiku"
    231         }
    232       },
    233       "setup_transparency": {
    234         "model_versions_specified": {
    235           "applies": true,
    236           "answer": true,
    237           "justification": "All custom neural network architectures are fully specified in Table IV (MLP dims=64, encoder/decoder as 3-layer MLPs, RNN hidden dim=64, latent dim=3, activation=ReLU, optimizer=Adam); no external pre-trained models are used.",
    238           "source": "haiku"
    239         },
    240         "prompts_provided": {
    241           "applies": false,
    242           "answer": false,
    243           "justification": "This is a pure MARL paper with custom-trained neural network agents; no LLM prompts or system instructions are used.",
    244           "source": "haiku"
    245         },
    246         "hyperparameters_reported": {
    247           "applies": true,
    248           "answer": true,
    249           "justification": "Table V provides comprehensive hyperparameters: learning rates for all four networks, loss weights (λe=0.01, λd=0.1), discount factor (γ=0.95), clip value (0.2), GAE lambda (0.95), and max steps per episode (160).",
    250           "source": "haiku"
    251         },
    252         "scaffolding_described": {
    253           "applies": true,
    254           "answer": true,
    255           "justification": "Both the single-step execution (Algorithm 1) and overall training procedure (Algorithm 2) are described with explicit pseudocode covering buffer storage, minibatch sampling, sequential agent updates, and loss computation.",
    256           "source": "haiku"
    257         },
    258         "data_preprocessing_documented": {
    259           "applies": true,
    260           "answer": true,
    261           "justification": "Appendix A documents the environment modifications: observation range is fixed to the closest N enemies and allies in each task (e.g., closest 10 enemies and 8 allies for MMM2) to enable scalability testing across different agent counts.",
    262           "source": "haiku"
    263         }
    264       },
    265       "data_integrity": {
    266         "raw_data_available": {
    267           "applies": true,
    268           "answer": false,
    269           "justification": "No raw training logs, trajectory data, or per-seed results are made available; results are reported only as aggregated mean ± std over 5 seeds.",
    270           "source": "haiku"
    271         },
    272         "data_collection_described": {
    273           "applies": true,
    274           "answer": true,
    275           "justification": "Data collection is through simulation: Algorithm 2 explicitly describes collecting trajectories including observations, hidden states, latent variables, actions, and rewards in a replay buffer during environment interaction.",
    276           "source": "haiku"
    277         },
    278         "recruitment_methods_described": {
    279           "applies": false,
    280           "answer": false,
    281           "justification": "No human participants; all data comes from automated simulation environments.",
    282           "source": "haiku"
    283         },
    284         "data_pipeline_documented": {
    285           "applies": true,
    286           "answer": true,
    287           "justification": "The full data pipeline from environment interaction to training is documented in Algorithms 1 and 2, covering observation collection, buffer storage, minibatch sampling, and sequential network updates.",
    288           "source": "haiku"
    289         }
    290       },
    291       "contamination": {
    292         "training_cutoff_stated": {
    293           "applies": false,
    294           "answer": false,
    295           "justification": "This paper trains MARL agents from scratch in game simulators; no pre-trained language models or external training cutoffs are relevant.",
    296           "source": "haiku"
    297         },
    298         "train_test_overlap_discussed": {
    299           "applies": false,
    300           "answer": false,
    301           "justification": "The train/test split is between game task variants; pre-trained model contamination does not apply.",
    302           "source": "haiku"
    303         },
    304         "benchmark_contamination_addressed": {
    305           "applies": false,
    306           "answer": false,
    307           "justification": "SMAC and GRF are used as training environments, not as held-out benchmarks for a pre-trained model; benchmark contamination is not applicable.",
    308           "source": "haiku"
    309         }
    310       },
    311       "human_studies": {
    312         "pre_registered": {
    313           "applies": false,
    314           "answer": false,
    315           "justification": "No human participants in this study.",
    316           "source": "haiku"
    317         },
    318         "irb_or_ethics_approval": {
    319           "applies": false,
    320           "answer": false,
    321           "justification": "No human participants in this study.",
    322           "source": "haiku"
    323         },
    324         "demographics_reported": {
    325           "applies": false,
    326           "answer": false,
    327           "justification": "No human participants in this study.",
    328           "source": "haiku"
    329         },
    330         "inclusion_exclusion_criteria": {
    331           "applies": false,
    332           "answer": false,
    333           "justification": "No human participants in this study.",
    334           "source": "haiku"
    335         },
    336         "randomization_described": {
    337           "applies": false,
    338           "answer": false,
    339           "justification": "No human participants in this study.",
    340           "source": "haiku"
    341         },
    342         "blinding_described": {
    343           "applies": false,
    344           "answer": false,
    345           "justification": "No human participants in this study.",
    346           "source": "haiku"
    347         },
    348         "attrition_reported": {
    349           "applies": false,
    350           "answer": false,
    351           "justification": "No human participants in this study.",
    352           "source": "haiku"
    353         }
    354       },
    355       "cost_and_practicality": {
    356         "inference_cost_reported": {
    357           "applies": true,
    358           "answer": false,
    359           "justification": "Inference latency or deployment cost is not reported; Table VI reports only training time (15–26 hours per task on V100) rather than per-step inference cost.",
    360           "source": "haiku"
    361         },
    362         "compute_budget_stated": {
    363           "applies": true,
    364           "answer": true,
    365           "justification": "Table VI reports training time for each task (MMM2: 25.5±0.2h, 8m_vs_9m: 20.3±0.6h, etc.) on NVIDIA V100 GPUs, and training step counts are specified (20M for SMAC, 25M for GRF).",
    366           "source": "haiku"
    367         }
    368       }
    369     }
    370   },
    371   "claims": [
    372     {
    373       "claim": "SHPPO outperforms all parameter-shared baselines (HAPPO share, MAPPO share, HATRPO share) on original SMAC and GRF tasks",
    374       "evidence": "Table I: SHPPO achieves 71.2% (MMM2), 85.5% (8m_vs_9m), 94.2% (3_vs_1_keeper), 91.2% (counterattack_easy) vs next-best parameter-shared baseline scores of 62.7%, 70.5%, 91.3%, 86.4% respectively",
    375       "supported": "strong"
    376     },
    377     {
    378       "claim": "SHPPO achieves competitive performance with original non-parameter-shared HAPPO while requiring far fewer parameters",
    379       "evidence": "Table I shows SHPPO (71.2±6.5%) vs HAPPO (76.3±5.1%) on MMM2 — overlapping confidence intervals; SHPPO outperforms HAPPO on 8m_vs_9m (85.5% vs 81.0%) and GRF tasks",
    380       "supported": "moderate"
    381     },
    382     {
    383       "claim": "SHPPO has superior zero-shot scalability to HAPPO when transferred to unseen tasks with different agent counts",
    384       "evidence": "Table II: HAPPO drops to 2.5% on 6m_vs_7m and 0.0% on 10m_vs_11m while SHPPO maintains 17.5% and 70.2%; across all 8m_vs_9m scalability variants HAPPO approaches zero",
    385       "supported": "strong"
    386     },
    387     {
    388       "claim": "Latent variables learn interpretable heterogeneous strategy patterns that adapt when team composition changes",
    389       "evidence": "Figures 5 and 6 show qualitative visualization of latent variable clusters corresponding to distinct strategies (attract fire, attack at distance, retreat) that shift plausibly when a new agent is added in task 821_831",
    390       "supported": "weak"
    391     },
    392     {
    393       "claim": "All three latent learning loss terms (Lv, Le, Ld) independently contribute to SHPPO performance",
    394       "evidence": "Figure 7 ablations show performance degrades when each loss is removed on MMM2 and 8m_vs_9m; removing InferenceNet guidance causes largest decline on both tasks",
    395       "supported": "moderate"
    396     }
    397   ],
    398   "methodology_tags": [
    399     "benchmark-eval",
    400     "empirical"
    401   ],
    402   "key_findings": "SHPPO integrates adaptive heterogeneity into parameter-shared MARL by learning per-agent latent strategy variables that parameterize agent-specific heterogeneous linear layers, while keeping all other network parameters shared for scalability. On SMAC and GRF benchmarks, SHPPO outperforms all parameter-shared baselines and achieves performance competitive with the non-scalable per-agent HAPPO. In zero-shot scalability tests on unseen agent-count variants, SHPPO maintains performance while HAPPO catastrophically degrades (near-zero win rates) due to its fixed per-agent architecture. Ablation studies confirm that all three latent learning losses contribute independently, with the InferenceNet-guided value loss providing the largest gain.",
    403   "red_flags": [
    404     {
    405       "flag": "Artificially fixed observations enable but constrain scalability claims",
    406       "detail": "The observation space is modified to have fixed length (e.g., limited to closest 10 enemies/8 allies) specifically to enable zero-shot transfer without observation remapping; this is a critical design choice that sidesteps a hard part of real-world scalability and is buried in the appendix rather than prominently flagged as a scope constraint."
    407     },
    408     {
    409       "flag": "No formal significance testing with only 5 seeds",
    410       "detail": "Comparative claims rest on overlapping ±std intervals over 5 seeds without t-tests or equivalent; in particular, SHPPO (71.2±6.5%) vs HAPPO (76.3±5.1%) on MMM2 have overlapping intervals yet are presented as demonstrating competitive performance."
    411     },
    412     {
    413       "flag": "No code release",
    414       "detail": "Source code is not released despite the method involving multiple interacting networks with non-trivial training dynamics; reproduction relies solely on the hyperparameter tables with no environment setup or training commands provided."
    415     },
    416     {
    417       "flag": "Qualitative latent space interpretation",
    418       "detail": "Claims about learned strategy patterns (colored circles in Figs. 5–6) are post-hoc visual interpretations; no quantitative validation (e.g., behavioral clustering metrics or policy similarity measures) is provided to confirm that the latent clusters correspond to distinct behavioral policies."
    419     },
    420     {
    421       "flag": "Funding not disclosed",
    422       "detail": "No funding acknowledgment appears despite the work being conducted at a major research university with institutional resources (V100 GPUs, multi-week training runs)."
    423     }
    424   ],
    425   "cited_papers": [
    426     {
    427       "title": "Trust region policy optimisation in multi-agent reinforcement learning (HAPPO/HATRPO)",
    428       "relevance": "Primary baseline and backbone that SHPPO builds upon; key heterogeneous MARL method used as main comparison"
    429     },
    430     {
    431       "title": "The surprising effectiveness of PPO in cooperative multi-agent games (MAPPO)",
    432       "relevance": "Key baseline and foundation for the parameter-shared PPO approach extended in SHPPO"
    433     },
    434     {
    435       "title": "The StarCraft Multi-Agent Challenge (SMAC)",
    436       "relevance": "Primary benchmark environment used for all MARL training and evaluation"
    437     },
    438     {
    439       "title": "Google Research Football: A novel reinforcement learning environment (GRF)",
    440       "relevance": "Second benchmark environment used for evaluation and scalability testing"
    441     },
    442     {
    443       "title": "Scaling multi-agent reinforcement learning with selective parameter sharing (SePS)",
    444       "relevance": "Related scalable MARL method using K fixed parameter-shared policies; key prior work SHPPO claims to improve upon by enabling adaptive heterogeneity"
    445     },
    446     {
    447       "title": "UPDeT: Universal multi-agent RL via policy decoupling with transformers",
    448       "relevance": "Related scalable MARL approach using transformer self-attention for population-invariant policies"
    449     },
    450     {
    451       "title": "LDSA: Learning dynamic subtask assignment in cooperative multi-agent reinforcement learning",
    452       "relevance": "Related heterogeneous MARL method using subtask-based role assignment that SHPPO addresses by removing the need to predetermine subtask count"
    453     },
    454     {
    455       "title": "ROMA: Multi-agent reinforcement learning with emergent roles",
    456       "relevance": "Related role-based heterogeneous MARL that uses current observation only for role embedding, contrasted with SHPPO's trajectory-aware latent learning"
    457     },
    458     {
    459       "title": "Proximal policy optimization algorithms (PPO)",
    460       "relevance": "Foundational single-agent algorithm that SHPPO extends to the heterogeneous multi-agent setting via HAPPO's multi-agent advantage decomposition"
    461     },
    462     {
    463       "title": "Monotonic value function factorisation for deep multi-agent reinforcement learning (QMIX)",
    464       "relevance": "Influential value-decomposition MARL baseline establishing credit assignment approach"
    465     }
    466   ],
    467   "engagement_factors": {
    468     "practical_relevance": {
    469       "score": 2,
    470       "justification": "Addresses real scenarios (UAV coordination, autonomous vehicles) but all experiments are simulation-only with modified observation spaces that may not transfer to real deployments."
    471     },
    472     "surprise_contrarian": {
    473       "score": 1,
    474       "justification": "The finding that a single parameter-shared model with adaptive latent variables can match or exceed per-agent heterogeneous models in zero-shot transfer is mildly counterintuitive but directionally expected in the field."
    475     },
    476     "fear_safety": {
    477       "score": 0,
    478       "justification": "The paper focuses entirely on cooperative game-playing agents with no safety or risk framing."
    479     },
    480     "drama_conflict": {
    481       "score": 0,
    482       "justification": "Standard incremental MARL research with no controversy or competing stakeholder interests."
    483     },
    484     "demo_ability": {
    485       "score": 1,
    486       "justification": "Could in principle be demoed with StarCraft II or GRF setup, but requires significant infrastructure and no code is released."
    487     },
    488     "brand_recognition": {
    489       "score": 1,
    490       "justification": "Tsinghua University is prestigious but not a recognized AI product lab; Neurocomputing has moderate field recognition."
    491     }
    492   },
    493   "hn_data": {
    494     "threads": [
    495       {
    496         "hn_id": "42073801",
    497         "title": "Evaluating the world model implicit in a generative model",
    498         "points": 159,
    499         "comments": 45,
    500         "url": "https://news.ycombinator.com/item?id=42073801",
    501         "created_at": "2024-11-07T05:51:32Z"
    502       },
    503       {
    504         "hn_id": "40107787",
    505         "title": "Lossless Acceleration of LLM via Adaptive N-Gram Parallel Decoding",
    506         "points": 136,
    507         "comments": 23,
    508         "url": "https://news.ycombinator.com/item?id=40107787",
    509         "created_at": "2024-04-21T18:02:40Z"
    510       },
    511       {
    512         "hn_id": "40757551",
    513         "title": "Evaluating the World Model Implicit in a Generative Model",
    514         "points": 3,
    515         "comments": 0,
    516         "url": "https://news.ycombinator.com/item?id=40757551",
    517         "created_at": "2024-06-22T09:05:47Z"
    518       },
    519       {
    520         "hn_id": "40300570",
    521         "title": "HCC Is All You Need",
    522         "points": 2,
    523         "comments": 1,
    524         "url": "https://news.ycombinator.com/item?id=40300570",
    525         "created_at": "2024-05-08T17:17:45Z"
    526       },
    527       {
    528         "hn_id": "41625255",
    529         "title": "The MLIR Transform Dialect. Your compiler is more powerful than you think",
    530         "points": 2,
    531         "comments": 0,
    532         "url": "https://news.ycombinator.com/item?id=41625255",
    533         "created_at": "2024-09-23T12:13:03Z"
    534       },
    535       {
    536         "hn_id": "39459395",
    537         "title": "Unsupervised Evaluation of Code LLMs with Round-Trip Correctness",
    538         "points": 2,
    539         "comments": 0,
    540         "url": "https://news.ycombinator.com/item?id=39459395",
    541         "created_at": "2024-02-21T20:53:25Z"
    542       },
    543       {
    544         "hn_id": "39380575",
    545         "title": "If Turing played piano with an artificial partner",
    546         "points": 2,
    547         "comments": 0,
    548         "url": "https://news.ycombinator.com/item?id=39380575",
    549         "created_at": "2024-02-15T09:08:58Z"
    550       },
    551       {
    552         "hn_id": "35520943",
    553         "title": "ChatGPT Empowered Long-Step Robot Control in Various Environments",
    554         "points": 2,
    555         "comments": 0,
    556         "url": "https://news.ycombinator.com/item?id=35520943",
    557         "created_at": "2023-04-11T04:43:42Z"
    558       },
    559       {
    560         "hn_id": "41439259",
    561         "title": "Help Finding LLM and Proof Based Refactoring Reference",
    562         "points": 1,
    563         "comments": 0,
    564         "url": "https://news.ycombinator.com/item?id=41439259",
    565         "created_at": "2024-09-03T21:11:48Z"
    566       },
    567       {
    568         "hn_id": "35594579",
    569         "title": "Overview of the James Webb Space Telescope Mission",
    570         "points": 1,
    571         "comments": 1,
    572         "url": "https://news.ycombinator.com/item?id=35594579",
    573         "created_at": "2023-04-16T21:49:06Z"
    574       }
    575     ],
    576     "top_points": 159,
    577     "total_points": 310,
    578     "total_comments": 70
    579   }
    580 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs