scan-v4.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan-v4.json (29835B)
      1 {
      2   "scan_version": 4,
      3   "paper_type": "empirical",
      4   "paper": {
      5     "title": "A2H-MAS: An Algorithm-to-HLS Multi-Agent System for Automated and Reliable FPGA Implementation",
      6     "authors": [
      7       "Jie Lei",
      8       "Ruofan Jia",
      9       "J. Andrew Zhang",
     10       "Hao Zhang"
     11     ],
     12     "year": 2025,
     13     "venue": "Unknown",
     14     "arxiv_id": "2508.10904",
     15     "doi": "10.48550/arXiv.2508.10904"
     16   },
     17   "checklist": {
     18     "claims_and_evidence": {
     19       "abstract_claims_supported": {
     20         "applies": true,
     21         "answer": true,
     22         "justification": "Abstract claims of 'functionally correct, resource-efficient, and latency-optimized HLS designs' are supported by Tables I and II showing working implementations with specific resource and timing numbers.",
     23         "source": "opus"
     24       },
     25       "causal_claims_justified": {
     26         "applies": true,
     27         "answer": true,
     28         "justification": "The ablation study (Direct → Adaptation → Refinement) uses controlled single-variable manipulation to show each stage's causal contribution to resource reduction.",
     29         "source": "opus"
     30       },
     31       "generalization_bounded": {
     32         "applies": true,
     33         "answer": false,
     34         "justification": "The title claims general 'Algorithm-to-HLS' capability, but results are limited to two wireless communication tasks. The conclusion mentions extending to 'computer vision and signal processing' without bounding current claims.",
     35         "source": "opus"
     36       },
     37       "alternative_explanations_discussed": {
     38         "applies": true,
     39         "answer": false,
     40         "justification": "No discussion of alternative explanations. Whether improvements stem from the multi-agent architecture vs. the knowledge library vs. specific algorithmic transformations is not disentangled beyond the 3-level ablation.",
     41         "source": "opus"
     42       },
     43       "proxy_outcome_distinction": {
     44         "applies": true,
     45         "answer": false,
     46         "justification": "The paper measures functional correctness (C simulation pass), resource usage (LUTs, FFs, DSP, BRAM), and clock frequency, then frames the system as producing 'reliable and high-quality hardware implementations' and demonstrating 'effectiveness and robustness for complex hardware development workflows.' The gap between measured metrics (correctness + resources on 2 tasks) and the broader claims (reliable, robust, complex workflows) is not acknowledged.",
     47         "source": "opus"
     48       }
     49     },
     50     "limitations_and_scope": {
     51       "limitations_section_present": {
     52         "applies": true,
     53         "answer": false,
     54         "justification": "No dedicated limitations or threats-to-validity section. The conclusion mentions future work but does not discuss limitations of the current system.",
     55         "source": "opus"
     56       },
     57       "threats_to_validity_specific": {
     58         "applies": true,
     59         "answer": false,
     60         "justification": "No specific threats to validity are discussed anywhere in the paper.",
     61         "source": "opus"
     62       },
     63       "scope_boundaries_stated": {
     64         "applies": true,
     65         "answer": false,
     66         "justification": "No explicit statements about what the results do NOT show. Future work mentions extending to other domains but does not state specific scope boundaries for current claims.",
     67         "source": "opus"
     68       }
     69     },
     70     "conflicts_of_interest": {
     71       "funding_disclosed": {
     72         "applies": true,
     73         "answer": false,
     74         "justification": "No funding information or acknowledgments section is present in the paper.",
     75         "source": "opus"
     76       },
     77       "affiliations_disclosed": {
     78         "applies": true,
     79         "answer": true,
     80         "justification": "Author affiliations are clearly listed: University of Technology Sydney and Xidian University.",
     81         "source": "opus"
     82       },
     83       "funder_independent_of_outcome": {
     84         "applies": true,
     85         "answer": false,
     86         "justification": "No funding is disclosed, so independence cannot be assessed. The paper uses Anthropic's Claude Code but does not disclose any relationship with Anthropic.",
     87         "source": "opus"
     88       },
     89       "financial_interests_declared": {
     90         "applies": true,
     91         "answer": false,
     92         "justification": "No competing interests or financial interests statement is present in the paper.",
     93         "source": "opus"
     94       }
     95     },
     96     "scope_and_framing": {
     97       "key_terms_defined": {
     98         "applies": true,
     99         "answer": false,
    100         "justification": "Core terms used without definition: 'HLS', 'FPGA', 'LLM', 'agent', 'streaming'. These are standard in hardware but not universally known; paper assumes reader familiarity.",
    101         "source": "haiku"
    102       },
    103       "intended_contribution_clear": {
    104         "applies": true,
    105         "answer": true,
    106         "justification": "Contributions explicitly stated in abstract: (1) A2H-MAS framework for MATLAB-to-HLS conversion, (2) algorithm-hardware co-design methodology, (3) experimental validation on wireless algorithms.",
    107         "source": "haiku"
    108       },
    109       "engagement_with_prior_work": {
    110         "applies": true,
    111         "answer": true,
    112         "justification": "Section II reviews VerilogEval, MG-Verilog, VGen, VeriMind, HLSPilot, HDLAgent, AutoChip and shows how this work differs: uses SOTA LLMs without fine-tuning, multi-agent with standardized interfaces, focuses on performance metrics beyond functional correctness.",
    113         "source": "haiku"
    114       }
    115     }
    116   },
    117   "type_checklist": {
    118     "empirical": {
    119       "artifacts": {
    120         "code_released": {
    121           "applies": true,
    122           "answer": false,
    123           "justification": "No repository URL, code archive, or link to source code is provided anywhere in the paper.",
    124           "source": "opus"
    125         },
    126         "data_released": {
    127           "applies": true,
    128           "answer": false,
    129           "justification": "No datasets, MATLAB source files, or HLS outputs are released. The test algorithms are described but not made available.",
    130           "source": "opus"
    131         },
    132         "environment_specified": {
    133           "applies": true,
    134           "answer": false,
    135           "justification": "The paper mentions Xilinx Vitis HLS, MATLAB, and NI USRP X310 but provides no version numbers, dependency specifications, or environment setup details.",
    136           "source": "opus"
    137         },
    138         "reproduction_instructions": {
    139           "applies": true,
    140           "answer": false,
    141           "justification": "No step-by-step reproduction instructions, README, or scripts are provided.",
    142           "source": "opus"
    143         }
    144       },
    145       "statistical_methodology": {
    146         "confidence_intervals_or_error_bars": {
    147           "applies": true,
    148           "answer": false,
    149           "justification": "Results in Tables I and II report only point estimates for resource usage, clock frequency, and latency with no confidence intervals or error bars.",
    150           "source": "opus"
    151         },
    152         "significance_tests": {
    153           "applies": true,
    154           "answer": false,
    155           "justification": "The paper claims A2H-MAS is effective compared to direct translation but provides no statistical significance tests.",
    156           "source": "opus"
    157         },
    158         "effect_sizes_reported": {
    159           "applies": true,
    160           "answer": true,
    161           "justification": "Concrete resource reductions with baseline context are reported, e.g., 'LUT consumption is reduced from 36,500 to 685 for calcThreshold' (Section V-B), providing magnitude of effect.",
    162           "source": "opus"
    163         },
    164         "sample_size_justified": {
    165           "applies": true,
    166           "answer": false,
    167           "justification": "Only two wireless communication systems tested with a handful of submodules. No justification for why this sample is sufficient.",
    168           "source": "opus"
    169         },
    170         "variance_reported": {
    171           "applies": true,
    172           "answer": false,
    173           "justification": "All results appear to be from single runs. No variance, standard deviation, or multiple-run results are reported.",
    174           "source": "opus"
    175         }
    176       },
    177       "evaluation_design": {
    178         "baselines_included": {
    179           "applies": true,
    180           "answer": true,
    181           "justification": "The ablation study in Table II compares Direct (naive LLM translation), Adaptation, and Refinement strategies.",
    182           "source": "opus"
    183         },
    184         "baselines_contemporary": {
    185           "applies": true,
    186           "answer": false,
    187           "justification": "No comparison against other contemporary LLM-based hardware generation systems (VeriMind, HLSPilot, HDLAgent, AutoChip) despite discussing them in related work. The only baseline is the authors' own naive Direct translation.",
    188           "source": "opus"
    189         },
    190         "ablation_study": {
    191           "applies": true,
    192           "answer": true,
    193           "justification": "Table II presents ablation results comparing Direct, Adaptation, and Refinement stages on calcThreshold and extractSSBsig modules.",
    194           "source": "opus"
    195         },
    196         "multiple_metrics": {
    197           "applies": true,
    198           "answer": true,
    199           "justification": "Results report LUTs, FFs, DSP, BRAMs, clock frequency (MHz), and latency — multiple complementary hardware metrics.",
    200           "source": "opus"
    201         },
    202         "human_evaluation": {
    203           "applies": false,
    204           "answer": false,
    205           "justification": "Human evaluation is not relevant here; correctness is verified through automated simulation (C simulation, synthesis, RTL co-simulation) and on-board hardware validation.",
    206           "source": "opus"
    207         },
    208         "held_out_test_set": {
    209           "applies": false,
    210           "answer": false,
    211           "justification": "Not a ML model evaluated on train/test splits. The system is tested on engineering tasks with deterministic correctness criteria.",
    212           "source": "opus"
    213         },
    214         "per_category_breakdown": {
    215           "applies": true,
    216           "answer": true,
    217           "justification": "Table I provides per-submodule breakdowns for all modules in both 5G NR (5 submodules + top) and WLAN (4 submodules + top) tasks.",
    218           "source": "opus"
    219         },
    220         "failure_cases_discussed": {
    221           "applies": true,
    222           "answer": true,
    223           "justification": "The Direct strategy for calcThreshold 'Failed' to achieve post-route timing closure (Table II), explicitly reported and discussed.",
    224           "source": "opus"
    225         },
    226         "negative_results_reported": {
    227           "applies": true,
    228           "answer": true,
    229           "justification": "The Direct baseline failing timing closure for calcThreshold is a negative result. Increased BRAM usage from integration overhead is also noted.",
    230           "source": "opus"
    231         }
    232       },
    233       "setup_transparency": {
    234         "model_versions_specified": {
    235           "applies": true,
    236           "answer": false,
    237           "justification": "Section V states 'Claude Code was employed' but provides no model version, snapshot date, or API version. Reference [6] cites 'Claude sonnet 4' without a specific version identifier.",
    238           "source": "opus"
    239         },
    240         "prompts_provided": {
    241           "applies": true,
    242           "answer": true,
    243           "justification": "Figures 2 and 3 show structured prompt templates with agent type, core mission, input/output parameters, workflow phases, and tool commands. Substantial detail on agent prompt structure is provided.",
    244           "source": "opus"
    245         },
    246         "hyperparameters_reported": {
    247           "applies": true,
    248           "answer": false,
    249           "justification": "No LLM hyperparameters (temperature, top-p, max tokens) are reported for the Claude Code usage.",
    250           "source": "opus"
    251         },
    252         "scaffolding_described": {
    253           "applies": true,
    254           "answer": true,
    255           "justification": "The multi-agent scaffolding is described in detail: Sections III and IV cover standardized interfaces (Fig. 2), rule-guided workflows (Fig. 3), deterministic tool usage, feedback mechanisms, and the 8-phase pipeline (Fig. 5).",
    256           "source": "opus"
    257         },
    258         "data_preprocessing_documented": {
    259           "applies": true,
    260           "answer": true,
    261           "justification": "Preprocessing is documented: modularization (Phase I), test data generation from intermediate variables (Phase II), function flattening (Phase III), with standardized naming conventions.",
    262           "source": "opus"
    263         }
    264       },
    265       "data_integrity": {
    266         "raw_data_available": {
    267           "applies": true,
    268           "answer": false,
    269           "justification": "No raw data (MATLAB source files, generated HLS code, synthesis reports) is made available for independent verification.",
    270           "source": "opus"
    271         },
    272         "data_collection_described": {
    273           "applies": true,
    274           "answer": true,
    275           "justification": "Section IV-B describes test data generation: executing the original algorithm, recording intermediate variables, and storing with standardized naming conventions.",
    276           "source": "opus"
    277         },
    278         "recruitment_methods_described": {
    279           "applies": false,
    280           "answer": false,
    281           "justification": "No human participants. The study evaluates automated hardware generation on specific algorithms.",
    282           "source": "opus"
    283         },
    284         "data_pipeline_documented": {
    285           "applies": true,
    286           "answer": true,
    287           "justification": "The full pipeline from MATLAB input through modularization, flattening, optimization, translation, refinement, and integration is documented in Section IV with figures.",
    288           "source": "opus"
    289         }
    290       },
    291       "contamination": {
    292         "training_cutoff_stated": {
    293           "applies": false,
    294           "answer": false,
    295           "justification": "The paper evaluates a multi-agent system's engineering outputs, not a pre-trained model's knowledge on a standard benchmark.",
    296           "source": "opus"
    297         },
    298         "train_test_overlap_discussed": {
    299           "applies": false,
    300           "answer": false,
    301           "justification": "Not evaluating a pre-trained model on a benchmark; evaluating a tool pipeline on custom engineering tasks.",
    302           "source": "opus"
    303         },
    304         "benchmark_contamination_addressed": {
    305           "applies": false,
    306           "answer": false,
    307           "justification": "No standard benchmark evaluation of model knowledge is conducted.",
    308           "source": "opus"
    309         }
    310       },
    311       "human_studies": {
    312         "pre_registered": {
    313           "applies": false,
    314           "answer": false,
    315           "justification": "No human participants.",
    316           "source": "opus"
    317         },
    318         "irb_or_ethics_approval": {
    319           "applies": false,
    320           "answer": false,
    321           "justification": "No human participants.",
    322           "source": "opus"
    323         },
    324         "demographics_reported": {
    325           "applies": false,
    326           "answer": false,
    327           "justification": "No human participants.",
    328           "source": "opus"
    329         },
    330         "inclusion_exclusion_criteria": {
    331           "applies": false,
    332           "answer": false,
    333           "justification": "No human participants.",
    334           "source": "opus"
    335         },
    336         "randomization_described": {
    337           "applies": false,
    338           "answer": false,
    339           "justification": "No human participants.",
    340           "source": "opus"
    341         },
    342         "blinding_described": {
    343           "applies": false,
    344           "answer": false,
    345           "justification": "No human participants.",
    346           "source": "opus"
    347         },
    348         "attrition_reported": {
    349           "applies": false,
    350           "answer": false,
    351           "justification": "No human participants.",
    352           "source": "opus"
    353         }
    354       },
    355       "cost_and_practicality": {
    356         "inference_cost_reported": {
    357           "applies": true,
    358           "answer": false,
    359           "justification": "No API costs, token consumption, or wall-clock time for the LLM-based code generation process is reported despite using Claude Code extensively.",
    360           "source": "opus"
    361         },
    362         "compute_budget_stated": {
    363           "applies": true,
    364           "answer": false,
    365           "justification": "No total computational budget, API spend, or hardware resources used for the generation process is stated.",
    366           "source": "opus"
    367         }
    368       },
    369       "experimental_rigor": {
    370         "seed_sensitivity_reported": {
    371           "applies": true,
    372           "answer": false,
    373           "justification": "LLM outputs are non-deterministic, but no sensitivity analysis across multiple runs is reported. All results appear to be from single runs.",
    374           "source": "opus"
    375         },
    376         "number_of_runs_stated": {
    377           "applies": true,
    378           "answer": false,
    379           "justification": "The number of experimental runs is not stated. Results appear to be single-run.",
    380           "source": "opus"
    381         },
    382         "hyperparameter_search_budget": {
    383           "applies": true,
    384           "answer": false,
    385           "justification": "The Refinement phase includes design space exploration (DSE) but no budget (number of configurations tried, compute spent on search) is reported.",
    386           "source": "opus"
    387         },
    388         "best_config_selection_justified": {
    389           "applies": true,
    390           "answer": false,
    391           "justification": "DSE is mentioned in Phase VII but no details on how many alternatives were explored or how the best configuration was selected.",
    392           "source": "opus"
    393         },
    394         "multiple_comparison_correction": {
    395           "applies": false,
    396           "answer": false,
    397           "justification": "No statistical tests are performed, so multiple comparison correction is not applicable.",
    398           "source": "opus"
    399         },
    400         "self_comparison_bias_addressed": {
    401           "applies": true,
    402           "answer": false,
    403           "justification": "The authors evaluate their own system against their own naive baseline (Direct translation). No acknowledgment of self-comparison bias or independent evaluation.",
    404           "source": "opus"
    405         },
    406         "compute_budget_vs_performance": {
    407           "applies": true,
    408           "answer": false,
    409           "justification": "Adaptation and Refinement stages require additional LLM calls and synthesis runs compared to Direct translation, but compute costs are not compared across the three strategies.",
    410           "source": "opus"
    411         },
    412         "benchmark_construct_validity": {
    413           "applies": true,
    414           "answer": false,
    415           "justification": "No discussion of whether the two wireless communication tasks are representative of the broader claim of 'automated and reliable FPGA implementation.'",
    416           "source": "opus"
    417         },
    418         "scaffold_confound_addressed": {
    419           "applies": true,
    420           "answer": false,
    421           "justification": "The ablation compares Direct (single LLM call) vs Adaptation vs Refinement (multi-agent pipeline with knowledge library), but these differ in both algorithmic approach AND scaffolding complexity. The paper does not discuss whether improvements stem from the multi-agent scaffold vs. the algorithmic transformations vs. the knowledge library, attributing all gains to the system as a whole.",
    422           "source": "opus"
    423         }
    424       }
    425     }
    426   },
    427   "claims": [
    428     {
    429       "claim": "Algorithm-level transformation has greater impact on hardware efficiency than pragma-level tuning",
    430       "evidence": "Table II ablation: Adaption stage reduces LUTs 98% (36,500→685 for calcThreshold), while Refinement stage reduces further by 75% (685→173). Algorithm-level changes dominate pragmas.",
    431       "supported": "strong"
    432     },
    433     {
    434       "claim": "Order-of-magnitude improvements in resource efficiency are achievable through algorithm selection",
    435       "evidence": "Table II shows LUT reduction from 36,500 to 275 (132x) for calcThreshold via streaming algorithm restructuring.",
    436       "supported": "strong"
    437     },
    438     {
    439       "claim": "A2H-MAS produces functionally correct hardware implementations for wireless communication algorithms",
    440       "evidence": "Table I reports successful implementations for 5G NR SSB detection (operating at 292.23 MHz) and WLAN synchronization (337.61 MHz); functional validation via C simulation and RTL co-simulation confirmed.",
    441       "supported": "strong"
    442     },
    443     {
    444       "claim": "Multi-agent system with standardized interfaces improves reliability by reducing hallucinations and forgetting",
    445       "evidence": "Design principle articulated in Figure 1 and Section III, but no empirical comparison to single-agent baseline or quantified reduction in errors.",
    446       "supported": "weak"
    447     },
    448     {
    449       "claim": "Dataflow-oriented modular decomposition enables scalable system extensions and targeted optimization",
    450       "evidence": "Demonstrated through 8-phase workflow and ability to optimize individual submodules independently, but no comparison to alternative decomposition strategies.",
    451       "supported": "moderate"
    452     },
    453     {
    454       "claim": "Deterministic tool-driven validation ensures correctness and reproducibility of generated code",
    455       "evidence": "Described in Section III-B: MATLAB batch execution for Phase IV validation, C simulation and co-simulation in Phase VI. But validation methodology not systematically benchmarked.",
    456       "supported": "moderate"
    457     },
    458     {
    459       "claim": "A2H-MAS consistently produces resource-efficient and latency-optimized designs compared to naive LLM translation",
    460       "evidence": "Table II Direct→Adaption→Refinement shows improvements, but only compares to Direct baseline; no comparison to HLSPilot, HDLAgent, or other state-of-the-art methods.",
    461       "supported": "moderate"
    462     }
    463   ],
    464   "methodology_tags": [
    465     "benchmark-eval",
    466     "case-study"
    467   ],
    468   "key_findings": "A2H-MAS, a multi-agent framework with standardized interfaces, automates the translation of MATLAB algorithms to hardware-efficient HLS code through eight modular phases. The system prioritizes algorithm-level transformations (e.g., shifting from frame-based to streaming paradigms) over pragma tuning, yielding order-of-magnitude resource reductions (LUTs: 36,500→275). Successfully implemented on two wireless communication systems (5G NR SSB detection, WLAN synchronization) with functional correctness validated via RTL co-simulation.",
    469   "red_flags": [
    470     {
    471       "flag": "No comparison to prior art",
    472       "detail": "Only compared to naive Direct baseline; no evaluation against HLSPilot, HDLAgent, or other recent multi-agent/agent-based hardware design systems mentioned in related work."
    473     },
    474     {
    475       "flag": "Extremely limited evaluation scope",
    476       "detail": "Only 2 application domains (5G NR, WLAN), both wireless/dataflow-oriented. No evidence applicability to control-flow-heavy algorithms, computer vision, or other domains."
    477     },
    478     {
    479       "flag": "LLM version unspecified",
    480       "detail": "Paper states 'Claude Code employed' with no version, snapshot date, or model ID; impossible to replicate or assess contamination."
    481     },
    482     {
    483       "flag": "Code and data not released",
    484       "detail": "No source code, test datasets, or generated outputs (C++/Verilog) made available; reproducibility impossible."
    485     },
    486     {
    487       "flag": "No failure mode analysis",
    488       "detail": "Claims 'reliability' but only shows one failure (Direct→calcThreshold timing closure); no systematic analysis of when/why method fails."
    489     },
    490     {
    491       "flag": "Single runs, no variance reporting",
    492       "detail": "Each module synthesized once; no error bars, no multiple random seeds, no variance estimate for synthesis results."
    493     },
    494     {
    495       "flag": "Sample size not justified",
    496       "detail": "Only 2 applications with ~5 submodules each; no justification for why 2 domains suffice or power analysis."
    497     },
    498     {
    499       "flag": "No discussion of generalization boundaries",
    500       "detail": "Claims applicability to 'complex wireless communication workloads' but never explicitly states limitations (e.g., streaming algorithms only, no adaptive control)."
    501     }
    502   ],
    503   "cited_papers": [
    504     {
    505       "title": "VerilogEval: Evaluating Large Language Models for Verilog Code Generation",
    506       "relevance": "Benchmark for HDL generation; establishes baseline for LLM performance on Verilog without fine-tuning."
    507     },
    508     {
    509       "title": "MG-Verilog: Multi-Grained Dataset Towards Enhanced LLM-Assisted Verilog Generation",
    510       "relevance": "Fine-tuning approach for Verilog; contrasts with this paper's general-purpose LLM strategy."
    511     },
    512     {
    513       "title": "VeriMind: Agentic LLM for Automated Verilog Generation with a Novel Evaluation Metric",
    514       "relevance": "Multi-agent framework for HDL; represents prior work in agent-based hardware design."
    515     },
    516     {
    517       "title": "HLSPilot: LLM-Based High-Level Synthesis",
    518       "relevance": "Concurrent work on LLM-driven HLS; baseline for comparison if evaluated."
    519     },
    520     {
    521       "title": "HDLAgent: A Benchmark for LLM-Driven RTL Design Using HDLAgent",
    522       "relevance": "Agent-based RTL generation; establishes evaluation protocols for hardware design automation."
    523     },
    524     {
    525       "title": "ChatDev: Communicative Agents for Software Development",
    526       "relevance": "Multi-agent collaboration framework; architectural pattern for role-based task decomposition."
    527     },
    528     {
    529       "title": "MetaGPT: Meta Programming for a Multi-Agent Collaborative Framework",
    530       "relevance": "Structured multi-agent workflows; applicable to hierarchical hardware design decomposition."
    531     }
    532   ],
    533   "engagement_factors": {
    534     "practical_relevance": {
    535       "score": 1,
    536       "justification": "Relevant only to the narrow intersection of FPGA designers working with MATLAB-to-HLS flows, not broadly applicable to most developers."
    537     },
    538     "surprise_contrarian": {
    539       "score": 1,
    540       "justification": "The finding that algorithm-level restructuring matters more than pragma tuning is known in the HLS community, though the magnitude (98% LUT reduction) is notable."
    541     },
    542     "fear_safety": {
    543       "score": 0,
    544       "justification": "No safety, security, or risk angle whatsoever."
    545     },
    546     "drama_conflict": {
    547       "score": 0,
    548       "justification": "No controversy, no challenges to specific companies or benchmarks, purely constructive contribution."
    549     },
    550     "demo_ability": {
    551       "score": 0,
    552       "justification": "No code, no demo, no reproducibility artifacts released; requires proprietary FPGA toolchains even conceptually."
    553     },
    554     "brand_recognition": {
    555       "score": 1,
    556       "justification": "From University of Technology Sydney, a recognized but not famous-in-tech institution; mentions Claude Code but is not from Anthropic."
    557     }
    558   },
    559   "hn_data": {
    560     "threads": [
    561       {
    562         "hn_id": "29279146",
    563         "title": "Crypto Wash Trading",
    564         "points": 572,
    565         "comments": 299,
    566         "url": "https://news.ycombinator.com/item?id=29279146",
    567         "created_at": "2021-11-19T16:44:26Z"
    568       },
    569       {
    570         "hn_id": "44271284",
    571         "title": "Self-Adapting Language Models",
    572         "points": 246,
    573         "comments": 73,
    574         "url": "https://news.ycombinator.com/item?id=44271284",
    575         "created_at": "2025-06-13T19:03:42Z"
    576       },
    577       {
    578         "hn_id": "41306555",
    579         "title": "Exploring Impact of Code in Pre-Training",
    580         "points": 5,
    581         "comments": 2,
    582         "url": "https://news.ycombinator.com/item?id=41306555",
    583         "created_at": "2024-08-21T03:38:33Z"
    584       },
    585       {
    586         "hn_id": "44443760",
    587         "title": "Your Language Model Can Handle Non-Canonical Tokenizations",
    588         "points": 2,
    589         "comments": 0,
    590         "url": "https://news.ycombinator.com/item?id=44443760",
    591         "created_at": "2025-07-02T13:53:44Z"
    592       },
    593       {
    594         "hn_id": "41745068",
    595         "title": "Pre-training with code improves performance on NL reasoning",
    596         "points": 2,
    597         "comments": 0,
    598         "url": "https://news.ycombinator.com/item?id=41745068",
    599         "created_at": "2024-10-04T20:02:19Z"
    600       },
    601       {
    602         "hn_id": "44116793",
    603         "title": "When Models Don't Collapse: On the Consistency of Iterative MLE",
    604         "points": 1,
    605         "comments": 0,
    606         "url": "https://news.ycombinator.com/item?id=44116793",
    607         "created_at": "2025-05-28T15:06:51Z"
    608       },
    609       {
    610         "hn_id": "43503479",
    611         "title": "The Quantum Technology Job Market: A Quantitative Investigation",
    612         "points": 1,
    613         "comments": 0,
    614         "url": "https://news.ycombinator.com/item?id=43503479",
    615         "created_at": "2025-03-28T10:05:27Z"
    616       },
    617       {
    618         "hn_id": "42884637",
    619         "title": "Player Performance and Skill Rating in Esports [pdf]",
    620         "points": 1,
    621         "comments": 0,
    622         "url": "https://news.ycombinator.com/item?id=42884637",
    623         "created_at": "2025-01-31T04:14:07Z"
    624       },
    625       {
    626         "hn_id": "41367147",
    627         "title": "Kotlin's Type System Is (Also) Unsound",
    628         "points": 1,
    629         "comments": 0,
    630         "url": "https://news.ycombinator.com/item?id=41367147",
    631         "created_at": "2024-08-27T13:11:45Z"
    632       },
    633       {
    634         "hn_id": "41318909",
    635         "title": "To Code, or Not to Code? Exploring Impact of Code in Pre-Training",
    636         "points": 1,
    637         "comments": 0,
    638         "url": "https://news.ycombinator.com/item?id=41318909",
    639         "created_at": "2024-08-22T11:09:37Z"
    640       }
    641     ],
    642     "top_points": 572,
    643     "total_points": 832,
    644     "total_comments": 374
    645   }
    646 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs