scan-v5.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan-v5.json (26201B)
      1 {
      2   "scan_version": 5,
      3   "paper_type": "empirical",
      4   "paper": {
      5     "title": "KernelBand: Steering LLM-based Kernel Optimization via Hardware-Aware Multi-Armed Bandits",
      6     "authors": [
      7       "Dezhi Ran",
      8       "Shuxiao Xie",
      9       "Mingfang Ji",
     10       "Anmin Liu",
     11       "Mengzhou Wu",
     12       "Yuan Cao",
     13       "Yuzhe Guo",
     14       "Hao Yu",
     15       "Linyi Li",
     16       "Yitao Hu",
     17       "Wei Yang",
     18       "Tao Xie"
     19     ],
     20     "year": 2025,
     21     "venue": "arXiv",
     22     "arxiv_id": "2511.18868",
     23     "doi": null
     24   },
     25   "checklist": {
     26     "claims_and_evidence": {
     27       "abstract_claims_supported": {
     28         "applies": true,
     29         "answer": true,
     30         "justification": "The abstract's '33% average improvement' is supported by Table 1 (geometric mean speedup improvements of 20.8%, 36.8%, 42.5% across three GPUs averaging ~33%); '1.91× speedup' and four LLMs / three GPU architectures are confirmed by Tables 1 and 2.",
     31         "source": "haiku"
     32       },
     33       "causal_claims_justified": {
     34         "applies": true,
     35         "answer": true,
     36         "justification": "Table 4 ablation studies isolate component contributions by removing individual modules; the LLM-strategy-selection ablation (0.97×) vs full KERNELBAND (1.57×) provides adequate support for causal attribution to the bandit policy.",
     37         "source": "haiku"
     38       },
     39       "generalization_bounded": {
     40         "applies": true,
     41         "answer": false,
     42         "justification": "Claims of 'consistent and substantial outperformance' are not bounded to NVIDIA GPUs and Triton kernels; the paper draws no explicit caveats about whether findings generalize to AMD hardware, CUDA kernels, or other DSLs.",
     43         "source": "haiku"
     44       },
     45       "alternative_explanations_discussed": {
     46         "applies": true,
     47         "answer": false,
     48         "justification": "The paper does not discuss whether benchmark contamination (LLMs having seen TritonBench-G kernels during training), GEAK's adaptation quality, or LLM sampling variance could partially explain results; ablations partially address strategy attribution but not these confounds.",
     49         "source": "haiku"
     50       },
     51       "proxy_outcome_distinction": {
     52         "applies": true,
     53         "answer": true,
     54         "justification": "The paper measures actual GPU kernel speedup (latency ratio) and correctness (torch.allclose), which directly match the claimed outcomes without conflating proxy metrics with real performance.",
     55         "source": "haiku"
     56       }
     57     },
     58     "limitations_and_scope": {
     59       "limitations_section_present": {
     60         "applies": true,
     61         "answer": false,
     62         "justification": "There is no dedicated limitations or threats-to-validity section; the conclusion discusses scope in passing but does not systematically enumerate limitations.",
     63         "source": "haiku"
     64       },
     65       "threats_to_validity_specific": {
     66         "applies": true,
     67         "answer": false,
     68         "justification": "No threats to validity are discussed anywhere in the paper; issues such as benchmark contamination, LLM sampling variance, single-run results, or GEAK adaptation fidelity are unaddressed.",
     69         "source": "haiku"
     70       },
     71       "scope_boundaries_stated": {
     72         "applies": true,
     73         "answer": false,
     74         "justification": "The paper does not explicitly state what the results do NOT show; no caveats about AMD hardware, non-Triton kernels, or tasks beyond GPU kernel optimization are provided.",
     75         "source": "haiku"
     76       }
     77     },
     78     "conflicts_of_interest": {
     79       "funding_disclosed": {
     80         "applies": true,
     81         "answer": false,
     82         "justification": "No funding acknowledgment section is present in the paper; only institutional affiliations are listed without disclosure of grants or sponsors.",
     83         "source": "haiku"
     84       },
     85       "affiliations_disclosed": {
     86         "applies": true,
     87         "answer": true,
     88         "justification": "Author affiliations are clearly disclosed in the paper header: Peking University, ECNU, Tianjin University, HKUST, Simon Fraser University, UT Dallas, and Fudan University.",
     89         "source": "haiku"
     90       },
     91       "funder_independent_of_outcome": {
     92         "applies": false,
     93         "answer": false,
     94         "justification": "No funding source is disclosed, making this criterion not assessable.",
     95         "source": "haiku"
     96       },
     97       "financial_interests_declared": {
     98         "applies": true,
     99         "answer": false,
    100         "justification": "There is no competing interests statement or financial disclosure anywhere in the paper.",
    101         "source": "haiku"
    102       }
    103     },
    104     "scope_and_framing": {
    105       "key_terms_defined": {
    106         "applies": true,
    107         "answer": true,
    108         "justification": "Key terms are defined precisely: kernel optimization as minimizing latency while preserving correctness (Eq. 1), the contextual bandit formulation (Section 2.2), optimization strategies with descriptions (Table 6), and regret bound components (Theorem 1).",
    109         "source": "haiku"
    110       },
    111       "intended_contribution_clear": {
    112         "applies": true,
    113         "answer": true,
    114         "justification": "Three explicit contribution bullet points in the introduction state: the MAB framework formulation, hardware-aware acquisition strategy, and empirical validation across GPUs and LLMs.",
    115         "source": "haiku"
    116       },
    117       "engagement_with_prior_work": {
    118         "applies": true,
    119         "answer": true,
    120         "justification": "Section 5 (Related Work) positions KERNELBAND against agent-based methods (STARK, CudaForge, GEAK, TritonForge), training-based methods (ConCuR, Kevin, TritonRL), and the MAB literature, explaining how this work differs from and complements each strand.",
    121         "source": "haiku"
    122       }
    123     }
    124   },
    125   "type_checklist": {
    126     "empirical": {
    127       "artifacts": {
    128         "code_released": {
    129           "applies": true,
    130           "answer": false,
    131           "justification": "No code repository or GitHub link is provided anywhere in the paper; no promise of future availability is made.",
    132           "source": "haiku"
    133         },
    134         "data_released": {
    135           "applies": true,
    136           "answer": true,
    137           "justification": "The paper evaluates on TritonBench-G (Li et al., 2025b), a publicly available benchmark published at ACL 2025, used with only one exclusion (sin_computation) clearly documented.",
    138           "source": "haiku"
    139         },
    140         "environment_specified": {
    141           "applies": true,
    142           "answer": false,
    143           "justification": "The paper specifies 'CUDA 12.1 with Triton 3.3.0' and mentions scikit-learn for KMeans, but provides no requirements.txt, Dockerfile, or complete dependency specification.",
    144           "source": "haiku"
    145         },
    146         "reproduction_instructions": {
    147           "applies": true,
    148           "answer": false,
    149           "justification": "Algorithm 1 describes the method workflow but no step-by-step instructions for setting up and running the full experimental pipeline are provided.",
    150           "source": "haiku"
    151         }
    152       },
    153       "statistical_methodology": {
    154         "confidence_intervals_or_error_bars": {
    155           "applies": true,
    156           "answer": false,
    157           "justification": "Tables 1 and 2 report only point estimates (geometric mean speedup, Fast@1 %, Correct %); no confidence intervals or error bars appear for any result.",
    158           "source": "haiku"
    159         },
    160         "significance_tests": {
    161           "applies": true,
    162           "answer": false,
    163           "justification": "No statistical significance tests are applied to any comparative claim; all comparisons are made on point estimates across a 183-kernel benchmark.",
    164           "source": "haiku"
    165         },
    166         "effect_sizes_reported": {
    167           "applies": true,
    168           "answer": true,
    169           "justification": "Geometric mean speedup ratios (e.g., 1.91× vs 1.34× on A100) and Fast@1 percentages with baseline context constitute meaningful effect-size measures for the optimization task.",
    170           "source": "haiku"
    171         },
    172         "sample_size_justified": {
    173           "applies": true,
    174           "answer": false,
    175           "justification": "The 183-kernel benchmark is used without statistical justification; the 50-kernel subset is described for distribution-preservation (stratified sampling, seed=42) but no power analysis or sample adequacy argument is presented.",
    176           "source": "haiku"
    177         },
    178         "variance_reported": {
    179           "applies": true,
    180           "answer": false,
    181           "justification": "No variance, standard deviation, or inter-run spread is reported; experiments use temperature=1.0 (high LLM stochasticity) with single-run point estimates for all results.",
    182           "source": "haiku"
    183         }
    184       },
    185       "evaluation_design": {
    186         "baselines_included": {
    187           "applies": true,
    188           "answer": true,
    189           "justification": "GEAK (agent-based), Best-of-N (sampling), and PyTorch baselines (eager, inductor, max-autotune) are all included for comparison.",
    190           "source": "haiku"
    191         },
    192         "baselines_contemporary": {
    193           "applies": true,
    194           "answer": true,
    195           "justification": "GEAK is a concurrent 2025 work specifically targeting Triton kernel optimization with iterative refinement; BoN is the natural competitive control; PyTorch torch.compile represents current practice.",
    196           "source": "haiku"
    197         },
    198         "ablation_study": {
    199           "applies": true,
    200           "answer": true,
    201           "justification": "Table 4 presents seven ablation configurations: single-component removals (no clustering K=1, no profiling, LLM strategy selection) and framework-level ablations (no strategy set, raw profiling injection, BoN lower bound).",
    202           "source": "haiku"
    203         },
    204         "multiple_metrics": {
    205           "applies": true,
    206           "answer": true,
    207           "justification": "Three complementary metrics are used: Correct (%), Fast@1 (%), and Geometric Mean Speedup; cost-normalized speedup is additionally reported in Figure 4.",
    208           "source": "haiku"
    209         },
    210         "human_evaluation": {
    211           "applies": false,
    212           "answer": false,
    213           "justification": "No human evaluation is relevant; this is an automated system optimization task evaluated entirely by objective hardware performance metrics.",
    214           "source": "haiku"
    215         },
    216         "held_out_test_set": {
    217           "applies": false,
    218           "answer": false,
    219           "justification": "The task is optimization rather than prediction; kernels are optimization targets, not labeled data points requiring a train/test split.",
    220           "source": "haiku"
    221         },
    222         "per_category_breakdown": {
    223           "applies": true,
    224           "answer": true,
    225           "justification": "Table 1 breaks results down by difficulty level (L1-2, L3, L4-5); Table 7 shows category distribution; Appendix I provides per-strategy statistics across both hardware platforms.",
    226           "source": "haiku"
    227         },
    228         "failure_cases_discussed": {
    229           "applies": true,
    230           "answer": true,
    231           "justification": "Compilation failures receive 0 reward; GEAK's 85% failure rate on hard kernels is documented; the catastrophic collapse to 0.97× for LLM strategy selection is explicitly analyzed as a failure mode.",
    232           "source": "haiku"
    233         },
    234         "negative_results_reported": {
    235           "applies": true,
    236           "answer": true,
    237           "justification": "Ablations report negative results: LLM strategy selection yields 0.97× (worse than reference kernel), raw profiling without strategy set drops correctness to 43.9%, and BoN fails on 85% of hard kernels.",
    238           "source": "haiku"
    239         }
    240       },
    241       "setup_transparency": {
    242         "model_versions_specified": {
    243           "applies": true,
    244           "answer": true,
    245           "justification": "Specific model versions are named: DeepSeek-V3.2, GPT-5, Claude Opus 4.5, Gemini 3 Flash, with citations to their official documentation; Table 5 specifies temperature=1.0 and max_tokens=16384.",
    246           "source": "haiku"
    247         },
    248         "prompts_provided": {
    249           "applies": true,
    250           "answer": false,
    251           "justification": "No actual prompt templates or system instructions are shown; Appendix D describes optimization strategies conceptually but does not provide the prompts given to LLMs during generation.",
    252           "source": "haiku"
    253         },
    254         "hyperparameters_reported": {
    255           "applies": true,
    256           "answer": true,
    257           "justification": "All key hyperparameters are explicitly reported: K=3 clusters, τ=10 reclustering period, θsat=75% saturation threshold, c=2.0 UCB exploration parameter, T=20 optimization budget, temperature=1.0, max_tokens=16384.",
    258           "source": "haiku"
    259         },
    260         "scaffolding_described": {
    261           "applies": true,
    262           "answer": true,
    263           "justification": "Algorithm 1 provides the complete KERNELBAND workflow with frontier expansion, periodic clustering, hardware profiling, masked UCB selection, and LLM generation steps in detail.",
    264           "source": "haiku"
    265         },
    266         "data_preprocessing_documented": {
    267           "applies": true,
    268           "answer": true,
    269           "justification": "Benchmark preprocessing is documented: use of GEAK's corrected TritonBench-G, exclusion of sin_computation with rationale (artificially high speedups), and stratified sampling for 50-kernel subset with seed=42 and <1% category deviation (Appendix E).",
    270           "source": "haiku"
    271         }
    272       },
    273       "data_integrity": {
    274         "raw_data_available": {
    275           "applies": true,
    276           "answer": false,
    277           "justification": "Raw experimental results (per-kernel timing traces, optimization trajectories) are not released; no data repository link is provided.",
    278           "source": "haiku"
    279         },
    280         "data_collection_described": {
    281           "applies": true,
    282           "answer": true,
    283           "justification": "Appendix H documents the evaluation protocol: Triton's do_bench with 100ms warmup and 1000ms timed runs, median execution time reporting, correctness thresholds (atol=rtol=1e-4), and weighted speedup aggregation formula.",
    284           "source": "haiku"
    285         },
    286         "recruitment_methods_described": {
    287           "applies": false,
    288           "answer": false,
    289           "justification": "No human participants; standard automated benchmark evaluation only.",
    290           "source": "haiku"
    291         },
    292         "data_pipeline_documented": {
    293           "applies": true,
    294           "answer": true,
    295           "justification": "The full evaluation pipeline is documented across Section 4.1 and Appendix H, including two-stage correctness verification (Call Accuracy then Execution Accuracy), benchmarking across 10+ input shapes, and weighted aggregation.",
    296           "source": "haiku"
    297         }
    298       },
    299       "contamination": {
    300         "training_cutoff_stated": {
    301           "applies": true,
    302           "answer": false,
    303           "justification": "Training data cutoffs for DeepSeek-V3.2, GPT-5, Claude Opus 4.5, or Gemini 3 Flash are not stated; TritonBench-G was published at ACL 2025 and may overlap with training data.",
    304           "source": "haiku"
    305         },
    306         "train_test_overlap_discussed": {
    307           "applies": true,
    308           "answer": false,
    309           "justification": "No discussion of potential overlap between LLM training corpora and TritonBench-G benchmark kernels, despite the benchmark being publicly available before these experiments.",
    310           "source": "haiku"
    311         },
    312         "benchmark_contamination_addressed": {
    313           "applies": true,
    314           "answer": false,
    315           "justification": "TritonBench-G appeared at ACL 2025; frontier LLMs used in this February 2026 preprint may have been trained on this data, but contamination is neither acknowledged nor mitigated.",
    316           "source": "haiku"
    317         }
    318       },
    319       "human_studies": {
    320         "pre_registered": {
    321           "applies": false,
    322           "answer": false,
    323           "justification": "No human participants in this study.",
    324           "source": "haiku"
    325         },
    326         "irb_or_ethics_approval": {
    327           "applies": false,
    328           "answer": false,
    329           "justification": "No human participants in this study.",
    330           "source": "haiku"
    331         },
    332         "demographics_reported": {
    333           "applies": false,
    334           "answer": false,
    335           "justification": "No human participants in this study.",
    336           "source": "haiku"
    337         },
    338         "inclusion_exclusion_criteria": {
    339           "applies": false,
    340           "answer": false,
    341           "justification": "No human participants in this study.",
    342           "source": "haiku"
    343         },
    344         "randomization_described": {
    345           "applies": false,
    346           "answer": false,
    347           "justification": "No human participants in this study.",
    348           "source": "haiku"
    349         },
    350         "blinding_described": {
    351           "applies": false,
    352           "answer": false,
    353           "justification": "No human participants in this study.",
    354           "source": "haiku"
    355         },
    356         "attrition_reported": {
    357           "applies": false,
    358           "answer": false,
    359           "justification": "No human participants in this study.",
    360           "source": "haiku"
    361         }
    362       },
    363       "cost_and_practicality": {
    364         "inference_cost_reported": {
    365           "applies": true,
    366           "answer": true,
    367           "justification": "Figure 4 shows speedup vs. API cost per kernel (up to $0.50/kernel); Figure 3 reports wall-clock time breakdown with 129s effective per-kernel iteration in parallel mode.",
    368           "source": "haiku"
    369         },
    370         "compute_budget_stated": {
    371           "applies": true,
    372           "answer": false,
    373           "justification": "Total compute budget across all experiments (full 183-kernel benchmark on 3 GPUs with 4 LLMs) is not stated; only per-kernel cost curves are shown in Figure 4.",
    374           "source": "haiku"
    375         }
    376       }
    377     }
    378   },
    379   "claims": [
    380     {
    381       "claim": "KERNELBAND achieves up to 1.91× geometric mean speedup on A100, outperforming GEAK by 42.5% in speedup",
    382       "evidence": "Table 1: KERNELBAND 1.91× vs GEAK 1.34× on A100 across 183 kernels at T=20 iterations",
    383       "supported": "strong"
    384     },
    385     {
    386       "claim": "Replacing the bandit policy with LLM semantic reasoning collapses performance to 0.97× (below reference kernel)",
    387       "evidence": "Table 4 ablation on 50-kernel H20 subset: 'LLM Strategy Selection' achieves 0.97× geometric mean speedup vs 1.57× for full KERNELBAND",
    388       "supported": "strong"
    389     },
    390     {
    391       "claim": "KERNELBAND automatically adapts optimization strategies to hardware bottlenecks, diverging allocation across platforms",
    392       "evidence": "Appendix I Table 10: FUSION selected 18.5% on RTX 4090 vs 12.8% on H20; TILING 10.0% on H20 vs 7.6% on RTX 4090",
    393       "supported": "moderate"
    394     },
    395     {
    396       "claim": "KERNELBAND generalizes across four frontier code LLMs, consistently outperforming baselines regardless of the underlying model",
    397       "evidence": "Table 2: KERNELBAND outperforms GEAK with DeepSeek-V3.2 (1.52× vs 0.95×), GPT-5 (1.72× vs 1.07×), Claude Opus 4.5 (1.82× vs 1.30×), Gemini 3 Flash (1.48× vs 1.21×)",
    398       "supported": "strong"
    399     },
    400     {
    401       "claim": "KERNELBAND delivers 35-50% higher speedup per dollar than unguided approaches at equivalent API budgets",
    402       "evidence": "Figure 4: at $0.50/kernel, KERNELBAND achieves 1.83× vs GEAK 1.35× (35% higher) and BoN 1.22× (50% higher)",
    403       "supported": "moderate"
    404     },
    405     {
    406       "claim": "Hardware-aware profiling is more critical than clustering at standard budget: removing profiling drops speedup 20% vs 10% for removing clustering",
    407       "evidence": "Table 4: w/o Profiling 1.26× vs w/o Clustering 1.41× vs full KERNELBAND 1.57× at T=20 on H20",
    408       "supported": "strong"
    409     }
    410   ],
    411   "methodology_tags": [
    412     "benchmark-eval"
    413   ],
    414   "key_findings": "KERNELBAND frames GPU Triton kernel optimization as a contextual multi-armed bandit problem, combining hardware-aware profiling-based pruning with trace-driven clustering to guide LLM code generation. On TritonBench-G across three NVIDIA GPU architectures and four frontier LLMs, KERNELBAND consistently outperforms the best baseline (GEAK) by 21-43% in geometric mean speedup and 39-141% in Fast@1 rate. The most striking finding is that replacing the bandit policy with LLM semantic reasoning for strategy selection collapses performance to 0.97× (below the reference kernel), demonstrating that learned execution statistics substantially outperform LLM hardware intuition. Hardware-aware profiling contributes more than clustering at standard budgets (20% vs 10% speedup drop when removed), but clustering's value grows with iteration count, showing sustained improvement to T=40 where baselines plateau.",
    415   "red_flags": [
    416     {
    417       "flag": "No statistical significance testing",
    418       "detail": "All comparative claims are made on point estimates without confidence intervals, error bars, or hypothesis tests; LLM sampling at temperature=1.0 introduces substantial variance that is never quantified."
    419     },
    420     {
    421       "flag": "Single-run results only",
    422       "detail": "No multi-run variance is reported for any configuration; given high LLM stochasticity (temperature=1.0), single-run point estimates for 183 kernels do not establish statistical reliability of the performance ordering."
    423     },
    424     {
    425       "flag": "Benchmark contamination unaddressed",
    426       "detail": "TritonBench-G was published at ACL 2025; DeepSeek-V3.2, GPT-5, Claude Opus 4.5, and Gemini 3 Flash may have seen these benchmark kernels during training, potentially inflating all LLM-based results without differentiation."
    427     },
    428     {
    429       "flag": "No code released",
    430       "detail": "The framework is described in detail but no code is available, preventing independent reproduction; STARK and TritonForge baselines also lack code, further limiting the comparative evaluation."
    431     },
    432     {
    433       "flag": "Corrected benchmark provided by competitor",
    434       "detail": "The 'corrected' TritonBench-G version used was provided by GEAK (Wang et al., 2025a), which is also the primary baseline; this creates circularity and the correction criteria are not independently verified."
    435     },
    436     {
    437       "flag": "No limitations section",
    438       "detail": "The paper has no dedicated limitations or threats-to-validity section, omitting discussion of scope restrictions to NVIDIA GPUs and Triton kernels, contamination risk, single-run variance, and GEAK adaptation fidelity."
    439     }
    440   ],
    441   "cited_papers": [
    442     {
    443       "title": "TritonBench: Benchmarking large language model capabilities for generating triton operators",
    444       "relevance": "Primary evaluation benchmark providing the 183-kernel TritonBench-G suite used in all main experiments"
    445     },
    446     {
    447       "title": "GEAK: Introducing Triton Kernel AI Agent & Evaluation Benchmarks",
    448       "relevance": "Main agent-based baseline for comparison; also provided the corrected benchmark version and adaptation details"
    449     },
    450     {
    451       "title": "STARK: Strategic team of agents for refining kernels",
    452       "relevance": "Concurrent agent-based kernel optimization method; compared conceptually but code unavailable for direct evaluation"
    453     },
    454     {
    455       "title": "CudaForge: An agent framework with hardware feedback for CUDA kernel optimization",
    456       "relevance": "Concurrent work using Coder-Judge architecture with Nsight Compute feedback; targets CUDA rather than Triton"
    457     },
    458     {
    459       "title": "ConCuR: Conciseness makes state-of-the-art kernel generation",
    460       "relevance": "Training-based alternative paradigm for kernel optimization via supervised fine-tuning with reasoning traces"
    461     },
    462     {
    463       "title": "Roofline: an insightful visual performance model for multicore architectures",
    464       "relevance": "Hardware performance modeling framework underpinning the hardware-aware pruning strategy and bottleneck identification"
    465     },
    466     {
    467       "title": "Finite-time analysis of the multiarmed bandit problem",
    468       "relevance": "Theoretical foundation for the UCB-based bandit policy used in KERNELBAND's masked action selection"
    469     }
    470   ],
    471   "engagement_factors": {
    472     "practical_relevance": {
    473       "score": 3,
    474       "justification": "GPU kernel optimization directly impacts LLM serving cost and throughput; the 1.87× speedup over torch.compile inductor backend is immediately actionable for ML infrastructure teams."
    475     },
    476     "surprise_contrarian": {
    477       "score": 2,
    478       "justification": "The finding that LLM semantic reasoning for strategy selection collapses to 0.97× (below reference kernel) is a striking and counterintuitive result — structured bandit statistics definitively outperform LLM hardware intuition."
    479     },
    480     "fear_safety": {
    481       "score": 0,
    482       "justification": "No AI safety or risk concerns raised; paper is a systems optimization paper with no threat modeling."
    483     },
    484     "drama_conflict": {
    485       "score": 0,
    486       "justification": "No controversy or conflict angle; straightforward systems-oriented contribution."
    487     },
    488     "demo_ability": {
    489       "score": 1,
    490       "justification": "No code released, so practitioners cannot immediately try it; the concept is clear but requires implementing the full framework from scratch to reproduce."
    491     },
    492     "brand_recognition": {
    493       "score": 1,
    494       "justification": "Peking University and associated institutions are respected academic groups, but no industry lab (DeepMind, Meta FAIR, etc.) is driving this work."
    495     }
    496   },
    497   "hn_data": {
    498     "threads": [
    499       {
    500         "hn_id": "39790604",
    501         "title": "One-Step Diffusion with Distribution Matching Distillation",
    502         "points": 4,
    503         "comments": 0,
    504         "url": "https://news.ycombinator.com/item?id=39790604",
    505         "created_at": "2024-03-22T13:36:19Z"
    506       }
    507     ],
    508     "top_points": 4,
    509     "total_points": 4,
    510     "total_comments": 0
    511   }
    512 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs