scan-v5.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan-v5.json (27144B)
      1 {
      2   "scan_version": 5,
      3   "paper_type": "empirical",
      4   "paper": {
      5     "title": "Grokking: Generalization Beyond Overfitting on Small Algorithmic Datasets",
      6     "authors": [
      7       "Alethea Power",
      8       "Yuri Burda",
      9       "Harrison Edwards",
     10       "Igor Babuschkin",
     11       "Vedant Misra"
     12     ],
     13     "year": 2022,
     14     "venue": "arXiv.org",
     15     "arxiv_id": "2201.02177",
     16     "doi": null
     17   },
     18   "checklist": {
     19     "claims_and_evidence": {
     20       "abstract_claims_supported": {
     21         "applies": true,
     22         "answer": true,
     23         "justification": "All four abstract claims (generalization on algorithmic datasets, grokking phenomenon, generalization as function of dataset size, study of overparameterized networks) are supported by extensive experiments shown in Sections 3.1-3.2.",
     24         "source": "haiku"
     25       },
     26       "causal_claims_justified": {
     27         "applies": true,
     28         "answer": true,
     29         "justification": "Paper makes causal claims about optimization methods via ablation studies (Section 3.3). Weight decay, learning rate, batch size effects are tested systematically across multiple conditions.",
     30         "source": "haiku"
     31       },
     32       "generalization_bounded": {
     33         "applies": true,
     34         "answer": true,
     35         "justification": "All claims are explicitly bounded to 'small algorithmically generated datasets' of binary operations. Paper notes effects are 'less pronounced' on natural data. No overgeneralization to larger models or real datasets.",
     36         "source": "haiku"
     37       },
     38       "alternative_explanations_discussed": {
     39         "applies": true,
     40         "answer": false,
     41         "justification": "Paper identifies weight decay effectiveness and speculates about flat minima (A.5), but does not systematically consider or discuss alternative explanations for why grokking occurs beyond flat minima hypothesis.",
     42         "source": "haiku"
     43       },
     44       "proxy_outcome_distinction": {
     45         "applies": true,
     46         "answer": true,
     47         "justification": "Outcome is validation accuracy on held-out equations, which directly measures generalization on the target task. No proxy-to-outcome conflation.",
     48         "source": "haiku"
     49       }
     50     },
     51     "limitations_and_scope": {
     52       "limitations_section_present": {
     53         "applies": true,
     54         "answer": false,
     55         "justification": "No dedicated limitations section exists. Discussion (Section 4) is brief and speculative about future work rather than discussing limitations of current work.",
     56         "source": "haiku"
     57       },
     58       "threats_to_validity_specific": {
     59         "applies": true,
     60         "answer": false,
     61         "justification": "No systematic discussion of threats to validity. Appendix A.4 shows outlier robustness but does not discuss broader threats like sample size adequacy (3 seeds), architectural specificity, or generalization to other domains.",
     62         "source": "haiku"
     63       },
     64       "scope_boundaries_stated": {
     65         "applies": true,
     66         "answer": false,
     67         "justification": "While the paper describes what was tested (binary operations, small transformers), it does not explicitly state what it does NOT show or what the boundaries of applicability are (e.g., does this apply to larger models, natural data, other architectures?).",
     68         "source": "haiku"
     69       }
     70     },
     71     "conflicts_of_interest": {
     72       "funding_disclosed": {
     73         "applies": true,
     74         "answer": false,
     75         "justification": "No funding statement or acknowledgments section mentioning financial support. OpenAI and Google affiliations are listed but no explicit funding disclosure.",
     76         "source": "haiku"
     77       },
     78       "affiliations_disclosed": {
     79         "applies": true,
     80         "answer": true,
     81         "justification": "Author affiliations clearly listed: Alethea Power, Yuri Burda, Harri Edwards, Igor Babuschkin at OpenAI; Vedant Misra at Google (and footnote: 'at OpenAI at time of work').",
     82         "source": "haiku"
     83       },
     84       "funder_independent_of_outcome": {
     85         "applies": false,
     86         "answer": false,
     87         "justification": "No funder disclosed.",
     88         "source": "haiku"
     89       },
     90       "financial_interests_declared": {
     91         "applies": true,
     92         "answer": false,
     93         "justification": "No competing interests or financial interests statement present.",
     94         "source": "haiku"
     95       }
     96     },
     97     "scope_and_framing": {
     98       "key_terms_defined": {
     99         "applies": true,
    100         "answer": true,
    101         "justification": "'Grokking' is defined clearly with example (generalization far after overfitting). 'Generalization' used in standard ML sense. 'Binary operation tables' explained with concrete examples (modular arithmetic, S5 permutations). Key terms adequately defined.",
    102         "source": "haiku"
    103       },
    104       "intended_contribution_clear": {
    105         "applies": true,
    106         "answer": true,
    107         "justification": "Contributions explicitly listed in Introduction: show networks generalize on binary op tables, identify grokking phenomenon, present data efficiency curves, show optimization time scaling, compare optimization tricks, visualize learned structure. Very clear.",
    108         "source": "haiku"
    109       },
    110       "engagement_with_prior_work": {
    111         "applies": true,
    112         "answer": true,
    113         "justification": "Appendix A.3 provides substantive related work section comparing to prior algorithmic datasets (bAbI, Saxton et al.), double descent work (Nakkiran et al., Belkin et al., d'Ascoli et al.), and generalization measures (Jiang et al.). Shows how this work differs from prior.",
    114         "source": "haiku"
    115       }
    116     }
    117   },
    118   "type_checklist": {
    119     "empirical": {
    120       "artifacts": {
    121         "code_released": {
    122           "applies": true,
    123           "answer": false,
    124           "justification": "No code release mentioned. Paper provides sufficient architectural and hyperparameter details to reimplement but explicitly notes 'source code' is not provided or referenced as available.",
    125           "source": "haiku"
    126         },
    127         "data_released": {
    128           "applies": true,
    129           "answer": true,
    130           "justification": "Datasets are algorithmically generated via explicit procedures detailed in Appendix A.1.1. Full description of binary operations (modular arithmetic, S5 composition, etc.) and equation generation makes datasets fully reproducible.",
    131           "source": "haiku"
    132         },
    133         "environment_specified": {
    134           "applies": true,
    135           "answer": true,
    136           "justification": "Complete specifications in A.1.2: 2-layer transformer, 128 width, 4 heads, ~4·10^5 params, AdamW with lr=10^-3, weight decay=1, β1=0.9, β2=0.98, warmup, batch size details. Sufficient for reproduction.",
    137           "source": "haiku"
    138         },
    139         "reproduction_instructions": {
    140           "applies": true,
    141           "answer": true,
    142           "justification": "Appendix A.1 provides step-by-step generation procedure and training protocol. Dataset construction (A.1.1), model architecture (A.1.2), and hyperparameters fully specified. Sufficient for expert reproduction.",
    143           "source": "haiku"
    144         }
    145       },
    146       "statistical_methodology": {
    147         "confidence_intervals_or_error_bars": {
    148           "applies": true,
    149           "answer": false,
    150           "justification": "Results shown as means across 3 seeds (or 7 for Section 3.1.1) but no error bars or confidence intervals displayed in figures. Variance across runs not reported.",
    151           "source": "haiku"
    152         },
    153         "significance_tests": {
    154           "applies": true,
    155           "answer": false,
    156           "justification": "No statistical significance tests reported. One exception: Appendix A.5 reports Spearman correlation of -0.79548 with p<0.000014 for sharpness-generalization relationship, but main results lack significance testing.",
    157           "source": "haiku"
    158         },
    159         "effect_sizes_reported": {
    160           "applies": true,
    161           "answer": true,
    162           "justification": "Effect sizes quantified: 'decreasing 1% of training data increases optimization time 40-50%' (Section 3.1.1). Weight decay effect: 'more than halving samples needed' (Section 3.3). Effects are reported numerically.",
    163           "source": "haiku"
    164         },
    165         "sample_size_justified": {
    166           "applies": true,
    167           "answer": false,
    168           "justification": "3 random seeds used for most experiments, 7 for Section 3.1.1. No justification given for adequacy of sample size. No power analysis provided.",
    169           "source": "haiku"
    170         },
    171         "variance_reported": {
    172           "applies": true,
    173           "answer": false,
    174           "justification": "Text mentions 'mean accuracy across three runs' but figures show only means, not variance/std dev. No error bars on main result plots (Figures 1, 2).",
    175           "source": "haiku"
    176         }
    177       },
    178       "evaluation_design": {
    179         "baselines_included": {
    180           "applies": true,
    181           "answer": true,
    182           "justification": "Section 3.3 compares multiple baselines within framework: Adam vs AdamW, with/without weight decay, with/without dropout, with/without gradient noise, different learning rates. Comprehensive ablation.",
    183           "source": "haiku"
    184         },
    185         "baselines_contemporary": {
    186           "applies": true,
    187           "answer": true,
    188           "justification": "All optimization baselines (Adam, SGD, AdamW, gradient noise) are standard and contemporary for 2022. No suspiciously weak or outdated baselines.",
    189           "source": "haiku"
    190         },
    191         "ablation_study": {
    192           "applies": true,
    193           "answer": true,
    194           "justification": "Section 3.3 provides systematic ablation of optimization methods: learning rate, batch size, weight decay, dropout, gradient noise, different Adam variants. Well-designed ablations.",
    195           "source": "haiku"
    196         },
    197         "multiple_metrics": {
    198           "applies": true,
    199           "answer": false,
    200           "justification": "Primary metric is validation accuracy. Loss curves shown in Figure 4 but appear secondary. Mostly single metric (accuracy) throughout paper.",
    201           "source": "haiku"
    202         },
    203         "human_evaluation": {
    204           "applies": false,
    205           "answer": false,
    206           "justification": "Not applicable—no human evaluation of system outputs.",
    207           "source": "haiku"
    208         },
    209         "held_out_test_set": {
    210           "applies": true,
    211           "answer": true,
    212           "justification": "Binary operation equations split into random training/validation sets. Validation set held out during training. Standard supervised learning setup.",
    213           "source": "haiku"
    214         },
    215         "per_category_breakdown": {
    216           "applies": true,
    217           "answer": true,
    218           "justification": "Results reported separately for multiple binary operations: modular addition, division, S5 composition, polynomial operations, etc. (Figure 2 right, Section 3.2). Per-operation breakdown provided.",
    219           "source": "haiku"
    220         },
    221         "failure_cases_discussed": {
    222           "applies": true,
    223           "answer": true,
    224           "justification": "Paper mentions operations that failed to generalize: 'x³+xy²+y didn't lead to generalization within budget at any data percentage up to 95%' (Section 3.2). Failure modes discussed.",
    225           "source": "haiku"
    226         },
    227         "negative_results_reported": {
    228           "applies": true,
    229           "answer": true,
    230           "justification": "Paper reports operations where generalization did not occur and conditions where optimization fails. Negative results transparently discussed.",
    231           "source": "haiku"
    232         }
    233       },
    234       "setup_transparency": {
    235         "model_versions_specified": {
    236           "applies": true,
    237           "answer": true,
    238           "justification": "Model fully specified: decoder-only transformer, 2 layers, width 128, 4 attention heads, ~4·10^5 parameters, causal attention. Snapshot-level detail provided.",
    239           "source": "haiku"
    240         },
    241         "prompts_provided": {
    242           "applies": false,
    243           "answer": false,
    244           "justification": "Not applicable—no prompts or instructions, only equation data.",
    245           "source": "haiku"
    246         },
    247         "hyperparameters_reported": {
    248           "applies": true,
    249           "answer": true,
    250           "justification": "Complete hyperparameter specification: AdamW lr=10⁻³, weight decay=1, β₁=0.9, β₂=0.98, linear warmup 10 steps, batch size 512 or half training set, optimization budget 10⁵-10⁶ steps. Extensive detail.",
    251           "source": "haiku"
    252         },
    253         "scaffolding_described": {
    254           "applies": false,
    255           "answer": false,
    256           "justification": "Not applicable—no agentic scaffolding in this supervised learning setting.",
    257           "source": "haiku"
    258         },
    259         "data_preprocessing_documented": {
    260           "applies": true,
    261           "answer": true,
    262           "justification": "Data generation fully documented (A.1.1). Random train/val split procedure described. Each symbol tokenized separately. Preprocessing and generation pipeline clear.",
    263           "source": "haiku"
    264         }
    265       },
    266       "data_integrity": {
    267         "raw_data_available": {
    268           "applies": true,
    269           "answer": true,
    270           "justification": "Raw data is the set of equations generated from binary operations. Generation procedure fully specified so raw data can be exactly regenerated by following the algorithm in A.1.1.",
    271           "source": "haiku"
    272         },
    273         "data_collection_described": {
    274           "applies": true,
    275           "answer": true,
    276           "justification": "Data collection procedure explicit (A.1.1): 'For each training run, we chose a fraction of all available equations at random and declared them to be the training set, with the rest equations being the validation set.'",
    277           "source": "haiku"
    278         },
    279         "recruitment_methods_described": {
    280           "applies": false,
    281           "answer": false,
    282           "justification": "Not applicable—no human participants.",
    283           "source": "haiku"
    284         },
    285         "data_pipeline_documented": {
    286           "applies": true,
    287           "answer": true,
    288           "justification": "Pipeline documented: generate equations per binary operation → randomly split into train/val → train transformer → measure validation accuracy. Fully described.",
    289           "source": "haiku"
    290         }
    291       },
    292       "contamination": {
    293         "training_cutoff_stated": {
    294           "applies": false,
    295           "answer": false,
    296           "justification": "Not applicable—training models on generated data, not evaluating pre-trained models on benchmarks.",
    297           "source": "haiku"
    298         },
    299         "train_test_overlap_discussed": {
    300           "applies": true,
    301           "answer": true,
    302           "justification": "No train-test overlap by design: equations randomly split once into training and validation sets. Explicit separation documented.",
    303           "source": "haiku"
    304         },
    305         "benchmark_contamination_addressed": {
    306           "applies": false,
    307           "answer": false,
    308           "justification": "Not applicable—datasets are created by authors, not pre-existing benchmarks.",
    309           "source": "haiku"
    310         }
    311       },
    312       "human_studies": {
    313         "pre_registered": {
    314           "applies": false,
    315           "answer": false,
    316           "justification": "Not applicable—no human participants.",
    317           "source": "haiku"
    318         },
    319         "irb_or_ethics_approval": {
    320           "applies": false,
    321           "answer": false,
    322           "justification": "Not applicable—no human participants.",
    323           "source": "haiku"
    324         },
    325         "demographics_reported": {
    326           "applies": false,
    327           "answer": false,
    328           "justification": "Not applicable—no human participants.",
    329           "source": "haiku"
    330         },
    331         "inclusion_exclusion_criteria": {
    332           "applies": false,
    333           "answer": false,
    334           "justification": "Not applicable—no human participants.",
    335           "source": "haiku"
    336         },
    337         "randomization_described": {
    338           "applies": false,
    339           "answer": false,
    340           "justification": "Not applicable—no human participants.",
    341           "source": "haiku"
    342         },
    343         "blinding_described": {
    344           "applies": false,
    345           "answer": false,
    346           "justification": "Not applicable—no human participants.",
    347           "source": "haiku"
    348         },
    349         "attrition_reported": {
    350           "applies": false,
    351           "answer": false,
    352           "justification": "Not applicable—no human participants.",
    353           "source": "haiku"
    354         }
    355       },
    356       "cost_and_practicality": {
    357         "inference_cost_reported": {
    358           "applies": false,
    359           "answer": false,
    360           "justification": "Not reported and not relevant—focus is training dynamics, not inference.",
    361           "source": "haiku"
    362         },
    363         "compute_budget_stated": {
    364           "applies": true,
    365           "answer": false,
    366           "justification": "Optimization step budgets stated (10⁵ to 10⁶ steps) and GPU use mentioned, but total compute cost (wall-clock time, FLOPs, or training cost) not reported.",
    367           "source": "haiku"
    368         }
    369       }
    370     }
    371   },
    372   "claims": [
    373     {
    374       "claim": "Neural networks exhibit 'grokking'—sudden dramatic improvement in validation accuracy well after overfitting, with generalization improving from chance level to perfect across training curves decoupled from training performance.",
    375       "evidence": "Figure 1 left shows division mod 97: training reaches 100% at <10³ steps, validation reaches 100% at ~10⁶ steps with almost no improvement until 10⁵ steps. This pattern replicated across multiple operations (Figure 2 right).",
    376       "supported": "strong"
    377     },
    378     {
    379       "claim": "Weight decay is particularly effective at improving data efficiency, more than halving the amount of samples needed for generalization compared to other interventions.",
    380       "evidence": "Section 3.3 and Figure 2 left explicitly state: 'adding weight decay has a very large effect on data efficiency, more than halving the amount of samples needed compared to most other interventions' on S5 product task.",
    381       "supported": "strong"
    382     },
    383     {
    384       "claim": "Optimization time required to achieve generalization increases rapidly and exponentially as dataset size decreases, with a 1% decrease in training data causing 40-50% increase in median steps to 99% accuracy.",
    385       "evidence": "Section 3.1.1 states: 'a decrease of 1% of training data leads to an increase of 40-50% in median time to generalization' near 25-30% of data for S5 product.",
    386       "supported": "strong"
    387     },
    388     {
    389       "claim": "Symmetric binary operations require less training data for generalization than non-symmetric counterparts.",
    390       "evidence": "Section 3.2 reports: 'symmetric operations tend to require less data for generalization than closely related non-symmetrical counterparts' (e.g., x+y vs x-y, x*y vs x/y). Figure 2 right visualizes this pattern.",
    391       "supported": "moderate"
    392     },
    393     {
    394       "claim": "Neural networks trained on algorithmic tasks learn recognizable mathematical structure that can be visualized in embedding space.",
    395       "evidence": "Figure 3 shows t-SNE projections revealing circle/number-line topology for modular addition and permutation coset structure for S5, with 'natural kinds of structure' cited in Discussion.",
    396       "supported": "strong"
    397     },
    398     {
    399       "claim": "Flat minima (low sharpness) correlate with generalization on these tasks with Spearman correlation coefficient of -0.79548 (p<0.000014).",
    400       "evidence": "Appendix A.5 states: 'validation accuracy and the φ score across trained networks had Spearman correlation coefficient of −0.79548 (significant with p < 0.000014)' on S5 composition task.",
    401       "supported": "strong"
    402     },
    403     {
    404       "claim": "Even small numbers of outliers (up to 1000 mislabeled examples) do not substantially degrade the ability of networks to generalize on these tasks.",
    405       "evidence": "Appendix A.4 Figure 6 shows data efficiency curves with outliers and concludes 'small number of outliers doesn't noticeably impact generalization performance.'",
    406       "supported": "moderate"
    407     }
    408   ],
    409   "methodology_tags": [
    410     "benchmark-eval",
    411     "ablation-study"
    412   ],
    413   "key_findings": "The paper identifies the 'grokking' phenomenon—a dramatic delayed generalization pattern where neural networks trained on small algorithmic datasets suddenly improve from chance-level validation performance to perfect accuracy well after memorizing the training set (e.g., 1000× more steps required). Weight decay is the single most effective intervention for data efficiency, reducing required samples by >50%. A fundamental compute-data tradeoff exists: optimization time required for generalization grows exponentially as training data decreases, with each 1% data reduction requiring 40-50% more training steps. Networks trained with these methods learn to represent underlying mathematical structure (e.g., cyclic topology in modular arithmetic), and flatter minima strongly correlate with generalization success (r=-0.80, p<0.00001).",
    414   "red_flags": [
    415     {
    416       "flag": "No code release",
    417       "detail": "Detailed enough to reimplement but no code explicitly released or linked. Reproducibility depends on researcher effort."
    418     },
    419     {
    420       "flag": "Missing error reporting",
    421       "detail": "Results aggregated from 3-7 random seeds but no error bars, confidence intervals, or variance estimates shown in figures. Mean without spread is incomplete."
    422     },
    423     {
    424       "flag": "No statistical significance testing",
    425       "detail": "Comparisons between optimization methods and datasets lack p-values or significance tests (exception: A.5 correlation). Results may not be robust to sampling variation."
    426     },
    427     {
    428       "flag": "Limited scope",
    429       "detail": "Results restricted to small binary operation tables with 97 elements and small transformers (~400K params). Generalization to larger models, realistic datasets, or other domains unknown."
    430     },
    431     {
    432       "flag": "Weak alternatives discussion",
    433       "detail": "Paper speculates about flat minima but does not systematically consider or test competing explanations for grokking (e.g., loss landscape structure, feature learning dynamics, implicit bias)."
    434     },
    435     {
    436       "flag": "No dedicated limitations section",
    437       "detail": "Discussion is brief and speculative. Lacks systematic treatment of threats to validity or explicit scope boundaries."
    438     },
    439     {
    440       "flag": "Correlation vs causation on mechanisms",
    441       "detail": "Flatness-generalization correlation identified but causality not established. Appendix A.5 shows correlation but does not prove flat minima cause grokking."
    442     }
    443   ],
    444   "cited_papers": [
    445     {
    446       "title": "Understanding Deep Learning Requires Rethinking Generalization",
    447       "relevance": "Establishes that neural networks can memorize arbitrary labels yet generalize with meaningful data—foundational to understanding grokking as distinct from memorization."
    448     },
    449     {
    450       "title": "Deep Double Descent: Where Bigger Models and More Data Hurt",
    451       "relevance": "Documents the double descent phenomenon in loss, related but distinct from grokking's delayed generalization in accuracy."
    452     },
    453     {
    454       "title": "Reconciling Modern Machine Learning Practice and the Bias-Variance Trade-off",
    455       "relevance": "Theoretical grounding for double descent and risk curves that motivate investigation of overfitting dynamics."
    456     },
    457     {
    458       "title": "Fantastic Generalization Measures and Where to Find Them",
    459       "relevance": "Evaluates generalization measures including flatness; directly informs A.5's hypothesis about minima sharpness."
    460     },
    461     {
    462       "title": "Flat Minima",
    463       "relevance": "Classic work relating flat minima to generalization; anchors A.5's correlation analysis of sharpness."
    464     },
    465     {
    466       "title": "Triple Descent and the Two Kinds of Overfitting: Where & Why Do They Appear?",
    467       "relevance": "Extends double descent to more complex settings, provides context for grokking as a third overfitting regime."
    468     },
    469     {
    470       "title": "On Large-Batch Training for Deep Learning: Generalization Gap, Sharp Minima, and Training Stability",
    471       "relevance": "Discusses batch size, learning rate, and minima geometry—techniques central to Section 3.3 ablations."
    472     },
    473     {
    474       "title": "Decoupled Weight Decay Regularization",
    475       "relevance": "Introduces AdamW optimizer and weight decay technique; most impactful result in paper is weight decay's effectiveness."
    476     }
    477   ],
    478   "engagement_factors": {
    479     "practical_relevance": {
    480       "score": 1,
    481       "justification": "Studies toy algorithmic tasks with no direct application to practitioners. However, insights about weight decay and optimization might transfer to real training."
    482     },
    483     "surprise_contrarian": {
    484       "score": 3,
    485       "justification": "Grokking contradicts intuition that validation should track training; delayed generalization by 1000× is genuinely unexpected and novel for 2022."
    486     },
    487     "fear_safety": {
    488       "score": 0,
    489       "justification": "No direct AI safety implications. Could tangentially relate to 'hidden capabilities' emergence but no alignment or risk claims made."
    490     },
    491     "drama_conflict": {
    492       "score": 1,
    493       "justification": "Elegant phenomenon with good visualizations but no controversial angle or conflict narrative."
    494     },
    495     "demo_ability": {
    496       "score": 2,
    497       "justification": "Grokking can be demonstrated with small GPU experiments but requires careful setup of specific binary operations and hyperparameters."
    498     },
    499     "brand_recognition": {
    500       "score": 3,
    501       "justification": "Authors from OpenAI and Google; OpenAI had high profile in 2022. Names carry weight in ML community."
    502     }
    503   },
    504   "hn_data": {
    505     "threads": [
    506       {
    507         "hn_id": "36133058",
    508         "title": "Grokking: Generalization Beyond Overfitting on Small Algorithmic Datasets",
    509         "points": 13,
    510         "comments": 0,
    511         "url": "https://news.ycombinator.com/item?id=36133058",
    512         "created_at": "2023-05-31T00:30:07Z"
    513       },
    514       {
    515         "hn_id": "35696690",
    516         "title": "Grokking: Generalization Beyond Overfitting on Small Algorithmic Datasets",
    517         "points": 3,
    518         "comments": 0,
    519         "url": "https://news.ycombinator.com/item?id=35696690",
    520         "created_at": "2023-04-25T05:02:30Z"
    521       },
    522       {
    523         "hn_id": "31958624",
    524         "title": "Grokking: Generalization Beyond Overfitting on Small Algorithmic Datasets",
    525         "points": 2,
    526         "comments": 1,
    527         "url": "https://news.ycombinator.com/item?id=31958624",
    528         "created_at": "2022-07-02T12:56:57Z"
    529       }
    530     ],
    531     "top_points": 13,
    532     "total_points": 18,
    533     "total_comments": 1
    534   }
    535 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs