scan.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan.json (26268B)
      1 {
      2   "paper": {
      3     "title": "The Quantization Model of Neural Scaling",
      4     "authors": ["Eric J. Michaud", "Ziming Liu", "Uzay Girit", "Max Tegmark"],
      5     "year": 2023,
      6     "venue": "Neural Information Processing Systems",
      7     "arxiv_id": "2303.13506",
      8     "doi": "10.48550/arXiv.2303.13506"
      9   },
     10   "scan_version": 2,
     11   "active_modules": ["experimental_rigor", "data_leakage"],
     12   "methodology_tags": ["theoretical", "benchmark-eval"],
     13   "key_findings": "The paper proposes the Quantization Hypothesis: neural network knowledge/skills are decomposable into discrete 'quanta' whose use frequencies follow a power law, explaining both power law scaling of loss and emergent capabilities. Validated on a toy 'multitask sparse parity' dataset where smooth scaling averages over many discrete subtask transitions. Analysis of Pythia LLMs shows per-token scaling curves range from monogenic (sharp transitions) to polygenic (gradual improvement). A gradient-based clustering method (QDG) discovers coherent skill clusters in a small LLM, with cluster size distribution roughly compatible with predicted power law exponent, though with high uncertainty.",
     14   "claims": [
     15     {
     16       "claim": "When quanta are learned in order of decreasing use frequency and frequencies follow a power law, the resulting loss scales as a power law in the number of quanta learned.",
     17       "evidence": "Mathematical derivation in Section 2, Equation 2, showing L_n - L_∞ ∝ n^{-α}. Additional derivations in Appendix A for different assumptions about a_k and b_k.",
     18       "supported": "strong"
     19     },
     20     {
     21       "claim": "Neural networks exhibit power law scaling on the multitask sparse parity dataset, with the mechanism being learning of increasingly rare quanta in order.",
     22       "evidence": "Figure 2 shows power law scaling in N, S, D on toy dataset with α=0.4. Per-subtask breakdown shows emergence at different scales, with smooth aggregate scaling averaging over discrete transitions.",
     23       "supported": "strong"
     24     },
     25     {
     26       "claim": "The parameter scaling exponent for Pythia LLMs is approximately α_N ≈ 0.083.",
     27       "evidence": "Figure 3 left panel, measured from first six Pythia models. The 6.4b model was excluded as it 'does not fit the scaling curve well.'",
     28       "supported": "moderate"
     29     },
     30     {
     31       "claim": "The frequency distribution of auto-discovered LLM quanta roughly follows a power law with exponent compatible with the observed scaling exponent.",
     32       "evidence": "Figure 5 rank-frequency plot shows envelope slope of ≈-1.24 vs expected -1.08, which is 'within the margin of error given the uncertainty of our clustering methodology' (Section 5.1). Authors estimate uncertainty of at least 0.2.",
     33       "supported": "weak"
     34     },
     35     {
     36       "claim": "The relationship α_D = α_N/(α_N + 1) should hold between data and parameter scaling exponents.",
     37       "evidence": "Derived in Section 2. Appendix F compiles exponents from prior studies; results are 'too messy to definitively support or contradict our model' (Figure 18).",
     38       "supported": "weak"
     39     }
     40   ],
     41   "checklist": {
     42     "artifacts": {
     43       "code_released": {
     44         "applies": true,
     45         "answer": true,
     46         "justification": "Paper states 'Project code can be found at: https://github.com/ejmichaud/quantization-model' on page 1."
     47       },
     48       "data_released": {
     49         "applies": true,
     50         "answer": true,
     51         "justification": "The toy dataset is procedurally generated (described fully in Section 3.1). LLM experiments use the publicly available Pythia models and The Pile test set. Code repository presumably includes generation scripts."
     52       },
     53       "environment_specified": {
     54         "applies": true,
     55         "answer": false,
     56         "justification": "No requirements.txt, Dockerfile, or detailed environment specification found in the paper. Only mentions Adam optimizer and scikit-learn for spectral clustering."
     57       },
     58       "reproduction_instructions": {
     59         "applies": true,
     60         "answer": false,
     61         "justification": "No step-by-step reproduction instructions in the paper. The code repository is referenced but the paper itself does not include a reproducibility section with commands or scripts."
     62       }
     63     },
     64     "statistical_methodology": {
     65       "confidence_intervals_or_error_bars": {
     66         "applies": true,
     67         "answer": false,
     68         "justification": "Main results (scaling exponents, cluster slopes) are reported as point estimates without confidence intervals or error bars. The authors acknowledge high uncertainty qualitatively ('uncertainty of at least 0.2') but do not compute formal intervals."
     69       },
     70       "significance_tests": {
     71         "applies": true,
     72         "answer": false,
     73         "justification": "No statistical significance tests are used. Scaling exponents are measured by fitting slopes, and claims of compatibility between predicted and observed exponents are made without formal tests."
     74       },
     75       "effect_sizes_reported": {
     76         "applies": true,
     77         "answer": true,
     78         "justification": "Scaling exponents are themselves effect sizes (α_N ≈ 0.083, cluster envelope slope ≈-1.24 vs predicted -1.08). These are reported with context of baseline predictions from theory."
     79       },
     80       "sample_size_justified": {
     81         "applies": true,
     82         "answer": false,
     83         "justification": "No justification for why 10,000 tokens were used for QDG clustering, why 7 Pythia models were analyzed, or why the toy dataset used the specific parameter choices."
     84       },
     85       "variance_reported": {
     86         "applies": true,
     87         "answer": false,
     88         "justification": "No variance across runs is reported. The authors note 'if we had multiple runs with different random seeds for each model scale, we could better test' (Section 4.2) but did not do this. Toy experiments appear to be single runs."
     89       }
     90     },
     91     "evaluation_design": {
     92       "baselines_included": {
     93         "applies": true,
     94         "answer": true,
     95         "justification": "The paper compares theoretical predictions against empirical scaling exponents from prior work (Kaplan et al., Hoffmann et al., etc.) and against its own measurements on Pythia and toy datasets."
     96       },
     97       "baselines_contemporary": {
     98         "applies": true,
     99         "answer": true,
    100         "justification": "Uses the Pythia model suite (Biderman et al. 2023) which was contemporary at time of writing. Compares against multiple recent scaling law studies."
    101       },
    102       "ablation_study": {
    103         "applies": true,
    104         "answer": true,
    105         "justification": "Figure 10 varies the quanta distribution parameter α across multiple values and measures how scaling exponents change, effectively ablating the key parameter. Appendix A explores different assumptions for a_k and b_k."
    106       },
    107       "multiple_metrics": {
    108         "applies": true,
    109         "answer": true,
    110         "justification": "Reports scaling exponents α_N, α_S, α_D separately, loss distributions p(L), per-token scaling curves, cluster rank-frequency statistics, and number of subtasks learned."
    111       },
    112       "human_evaluation": {
    113         "applies": true,
    114         "answer": true,
    115         "justification": "QDG clusters were manually inspected to verify coherence: 'manually inspecting these clusters we find that they usually involve predicting the same token for a coherent reason' (Section 5)."
    116       },
    117       "held_out_test_set": {
    118         "applies": true,
    119         "answer": true,
    120         "justification": "LLM experiments use 'approximately 10 million tokens from the test set of The Pile' (Section 4). Toy experiments report mean test loss."
    121       },
    122       "per_category_breakdown": {
    123         "applies": true,
    124         "answer": true,
    125         "justification": "Scaling curves are decomposed per-subtask (Figure 2 bottom), per-token (Figures 3, 4, 12), and per-cluster (Figure 5). This is a central contribution."
    126       },
    127       "failure_cases_discussed": {
    128         "applies": true,
    129         "answer": true,
    130         "justification": "The paper discusses failures: QDG clusters are 'not all' coherent (Section 5), the 6.4b Pythia model breaks the scaling trend (Section 4.1), and toy exponents deviate from theory (Appendix B)."
    131       },
    132       "negative_results_reported": {
    133         "applies": true,
    134         "answer": true,
    135         "justification": "Several negative results: the cluster power law slope doesn't match predictions well (-1.24 vs -1.08), toy scaling exponents deviate from theory (Appendix B), and the 6.4b model doesn't fit. The relationship α_D = α_N/(α_N+1) is described as 'too messy to definitively support' (Appendix F)."
    136       }
    137     },
    138     "claims_and_evidence": {
    139       "abstract_claims_supported": {
    140         "applies": true,
    141         "answer": true,
    142         "justification": "Abstract claims are appropriately hedged: 'We tentatively find that the frequency at which these quanta are used... roughly follows a power law corresponding with the empirical scaling exponent.' Results sections support these hedged claims."
    143       },
    144       "causal_claims_justified": {
    145         "applies": true,
    146         "answer": true,
    147         "justification": "The paper's causal claims are primarily about the toy dataset (where the mechanism is fully controlled and verifiable). For LLMs, claims are appropriately framed as conjectures and hypotheses, not established causal relationships."
    148       },
    149       "generalization_bounded": {
    150         "applies": true,
    151         "answer": true,
    152         "justification": "The paper is explicit about boundaries: the toy dataset is a 'proof of concept' (Section 3.2), and for LLMs the results are 'tentative' with 'much work remains' (Section 7). The Limitations section identifies multiple ways the hypothesis might not hold."
    153       },
    154       "alternative_explanations_discussed": {
    155         "applies": true,
    156         "answer": true,
    157         "justification": "Section 6 discusses multiple alternative models of neural scaling (Sharma & Kaplan, Bahri et al., Maloney et al., Bordelon et al., Hutter). Section 7 discusses that smooth scaling could be modeled without discrete quanta, and that polygenicity undermines the simple model."
    158       },
    159       "proxy_outcome_distinction": {
    160         "applies": true,
    161         "answer": true,
    162         "justification": "The paper is careful about what QDG clusters actually measure vs what quanta are theoretically. Section 5 notes clusters 'will not give us a mechanistic understanding of the quanta, but simply provide examples' and assumes monogenicity which is 'likely unrealistic.'"
    163       }
    164     },
    165     "setup_transparency": {
    166       "model_versions_specified": {
    167         "applies": true,
    168         "answer": true,
    169         "justification": "Specific Pythia model sizes are stated (19m to 6.4b non-embedding parameters) from the Pythia suite (Biderman et al. 2023), which is a specific named model family with known training details."
    170       },
    171       "prompts_provided": {
    172         "applies": false,
    173         "answer": false,
    174         "justification": "The paper does not use prompting. It evaluates language models on next-token prediction loss directly."
    175       },
    176       "hyperparameters_reported": {
    177         "applies": true,
    178         "answer": true,
    179         "justification": "For toy experiments: Adam optimizer with lr=10^-3, batch size 20000, ntasks=500, n=100, k=3, α=0.4, training steps 2e5, hidden-layer widths 10-500, dataset sizes 1e4-5e6 (Section 3.2). QDG: n_clusters=400, loss threshold 0.1 nats (Section 5, Appendix C.1)."
    180       },
    181       "scaffolding_described": {
    182         "applies": false,
    183         "answer": false,
    184         "justification": "No agentic scaffolding is used. The paper trains and evaluates standard neural networks."
    185       },
    186       "data_preprocessing_documented": {
    187         "applies": true,
    188         "answer": true,
    189         "justification": "QDG filtering is described: tokens with <0.1 nats loss, induction samples filtered out via trigram matching, gradients exclude embed/unembed/layernorm (Appendix C.1). Toy data generation is fully specified."
    190       }
    191     },
    192     "limitations_and_scope": {
    193       "limitations_section_present": {
    194         "applies": true,
    195         "answer": true,
    196         "justification": "Section 7 (Discussion) contains a substantial 'Limitations' paragraph discussing multiple specific limitations of the work."
    197       },
    198       "threats_to_validity_specific": {
    199         "applies": true,
    200         "answer": true,
    201         "justification": "Specific threats discussed: 'Probably our riskiest assumption was that there is an underlying discreteness to everything that models learn,' larger networks are more efficient learners (not just more capacity), quanta modeled as independent when they may be hierarchical, QDG is 'neither very principled nor scalable' (Section 7)."
    202       },
    203       "scope_boundaries_stated": {
    204         "applies": true,
    205         "answer": true,
    206         "justification": "Multiple explicit scope boundaries: the hypothesis is validated on toy data but 'much work remains in investigating to what extent it holds for natural tasks' (Section 7). Cluster slope measurement has 'uncertainty of at least 0.2' (Section 5.1). Appendix F: empirical results are 'too messy to definitively support or contradict our model.'"
    207       }
    208     },
    209     "data_integrity": {
    210       "raw_data_available": {
    211         "applies": true,
    212         "answer": true,
    213         "justification": "Toy data is fully reproducible from the specification. Pythia models and The Pile test set are publicly available. Code repository is provided."
    214       },
    215       "data_collection_described": {
    216         "applies": true,
    217         "answer": true,
    218         "justification": "Toy data generation procedure is fully specified (Section 3.1). LLM evaluation: 'approximately 10 million tokens from the test set of The Pile' evaluated on Pythia models (Section 4). QDG: 10,000 tokens selected by loss threshold from The Pile test set (Appendix C.1)."
    219       },
    220       "recruitment_methods_described": {
    221         "applies": false,
    222         "answer": false,
    223         "justification": "No human participants. Data sources are standard public datasets/models."
    224       },
    225       "data_pipeline_documented": {
    226         "applies": true,
    227         "answer": true,
    228         "justification": "QDG pipeline is documented: select low-loss tokens → filter induction → compute gradients → normalize → compute affinity matrix → spectral clustering (Section 5, Appendix C.1). Toy data pipeline is fully specified."
    229       }
    230     },
    231     "conflicts_of_interest": {
    232       "funding_disclosed": {
    233         "applies": true,
    234         "answer": true,
    235         "justification": "Acknowledgments section lists: Foundational Questions Institute, Rothberg Family Fund for Cognitive Science, NSF Graduate Research Fellowship (Grant No. 2141064), and IAIFI through NSF grant PHY-2019786."
    236       },
    237       "affiliations_disclosed": {
    238         "applies": true,
    239         "answer": true,
    240         "justification": "Authors listed as affiliated with MIT & IAIFI. No product being evaluated that would create a conflict."
    241       },
    242       "funder_independent_of_outcome": {
    243         "applies": true,
    244         "answer": true,
    245         "justification": "Funders are academic foundations and NSF, which have no financial stake in the theoretical claims about neural scaling laws."
    246       },
    247       "financial_interests_declared": {
    248         "applies": true,
    249         "answer": false,
    250         "justification": "No competing interests statement is included in the paper."
    251       }
    252     },
    253     "contamination": {
    254       "training_cutoff_stated": {
    255         "applies": false,
    256         "answer": false,
    257         "justification": "The paper does not evaluate a pre-trained model's capability on a benchmark. It measures per-token loss statistics of Pythia models on their own test set, studying scaling properties rather than benchmark performance."
    258       },
    259       "train_test_overlap_discussed": {
    260         "applies": false,
    261         "answer": false,
    262         "justification": "Same as above — the paper analyzes scaling curves on The Pile's test split, not benchmark capability. Contamination in the benchmark sense is not applicable."
    263       },
    264       "benchmark_contamination_addressed": {
    265         "applies": false,
    266         "answer": false,
    267         "justification": "No benchmark capability evaluation is performed. The paper studies loss distributions and scaling exponents."
    268       }
    269     },
    270     "human_studies": {
    271       "pre_registered": {
    272         "applies": false,
    273         "answer": false,
    274         "justification": "No human participants in this study."
    275       },
    276       "irb_or_ethics_approval": {
    277         "applies": false,
    278         "answer": false,
    279         "justification": "No human participants in this study."
    280       },
    281       "demographics_reported": {
    282         "applies": false,
    283         "answer": false,
    284         "justification": "No human participants in this study."
    285       },
    286       "inclusion_exclusion_criteria": {
    287         "applies": false,
    288         "answer": false,
    289         "justification": "No human participants in this study."
    290       },
    291       "randomization_described": {
    292         "applies": false,
    293         "answer": false,
    294         "justification": "No human participants in this study."
    295       },
    296       "blinding_described": {
    297         "applies": false,
    298         "answer": false,
    299         "justification": "No human participants in this study."
    300       },
    301       "attrition_reported": {
    302         "applies": false,
    303         "answer": false,
    304         "justification": "No human participants in this study."
    305       }
    306     },
    307     "cost_and_practicality": {
    308       "inference_cost_reported": {
    309         "applies": true,
    310         "answer": false,
    311         "justification": "No inference cost or latency figures reported for QDG or model evaluations, only qualitative notes like 'a few hours' in Appendix G."
    312       },
    313       "compute_budget_stated": {
    314         "applies": true,
    315         "answer": true,
    316         "justification": "Appendix G provides compute estimates: ~1450 GPU-hours for toy experiments (467 runs), Pythia evaluations likely <100 A100-hours, QDG takes 'a few hours' on A100 80GB. Hardware is specified (A100, heterogeneous cluster)."
    317       }
    318     },
    319     "experimental_rigor": {
    320       "seed_sensitivity_reported": {
    321         "applies": true,
    322         "answer": false,
    323         "justification": "No multi-seed analysis. The paper explicitly notes this limitation: 'if we had multiple runs with different random seeds for each model scale, we could better test whether the mean loss across seeds decreases smoothly' (Section 4.2)."
    324       },
    325       "number_of_runs_stated": {
    326         "applies": true,
    327         "answer": false,
    328         "justification": "Number of runs not explicitly stated for most experiments. Toy experiments appear to be single runs per configuration. Pythia evaluations are single evaluations per model."
    329       },
    330       "hyperparameter_search_budget": {
    331         "applies": true,
    332         "answer": false,
    333         "justification": "No hyperparameter search budget reported. The choice of n_clusters=400 and loss threshold 0.1 nats for QDG appears ad-hoc, with no discussion of alternatives tried beyond sweeping n_clusters for rank-frequency plots."
    334       },
    335       "best_config_selection_justified": {
    336         "applies": true,
    337         "answer": false,
    338         "justification": "The 6.4b Pythia model was excluded from scaling exponent measurement because it 'does not fit the scaling curve well' (Section 4.1) — this is post-hoc selection without clear justification."
    339       },
    340       "multiple_comparison_correction": {
    341         "applies": false,
    342         "answer": false,
    343         "justification": "No formal statistical tests are performed, so multiple comparison correction is not applicable."
    344       },
    345       "self_comparison_bias_addressed": {
    346         "applies": false,
    347         "answer": false,
    348         "justification": "The paper proposes a theoretical model and validates it on self-constructed toy data and public models. There is no baseline re-implementation bias concern."
    349       },
    350       "compute_budget_vs_performance": {
    351         "applies": false,
    352         "answer": false,
    353         "justification": "The paper is studying scaling itself — compute/parameters/data vs performance IS the main analysis. Not applicable as a fairness concern."
    354       },
    355       "benchmark_construct_validity": {
    356         "applies": true,
    357         "answer": true,
    358         "justification": "The paper extensively discusses what its toy benchmark and QDG method actually measure vs what it claims. Section 5 notes QDG clusters are proxies not mechanistic understanding; Appendix E analyzes estimation bias; Section 7 discusses whether discrete quanta are real."
    359       },
    360       "scaffold_confound_addressed": {
    361         "applies": false,
    362         "answer": false,
    363         "justification": "No scaffolding is involved in the experiments."
    364       }
    365     },
    366     "data_leakage": {
    367       "temporal_leakage_addressed": {
    368         "applies": false,
    369         "answer": false,
    370         "justification": "The paper evaluates Pythia models on The Pile test set (the models' own evaluation split), not on external benchmarks. Temporal leakage is not applicable."
    371       },
    372       "feature_leakage_addressed": {
    373         "applies": false,
    374         "answer": false,
    375         "justification": "No benchmark evaluation where feature leakage would be a concern. The paper studies loss distributions on held-out data."
    376       },
    377       "non_independence_addressed": {
    378         "applies": false,
    379         "answer": false,
    380         "justification": "The Pile's train/test split is pre-defined. The paper is not making benchmark claims where independence would be a concern."
    381       },
    382       "leakage_detection_method": {
    383         "applies": false,
    384         "answer": false,
    385         "justification": "No benchmark evaluation where leakage detection would be relevant."
    386       }
    387     }
    388   },
    389   "red_flags": [
    390     {
    391       "flag": "Post-hoc exclusion of data point",
    392       "detail": "The 6.4b Pythia model was excluded from the scaling exponent measurement because it 'does not fit the scaling curve well' (Section 4.1). This is a concerning post-hoc exclusion without rigorous justification."
    393     },
    394     {
    395       "flag": "Key claim relies on imprecise measurement",
    396       "detail": "The central empirical claim — that LLM quanta frequencies follow the predicted power law — rests on QDG cluster sizes with an admitted uncertainty of ≥0.2 on the exponent. The measured slope (-1.24) vs predicted (-1.08) is within this margin but the measurement is too noisy to be confirmatory."
    397     },
    398     {
    399       "flag": "No multi-seed experiments",
    400       "detail": "The paper explicitly acknowledges that multi-seed runs would be needed to distinguish genuine discrete transitions from noise in per-token scaling curves, but does not perform them."
    401     }
    402   ],
    403   "cited_papers": [
    404     {
    405       "title": "Scaling laws for neural language models",
    406       "authors": ["Jared Kaplan", "Sam McCandlish", "Tom Henighan", "Tom B Brown"],
    407       "year": 2020,
    408       "arxiv_id": "2001.08361",
    409       "relevance": "Foundational paper on neural scaling laws that this work aims to explain mechanistically."
    410     },
    411     {
    412       "title": "Emergent Abilities of Large Language Models",
    413       "authors": ["Jason Wei", "Yi Tay", "Rishi Bommasani"],
    414       "year": 2022,
    415       "relevance": "Documents emergent abilities in LLMs that the Quantization Model aims to explain as averaging over discrete skill acquisition."
    416     },
    417     {
    418       "title": "Training compute-optimal large language models",
    419       "authors": ["Jordan Hoffmann", "Sebastian Borgeaud", "Arthur Mensch"],
    420       "year": 2022,
    421       "arxiv_id": "2203.15556",
    422       "relevance": "Chinchilla scaling laws paper; this work proposes a relationship between its data and parameter scaling exponents."
    423     },
    424     {
    425       "title": "Are Emergent Abilities of Large Language Models a Mirage?",
    426       "authors": ["Rylan Schaeffer", "Brando Miranda", "Sanmi Koyejo"],
    427       "year": 2023,
    428       "arxiv_id": "2304.15004",
    429       "relevance": "Challenges the emergence narrative by suggesting it is a metric artifact; this paper offers an alternative explanation via polygenicity."
    430     },
    431     {
    432       "title": "Pythia: A suite for analyzing large language models across training and scaling",
    433       "authors": ["Stella Biderman", "Hailey Schoelkopf"],
    434       "year": 2023,
    435       "arxiv_id": "2304.01373",
    436       "relevance": "Provides the model suite used for all LLM experiments in this paper."
    437     },
    438     {
    439       "title": "Towards Monosemanticity: Decomposing Language Models With Dictionary Learning",
    440       "authors": ["Trenton Bricken", "Adly Templeton", "Joshua Batson"],
    441       "year": 2023,
    442       "relevance": "Mechanistic interpretability work on decomposing LLMs into interpretable features, closely related to the quanta decomposition idea."
    443     },
    444     {
    445       "title": "Beyond the imitation game: Quantifying and extrapolating the capabilities of language models",
    446       "authors": ["Aarohi Srivastava", "Abhinav Rastogi"],
    447       "year": 2022,
    448       "arxiv_id": "2206.04615",
    449       "relevance": "BIG-Bench benchmark documenting emergent abilities and scaling 'breakthroughness' that this model aims to explain."
    450     },
    451     {
    452       "title": "In-context Learning and Induction Heads",
    453       "authors": ["Catherine Olsson", "Nelson Elhage", "Neel Nanda"],
    454       "year": 2022,
    455       "relevance": "Found evidence for universality of induction heads across LLM scales, supporting the quantization hypothesis's universality assumption."
    456     },
    457     {
    458       "title": "Grokking: Generalization beyond overfitting on small algorithmic datasets",
    459       "authors": ["Alethea Power", "Yuri Burda", "Harri Edwards"],
    460       "year": 2022,
    461       "arxiv_id": "2201.02177",
    462       "relevance": "Phase transitions in learning algorithmic tasks, providing precedent for discrete learning dynamics underlying smooth aggregate performance."
    463     },
    464     {
    465       "title": "A theory for emergence of complex skills in language models",
    466       "authors": ["Sanjeev Arora", "Anirudh Goyal"],
    467       "year": 2023,
    468       "arxiv_id": "2307.15936",
    469       "relevance": "Complementary theoretical framework for skill emergence in LLMs through combination of underlying skills."
    470     }
    471   ]
    472 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs