scan-v5.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan-v5.json (27937B)
      1 {
      2   "scan_version": 5,
      3   "paper_type": "empirical",
      4   "paper": {
      5     "title": "Improving Automated Program Repair with Domain Adaptation",
      6     "authors": [
      7       "Armin Zirak",
      8       "Hadi Hemmati"
      9     ],
     10     "year": 2022,
     11     "venue": "ACM Transactions on Software Engineering and Methodology",
     12     "arxiv_id": "2212.11414",
     13     "doi": "10.1145/3631972"
     14   },
     15   "checklist": {
     16     "claims_and_evidence": {
     17       "abstract_claims_supported": {
     18         "applies": true,
     19         "answer": true,
     20         "justification": "All major claims in the abstract are supported by experimental results (Tables 4-20), though there is a minor discrepancy between the abstract's '24.42%' and the results section's '23.42%' for the CodeXGLUE zero-shot improvement.",
     21         "source": "haiku"
     22       },
     23       "causal_claims_justified": {
     24         "applies": true,
     25         "answer": true,
     26         "justification": "The included/excluded experimental design holds test sets constant while varying training data, providing adequate design for the causal claim that domain adaptation improves accuracy; exposure bias analysis (RQ2.3) addresses the alternative explanation that improvements stem merely from more data.",
     27         "source": "haiku"
     28       },
     29       "generalization_bounded": {
     30         "applies": true,
     31         "answer": true,
     32         "justification": "The paper explicitly bounds conclusions to TFix (JS) and CodeXGLUE (Java), 19 and 11 projects respectively, and Section 5.6 acknowledges limitations in generalizing to other APR methods or datasets.",
     33         "source": "haiku"
     34       },
     35       "alternative_explanations_discussed": {
     36         "applies": true,
     37         "answer": true,
     38         "justification": "The paper discusses the alternative that accuracy gains may come from more data (addressed via RQ2.3 exposure bias analysis) and the alternative that TFix-Small's resistance is due to limited capacity rather than robustness.",
     39         "source": "haiku"
     40       },
     41       "proxy_outcome_distinction": {
     42         "applies": true,
     43         "answer": true,
     44         "justification": "The paper clearly frames 'exact match' as a proxy and discusses its limitations versus error removal metrics, acknowledging that correct patches not identical to reference fixes are not captured.",
     45         "source": "haiku"
     46       }
     47     },
     48     "limitations_and_scope": {
     49       "limitations_section_present": {
     50         "applies": true,
     51         "answer": true,
     52         "justification": "Section 5.6 'Limitations of the Proposed Methods and the Conducted Study' and Section 5.7 'Validity Threats' are dedicated sections, not merely a sentence in the conclusion.",
     53         "source": "haiku"
     54       },
     55       "threats_to_validity_specific": {
     56         "applies": true,
     57         "answer": true,
     58         "justification": "Threats include specific claims: the excluded set is only <2% of data (ruling out data-volume as explanation for the drop), CodeXGLUE has only 1 project with >150 samples making results less reliable, exact match is unavailable for error-removal due to unpublished source code.",
     59         "source": "haiku"
     60       },
     61       "scope_boundaries_stated": {
     62         "applies": true,
     63         "answer": true,
     64         "justification": "The paper explicitly limits scope to one-line logical errors, two specific APR models, JavaScript and Java, and cross-project domain shift only (not language-change or synthetic-to-real shift).",
     65         "source": "haiku"
     66       }
     67     },
     68     "conflicts_of_interest": {
     69       "funding_disclosed": {
     70         "applies": true,
     71         "answer": false,
     72         "justification": "No funding source is mentioned anywhere in the paper; no acknowledgments section is present in the provided text.",
     73         "source": "haiku"
     74       },
     75       "affiliations_disclosed": {
     76         "applies": true,
     77         "answer": true,
     78         "justification": "Both authors' affiliations (University of Calgary) and email addresses are clearly disclosed in the author block.",
     79         "source": "haiku"
     80       },
     81       "funder_independent_of_outcome": {
     82         "applies": false,
     83         "answer": false,
     84         "justification": "No funding is disclosed, so independence cannot be assessed.",
     85         "source": "haiku"
     86       },
     87       "financial_interests_declared": {
     88         "applies": true,
     89         "answer": false,
     90         "justification": "No competing interests, patent, or financial disclosure statement appears in the paper.",
     91         "source": "haiku"
     92       }
     93     },
     94     "scope_and_framing": {
     95       "key_terms_defined": {
     96         "applies": true,
     97         "answer": true,
     98         "justification": "APR, domain shift, domain adaptation, exact match, exposure bias, and curriculum learning are all explicitly defined with formal or operational definitions in Sections 2-3.",
     99         "source": "haiku"
    100       },
    101       "intended_contribution_clear": {
    102         "applies": true,
    103         "answer": true,
    104         "justification": "The paper states two explicit contributions: (1) a domain adaptation framework with three methods for APR, and (2) a transformer-based bug generator (TBug/CodeXBUG) for zero-shot DA via synthetic data.",
    105         "source": "haiku"
    106       },
    107       "engagement_with_prior_work": {
    108         "applies": true,
    109         "answer": true,
    110         "justification": "Section 6 compares the proposed approach with related work on cross-project defect prediction, semi-supervised APR (DrRepair, BugLab, SelfAPR, SamSeed), and NLP domain adaptation, explicitly differentiating this work from each.",
    111         "source": "haiku"
    112       }
    113     }
    114   },
    115   "type_checklist": {
    116     "empirical": {
    117       "artifacts": {
    118         "code_released": {
    119           "applies": true,
    120           "answer": true,
    121           "justification": "Footnote 1 links to https://github.com/arminzirak/TFix stating 'We publish all the source codes, models and results in a public repository.'",
    122           "source": "haiku"
    123         },
    124         "data_released": {
    125           "applies": true,
    126           "answer": true,
    127           "justification": "The paper uses the publicly available TFix JS dataset (Berabi et al.) and CodeXGLUE Java dataset (Tufano et al./Lu et al.); both are standard public benchmarks used unmodified except for train/test split changes.",
    128           "source": "haiku"
    129         },
    130         "environment_specified": {
    131           "applies": true,
    132           "answer": false,
    133           "justification": "The paper specifies the hardware (32GB V100 GPU, 4 CPU cores, 30GB RAM on ComputeCanada Cedar) but provides no requirements.txt, Dockerfile, or software dependency list.",
    134           "source": "haiku"
    135         },
    136         "reproduction_instructions": {
    137           "applies": true,
    138           "answer": false,
    139           "justification": "The paper states it uses scripts from TFix and CodeXGLUE original authors with only data split changes, but provides no step-by-step instructions in the paper itself for reproducing results.",
    140           "source": "haiku"
    141         }
    142       },
    143       "statistical_methodology": {
    144         "confidence_intervals_or_error_bars": {
    145           "applies": true,
    146           "answer": false,
    147           "justification": "All results are point estimates (exact match percentages); no confidence intervals or error bars are reported for any comparison.",
    148           "source": "haiku"
    149         },
    150         "significance_tests": {
    151           "applies": true,
    152           "answer": false,
    153           "justification": "No statistical significance tests are applied to any comparative claim; improvements are presented as raw percentage differences without p-values.",
    154           "source": "haiku"
    155         },
    156         "effect_sizes_reported": {
    157           "applies": true,
    158           "answer": true,
    159           "justification": "Percentage-point improvements over the default baseline are reported throughout (e.g., 13.05% for TFix-Large, 23.42% for CodeXGLUE small), providing effect sizes with clear baseline context.",
    160           "source": "haiku"
    161         },
    162         "sample_size_justified": {
    163           "applies": true,
    164           "answer": true,
    165           "justification": "The paper explicitly justifies project selection thresholds (≥150 samples for TFix, ≥50 for CodeXGLUE) by arguing that fewer samples produce unreliable test results and inadequate adaptation data.",
    166           "source": "haiku"
    167         },
    168         "variance_reported": {
    169           "applies": true,
    170           "answer": false,
    171           "justification": "No variance, standard deviation, or run-to-run variation is reported; all results appear to be from single experimental runs.",
    172           "source": "haiku"
    173         }
    174       },
    175       "evaluation_design": {
    176         "baselines_included": {
    177           "applies": true,
    178           "answer": true,
    179           "justification": "Two baselines are clearly defined: 'Default' (pretrained model applied as-is, representing no DA) and 'Baseline' (full retraining with target data included), providing both lower and upper bound comparisons.",
    180           "source": "haiku"
    181         },
    182         "baselines_contemporary": {
    183           "applies": true,
    184           "answer": true,
    185           "justification": "TFix (2021) and CodeXGLUE (2021) were state-of-the-art APR methods at the time of this 2022 study, and the paper verifies TFix outperforms SequenceR, CoCoNuT, and Hoppity.",
    186           "source": "haiku"
    187         },
    188         "ablation_study": {
    189           "applies": true,
    190           "answer": true,
    191           "justification": "The paper tests five DA methods (FFT, TLWAL, CLC, CLL, CLS) against each other and against Default/Baseline, effectively ablating the contribution of different DA strategies.",
    192           "source": "haiku"
    193         },
    194         "multiple_metrics": {
    195           "applies": true,
    196           "answer": true,
    197           "justification": "The paper uses exact match accuracy (primary), plus efficiency metrics (preparation time, model size, inference time) and exposure bias as secondary metrics.",
    198           "source": "haiku"
    199         },
    200         "human_evaluation": {
    201           "applies": false,
    202           "answer": false,
    203           "justification": "Human evaluation is not applicable; correctness is assessed automatically by comparing generated fixes to reference patches via exact match.",
    204           "source": "haiku"
    205         },
    206         "held_out_test_set": {
    207           "applies": true,
    208           "answer": true,
    209           "justification": "Test splits are defined upfront and kept identical across all RQs (1-3) to enable fair comparisons; target-test is never used during adaptation.",
    210           "source": "haiku"
    211         },
    212         "per_category_breakdown": {
    213           "applies": true,
    214           "answer": true,
    215           "justification": "Tables 4-6, 13-20 provide per-project breakdowns, and Table 12 provides per-error-type accuracy for the TBug bug generator across 50+ error types.",
    216           "source": "haiku"
    217         },
    218         "failure_cases_discussed": {
    219           "applies": true,
    220           "answer": true,
    221           "justification": "The paper discusses specific failure cases: Coprhd Controller with 0% accuracy across all methods, LivelyKernel as too small for reliable results, and projects where DA reduces accuracy.",
    222           "source": "haiku"
    223         },
    224         "negative_results_reported": {
    225           "applies": true,
    226           "answer": true,
    227           "justification": "Negative results are explicitly reported: CurriculumLearning methods generally underperform FullFineTuning; CodeXBUG shows less improvement than TBug; DA reduces accuracy in 1-2 projects per method.",
    228           "source": "haiku"
    229         }
    230       },
    231       "setup_transparency": {
    232         "model_versions_specified": {
    233           "applies": true,
    234           "answer": true,
    235           "justification": "Specific architecture sizes are identified (T5-Large, T5-Small, CodeBERT with 6 layers, 768-dimensional hidden states, 12 attention heads) and they use scripts from original published papers.",
    236           "source": "haiku"
    237         },
    238         "prompts_provided": {
    239           "applies": false,
    240           "answer": false,
    241           "justification": "This is a fine-tuning study, not an LLM prompting study; the input format for TFix is described ('fix error type / error message buggy line: error context ⟹ fixed line') but this is model input formatting, not LLM prompting.",
    242           "source": "haiku"
    243         },
    244         "hyperparameters_reported": {
    245           "applies": true,
    246           "answer": false,
    247           "justification": "The paper states it uses 'the same configurations and hyperparameters' as the original TFix/CodeXGLUE studies without reporting specific values (learning rate, batch size, etc.) in this paper.",
    248           "source": "haiku"
    249         },
    250         "scaffolding_described": {
    251           "applies": false,
    252           "answer": false,
    253           "justification": "No agentic scaffolding is present; this is a model training and fine-tuning study.",
    254           "source": "haiku"
    255         },
    256         "data_preprocessing_documented": {
    257           "applies": true,
    258           "answer": true,
    259           "justification": "Data split procedures are described in detail (80/20 train/test per error type per project, then aggregation), and TBug training data preparation (swapping inputs/outputs) is explained.",
    260           "source": "haiku"
    261         }
    262       },
    263       "data_integrity": {
    264         "raw_data_available": {
    265           "applies": true,
    266           "answer": true,
    267           "justification": "The underlying TFix and CodeXGLUE datasets are publicly available from original authors; their modified splits are included in the published code repository.",
    268           "source": "haiku"
    269         },
    270         "data_collection_described": {
    271           "applies": true,
    272           "answer": true,
    273           "justification": "The paper describes how the original datasets were created (5.5M GitHub commits filtered by ESLint + Myers Diff for TFix; Tufano et al.'s Java function commit-based extraction for CodeXGLUE) and how the project labels were added.",
    274           "source": "haiku"
    275         },
    276         "recruitment_methods_described": {
    277           "applies": false,
    278           "answer": false,
    279           "justification": "No human participants; data comes from public GitHub repositories via established benchmark datasets.",
    280           "source": "haiku"
    281         },
    282         "data_pipeline_documented": {
    283           "applies": true,
    284           "answer": true,
    285           "justification": "The full pipeline is documented: original dataset → project labeling → stratified split by error type and project → TBug training data preparation (swap buggy/fixed, add context and metadata).",
    286           "source": "haiku"
    287         }
    288       },
    289       "contamination": {
    290         "training_cutoff_stated": {
    291           "applies": true,
    292           "answer": false,
    293           "justification": "Pre-trained T5 and CodeBERT are used but their training data cutoffs are not stated; both were trained on large web/code corpora that overlap with the GitHub-derived test benchmarks.",
    294           "source": "haiku"
    295         },
    296         "train_test_overlap_discussed": {
    297           "applies": true,
    298           "answer": false,
    299           "justification": "The paper does not discuss potential overlap between T5's or CodeBERT's pre-training data and the TFix/CodeXGLUE benchmark datasets, both of which derive from public GitHub commits.",
    300           "source": "haiku"
    301         },
    302         "benchmark_contamination_addressed": {
    303           "applies": true,
    304           "answer": false,
    305           "justification": "The benchmark data is from GitHub commits that predate T5 and CodeBERT's training, but potential contamination through pre-training is never discussed.",
    306           "source": "haiku"
    307         }
    308       },
    309       "human_studies": {
    310         "pre_registered": {
    311           "applies": false,
    312           "answer": false,
    313           "justification": "No human participants.",
    314           "source": "haiku"
    315         },
    316         "irb_or_ethics_approval": {
    317           "applies": false,
    318           "answer": false,
    319           "justification": "No human participants.",
    320           "source": "haiku"
    321         },
    322         "demographics_reported": {
    323           "applies": false,
    324           "answer": false,
    325           "justification": "No human participants.",
    326           "source": "haiku"
    327         },
    328         "inclusion_exclusion_criteria": {
    329           "applies": false,
    330           "answer": false,
    331           "justification": "No human participants.",
    332           "source": "haiku"
    333         },
    334         "randomization_described": {
    335           "applies": false,
    336           "answer": false,
    337           "justification": "No human participants.",
    338           "source": "haiku"
    339         },
    340         "blinding_described": {
    341           "applies": false,
    342           "answer": false,
    343           "justification": "No human participants.",
    344           "source": "haiku"
    345         },
    346         "attrition_reported": {
    347           "applies": false,
    348           "answer": false,
    349           "justification": "No human participants.",
    350           "source": "haiku"
    351         }
    352       },
    353       "cost_and_practicality": {
    354         "inference_cost_reported": {
    355           "applies": true,
    356           "answer": true,
    357           "justification": "Table 9 reports inference time per bug fix: 0.39 sec for TFix-Small and 1.06 sec for TFix-Large.",
    358           "source": "haiku"
    359         },
    360         "compute_budget_stated": {
    361           "applies": true,
    362           "answer": true,
    363           "justification": "Training times are reported in Table 7 (e.g., Baseline takes 1d 19h for TFix-Large; FullFineTuning takes 7 min), and hardware is specified (32GB V100 GPU, 4 CPU cores, 30GB RAM).",
    364           "source": "haiku"
    365         }
    366       }
    367     }
    368   },
    369   "claims": [
    370     {
    371       "claim": "Domain shift reduces TFix-Large's weighted average exact match accuracy by 11.82% when tested on unseen projects (excluded design).",
    372       "evidence": "Table 4: weighted average drops from 66.01% (included) to 54.19% (excluded) for TFix-Large across 8 target projects.",
    373       "supported": "strong"
    374     },
    375     {
    376       "claim": "TFix-Small is relatively resistant to domain shift, losing only 1.47% weighted average accuracy under the excluded design.",
    377       "evidence": "Table 4: TFix-Small weighted average drops from 56.40% to 54.93%, with 4 of 8 projects showing no drop.",
    378       "supported": "strong"
    379     },
    380     {
    381       "claim": "FullFineTuning improves TFix-Large by 13.05% (median) over the default approach, reaching 64.59% vs. 51.11%.",
    382       "evidence": "Table 5 shows FFT weighted average of 67.24% vs. Default 54.19%; median 64.59% vs. Default 48.58%.",
    383       "supported": "strong"
    384     },
    385     {
    386       "claim": "Domain adaptation with FullFineTuning improves CodeXGLUE by 23.42% (weighted average absolute) on the small Java dataset.",
    387       "evidence": "Table 17: FFT weighted average 45.04% vs. Default 5.40% — though this is a 39.64pp absolute improvement; the 23.42% figure appears to be for the zero-shot synthetic data condition (Table 19).",
    388       "supported": "moderate"
    389     },
    390     {
    391       "claim": "Adding bug-type metadata to TBug increases its exact match accuracy from 33.1% to 41.4% (Small) and from 27.13% to 44.68% (Large).",
    392       "evidence": "Table 12 reports weighted average exact match for TBug with and without metadata across 50+ error types.",
    393       "supported": "strong"
    394     },
    395     {
    396       "claim": "Synthetic data from TBug enables zero-shot domain adaptation, improving TFix-Large by 5.66% on average over the default approach.",
    397       "evidence": "Table 14: FFT with synthetic data achieves weighted average 59.85% vs. Default 54.19%; improvement varies widely across projects (0% to 27pp).",
    398       "supported": "moderate"
    399     },
    400     {
    401       "claim": "Adapted TFix-Large models do not suffer exposure bias — their accuracy on the general dataset slightly increases after adaptation (40.01% → ~46-48%).",
    402       "evidence": "Table 11 shows all DA methods score higher than the pretrained TFix-Large (40.01%) on 5000 source-set samples.",
    403       "supported": "strong"
    404     }
    405   ],
    406   "methodology_tags": [
    407     "benchmark-eval",
    408     "empirical"
    409   ],
    410   "key_findings": "APR models (TFix and CodeXGLUE) experience significant accuracy degradation under cross-project domain shift, with TFix-Large losing up to 11.82% weighted average exact match when tested on unseen projects. A domain adaptation framework using FullFineTuning or Adapter Layers effectively recovers and in many cases exceeds baseline performance (13.05% improvement for TFix-Large), while requiring only minutes of fine-tuning compared to days for full retraining. A novel bug generator model (TBug) enables zero-shot domain adaptation through synthetic data creation, achieving ~5.76% improvement for TFix with no labeled target data. Critically, the paper shows TFix-Large and TFix-Small behave oppositely under domain shift: the larger model suffers more but benefits more from adaptation, leading to the counter-intuitive finding that TFix-Small is preferable when no adaptation is applied.",
    411   "red_flags": [
    412     {
    413       "flag": "No statistical tests",
    414       "detail": "All comparative claims are made without significance tests or confidence intervals; improvements are presented as raw point estimates across 8 projects, making it impossible to determine if differences are statistically meaningful."
    415     },
    416     {
    417       "flag": "Single run results",
    418       "detail": "No variance across multiple experimental runs is reported; results from stochastic training processes are presented as single point estimates without standard deviations."
    419     },
    420     {
    421       "flag": "No contamination discussion",
    422       "detail": "Pre-trained T5 and CodeBERT were trained on large GitHub-derived corpora; the benchmark datasets also come from GitHub commits, but potential train/test contamination through pre-training is never discussed."
    423     },
    424     {
    425       "flag": "Numerical inconsistency in abstract",
    426       "detail": "The abstract states '24.42%' improvement for CodeXGLUE zero-shot, but the results section and Table 19 report 23.42%; minor but indicates insufficient proofreading."
    427     },
    428     {
    429       "flag": "Small target project pool",
    430       "detail": "Only 8 target projects for TFix and fewer for CodeXGLUE; project-level comparisons are highly sensitive to outlier projects (ONM always 100%, Coprhd always 0%), and aggregated metrics can be driven by single outliers."
    431     }
    432   ],
    433   "cited_papers": [
    434     {
    435       "title": "TFix: Learning to Fix Coding Errors with a Text-to-Text Transformer",
    436       "relevance": "Primary APR model studied; baseline for domain adaptation experiments"
    437     },
    438     {
    439       "title": "CodeXGLUE: A Machine Learning Benchmark Dataset for Code Understanding and Generation",
    440       "relevance": "Second APR model studied; provides Java benchmark and CodeBERT-based APR method"
    441     },
    442     {
    443       "title": "CodeBERT: A Pre-Trained Model for Programming and Natural Languages",
    444       "relevance": "Underlying pre-trained model for CodeXGLUE; used as encoder and for embedding similarity in curriculum learning"
    445     },
    446     {
    447       "title": "SequenceR: Sequence-to-Sequence Learning for End-to-End Program Repair",
    448       "relevance": "Prior NMT-based APR method used as comparison point for TFix's superiority"
    449     },
    450     {
    451       "title": "CoCoNuT: Combining Context-Aware Neural Translation Models Using Ensemble for Program Repair",
    452       "relevance": "Competing APR method demonstrating the broader landscape of NMT-based repair"
    453     },
    454     {
    455       "title": "SelfAPR: Self-supervised Program Repair with Test Execution Diagnostics",
    456       "relevance": "Related semi-supervised APR work using perturbations for synthetic bug generation"
    457     },
    458     {
    459       "title": "On Distribution Shift in Learning-based Bug Detectors",
    460       "relevance": "Closely related work studying domain shift in APR for synthetic-to-real transfer (He et al. 2022)"
    461     },
    462     {
    463       "title": "An Empirical Study on Learning Bug-Fixing Patches in the Wild via Neural Machine Translation",
    464       "relevance": "Foundational dataset (Tufano et al.) used for CodeXGLUE Java benchmark"
    465     }
    466   ],
    467   "engagement_factors": {
    468     "practical_relevance": {
    469       "score": 3,
    470       "justification": "Directly addresses the gap between academic APR evaluation and real-world deployment on new projects; the framework requires only minutes of fine-tuning and is computationally feasible on personal hardware."
    471     },
    472     "surprise_contrarian": {
    473       "score": 2,
    474       "justification": "Contradicts the original TFix paper's conclusion that TFix-Large is always the best architecture — under domain shift without adaptation, TFix-Small is equally effective and far more efficient."
    475     },
    476     "fear_safety": {
    477       "score": 0,
    478       "justification": "No AI safety or risk concerns raised; this is a software engineering tool improvement paper."
    479     },
    480     "drama_conflict": {
    481       "score": 1,
    482       "justification": "Explicitly argues against a conclusion in a published ICML paper (Berabi et al. 2021) regarding TFix-Large's superiority."
    483     },
    484     "demo_ability": {
    485       "score": 2,
    486       "justification": "Code is released publicly on GitHub and adapts to new projects in under 10 minutes; practitioners can apply it to their own projects."
    487     },
    488     "brand_recognition": {
    489       "score": 1,
    490       "justification": "University of Calgary researchers; no famous lab or industry affiliation, but published in a top software engineering journal (TOSEM)."
    491     }
    492   },
    493   "hn_data": {
    494     "threads": [
    495       {
    496         "hn_id": "38704982",
    497         "title": "LLM in a Flash: Efficient LLM Inference with Limited Memory",
    498         "points": 252,
    499         "comments": 53,
    500         "url": "https://news.ycombinator.com/item?id=38704982"
    501       },
    502       {
    503         "hn_id": "46280080",
    504         "title": "A quarter of US-trained scientists eventually leave",
    505         "points": 155,
    506         "comments": 172,
    507         "url": "https://news.ycombinator.com/item?id=46280080"
    508       },
    509       {
    510         "hn_id": "38695583",
    511         "title": "An In-depth Look at Gemini's Language Abilities",
    512         "points": 118,
    513         "comments": 70,
    514         "url": "https://news.ycombinator.com/item?id=38695583"
    515       },
    516       {
    517         "hn_id": "38710452",
    518         "title": "Efficient Large Language Model Inference with Limited Memory",
    519         "points": 50,
    520         "comments": 1,
    521         "url": "https://news.ycombinator.com/item?id=38710452"
    522       },
    523       {
    524         "hn_id": "39120456",
    525         "title": "LLM in a Flash: Efficient Large Language Model Inference with Limited Memory",
    526         "points": 18,
    527         "comments": 1,
    528         "url": "https://news.ycombinator.com/item?id=39120456"
    529       },
    530       {
    531         "hn_id": "38728018",
    532         "title": "LLM in a Flash: Efficient Large Language Model Inference with Limited Memory",
    533         "points": 12,
    534         "comments": 1,
    535         "url": "https://news.ycombinator.com/item?id=38728018"
    536       },
    537       {
    538         "hn_id": "38720719",
    539         "title": "LLM in a Flash: Efficient Large Language Model Inference with Limited Memory",
    540         "points": 5,
    541         "comments": 0,
    542         "url": "https://news.ycombinator.com/item?id=38720719"
    543       },
    544       {
    545         "hn_id": "38717116",
    546         "title": "LLM in a Flash: Efficient Large Language Model Inference with Limited Memory",
    547         "points": 4,
    548         "comments": 0,
    549         "url": "https://news.ycombinator.com/item?id=38717116"
    550       },
    551       {
    552         "hn_id": "38777354",
    553         "title": "LLM in a Flash: Efficient Large Language Model Inference with Limited Memory",
    554         "points": 3,
    555         "comments": 1,
    556         "url": "https://news.ycombinator.com/item?id=38777354"
    557       },
    558       {
    559         "hn_id": "39542164",
    560         "title": "Ask HN: How to execute an 180B+ LLM on a Turing machine?",
    561         "points": 1,
    562         "comments": 0,
    563         "url": "https://news.ycombinator.com/item?id=39542164"
    564       }
    565     ],
    566     "top_points": 252,
    567     "total_points": 618,
    568     "total_comments": 299
    569   }
    570 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs