scan-v5.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan-v5.json (27062B)
      1 {
      2   "scan_version": 5,
      3   "paper_type": "empirical",
      4   "paper": {
      5     "title": "TyFlow: A Type-Aware Approach to Neural Code Models",
      6     "authors": [
      7       "Zhechong Huang",
      8       "Zhao Zhang",
      9       "Ruyi Ji",
     10       "Tingxuan Xia",
     11       "Qihao Zhu",
     12       "Qinxiang Cao",
     13       "Zeyu Sun",
     14       "Wiggin Zhou",
     15       "Yingfei Xiong"
     16     ],
     17     "year": 2025,
     18     "venue": "arXiv (submitted to ACM TOSEM)",
     19     "arxiv_id": "2510.10216",
     20     "doi": null
     21   },
     22   "checklist": {
     23     "claims_and_evidence": {
     24       "abstract_claims_supported": {
     25         "applies": true,
     26         "answer": true,
     27         "justification": "The abstract claims TyFlow eliminates type errors and significantly improves functional correctness; Table 2 confirms CER=0% on SuFu and improved pass@k across both languages and model sizes.",
     28         "source": "haiku"
     29       },
     30       "causal_claims_justified": {
     31         "applies": true,
     32         "answer": true,
     33         "justification": "The paper makes causal claims supported by a sequential ablation study (Table 3) that incrementally adds components and quantifies each contribution, and by comparisons against rejection sampling and separated type-code generation variants.",
     34         "source": "haiku"
     35       },
     36       "generalization_bounded": {
     37         "applies": true,
     38         "answer": false,
     39         "justification": "The conclusion asserts 'broader applications, including safety verification and the generation of other structural data' and 'other domains where structural constraints matter,' going beyond the two evaluated languages (SuFu and a Java subset).",
     40         "source": "haiku"
     41       },
     42       "alternative_explanations_discussed": {
     43         "applies": true,
     44         "answer": false,
     45         "justification": "The paper does not consider whether performance gains stem from the structured decision-sequence representation itself rather than type reasoning specifically, or from the dual-encoding architecture versus the type-guided synthesis framework.",
     46         "source": "haiku"
     47       },
     48       "proxy_outcome_distinction": {
     49         "applies": true,
     50         "answer": true,
     51         "justification": "pass@k directly measures functional correctness via automated test cases and CER directly measures type/compilation errors; claimed outcomes match measured outcomes without conflation.",
     52         "source": "haiku"
     53       }
     54     },
     55     "limitations_and_scope": {
     56       "limitations_section_present": {
     57         "applies": true,
     58         "answer": false,
     59         "justification": "There is no dedicated limitations or threats-to-validity section; limitations appear only in passing within results text (e.g., residual Java CER due to uncaptured static analysis constraints).",
     60         "source": "haiku"
     61       },
     62       "threats_to_validity_specific": {
     63         "applies": true,
     64         "answer": false,
     65         "justification": "No systematic threat analysis is provided; observations such as the Java subset covering ~78% of programs and the SuFu test set of ~58 programs are scattered and not framed as validity threats.",
     66         "source": "haiku"
     67       },
     68       "scope_boundaries_stated": {
     69         "applies": true,
     70         "answer": false,
     71         "justification": "The paper does not explicitly state what results do NOT show; the Java evaluation uses a language subset but this restriction is not clearly framed as a scope boundary with stated implications for generalization.",
     72         "source": "haiku"
     73       }
     74     },
     75     "conflicts_of_interest": {
     76       "funding_disclosed": {
     77         "applies": true,
     78         "answer": false,
     79         "justification": "No funding source is disclosed anywhere in the paper.",
     80         "source": "haiku"
     81       },
     82       "affiliations_disclosed": {
     83         "applies": true,
     84         "answer": true,
     85         "justification": "Author affiliations are explicitly listed: Peking University, University of Michigan, Shanghai Jiao Tong University, Institute of Software CAS, and Tencent.",
     86         "source": "haiku"
     87       },
     88       "funder_independent_of_outcome": {
     89         "applies": false,
     90         "answer": false,
     91         "justification": "No funding is disclosed, making funder independence unverifiable; one author (Wiggin Zhou) is from Tencent, which could benefit commercially from code generation improvements.",
     92         "source": "haiku"
     93       },
     94       "financial_interests_declared": {
     95         "applies": true,
     96         "answer": false,
     97         "justification": "No competing interests statement or financial interest declaration appears in the paper.",
     98         "source": "haiku"
     99       }
    100     },
    101     "scope_and_framing": {
    102       "key_terms_defined": {
    103         "applies": true,
    104         "answer": true,
    105         "justification": "Key terms are formally defined: type correctness via CHCs and typing rules, synthesis decision sequences, synthesis derivation trees, and the isomorphism between them are all defined with mathematical precision.",
    106         "source": "haiku"
    107       },
    108       "intended_contribution_clear": {
    109         "applies": true,
    110         "answer": true,
    111         "justification": "Three explicit contributions are stated: the proof-construction observation enabling synthesis, the dual-encoding architecture, and the TyFlow meta-system; the intended addition over prior work is clearly articulated.",
    112         "source": "haiku"
    113       },
    114       "engagement_with_prior_work": {
    115         "applies": true,
    116         "answer": true,
    117         "justification": "Section 7 discusses three lines of related work and explicitly differentiates TyFlow from constrained decoding, GrammarT5, Tare, and Refine4LLM, explaining what is new rather than just listing references.",
    118         "source": "haiku"
    119       }
    120     }
    121   },
    122   "type_checklist": {
    123     "empirical": {
    124       "artifacts": {
    125         "code_released": {
    126           "applies": true,
    127           "answer": false,
    128           "justification": "No code repository or artifact release is mentioned anywhere in the paper.",
    129           "source": "haiku"
    130         },
    131         "data_released": {
    132           "applies": true,
    133           "answer": true,
    134           "justification": "Both evaluation datasets are publicly available: SuFu programs from jiry17/SuFu on GitHub, Java tasks from MBJP at amazon-science/mxeval; the augmented SuFu NL descriptions are not separately packaged.",
    135           "source": "haiku"
    136         },
    137         "environment_specified": {
    138           "applies": true,
    139           "answer": false,
    140           "justification": "Hardware is specified (AMD EPYC 9655, 8× RTX 4090, Ubuntu 22.04.5, CUDA 12.8) but no software dependencies, requirements file, or Dockerfile is provided to enable reproduction.",
    141           "source": "haiku"
    142         },
    143         "reproduction_instructions": {
    144           "applies": true,
    145           "answer": false,
    146           "justification": "No step-by-step reproduction instructions are provided; the approach is described formally but operational replication steps are absent.",
    147           "source": "haiku"
    148         }
    149       },
    150       "statistical_methodology": {
    151         "confidence_intervals_or_error_bars": {
    152           "applies": true,
    153           "answer": false,
    154           "justification": "No confidence intervals or error bars are reported in any of the result tables (Tables 2–5).",
    155           "source": "haiku"
    156         },
    157         "significance_tests": {
    158           "applies": true,
    159           "answer": false,
    160           "justification": "No statistical significance tests are used despite multiple comparative claims between TyFlow and baselines across two languages and model sizes.",
    161           "source": "haiku"
    162         },
    163         "effect_sizes_reported": {
    164           "applies": true,
    165           "answer": true,
    166           "justification": "Absolute performance values and deltas are reported in tables (e.g., pass@10 from 32.76% to 46.55% on SuFu), allowing direct effect size calculation with baseline context.",
    167           "source": "haiku"
    168         },
    169         "sample_size_justified": {
    170           "applies": true,
    171           "answer": false,
    172           "justification": "The SuFu test set (~58 programs) and Java test set (~61 programs) are very small; no power analysis or justification is provided for these sizes.",
    173           "source": "haiku"
    174         },
    175         "variance_reported": {
    176           "applies": true,
    177           "answer": false,
    178           "justification": "No variance or standard deviation across multiple runs is reported; single-run results are presented throughout.",
    179           "source": "haiku"
    180         }
    181       },
    182       "evaluation_design": {
    183         "baselines_included": {
    184           "applies": true,
    185           "answer": true,
    186           "justification": "Baselines include vanilla CodeT5-220M, T5Gemma2-2B, rejection sampling, and separated type-code generation variants (Type-First, Code-First).",
    187           "source": "haiku"
    188         },
    189         "baselines_contemporary": {
    190           "applies": true,
    191           "answer": true,
    192           "justification": "T5Gemma2-2B (2025) is recent; comparison with constrained decoding references Mündler et al. 2025, a contemporary approach; CodeT5 is the standard encoder-decoder code model for this setting.",
    193           "source": "haiku"
    194         },
    195         "ablation_study": {
    196           "applies": true,
    197           "answer": true,
    198           "justification": "Table 3 presents a sequential ablation on SuFu incrementally adding syntactic pruning, type pruning, and dynamic typing context, quantifying each component's contribution to pass@k, FSP, and CER.",
    199           "source": "haiku"
    200         },
    201         "multiple_metrics": {
    202           "applies": true,
    203           "answer": true,
    204           "justification": "Four metrics are used: pass@1, pass@10, First Success Position (FSP), and Compilation Error Rate (CER), covering functional correctness, ranking efficiency, and type validity.",
    205           "source": "haiku"
    206         },
    207         "human_evaluation": {
    208           "applies": false,
    209           "answer": false,
    210           "justification": "Human evaluation of system outputs is not applicable; automated unit test execution is the standard and appropriate evaluation method for code generation benchmarks with test cases.",
    211           "source": "haiku"
    212         },
    213         "held_out_test_set": {
    214           "applies": true,
    215           "answer": true,
    216           "justification": "SuFu uses a random 80/20 train/test split and Java uses a random 90/10 split; evaluation is reported on held-out test sets.",
    217           "source": "haiku"
    218         },
    219         "per_category_breakdown": {
    220           "applies": true,
    221           "answer": false,
    222           "justification": "No per-difficulty or per-type breakdown is provided within each language dataset; results are aggregated at the language level only.",
    223           "source": "haiku"
    224         },
    225         "failure_cases_discussed": {
    226           "applies": true,
    227           "answer": true,
    228           "justification": "The paper discusses why TyFlow retains CER=3.52% on Java, attributing it to static analysis errors outside the implemented type system (e.g., unreachable code), explaining a specific class of failures.",
    229           "source": "haiku"
    230         },
    231         "negative_results_reported": {
    232           "applies": true,
    233           "answer": true,
    234           "justification": "The marginal improvement of TyFlow-220M over CodeT5-220M on Java pass@1 (11.94% vs 10.45%) and zero improvement from rejection sampling on Java are reported without omission.",
    235           "source": "haiku"
    236         }
    237       },
    238       "setup_transparency": {
    239         "model_versions_specified": {
    240           "applies": true,
    241           "answer": true,
    242           "justification": "CodeT5-220M and T5Gemma2-2B are specified with parameter counts, layer configurations, hidden dimensions, attention heads, and citations to source papers.",
    243           "source": "haiku"
    244         },
    245         "prompts_provided": {
    246           "applies": true,
    247           "answer": false,
    248           "justification": "The input format (NL description + synthesis decision sequence + current synthesis goal) is described conceptually, but actual prompts or templates used during training and inference are not provided.",
    249           "source": "haiku"
    250         },
    251         "hyperparameters_reported": {
    252           "applies": true,
    253           "answer": false,
    254           "justification": "No hyperparameters (learning rate, batch size, optimizer, training epochs) are reported; the paper states only that models were 'fine-tuned until convergence' with 'the same set of hyperparameters.'",
    255           "source": "haiku"
    256         },
    257         "scaffolding_described": {
    258           "applies": true,
    259           "answer": true,
    260           "justification": "The TyFlow synthesis framework (synthesis rules, Algorithm 1 for tree construction, syntactic and type pruning, beam search integration) is thoroughly described with formal definitions and pseudocode in Sections 3–5.",
    261           "source": "haiku"
    262         },
    263         "data_preprocessing_documented": {
    264           "applies": true,
    265           "answer": false,
    266           "justification": "While dataset sources and split ratios are given, preprocessing details (tokenization, sequence length limits, OpenCoder initial fine-tuning procedure, conversion from programs to decision sequences) are not documented.",
    267           "source": "haiku"
    268         }
    269       },
    270       "data_integrity": {
    271         "raw_data_available": {
    272           "applies": true,
    273           "answer": false,
    274           "justification": "Model outputs and predictions are not released; the augmented SuFu dataset (programs + GPT-generated NL descriptions) is not packaged for release, limiting independent verification.",
    275           "source": "haiku"
    276         },
    277         "data_collection_described": {
    278           "applies": true,
    279           "answer": true,
    280           "justification": "Data sources are identified: SuFu programs from jiry17/SuFu GitHub with NL descriptions generated by GPT-o3-mini and human-reviewed; Java tasks from MBJP at amazon-science/mxeval.",
    281           "source": "haiku"
    282         },
    283         "recruitment_methods_described": {
    284           "applies": false,
    285           "answer": false,
    286           "justification": "No human participants; standard benchmarks are used without participant recruitment.",
    287           "source": "haiku"
    288         },
    289         "data_pipeline_documented": {
    290           "applies": true,
    291           "answer": false,
    292           "justification": "The pipeline from raw programs to synthesis decision sequences is described architecturally but not as a reproducible procedure with implementation details or intermediate artifact releases.",
    293           "source": "haiku"
    294         }
    295       },
    296       "contamination": {
    297         "training_cutoff_stated": {
    298           "applies": true,
    299           "answer": false,
    300           "justification": "Training data cutoffs for CodeT5 and T5Gemma2-2B pre-training corpora are not stated; this is relevant since MBJP is based on public MBPP and may appear in pre-training data.",
    301           "source": "haiku"
    302         },
    303         "train_test_overlap_discussed": {
    304           "applies": true,
    305           "answer": false,
    306           "justification": "Potential overlap between MBJP benchmark problems and the pre-training data of CodeT5 or T5Gemma2-2B is not discussed.",
    307           "source": "haiku"
    308         },
    309         "benchmark_contamination_addressed": {
    310           "applies": true,
    311           "answer": false,
    312           "justification": "MBJP is based on the public MBPP dataset available before T5Gemma2-2B's training cutoff; this potential contamination is not addressed.",
    313           "source": "haiku"
    314         }
    315       },
    316       "human_studies": {
    317         "pre_registered": {
    318           "applies": false,
    319           "answer": false,
    320           "justification": "No human participants in this study.",
    321           "source": "haiku"
    322         },
    323         "irb_or_ethics_approval": {
    324           "applies": false,
    325           "answer": false,
    326           "justification": "No human participants in this study.",
    327           "source": "haiku"
    328         },
    329         "demographics_reported": {
    330           "applies": false,
    331           "answer": false,
    332           "justification": "No human participants in this study.",
    333           "source": "haiku"
    334         },
    335         "inclusion_exclusion_criteria": {
    336           "applies": false,
    337           "answer": false,
    338           "justification": "No human participants in this study.",
    339           "source": "haiku"
    340         },
    341         "randomization_described": {
    342           "applies": false,
    343           "answer": false,
    344           "justification": "No human participants in this study.",
    345           "source": "haiku"
    346         },
    347         "blinding_described": {
    348           "applies": false,
    349           "answer": false,
    350           "justification": "No human participants in this study.",
    351           "source": "haiku"
    352         },
    353         "attrition_reported": {
    354           "applies": false,
    355           "answer": false,
    356           "justification": "No human participants in this study.",
    357           "source": "haiku"
    358         }
    359       },
    360       "cost_and_practicality": {
    361         "inference_cost_reported": {
    362           "applies": true,
    363           "answer": false,
    364           "justification": "Token usage reduction (~45-48% fewer tokens vs. separated approaches) is reported, but actual inference latency or monetary cost per query is not measured or reported.",
    365           "source": "haiku"
    366         },
    367         "compute_budget_stated": {
    368           "applies": true,
    369           "answer": false,
    370           "justification": "Hardware is specified but total training compute hours or GPU-hours are not reported.",
    371           "source": "haiku"
    372         }
    373       }
    374     }
    375   },
    376   "claims": [
    377     {
    378       "claim": "TyFlow eliminates type errors on SuFu (CER=0%) and dramatically reduces them on Java (from 38.51% to 3.52% for 220M model).",
    379       "evidence": "Table 2 reports CER=0.00% for both TyFlow-220M and TyFlow-2B on SuFu, and CER=3.52%/3.12% for the Java variants.",
    380       "supported": "strong"
    381     },
    382     {
    383       "claim": "TyFlow significantly improves functional correctness: pass@10 from 32.76% to 46.55% on SuFu (220M) and from 35.82% to 40.30% on Java (2B).",
    384       "evidence": "Table 2 reports these numbers; improvements are consistent across both model sizes on both languages.",
    385       "supported": "strong"
    386     },
    387     {
    388       "claim": "Internalizing type reasoning (TyFlow) is substantially more effective than post-hoc rejection sampling.",
    389       "evidence": "Table 4 shows rejection sampling provides zero improvement on Java (pass@10 stays 20.90%) and only 6.89pp on SuFu, versus TyFlow's 13.79pp and 7.46pp improvements respectively.",
    390       "supported": "moderate"
    391     },
    392     {
    393       "claim": "Integrated type-code generation outperforms separated type-code generation in both accuracy and token efficiency.",
    394       "evidence": "Table 5 shows TyFlow achieves higher pass@k with 45-48% fewer tokens than Type-First and Code-First variants on both languages.",
    395       "supported": "strong"
    396     },
    397     {
    398       "claim": "Type pruning is the critical component, responsible for eliminating compilation errors and contributing +13.79pp pass@1 in the ablation.",
    399       "evidence": "Table 3 shows adding type pruning causes the largest jump: pass@1 from 27.59% to 37.93% and CER from 72.13% to 0.00%.",
    400       "supported": "strong"
    401     },
    402     {
    403       "claim": "Benefits are more pronounced for low-resource languages with complex type systems (SuFu) than for common languages (Java).",
    404       "evidence": "SuFu pass@1 improvement (+13.79pp for 220M) is roughly 9x larger than Java pass@1 improvement (+1.49pp for 220M), consistent with the paper's framing of SuFu as a challenging low-resource case.",
    405       "supported": "moderate"
    406     }
    407   ],
    408   "methodology_tags": [
    409     "benchmark-eval",
    410     "empirical"
    411   ],
    412   "key_findings": "TyFlow introduces a proof-guided synthesis system maintaining a formal isomorphism between type derivation trees and synthesis derivation trees, enabling neural code models to internalize type reasoning rather than rely on external filtering. On SuFu (a low-resource functional language with complex types), TyFlow completely eliminates compilation errors (CER=0%) while improving pass@10 from 32.76% to 46.55% for the 220M model. On Java, improvements are more modest (+4-7pp pass@10) with residual CER (3.12-3.52%) from uncaptured static analysis constraints outside the implemented type system. An ablation study identifies type pruning as the critical component, and integrated type-code generation uses 45-48% fewer tokens than separated approaches while achieving higher accuracy.",
    413   "red_flags": [
    414     {
    415       "flag": "Tiny test sets",
    416       "detail": "SuFu test set contains ~58 programs (20% of 290) and Java test set ~61 programs (10% of 608); no error bars or significance tests accompany comparative claims at these sample sizes."
    417     },
    418     {
    419       "flag": "No statistical testing",
    420       "detail": "No statistical significance tests or confidence intervals are reported despite multiple comparative claims across baselines, ablation conditions, and two languages."
    421     },
    422     {
    423       "flag": "No code release",
    424       "detail": "The TyFlow implementation is not released, preventing independent reproduction despite the paper's reliance on a custom synthesis framework and training pipeline."
    425     },
    426     {
    427       "flag": "Missing hyperparameters",
    428       "detail": "No training hyperparameters (learning rate, batch size, optimizer, number of epochs) are reported despite results depending heavily on fine-tuning these large models."
    429     },
    430     {
    431       "flag": "Benchmark contamination unaddressed",
    432       "detail": "MBJP is based on the public MBPP dataset; potential overlap with T5Gemma2-2B pre-training data (a 2025 model trained on code) is not discussed."
    433     },
    434     {
    435       "flag": "No funding disclosure",
    436       "detail": "No funding source is disclosed; one author is from Tencent, which has commercial interest in code generation, but no conflict of interest statement is provided."
    437     }
    438   ],
    439   "cited_papers": [
    440     {
    441       "title": "Type-Constrained Code Generation with Language Models",
    442       "relevance": "Most directly related contemporary work on enforcing type correctness in LLM code generation via constrained decoding; TyFlow is explicitly compared against this approach."
    443     },
    444     {
    445       "title": "GrammarT5: Grammar-Integrated Pretrained Encoder-Decoder Neural Model for Code",
    446       "relevance": "Prior work on grammar-based code representation that TyFlow directly extends; paper explicitly positions TyFlow as generalizing grammar-based encoding to arbitrary CHC-representable constraints."
    447     },
    448     {
    449       "title": "Grammar-Based Code Representation: Is It a Worthy Pursuit for LLMs?",
    450       "relevance": "Evaluates grammar-based code representation approaches; foundational for understanding TyFlow's design choices and the value of structured representations."
    451     },
    452     {
    453       "title": "Tare: Type-Aware Neural Program Repair",
    454       "relevance": "Most closely related prior work using type awareness for code manipulation; TyFlow explicitly contrasts its approach against Tare's narrower scope and lack of type explicitness."
    455     },
    456     {
    457       "title": "Grammar-Aligned Decoding",
    458       "relevance": "Provides theoretical analysis showing constrained decoding can distort LM output distributions; key motivation cited for TyFlow's alternative internalization approach."
    459     },
    460     {
    461       "title": "Evaluating Large Language Models Trained on Code",
    462       "relevance": "Introduces the pass@k metric and code evaluation methodology used in this paper; also introduces MBPP on which MBJP is based."
    463     },
    464     {
    465       "title": "An Empirical Evaluation of GitHub Copilot's Code Suggestions",
    466       "relevance": "Cited as empirical evidence that 24% of Copilot suggestions have compilation errors, motivating the type correctness problem addressed by TyFlow."
    467     },
    468     {
    469       "title": "GramTrans: A Better Code Representation Approach in Code Generation",
    470       "relevance": "Related work from the same research group supporting the hypothesis that assisting LMs with structural constraints during training improves overall performance."
    471     }
    472   ],
    473   "engagement_factors": {
    474     "practical_relevance": {
    475       "score": 2,
    476       "justification": "Type correctness in code generation is a real practitioner problem (24% Copilot compilation errors cited), but the approach requires per-language type system specification and a custom toolchain, limiting immediate drop-in applicability."
    477     },
    478     "surprise_contrarian": {
    479       "score": 2,
    480       "justification": "The finding that internalizing type reasoning outperforms external constrained decoding—and that rejection sampling provides near-zero benefit—challenges the dominant approach to enforcing code correctness."
    481     },
    482     "fear_safety": {
    483       "score": 0,
    484       "justification": "No AI safety or risk concerns raised; the paper focuses on correctness properties of code generation without broader societal implications."
    485     },
    486     "drama_conflict": {
    487       "score": 0,
    488       "justification": "Standard research contribution; comparisons with prior work are constructive rather than adversarial."
    489     },
    490     "demo_ability": {
    491       "score": 1,
    492       "justification": "No code or demo released; the approach requires implementing a full type system specification and training infrastructure, making it inaccessible for immediate experimentation."
    493     },
    494     "brand_recognition": {
    495       "score": 1,
    496       "justification": "Peking University is a recognized research institution; no famous industry lab or major AI product association."
    497     }
    498   },
    499   "hn_data": {
    500     "threads": [
    501       {
    502         "hn_id": "38424009",
    503         "title": "Does GPT-4 Pass the Turing Test?",
    504         "points": 60,
    505         "comments": 88,
    506         "url": "https://news.ycombinator.com/item?id=38424009",
    507         "created_at": "2023-11-26T19:04:03Z"
    508       },
    509       {
    510         "hn_id": "45588116",
    511         "title": "Old Is Gold: Optimizing Single-Threaded Applications with Exgen-Malloc",
    512         "points": 16,
    513         "comments": 7,
    514         "url": "https://news.ycombinator.com/item?id=45588116",
    515         "created_at": "2025-10-15T04:33:36Z"
    516       },
    517       {
    518         "hn_id": "38093289",
    519         "title": "Does GPT-4 Pass the Turing Test?",
    520         "points": 5,
    521         "comments": 1,
    522         "url": "https://news.ycombinator.com/item?id=38093289",
    523         "created_at": "2023-11-01T00:45:13Z"
    524       },
    525       {
    526         "hn_id": "45793608",
    527         "title": "Old Is Gold: Optimizing Single-Threaded Applications with Exgen-Malloc",
    528         "points": 2,
    529         "comments": 0,
    530         "url": "https://news.ycombinator.com/item?id=45793608",
    531         "created_at": "2025-11-02T21:28:55Z"
    532       },
    533       {
    534         "hn_id": "37960574",
    535         "title": "Incorrect conclusions drawn for plausible looking diagrams",
    536         "points": 1,
    537         "comments": 0,
    538         "url": "https://news.ycombinator.com/item?id=37960574",
    539         "created_at": "2023-10-20T19:39:01Z"
    540       }
    541     ],
    542     "top_points": 60,
    543     "total_points": 84,
    544     "total_comments": 96
    545   }
    546 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs