scan-v5.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan-v5.json (27949B)
      1 {
      2   "scan_version": 5,
      3   "paper_type": "empirical",
      4   "paper": {
      5     "title": "GraphCodeAgent: Dual Graph-Guided LLM Agent for Retrieval-Augmented Repo-Level Code Generation",
      6     "authors": [
      7       "Jia Li",
      8       "Xianjie Shi",
      9       "Kechi Zhang",
     10       "Lei Li",
     11       "Ge Li",
     12       "Zhi Jin",
     13       "Huangzhao Zhang",
     14       "Fang Liu",
     15       "Yuwei Zhang",
     16       "Zhengwei Tao",
     17       "Yihong Dong",
     18       "Yuqi Zhu",
     19       "Chongyang Tao"
     20     ],
     21     "year": 2025,
     22     "venue": "arXiv",
     23     "arxiv_id": "2504.10046",
     24     "doi": "XXXXXXX.XXXXXXX"
     25   },
     26   "checklist": {
     27     "claims_and_evidence": {
     28       "abstract_claims_supported": {
     29         "applies": true,
     30         "answer": true,
     31         "justification": "All quantitative claims in the abstract (43.81% relative improvement with GPT-4o on DevEval, 39.15% with Gemini-1.5-Pro, 31.91% and 8.25% on CoderEval) are confirmed directly in Table 4.",
     32         "source": "haiku"
     33       },
     34       "causal_claims_justified": {
     35         "applies": true,
     36         "answer": true,
     37         "justification": "Ablation study (RQ2, Table 5) removes individual tools to establish their causal contribution; this design is appropriate for causal inference about component effects.",
     38         "source": "haiku"
     39       },
     40       "generalization_bounded": {
     41         "applies": true,
     42         "answer": false,
     43         "justification": "The paper claims the approach is 'universally effective' and has 'potential for practical application in complex software development workflows' based on only two Python-focused benchmarks and three LLMs, without stating language or domain scope boundaries.",
     44         "source": "haiku"
     45       },
     46       "alternative_explanations_discussed": {
     47         "applies": true,
     48         "answer": false,
     49         "justification": "No alternative explanations are discussed, including the key confound that baselines truncate context to fit LLM windows while GraphCodeAgent does not, potentially explaining the performance gap independently of graph structure.",
     50         "source": "haiku"
     51       },
     52       "proxy_outcome_distinction": {
     53         "applies": true,
     54         "answer": true,
     55         "justification": "Pass@1 via automated test-case execution directly measures functional correctness; the paper appropriately frames this as the primary metric for repo-level code generation without conflating it with productivity or quality.",
     56         "source": "haiku"
     57       }
     58     },
     59     "limitations_and_scope": {
     60       "limitations_section_present": {
     61         "applies": true,
     62         "answer": true,
     63         "justification": "Section 6.2 'Threats to Validity' is a dedicated subsection covering internal and external validity threats.",
     64         "source": "haiku"
     65       },
     66       "threats_to_validity_specific": {
     67         "applies": true,
     68         "answer": false,
     69         "justification": "Threats are largely generic: 'minor threat to hyperparameter settings' and benchmark quality descriptions. Key specific threats — reliance on LLM-generated requirement annotation quality, language restriction to Python, and absent variance reporting — are not discussed.",
     70         "source": "haiku"
     71       },
     72       "scope_boundaries_stated": {
     73         "applies": true,
     74         "answer": false,
     75         "justification": "The paper does not explicitly state that results are bounded to Python repositories or to the specific two benchmarks tested; conclusions are presented as generally applicable to 'real-world repo-level coding.'",
     76         "source": "haiku"
     77       }
     78     },
     79     "conflicts_of_interest": {
     80       "funding_disclosed": {
     81         "applies": true,
     82         "answer": false,
     83         "justification": "No funding source is disclosed anywhere in the paper.",
     84         "source": "haiku"
     85       },
     86       "affiliations_disclosed": {
     87         "applies": true,
     88         "answer": true,
     89         "justification": "All author affiliations are listed in the paper header: Wuhan University, Peking University, University of Hong Kong, Tsinghua University, Beihang University, ISCAS, and Academy of Military Sciences.",
     90         "source": "haiku"
     91       },
     92       "funder_independent_of_outcome": {
     93         "applies": false,
     94         "answer": false,
     95         "justification": "No funding is disclosed, so this criterion is not applicable.",
     96         "source": "haiku"
     97       },
     98       "financial_interests_declared": {
     99         "applies": true,
    100         "answer": false,
    101         "justification": "No competing interests statement or financial disclosure appears in the paper.",
    102         "source": "haiku"
    103       }
    104     },
    105     "scope_and_framing": {
    106       "key_terms_defined": {
    107         "applies": true,
    108         "answer": true,
    109         "justification": "Key terms are defined: 'repo-level code generation,' 'RACG,' 'Requirement Graph,' 'Structural-Semantic Code Graph,' and 'standalone vs. non-standalone' dependency types are all explicitly defined with formal notation.",
    110         "source": "haiku"
    111       },
    112       "intended_contribution_clear": {
    113         "applies": true,
    114         "answer": true,
    115         "justification": "Three explicit bullet-point contributions are listed in the introduction: the GraphCodeAgent system, the RG+SSCG dual graph construction with unified tools, and superior empirical performance on benchmarks.",
    116         "source": "haiku"
    117       },
    118       "engagement_with_prior_work": {
    119         "applies": true,
    120         "answer": true,
    121         "justification": "Section 7 provides a structured comparison with text-based, graph-based, and agent-based RACG, and Table 1 directly contrasts graph representation capabilities of six related methods.",
    122         "source": "haiku"
    123       }
    124     }
    125   },
    126   "type_checklist": {
    127     "empirical": {
    128       "artifacts": {
    129         "code_released": {
    130           "applies": true,
    131           "answer": true,
    132           "justification": "Section 9 states 'The code implementation and data are publicly available at figshare anonymous link' with URL https://figshare.com/s/4148a1c56d08804cd75a provided.",
    133           "source": "haiku"
    134         },
    135         "data_released": {
    136           "applies": true,
    137           "answer": true,
    138           "justification": "Both evaluation benchmarks (DevEval and CoderEval) are publicly available and used unmodified; Section 9 figshare link also includes data.",
    139           "source": "haiku"
    140         },
    141         "environment_specified": {
    142           "applies": true,
    143           "answer": false,
    144           "justification": "Specific tools are mentioned (tree-sitter, Neo4j, stella_en_400M_v5, Black) but no requirements.txt, Dockerfile, or formal environment specification is provided in the paper.",
    145           "source": "haiku"
    146         },
    147         "reproduction_instructions": {
    148           "applies": true,
    149           "answer": false,
    150           "justification": "No step-by-step reproduction instructions appear in the paper; Section 9 refers readers to a figshare link without providing detailed setup or execution instructions.",
    151           "source": "haiku"
    152         }
    153       },
    154       "statistical_methodology": {
    155         "confidence_intervals_or_error_bars": {
    156           "applies": true,
    157           "answer": false,
    158           "justification": "No confidence intervals or error bars are reported; experiments are run twice and averaged but variance across runs is not shown.",
    159           "source": "haiku"
    160         },
    161         "significance_tests": {
    162           "applies": true,
    163           "answer": false,
    164           "justification": "No statistical significance tests are used despite comparative performance claims across multiple baselines and benchmarks.",
    165           "source": "haiku"
    166         },
    167         "effect_sizes_reported": {
    168           "applies": true,
    169           "answer": true,
    170           "justification": "Relative percentage improvements are reported consistently throughout (e.g., 43.81% relative improvement on DevEval with GPT-4o), providing contextualized effect sizes with baseline reference points.",
    171           "source": "haiku"
    172         },
    173         "sample_size_justified": {
    174           "applies": true,
    175           "answer": false,
    176           "justification": "Sample sizes are determined by the existing benchmark sizes (DevEval: 1,825; CoderEval Python subset) with no power analysis or justification for adequacy.",
    177           "source": "haiku"
    178         },
    179         "variance_reported": {
    180           "applies": true,
    181           "answer": false,
    182           "justification": "Experiments are run twice and averaged, but standard deviation or variance across runs is not reported in any results table.",
    183           "source": "haiku"
    184         }
    185       },
    186       "evaluation_design": {
    187         "baselines_included": {
    188           "applies": true,
    189           "answer": true,
    190           "justification": "Six baselines are included: ScratchCG (no retrieval), Sparse RACG, Dense RACG, RepoCoder, GraphCoder, and CodeAgent, covering text-based, graph-based, and agent-based paradigms.",
    191           "source": "haiku"
    192         },
    193         "baselines_contemporary": {
    194           "applies": true,
    195           "answer": true,
    196           "justification": "Baselines include CodeAgent (2024), GraphCoder (2024), and RepoCoder (2023), all recent and competitive methods in the repo-level code generation space.",
    197           "source": "haiku"
    198         },
    199         "ablation_study": {
    200           "applies": true,
    201           "answer": true,
    202           "justification": "RQ2 (Table 5) ablates each of five tools individually, showing contributions from 0.51% (WebSearch) to 12.17% (SSCGTraverse) relative to the full system.",
    203           "source": "haiku"
    204         },
    205         "multiple_metrics": {
    206           "applies": true,
    207           "answer": false,
    208           "justification": "Only Pass@1 is used; no secondary metrics such as partial credit, BLEU/CodeBLEU, compilation rate, or Pass@k with k>1 are reported.",
    209           "source": "haiku"
    210         },
    211         "human_evaluation": {
    212           "applies": false,
    213           "answer": false,
    214           "justification": "Functional correctness via automated test-case execution is the appropriate metric for this task; human evaluation is not applicable.",
    215           "source": "haiku"
    216         },
    217         "held_out_test_set": {
    218           "applies": true,
    219           "answer": true,
    220           "justification": "DevEval (1,825 examples from 117 repositories) and CoderEval are used as held-out test sets with existing benchmark splits.",
    221           "source": "haiku"
    222         },
    223         "per_category_breakdown": {
    224           "applies": true,
    225           "answer": true,
    226           "justification": "Table 6 provides per-category results across dependency types: standalone, local-file, cross-file, and local+cross-file non-standalone scenarios.",
    227           "source": "haiku"
    228         },
    229         "failure_cases_discussed": {
    230           "applies": true,
    231           "answer": false,
    232           "justification": "Section 6.1 presents only a success case study; no failure cases are shown or analyzed despite the approach achieving below 60% Pass@1.",
    233           "source": "haiku"
    234         },
    235         "negative_results_reported": {
    236           "applies": true,
    237           "answer": true,
    238           "justification": "Table 5 shows WebSearch contributes only 0.51% and CodeTesting only 1.84%, implicitly reporting that these components have minimal impact.",
    239           "source": "haiku"
    240         }
    241       },
    242       "setup_transparency": {
    243         "model_versions_specified": {
    244           "applies": true,
    245           "answer": true,
    246           "justification": "GPT-4o (GPT-4o-2024-08-06) and QwQ-32B are specific version identifiers; Gemini-1.5-Pro-latest is less precise but the embedding model stella_en_400M_v5 and DeepSeek-V2.5 are also specified.",
    247           "source": "haiku"
    248         },
    249         "prompts_provided": {
    250           "applies": true,
    251           "answer": false,
    252           "justification": "The paper references 'the link provided at Section 9' for prompts but does not include actual prompt templates in the paper itself.",
    253           "source": "haiku"
    254         },
    255         "hyperparameters_reported": {
    256           "applies": true,
    257           "answer": true,
    258           "justification": "Key hyperparameters are reported: temperature=0, maximum generation length=500 tokens, cosine similarity threshold ε=0.8 for RG construction, and top-k retrieval count equalized across baselines.",
    259           "source": "haiku"
    260         },
    261         "scaffolding_described": {
    262           "applies": true,
    263           "answer": true,
    264           "justification": "Section 3.4 describes the ReAct agent with five tools (RGRetrieval, DualGraphMapping, SSCGTraverse, WebSearch, CodeTesting) in detail with inputs, outputs, and multi-turn reasoning flow.",
    265           "source": "haiku"
    266         },
    267         "data_preprocessing_documented": {
    268           "applies": true,
    269           "answer": true,
    270           "justification": "Repository parsing with tree-sitter, embedding computation with stella_en_400M_v5, requirement generation with DeepSeek-V2.5, cosine similarity thresholding, and Neo4j storage are all documented.",
    271           "source": "haiku"
    272         }
    273       },
    274       "data_integrity": {
    275         "raw_data_available": {
    276           "applies": true,
    277           "answer": true,
    278           "justification": "DevEval and CoderEval are publicly available benchmarks; the figshare link in Section 9 also provides data.",
    279           "source": "haiku"
    280         },
    281         "data_collection_described": {
    282           "applies": true,
    283           "answer": true,
    284           "justification": "DevEval (117 GitHub repositories, 1,825 examples, 10 domains, developer-annotated) and CoderEval (open-source Python projects with contextual dependencies) are described with their origins and content.",
    285           "source": "haiku"
    286         },
    287         "recruitment_methods_described": {
    288           "applies": false,
    289           "answer": false,
    290           "justification": "Standard public benchmarks are used; no participant recruitment is involved.",
    291           "source": "haiku"
    292         },
    293         "data_pipeline_documented": {
    294           "applies": true,
    295           "answer": true,
    296           "justification": "The full pipeline from repository parsing (tree-sitter) through graph construction (RG nodes, SSCG nodes and edges) to retrieval storage (Neo4j) is documented in Sections 3.2 and 3.3.",
    297           "source": "haiku"
    298         }
    299       },
    300       "contamination": {
    301         "training_cutoff_stated": {
    302           "applies": true,
    303           "answer": false,
    304           "justification": "Training data cutoffs are not stated for GPT-4o, Gemini-1.5-Pro, or QwQ-32B in the paper.",
    305           "source": "haiku"
    306         },
    307         "train_test_overlap_discussed": {
    308           "applies": true,
    309           "answer": false,
    310           "justification": "Potential overlap between benchmark repositories and model training data is not discussed, despite DevEval sourcing from GitHub repositories that may have been in pretraining corpora.",
    311           "source": "haiku"
    312         },
    313         "benchmark_contamination_addressed": {
    314           "applies": true,
    315           "answer": false,
    316           "justification": "DevEval and CoderEval were published in 2024; whether these benchmarks or their source repositories were available before GPT-4o's or Gemini's training cutoffs is not addressed.",
    317           "source": "haiku"
    318         }
    319       },
    320       "human_studies": {
    321         "pre_registered": {
    322           "applies": false,
    323           "answer": false,
    324           "justification": "No human participants; not applicable.",
    325           "source": "haiku"
    326         },
    327         "irb_or_ethics_approval": {
    328           "applies": false,
    329           "answer": false,
    330           "justification": "No human participants; not applicable.",
    331           "source": "haiku"
    332         },
    333         "demographics_reported": {
    334           "applies": false,
    335           "answer": false,
    336           "justification": "No human participants; not applicable.",
    337           "source": "haiku"
    338         },
    339         "inclusion_exclusion_criteria": {
    340           "applies": false,
    341           "answer": false,
    342           "justification": "No human participants; not applicable.",
    343           "source": "haiku"
    344         },
    345         "randomization_described": {
    346           "applies": false,
    347           "answer": false,
    348           "justification": "No human participants; not applicable.",
    349           "source": "haiku"
    350         },
    351         "blinding_described": {
    352           "applies": false,
    353           "answer": false,
    354           "justification": "No human participants; not applicable.",
    355           "source": "haiku"
    356         },
    357         "attrition_reported": {
    358           "applies": false,
    359           "answer": false,
    360           "justification": "No human participants; not applicable.",
    361           "source": "haiku"
    362         }
    363       },
    364       "cost_and_practicality": {
    365         "inference_cost_reported": {
    366           "applies": true,
    367           "answer": false,
    368           "justification": "The paper mentions retrieval 'typically takes only a few seconds' but no API token costs, total experiment costs, or detailed latency measurements are reported.",
    369           "source": "haiku"
    370         },
    371         "compute_budget_stated": {
    372           "applies": true,
    373           "answer": false,
    374           "justification": "No computational budget (total API calls, token usage, or hardware specifications) is reported for the full set of experiments.",
    375           "source": "haiku"
    376         }
    377       }
    378     }
    379   },
    380   "claims": [
    381     {
    382       "claim": "GraphCodeAgent achieves 43.81% relative improvement over the strongest baseline (Dense RACG) with GPT-4o on DevEval (58.14% vs 40.43% Pass@1)",
    383       "evidence": "Table 4 shows DevEval Pass@1: Dense RACG 40.43%, GraphCodeAgent 58.14% with GPT-4o",
    384       "supported": "strong"
    385     },
    386     {
    387       "claim": "For cross-file dependency tasks GraphCodeAgent nearly doubles the best baseline (43.31% vs 22.29% Pass@1, 94.30% relative improvement)",
    388       "evidence": "Table 6 Cross-file column: RepoCoder 22.29, GraphCodeAgent 43.31, labeled 94.30% relative improvement",
    389       "supported": "strong"
    390     },
    391     {
    392       "claim": "SSCGTraverse is the most critical component; its removal causes a 12.17% relative performance drop (58.14 → 51.83 Pass@1)",
    393       "evidence": "Table 5 ablation: w/o SSCGTraverse scores 51.83 vs full system 58.14, reported as 12.17% relative drop",
    394       "supported": "moderate"
    395     },
    396     {
    397       "claim": "GraphCodeAgent generalizes to reasoning models, achieving 10.65% relative improvement over RepoCoder with QwQ-32B on DevEval",
    398       "evidence": "Table 7: RepoCoder 48.93, GraphCodeAgent 54.14 on QwQ-32B; improvement calculated from these values",
    399       "supported": "strong"
    400     },
    401     {
    402       "claim": "WebSearch and CodeTesting contribute minimally (0.51% and 1.84% relative), while the graph traversal component dominates",
    403       "evidence": "Table 5 ablation shows marginal drops for w/o WebSearch and w/o CodeTesting versus 12.17% drop for w/o SSCGTraverse",
    404       "supported": "moderate"
    405     },
    406     {
    407       "claim": "The dual-graph approach bridges the NL-to-code gap by retrieving implicit dependencies not expressible through lexical/semantic matching",
    408       "evidence": "Figure 5 case study demonstrates multi-hop retrieval; Table 3 shows GraphCodeAgent is the only approach retrieving all four knowledge types",
    409       "supported": "moderate"
    410     }
    411   ],
    412   "methodology_tags": [
    413     "benchmark-eval",
    414     "case-study"
    415   ],
    416   "key_findings": "GraphCodeAgent introduces a dual-graph approach (Requirement Graph + Structural-Semantic Code Graph) for retrieval-augmented repo-level code generation, achieving up to 43.81% relative improvement over the strongest baseline on DevEval with GPT-4o. The approach shows particularly large gains for code with cross-file dependencies (94.30% relative improvement over RepoCoder), validating the core hypothesis that graph-guided multi-hop retrieval addresses the gap between natural language requirements and implicit code dependencies that lexical/semantic matching cannot capture. Ablation reveals SSCGTraverse as the critical component (12.17% relative drop on removal) while web search contributes only 0.51%, suggesting the graph traversal mechanism rather than external knowledge is the primary driver. The method generalizes across both non-reasoning and reasoning LLMs.",
    417   "red_flags": [
    418     {
    419       "flag": "No statistical significance tests",
    420       "detail": "All performance comparisons lack significance tests despite comparative claims; experiments are run only twice with averages reported and no variance shown."
    421     },
    422     {
    423       "flag": "Context-length confound unaddressed",
    424       "detail": "Baselines truncate retrieved code to fit LLM context windows while GraphCodeAgent's multi-turn interaction is exempt from this constraint, potentially explaining performance gaps independently of graph structure."
    425     },
    426     {
    427       "flag": "Ablation cannot remove core components",
    428       "detail": "RGRetrieval and DualGraphMapping are declared essential first steps and excluded from ablation, leaving their individual contributions unmeasured."
    429     },
    430     {
    431       "flag": "Single metric only",
    432       "detail": "Only Pass@1 is reported; no partial credit, similarity scores, or compilation rate that could reveal finer-grained quality differences or expose cases where GraphCodeAgent generates syntactically valid but semantically wrong code."
    433     },
    434     {
    435       "flag": "No contamination analysis",
    436       "detail": "DevEval and CoderEval source from GitHub repositories that may overlap with GPT-4o and Gemini training data; no training cutoffs are stated and no overlap analysis is performed."
    437     },
    438     {
    439       "flag": "LLM-generated requirement graph quality unvalidated at scale",
    440       "detail": "RG construction relies on DeepSeek-V2.5 to generate requirements and relationships; only 'two PhD candidates' manually verified an unspecified subset with no quantitative quality metrics reported."
    441     }
    442   ],
    443   "cited_papers": [
    444     {
    445       "title": "DevEval: A Manually-Annotated Code Generation Benchmark Aligned with Real-World Code Repositories",
    446       "relevance": "Primary benchmark for evaluation; 1,825 test examples from 117 real-world repositories across 10 domains"
    447     },
    448     {
    449       "title": "CoderEval: A Benchmark of Pragmatic Code Generation with Generative Pre-Trained Models",
    450       "relevance": "Second primary benchmark; provides project-level execution platform for functional correctness assessment"
    451     },
    452     {
    453       "title": "CodeAgent: Enhancing Code Generation with Tool-Integrated Agent Systems for Real-World Repo-Level Coding Challenges",
    454       "relevance": "Key baseline and prior work on agent-based RACG with five programming tools and four agent strategies"
    455     },
    456     {
    457       "title": "GraphCoder: Enhancing Repository-Level Code Completion via Code Context Graph-Based Retrieval and Language Model",
    458       "relevance": "Key baseline for graph-based RACG using control-flow and data-dependency graphs for code completion"
    459     },
    460     {
    461       "title": "RepoCoder: Repository-Level Code Completion through Iterative Retrieval and Generation",
    462       "relevance": "Baseline for iterative retrieval-generation; also introduces RepoEval benchmark"
    463     },
    464     {
    465       "title": "CodexGraph: Bridging Large Language Models and Code Repositories via Code Graph Databases",
    466       "relevance": "Related graph-based RACG approach using static analysis to build code graph databases"
    467     },
    468     {
    469       "title": "ReAct: Synergizing Reasoning and Acting in Language Models",
    470       "relevance": "Foundation for the ReAct agent reasoning strategy employed by GraphCodeAgent"
    471     },
    472     {
    473       "title": "Agentless: Demystifying LLM-based Software Engineering Agents",
    474       "relevance": "Related agentic framework for issue resolution that preprocesses repository structure for LLM navigation"
    475     }
    476   ],
    477   "engagement_factors": {
    478     "practical_relevance": {
    479       "score": 2,
    480       "justification": "Addresses real-world repo-level code generation with public code release and test on realistic benchmarks, though deployment requires Neo4j graph database and LLM API access."
    481     },
    482     "surprise_contrarian": {
    483       "score": 1,
    484       "justification": "Graph-guided retrieval improving code generation is an incremental advance; the specific combination of requirement-level and structural graphs is novel but not surprising or contrarian."
    485     },
    486     "fear_safety": {
    487       "score": 0,
    488       "justification": "No AI safety or risk concerns raised; purely a code generation performance paper."
    489     },
    490     "drama_conflict": {
    491       "score": 0,
    492       "justification": "No controversy or conflict angle; straightforward performance improvement paper with no competing claims."
    493     },
    494     "demo_ability": {
    495       "score": 2,
    496       "justification": "Code is released on figshare and the approach can be applied to custom repositories, though setup requires Neo4j, tree-sitter, and multiple model APIs."
    497     },
    498     "brand_recognition": {
    499       "score": 1,
    500       "justification": "Authors from Peking University, Wuhan University, and other Chinese institutions have moderate recognition in the code intelligence community; no major Western lab affiliation."
    501     }
    502   },
    503   "hn_data": {
    504     "threads": [
    505       {
    506         "hn_id": "40120846",
    507         "title": "FPGA Architecture for Deep Learning: Survey and Future Directions",
    508         "points": 128,
    509         "comments": 52,
    510         "url": "https://news.ycombinator.com/item?id=40120846",
    511         "created_at": "2024-04-22T21:13:51Z"
    512       },
    513       {
    514         "hn_id": "46728063",
    515         "title": "New York Times games are hard: A computational perspective",
    516         "points": 73,
    517         "comments": 33,
    518         "url": "https://news.ycombinator.com/item?id=46728063",
    519         "created_at": "2026-01-23T03:31:44Z"
    520       },
    521       {
    522         "hn_id": "44819042",
    523         "title": "Solving the compute crisis with physics-based ASICs",
    524         "points": 5,
    525         "comments": 2,
    526         "url": "https://news.ycombinator.com/item?id=44819042",
    527         "created_at": "2025-08-06T23:29:40Z"
    528       },
    529       {
    530         "hn_id": "42799267",
    531         "title": "The Mathematics of Artificial Intelligence",
    532         "points": 4,
    533         "comments": 0,
    534         "url": "https://news.ycombinator.com/item?id=42799267",
    535         "created_at": "2025-01-23T00:51:02Z"
    536       },
    537       {
    538         "hn_id": "22958362",
    539         "title": "Chip Placement with Deep Reinforcement Learning",
    540         "points": 4,
    541         "comments": 0,
    542         "url": "https://news.ycombinator.com/item?id=22958362",
    543         "created_at": "2020-04-23T17:21:46Z"
    544       },
    545       {
    546         "hn_id": "35662520",
    547         "title": "Learning to Program with Natural Language",
    548         "points": 3,
    549         "comments": 2,
    550         "url": "https://news.ycombinator.com/item?id=35662520",
    551         "created_at": "2023-04-22T01:45:40Z"
    552       },
    553       {
    554         "hn_id": "44020812",
    555         "title": "AI Agents vs. Agentic AI: A Conceptual Taxonomy, Applications and Challenges",
    556         "points": 3,
    557         "comments": 0,
    558         "url": "https://news.ycombinator.com/item?id=44020812",
    559         "created_at": "2025-05-18T12:20:18Z"
    560       },
    561       {
    562         "hn_id": "44002229",
    563         "title": "Superposition of Features Creates Power Law Performance in LLMs",
    564         "points": 3,
    565         "comments": 0,
    566         "url": "https://news.ycombinator.com/item?id=44002229",
    567         "created_at": "2025-05-16T05:58:20Z"
    568       },
    569       {
    570         "hn_id": "35658291",
    571         "title": "The Enmity Paradox",
    572         "points": 3,
    573         "comments": 0,
    574         "url": "https://news.ycombinator.com/item?id=35658291",
    575         "created_at": "2023-04-21T18:52:22Z"
    576       },
    577       {
    578         "hn_id": "24889330",
    579         "title": "Chip Placement with Deep Reinforcement Learning",
    580         "points": 3,
    581         "comments": 0,
    582         "url": "https://news.ycombinator.com/item?id=24889330",
    583         "created_at": "2020-10-25T19:50:29Z"
    584       }
    585     ],
    586     "top_points": 128,
    587     "total_points": 229,
    588     "total_comments": 89
    589   }
    590 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs