scan-v5.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan-v5.json (26722B)
      1 {
      2   "scan_version": 5,
      3   "paper_type": "empirical",
      4   "paper": {
      5     "title": "LD-Scene: LLM-Guided Diffusion for Controllable Generation of Adversarial Safety-Critical Driving Scenarios",
      6     "authors": [
      7       "Mingxing Peng",
      8       "Yuting Xie",
      9       "Xusen Guo",
     10       "Ruoyu Yao",
     11       "Hai Yang",
     12       "Jun Ma"
     13     ],
     14     "year": 2025,
     15     "venue": "arXiv.org",
     16     "arxiv_id": "2505.11247",
     17     "doi": "10.48550/arXiv.2505.11247"
     18   },
     19   "checklist": {
     20     "claims_and_evidence": {
     21       "abstract_claims_supported": {
     22         "applies": true,
     23         "answer": true,
     24         "justification": "Abstract claims of state-of-the-art adversarial effectiveness are supported by Table 1 (40.75% Adv-Ego Coll vs. 27.81% best baseline). Fine-grained control claims are supported by Tables 2-3 and case studies in Figures 6-7.",
     25         "source": "haiku"
     26       },
     27       "causal_claims_justified": {
     28         "applies": true,
     29         "answer": true,
     30         "justification": "Ablation studies (Tables 2 and Fig. 5a) isolate the contribution of guidance components and the debugger module with 500 query trials, providing adequate causal evidence for the system-level claims.",
     31         "source": "haiku"
     32       },
     33       "generalization_bounded": {
     34         "applies": true,
     35         "answer": false,
     36         "justification": "Results are only on the nuScenes dataset, but the conclusion states LD-Scene 'outperforms existing adversarial scenario generation baselines' without qualifying this to nuScenes specifically, implying broader generalization not demonstrated.",
     37         "source": "haiku"
     38       },
     39       "alternative_explanations_discussed": {
     40         "applies": true,
     41         "answer": false,
     42         "justification": "The paper does not discuss whether improvements over baselines stem from the LDM architecture, the LLM guidance, the VAE pretraining, or their combination; only the ablation over guidance loss components is considered.",
     43         "source": "haiku"
     44       },
     45       "proxy_outcome_distinction": {
     46         "applies": true,
     47         "answer": true,
     48         "justification": "The paper directly measures adversarial collision rate and offroad rate, which are direct proxies for the claimed adversariality and realism goals; no conflation of proxy with distal outcome.",
     49         "source": "haiku"
     50       }
     51     },
     52     "limitations_and_scope": {
     53       "limitations_section_present": {
     54         "applies": true,
     55         "answer": false,
     56         "justification": "There is no dedicated limitations or threats-to-validity section; the conclusion only describes positive outcomes without acknowledging weaknesses.",
     57         "source": "haiku"
     58       },
     59       "threats_to_validity_specific": {
     60         "applies": true,
     61         "answer": false,
     62         "justification": "No threats to validity are discussed — single-dataset evaluation, synthetic query generation via GPT-4o for debugger testing, and sensitivity to rule-based ego planner are unacknowledged threats.",
     63         "source": "haiku"
     64       },
     65       "scope_boundaries_stated": {
     66         "applies": true,
     67         "answer": false,
     68         "justification": "The paper does not state what settings the results do not apply to, such as other datasets, different ego planners, or non-urban scenarios.",
     69         "source": "haiku"
     70       }
     71     },
     72     "conflicts_of_interest": {
     73       "funding_disclosed": {
     74         "applies": true,
     75         "answer": false,
     76         "justification": "No funding acknowledgment section appears in the paper.",
     77         "source": "haiku"
     78       },
     79       "affiliations_disclosed": {
     80         "applies": true,
     81         "answer": true,
     82         "justification": "All authors list their institutional affiliations (HKUST Guangzhou, Sun Yat-sen University, HKUST) on the title page.",
     83         "source": "haiku"
     84       },
     85       "funder_independent_of_outcome": {
     86         "applies": false,
     87         "answer": false,
     88         "justification": "No funding disclosed; cannot assess funder independence.",
     89         "source": "haiku"
     90       },
     91       "financial_interests_declared": {
     92         "applies": true,
     93         "answer": false,
     94         "justification": "No competing interests or financial interests statement appears anywhere in the paper.",
     95         "source": "haiku"
     96       }
     97     },
     98     "scope_and_framing": {
     99       "key_terms_defined": {
    100         "applies": true,
    101         "answer": true,
    102         "justification": "Section 3.1 formally defines the scenario structure, adversarial vehicle role, and ego planner; adversarial levels (Weak/Medium/Strong) are operationally defined with specific intensity criteria.",
    103         "source": "haiku"
    104       },
    105       "intended_contribution_clear": {
    106         "applies": true,
    107         "answer": true,
    108         "justification": "The introduction lists three explicit contributions: the LD-Scene framework, the LLM guidance generation module with CoT debugger, and nuScenes benchmark results.",
    109         "source": "haiku"
    110       },
    111       "engagement_with_prior_work": {
    112         "applies": true,
    113         "answer": true,
    114         "justification": "A structured related work section (Section 2) covers three relevant areas, with specific comparisons to CTG++, Strive, AdvDiffuser, and Safe-Sim, articulating what each prior work lacks and how LD-Scene addresses those gaps.",
    115         "source": "haiku"
    116       }
    117     }
    118   },
    119   "type_checklist": {
    120     "empirical": {
    121       "artifacts": {
    122         "code_released": {
    123           "applies": true,
    124           "answer": false,
    125           "justification": "No code repository or release is mentioned anywhere in the paper.",
    126           "source": "haiku"
    127         },
    128         "data_released": {
    129           "applies": true,
    130           "answer": true,
    131           "justification": "nuScenes is a publicly available standard benchmark dataset used without modification.",
    132           "source": "haiku"
    133         },
    134         "environment_specified": {
    135           "applies": true,
    136           "answer": false,
    137           "justification": "PyTorch framework and GPU hardware (4x RTX 4090) are mentioned, but no requirements.txt, conda environment, or Dockerfile is provided.",
    138           "source": "haiku"
    139         },
    140         "reproduction_instructions": {
    141           "applies": true,
    142           "answer": false,
    143           "justification": "Implementation details (epochs, optimizer, learning rate) are given, but no step-by-step instructions for reproducing experiments from data preprocessing through evaluation.",
    144           "source": "haiku"
    145         }
    146       },
    147       "statistical_methodology": {
    148         "confidence_intervals_or_error_bars": {
    149           "applies": true,
    150           "answer": false,
    151           "justification": "Table 1 reports point estimates only; no confidence intervals or error bars accompany any reported metric.",
    152           "source": "haiku"
    153         },
    154         "significance_tests": {
    155           "applies": true,
    156           "answer": false,
    157           "justification": "No statistical significance tests (t-tests, bootstrap, etc.) are applied to any comparative claim against baselines.",
    158           "source": "haiku"
    159         },
    160         "effect_sizes_reported": {
    161           "applies": true,
    162           "answer": true,
    163           "justification": "Percentage improvements with clear absolute numbers and baseline context are reported throughout (e.g., 40.75% vs. 27.81% collision rate; 95.0% vs. 69.4% success rate).",
    164           "source": "haiku"
    165         },
    166         "sample_size_justified": {
    167           "applies": true,
    168           "answer": false,
    169           "justification": "nuScenes validation split size is not stated and no power analysis or sample size justification is provided for the main evaluation.",
    170           "source": "haiku"
    171         },
    172         "variance_reported": {
    173           "applies": true,
    174           "answer": false,
    175           "justification": "No variance, standard deviation, or spread across runs is reported for any metric in Tables 1-3.",
    176           "source": "haiku"
    177         }
    178       },
    179       "evaluation_design": {
    180         "baselines_included": {
    181           "applies": true,
    182           "answer": true,
    183           "justification": "Four baselines are included: AdvSim, Strive, DiffScene, and Safe-Sim, covering optimization-based and diffusion-based approaches.",
    184           "source": "haiku"
    185         },
    186         "baselines_contemporary": {
    187           "applies": true,
    188           "answer": true,
    189           "justification": "Safe-Sim (ECCV 2024) and DiffScene (2023) are recent; Strive (CVPR 2022) is still widely cited. Baselines are competitive and recent.",
    190           "source": "haiku"
    191         },
    192         "ablation_study": {
    193           "applies": true,
    194           "answer": true,
    195           "justification": "Two ablation studies are performed: Table 2 ablates guidance loss components and Figure 5 ablates the debugger module across 500 queries and multiple LLMs.",
    196           "source": "haiku"
    197         },
    198         "multiple_metrics": {
    199           "applies": true,
    200           "answer": true,
    201           "justification": "Eight metrics are reported across three dimensions: adversariality (Adv-Ego Coll, Adv Acc), behavior plausibility (four offroad/collision rates), and efficiency (simulation time).",
    202           "source": "haiku"
    203         },
    204         "human_evaluation": {
    205           "applies": false,
    206           "answer": false,
    207           "justification": "Human evaluation of generated scenario quality is not conducted; automated simulation metrics are used throughout.",
    208           "source": "haiku"
    209         },
    210         "held_out_test_set": {
    211           "applies": true,
    212           "answer": true,
    213           "justification": "The model is trained on nuScenes training split and evaluated on the validation split, following standard challenge guidelines.",
    214           "source": "haiku"
    215         },
    216         "per_category_breakdown": {
    217           "applies": true,
    218           "answer": true,
    219           "justification": "Table 3 provides breakdown across adversarial intensity levels (Weak/Medium/Strong); Figure 6 shows per-level TTC and acceleration distributions.",
    220           "source": "haiku"
    221         },
    222         "failure_cases_discussed": {
    223           "applies": true,
    224           "answer": true,
    225           "justification": "The paper notes that Strong adversarial level (39.33% coll) does not outperform Medium (40.75%) and explains that high-speed overtaking may miss collision timing in low-speed scenarios.",
    226           "source": "haiku"
    227         },
    228         "negative_results_reported": {
    229           "applies": true,
    230           "answer": true,
    231           "justification": "Figure 8b shows increasing diffusion steps degrades both adversariality and realism, and Figure 8a shows diminishing returns with more samples; both are reported candidly.",
    232           "source": "haiku"
    233         }
    234       },
    235       "setup_transparency": {
    236         "model_versions_specified": {
    237           "applies": true,
    238           "answer": false,
    239           "justification": "GPT-4o is named as the LLM used for both code generator and debugger, but no snapshot date or API version is specified.",
    240           "source": "haiku"
    241         },
    242         "prompts_provided": {
    243           "applies": true,
    244           "answer": true,
    245           "justification": "Figure 2 shows the complete system prompt, code generation prompt, reasoning prompt, and debugger prompt with actual content, not just templates.",
    246           "source": "haiku"
    247         },
    248         "hyperparameters_reported": {
    249           "applies": true,
    250           "answer": true,
    251           "justification": "Learning rate (5e-4), optimizer (Adam), epochs (200), diffusion steps (20), test samples (10), and GPU setup (4x RTX 4090, 6 hours) are all reported.",
    252           "source": "haiku"
    253         },
    254         "scaffolding_described": {
    255           "applies": true,
    256           "answer": true,
    257           "justification": "The CoT code generation pipeline, three-step reasoning process, closed-loop unit testing debugger, and iterative refinement loop are all described in detail in Section 3.3.",
    258           "source": "haiku"
    259         },
    260         "data_preprocessing_documented": {
    261           "applies": true,
    262           "answer": true,
    263           "justification": "Standard nuScenes prediction challenge guidelines are followed: 2s (4 steps) past, 6s (12 steps) future, standard train/val splits; the adversarial vehicle selection strategy is also specified.",
    264           "source": "haiku"
    265         }
    266       },
    267       "data_integrity": {
    268         "raw_data_available": {
    269           "applies": true,
    270           "answer": true,
    271           "justification": "nuScenes is a publicly available dataset; raw data can be independently obtained from the nuScenes website.",
    272           "source": "haiku"
    273         },
    274         "data_collection_described": {
    275           "applies": true,
    276           "answer": true,
    277           "justification": "nuScenes dataset characteristics are described (1000 scenes, 20s each, 2Hz, 5.5 hours, Boston and Singapore). The 500 test user queries are noted as 'automatically generated by GPT-4o' though generation details are sparse.",
    278           "source": "haiku"
    279         },
    280         "recruitment_methods_described": {
    281           "applies": false,
    282           "answer": false,
    283           "justification": "No human participants; standard benchmark dataset used.",
    284           "source": "haiku"
    285         },
    286         "data_pipeline_documented": {
    287           "applies": true,
    288           "answer": false,
    289           "justification": "The generation pipeline is documented architecturally, but the 500 query generation process for debugger testing lacks documentation of prompt templates, diversity controls, or filtering.",
    290           "source": "haiku"
    291         }
    292       },
    293       "contamination": {
    294         "training_cutoff_stated": {
    295           "applies": false,
    296           "answer": false,
    297           "justification": "The paper evaluates adversarial scenario generation quality, not LLM capability on standard benchmarks; contamination of nuScenes by diffusion model training is not the relevant concern.",
    298           "source": "haiku"
    299         },
    300         "train_test_overlap_discussed": {
    301           "applies": false,
    302           "answer": false,
    303           "justification": "Standard train/val split of nuScenes is used; contamination in the benchmark sense is not applicable.",
    304           "source": "haiku"
    305         },
    306         "benchmark_contamination_addressed": {
    307           "applies": false,
    308           "answer": false,
    309           "justification": "Not evaluating LLM capabilities on benchmarks; scenario generation task does not have contamination exposure in the relevant sense.",
    310           "source": "haiku"
    311         }
    312       },
    313       "human_studies": {
    314         "pre_registered": {
    315           "applies": false,
    316           "answer": false,
    317           "justification": "No human participants in this study.",
    318           "source": "haiku"
    319         },
    320         "irb_or_ethics_approval": {
    321           "applies": false,
    322           "answer": false,
    323           "justification": "No human participants in this study.",
    324           "source": "haiku"
    325         },
    326         "demographics_reported": {
    327           "applies": false,
    328           "answer": false,
    329           "justification": "No human participants in this study.",
    330           "source": "haiku"
    331         },
    332         "inclusion_exclusion_criteria": {
    333           "applies": false,
    334           "answer": false,
    335           "justification": "No human participants in this study.",
    336           "source": "haiku"
    337         },
    338         "randomization_described": {
    339           "applies": false,
    340           "answer": false,
    341           "justification": "No human participants in this study.",
    342           "source": "haiku"
    343         },
    344         "blinding_described": {
    345           "applies": false,
    346           "answer": false,
    347           "justification": "No human participants in this study.",
    348           "source": "haiku"
    349         },
    350         "attrition_reported": {
    351           "applies": false,
    352           "answer": false,
    353           "justification": "No human participants in this study.",
    354           "source": "haiku"
    355         }
    356       },
    357       "cost_and_practicality": {
    358         "inference_cost_reported": {
    359           "applies": true,
    360           "answer": true,
    361           "justification": "Closed-loop simulation time is reported (229.40s for LD-Scene vs baselines), and Figure 5c reports total LLM API cost for each model tested.",
    362           "source": "haiku"
    363         },
    364         "compute_budget_stated": {
    365           "applies": true,
    366           "answer": true,
    367           "justification": "Training is reported as 4x GeForce RTX 4090 GPUs for 6 hours, giving a clear compute budget.",
    368           "source": "haiku"
    369         }
    370       }
    371     }
    372   },
    373   "claims": [
    374     {
    375       "claim": "LD-Scene achieves 40.75% Adv-Ego collision rate, substantially outperforming all baselines including Safe-Sim (27.81%) and AdvSim (24.72%)",
    376       "evidence": "Table 1 reports point estimates for all five models; LD-Scene leads by a large margin on adversariality.",
    377       "supported": "moderate"
    378     },
    379     {
    380       "claim": "LD-Scene achieves lower adversarial offroad rate (12.52%) than all baselines, indicating better realism",
    381       "evidence": "Table 1 shows Adv Offroad values: AdvSim 15.60%, Strive 18.94%, DiffScene 19.71%, Safe-Sim 21.79%, LD-Scene 12.52%.",
    382       "supported": "moderate"
    383     },
    384     {
    385       "claim": "The LLM-based code debugger improves guidance code execution success rate from 69.4% to 95.0% for GPT-4o across 500 user queries",
    386       "evidence": "Figure 5a shows success rate comparison with/without debugger for GPT-4o, Claude, and Gemini models on 500 GPT-4o-generated queries.",
    387       "supported": "moderate"
    388     },
    389     {
    390       "claim": "CoT reasoning enables controllable adversarial intensity — weak/medium/strong levels produce statistically distinguishable TTC and acceleration profiles",
    391       "evidence": "Table 3 and Figures 6a-6c show progressively shorter TTC (2.06→1.98→1.91s) and higher accelerations across the three levels.",
    392       "supported": "weak"
    393     },
    394     {
    395       "claim": "LD-Scene generates more efficient scenarios than test-time optimization baselines (229.40s vs. Strive's 609.72s)",
    396       "evidence": "Table 1 Sim Time column; LD-Scene is faster than Strive but slower than DiffScene (199.01s) and Safe-Sim (193.59s).",
    397       "supported": "weak"
    398     },
    399     {
    400       "claim": "The framework requires no expert knowledge, enabling user-friendly scenario specification via natural language",
    401       "evidence": "Case studies (Figure 7) demonstrate three natural language queries producing distinct behaviors, but no user study validates the 'user-friendly' claim.",
    402       "supported": "unsupported"
    403     }
    404   ],
    405   "methodology_tags": [
    406     "benchmark-eval",
    407     "case-study"
    408   ],
    409   "key_findings": "LD-Scene integrates a pretrained graph-based latent diffusion model with LLM-generated guidance loss functions to achieve 40.75% adversarial ego-collision rate on nuScenes, a 46% relative improvement over the next-best baseline (Safe-Sim: 27.81%), while reducing adversarial offroad rate below all baselines. The LLM-based code debugger raises guidance code execution success from 69.4% to 95.0% for GPT-4o over 500 synthetic queries. CoT prompting enables meaningful modulation of adversarial intensity across three predefined levels. However, strong-level intensity does not consistently exceed medium-level collision rate, and more diffusion steps degrade both adversariality and realism.",
    410   "red_flags": [
    411     {
    412       "flag": "No statistical tests",
    413       "detail": "All comparative results in Table 1 are raw point estimates with no confidence intervals, significance tests, or variance across runs, making it impossible to assess whether differences are meaningful."
    414     },
    415     {
    416       "flag": "Synthetic debugger evaluation",
    417       "detail": "The 500 user queries used to benchmark the debugger were 'automatically generated by GPT-4o' — the same model being evaluated — introducing circularity and no guarantee of real-user distribution."
    418     },
    419     {
    420       "flag": "GPT-4o version unspecified",
    421       "detail": "The paper names 'GPT-4o' as the LLM backbone but provides no snapshot date or API version, making results non-reproducible as the model updates."
    422     },
    423     {
    424       "flag": "No limitations section",
    425       "detail": "The paper contains no dedicated limitations or threats-to-validity section; the conclusion discusses only strengths."
    426     },
    427     {
    428       "flag": "Single dataset evaluation",
    429       "detail": "All quantitative results are on nuScenes only; no cross-dataset validation despite broad claims of outperforming existing methods."
    430     },
    431     {
    432       "flag": "No code release",
    433       "detail": "No code repository is mentioned, preventing independent reproduction of the reported results."
    434     },
    435     {
    436       "flag": "Rule-based ego planner confound",
    437       "detail": "All experiments use a single rule-based lane-graph planner as the ego; adversarial effectiveness may differ substantially against learned planners or neural policies."
    438     }
    439   ],
    440   "cited_papers": [
    441     {
    442       "title": "Generating useful accident-prone driving scenarios via a learned traffic prior (Strive)",
    443       "relevance": "Key baseline and foundational VAE architecture adopted by LD-Scene for latent space representation"
    444     },
    445     {
    446       "title": "SAFE-SIM: Safety-critical closed-loop traffic simulation with diffusion-controllable adversaries",
    447       "relevance": "Primary competing diffusion-based baseline; LD-Scene directly improves over Safe-Sim's adversariality score"
    448     },
    449     {
    450       "title": "Language-guided traffic simulation via scene-level diffusion (CTG++)",
    451       "relevance": "Prior LLM+diffusion work LD-Scene explicitly builds on and addresses limitations of (instability in code generation)"
    452     },
    453     {
    454       "title": "AdvDiffuser: Generating adversarial safety-critical driving scenarios via guided diffusion",
    455       "relevance": "Competing RL-guided diffusion approach whose retraining limitation LD-Scene claims to solve"
    456     },
    457     {
    458       "title": "DiffScene: Diffusion-based safety-critical scenario generation for autonomous vehicles",
    459       "relevance": "Diffusion baseline with human-designed safety guidance; compared in Table 1"
    460     },
    461     {
    462       "title": "nuScenes: A multimodal dataset for autonomous driving",
    463       "relevance": "Primary evaluation dataset used throughout all experiments"
    464     },
    465     {
    466       "title": "Denoising diffusion probabilistic models (Ho et al. 2020)",
    467       "relevance": "Foundational diffusion model method underlying the LDM architecture"
    468     },
    469     {
    470       "title": "Chain-of-thought prompting elicits reasoning in large language models",
    471       "relevance": "Methodological basis for CoT code generation strategy used in the guidance module"
    472     }
    473   ],
    474   "engagement_factors": {
    475     "practical_relevance": {
    476       "score": 2,
    477       "justification": "Directly useful for AV safety teams needing test scenario generation, but requires GPT-4o API access and multi-GPU training infrastructure."
    478     },
    479     "surprise_contrarian": {
    480       "score": 1,
    481       "justification": "Incremental combination of existing techniques (LDMs + LLMs + CoT); no finding contradicts established expectations."
    482     },
    483     "fear_safety": {
    484       "score": 2,
    485       "justification": "Provides tools to expose AV vulnerabilities, raising awareness of how easily adversarial scenarios can be generated via natural language."
    486     },
    487     "drama_conflict": {
    488       "score": 1,
    489       "justification": "No controversy; straightforward engineering contribution with competitive baseline comparison."
    490     },
    491     "demo_ability": {
    492       "score": 2,
    493       "justification": "Natural language interface for scenario generation is demonstrable in principle, but no public demo or code is released."
    494     },
    495     "brand_recognition": {
    496       "score": 1,
    497       "justification": "HKUST is a reputable institution but not a high-profile AI lab; no famous authors or industry affiliation."
    498     }
    499   },
    500   "hn_data": {
    501     "threads": [
    502       {
    503         "hn_id": "44582855",
    504         "title": "Chain of thought monitorability: A new and fragile opportunity for AI safety",
    505         "points": 134,
    506         "comments": 64,
    507         "url": "https://news.ycombinator.com/item?id=44582855",
    508         "created_at": "2025-07-16T14:39:55Z"
    509       },
    510       {
    511         "hn_id": "40497235",
    512         "title": "An Introduction to Vision-Language Modeling",
    513         "points": 13,
    514         "comments": 0,
    515         "url": "https://news.ycombinator.com/item?id=40497235",
    516         "created_at": "2024-05-28T04:09:15Z"
    517       },
    518       {
    519         "hn_id": "44627742",
    520         "title": "AIOps in the Era of LLMs",
    521         "points": 3,
    522         "comments": 0,
    523         "url": "https://news.ycombinator.com/item?id=44627742",
    524         "created_at": "2025-07-20T18:13:31Z"
    525       },
    526       {
    527         "hn_id": "44534854",
    528         "title": "Potential Danger to Satellites from a 2032 Lunar Impact by Asteroid 2024 YR4",
    529         "points": 3,
    530         "comments": 0,
    531         "url": "https://news.ycombinator.com/item?id=44534854",
    532         "created_at": "2025-07-11T17:27:57Z"
    533       },
    534       {
    535         "hn_id": "37940798",
    536         "title": "Curve Your Enthusiasm: Concurvity Regularization in Differentiable GAMs",
    537         "points": 2,
    538         "comments": 1,
    539         "url": "https://news.ycombinator.com/item?id=37940798",
    540         "created_at": "2023-10-19T10:16:08Z"
    541       },
    542       {
    543         "hn_id": "40503425",
    544         "title": "An Introduction to Vision-Language Modeling",
    545         "points": 2,
    546         "comments": 0,
    547         "url": "https://news.ycombinator.com/item?id=40503425",
    548         "created_at": "2024-05-28T17:49:36Z"
    549       },
    550       {
    551         "hn_id": "40502854",
    552         "title": "An Introduction to Vision-Language Modeling",
    553         "points": 2,
    554         "comments": 0,
    555         "url": "https://news.ycombinator.com/item?id=40502854",
    556         "created_at": "2024-05-28T17:00:36Z"
    557       },
    558       {
    559         "hn_id": "30579179",
    560         "title": "A Flawed Dataset for Symbolic Equation Verification",
    561         "points": 2,
    562         "comments": 0,
    563         "url": "https://news.ycombinator.com/item?id=30579179",
    564         "created_at": "2022-03-06T17:29:28Z"
    565       },
    566       {
    567         "hn_id": "43843140",
    568         "title": "Physical Principles of Quantum Biology",
    569         "points": 1,
    570         "comments": 0,
    571         "url": "https://news.ycombinator.com/item?id=43843140",
    572         "created_at": "2025-04-30T10:13:25Z"
    573       },
    574       {
    575         "hn_id": "43773846",
    576         "title": "UI-E2I-Synth: Advancing GUI Grounding with Large-Scale Instruction Synthesis",
    577         "points": 1,
    578         "comments": 0,
    579         "url": "https://news.ycombinator.com/item?id=43773846",
    580         "created_at": "2025-04-23T16:23:19Z"
    581       }
    582     ],
    583     "top_points": 134,
    584     "total_points": 163,
    585     "total_comments": 65
    586   }
    587 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs