scan-v5.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan-v5.json (27519B)
      1 {
      2   "scan_version": 5,
      3   "paper_type": "empirical",
      4   "paper": {
      5     "title": "Fuzz4All: Universal Fuzzing with Large Language Models",
      6     "authors": [
      7       "Chunqiu Steven Xia",
      8       "Matteo Paltenghi",
      9       "Jia Le Tian",
     10       "Michael Pradel",
     11       "Lingming Zhang"
     12     ],
     13     "year": 2024,
     14     "venue": "ICSE 2024",
     15     "arxiv_id": "2308.04748",
     16     "doi": "10.1145/3597503.3639121"
     17   },
     18   "checklist": {
     19     "claims_and_evidence": {
     20       "abstract_claims_supported": {
     21         "applies": true,
     22         "answer": true,
     23         "justification": "Abstract claims (higher coverage across all 6 languages, 98 bugs found with 64 confirmed) are directly supported by Tables 2 and 5 and the ablation study in Section 5.",
     24         "source": "haiku"
     25       },
     26       "causal_claims_justified": {
     27         "applies": true,
     28         "answer": true,
     29         "justification": "Causal claims like 'autoprompting improves coverage' and 'fuzzing loop diversity increases coverage' are supported by ablation study RQ3 (Table 4) isolating each component.",
     30         "source": "haiku"
     31       },
     32       "generalization_bounded": {
     33         "applies": true,
     34         "answer": false,
     35         "justification": "The paper claims to be the 'first universal fuzzer' and uses 'universal' broadly, but only evaluates on 9 SUTs across 6 languages; generalizability to arbitrary languages/SUTs beyond this scope is not demonstrated.",
     36         "source": "haiku"
     37       },
     38       "alternative_explanations_discussed": {
     39         "applies": true,
     40         "answer": false,
     41         "justification": "The paper does not discuss whether coverage improvements could be attributable to StarCoder's training data containing known bug-triggering patterns for GCC/Clang/Z3, or whether the LLM is effectively replaying memorized test patterns.",
     42         "source": "haiku"
     43       },
     44       "proxy_outcome_distinction": {
     45         "applies": true,
     46         "answer": true,
     47         "justification": "The paper explicitly uses code coverage as the primary metric and separately reports bug count (the ultimate goal), acknowledging they are distinct measures in Section 4.3.",
     48         "source": "haiku"
     49       }
     50     },
     51     "limitations_and_scope": {
     52       "limitations_section_present": {
     53         "applies": true,
     54         "answer": true,
     55         "justification": "Section 6 'Threats to Validity' is a dedicated section covering internal and external threats.",
     56         "source": "haiku"
     57       },
     58       "threats_to_validity_specific": {
     59         "applies": true,
     60         "answer": true,
     61         "justification": "Specific threats include: LLM training data recency causing future effectiveness degradation, and LLM hallucination producing inaccurate prompts—these are more specific than boilerplate generalization disclaimers.",
     62         "source": "haiku"
     63       },
     64       "scope_boundaries_stated": {
     65         "applies": true,
     66         "answer": false,
     67         "justification": "The paper defines scope as 'SUTs that take programming or formal language as inputs' but does not explicitly state what Fuzz4All cannot do or what the results do not show beyond general LLM limitations.",
     68         "source": "haiku"
     69       }
     70     },
     71     "conflicts_of_interest": {
     72       "funding_disclosed": {
     73         "applies": true,
     74         "answer": true,
     75         "justification": "Funding explicitly disclosed in acknowledgment: NSF grants CCF-2131943 and CCF-2141474, Kwai Inc., ERC grant 851895, and German Research Foundation.",
     76         "source": "haiku"
     77       },
     78       "affiliations_disclosed": {
     79         "applies": true,
     80         "answer": true,
     81         "justification": "All five authors list their institutional affiliations (UIUC and University of Stuttgart) on the first page.",
     82         "source": "haiku"
     83       },
     84       "funder_independent_of_outcome": {
     85         "applies": true,
     86         "answer": true,
     87         "justification": "Kwai Inc. funded the work but the tool evaluates GPT-4/StarCoder against publicly available compiler/solver systems; no funder product is being evaluated favorably.",
     88         "source": "haiku"
     89       },
     90       "financial_interests_declared": {
     91         "applies": true,
     92         "answer": false,
     93         "justification": "No competing interests statement or declaration of financial interests (patents, equity, consulting) appears in the paper.",
     94         "source": "haiku"
     95       }
     96     },
     97     "scope_and_framing": {
     98       "key_terms_defined": {
     99         "applies": true,
    100         "answer": true,
    101         "justification": "'Universal fuzzing' is explicitly defined as targeting many different input languages and features; SUT, autoprompting, validity rate, and hit rate are all defined in context.",
    102         "source": "haiku"
    103       },
    104       "intended_contribution_clear": {
    105         "applies": true,
    106         "answer": true,
    107         "justification": "Four explicit contributions are bulleted at the end of Section 1: universal fuzzing, autoprompting technique, LLM-powered fuzzing loop, and empirical evidence of real-world effectiveness.",
    108         "source": "haiku"
    109       },
    110       "engagement_with_prior_work": {
    111         "applies": true,
    112         "answer": true,
    113         "justification": "Section 2 explicitly contrasts Fuzz4All with generation-based fuzzers (Csmith, YARPGen), mutation-based fuzzers (GrayC, go-fuzz), general-purpose fuzzers (AFL, libFuzzer), and prior LLM-based fuzzers (TitanFuzz), explaining specific differentiators.",
    114         "source": "haiku"
    115       }
    116     }
    117   },
    118   "type_checklist": {
    119     "empirical": {
    120       "artifacts": {
    121         "code_released": {
    122           "applies": true,
    123           "answer": true,
    124           "justification": "Paper explicitly states 'Our code and data are available at: https://doi.org/10.5281/zenodo.10456883 and https://github.com/fuzz4all/fuzz4all' in the Data Availability section.",
    125           "source": "haiku"
    126         },
    127         "data_released": {
    128           "applies": true,
    129           "answer": true,
    130           "justification": "Evaluation targets (GCC, Clang, Z3, CVC5, OpenJDK, Qiskit) are publicly available systems; bug reports and full artifact are linked at Zenodo.",
    131           "source": "haiku"
    132         },
    133         "environment_specified": {
    134           "applies": true,
    135           "answer": false,
    136           "justification": "Hardware specs (Ubuntu 20.04.5 LTS, 256 GB RAM, 4 NVIDIA RTX A6000 GPUs) and SUT versions are given, but no Dockerfile or requirements.txt is mentioned in the paper text.",
    137           "source": "haiku"
    138         },
    139         "reproduction_instructions": {
    140           "applies": true,
    141           "answer": false,
    142           "justification": "The paper provides algorithms and parameter descriptions but no step-by-step reproduction instructions; these may exist in the linked artifact but are not in the paper itself.",
    143           "source": "haiku"
    144         }
    145       },
    146       "statistical_methodology": {
    147         "confidence_intervals_or_error_bars": {
    148           "applies": true,
    149           "answer": true,
    150           "justification": "Figure 4 shows shaded areas indicating minimum and maximum coverage across five runs for the 24-hour fuzzing campaigns.",
    151           "source": "haiku"
    152         },
    153         "significance_tests": {
    154           "applies": true,
    155           "answer": true,
    156           "justification": "Mann-Whitney U-test is used for statistical significance at p < 0.05, with significant results marked with * in Tables 2 and 4.",
    157           "source": "haiku"
    158         },
    159         "effect_sizes_reported": {
    160           "applies": true,
    161           "answer": true,
    162           "justification": "Relative coverage improvements are reported for each target (e.g., +18.8% for GCC, +75.6% for Qiskit, +36.8% average) with absolute baseline values for context.",
    163           "source": "haiku"
    164         },
    165         "sample_size_justified": {
    166           "applies": true,
    167           "answer": false,
    168           "justification": "Five repetitions for 24-hour campaigns and four repetitions for ablation are justified by reference to prior work conventions, not by statistical power analysis.",
    169           "source": "haiku"
    170         },
    171         "variance_reported": {
    172           "applies": true,
    173           "answer": false,
    174           "justification": "Main comparison Table 2 and ablation Table 4 report only point estimates; variance (std dev or CIs) is shown only as min/max ranges in Figure 4.",
    175           "source": "haiku"
    176         }
    177       },
    178       "evaluation_design": {
    179         "baselines_included": {
    180           "applies": true,
    181           "answer": true,
    182           "justification": "Each SUT is compared against one or more state-of-the-art baselines (GrayC, Csmith, YARPGen, TypeFuzz, go-fuzz, Hephaestus, MorphQ) as detailed in Table 1.",
    183           "source": "haiku"
    184         },
    185         "baselines_contemporary": {
    186           "applies": true,
    187           "answer": true,
    188           "justification": "Baselines include GrayC (ISSTA 2023), MorphQ (ICSE 2023), and Hephaestus (PLDI 2022), all recent state-of-the-art tools for their respective domains.",
    189           "source": "haiku"
    190         },
    191         "ablation_study": {
    192           "applies": true,
    193           "answer": true,
    194           "justification": "RQ3 (Section 5.3) conducts a systematic ablation of autoprompting variants (no input, raw prompt, autoprompt) and fuzzing loop variants (w/o example, w/ example, full Fuzz4All).",
    195           "source": "haiku"
    196         },
    197         "multiple_metrics": {
    198           "applies": true,
    199           "answer": true,
    200           "justification": "Evaluation uses code coverage, validity rate (% valid), hit rate for targeted fuzzing, and bug count (total + confirmed) as complementary metrics.",
    201           "source": "haiku"
    202         },
    203         "human_evaluation": {
    204           "applies": false,
    205           "answer": false,
    206           "justification": "This is a fully automated fuzzing tool evaluated by compiler/SUT crashes and coverage; human evaluation is not relevant.",
    207           "source": "haiku"
    208         },
    209         "held_out_test_set": {
    210           "applies": false,
    211           "answer": false,
    212           "justification": "Not a prediction task; fuzzing generates inputs dynamically rather than being evaluated on a fixed test set.",
    213           "source": "haiku"
    214         },
    215         "per_category_breakdown": {
    216           "applies": true,
    217           "answer": true,
    218           "justification": "Results are broken down per language and per SUT in Tables 2, 3, 4, and 5, and per targeted feature in Table 3.",
    219           "source": "haiku"
    220         },
    221         "failure_cases_discussed": {
    222           "applies": true,
    223           "answer": false,
    224           "justification": "Bug examples showcase successes; the paper does not systematically discuss cases where Fuzz4All failed to outperform baselines or produced unhelpful inputs.",
    225           "source": "haiku"
    226         },
    227         "negative_results_reported": {
    228           "applies": true,
    229           "answer": false,
    230           "justification": "Fuzz4All achieves the highest coverage on every single SUT in Table 2; no conditions under which baselines were competitive or Fuzz4All underperformed are reported.",
    231           "source": "haiku"
    232         }
    233       },
    234       "setup_transparency": {
    235         "model_versions_specified": {
    236           "applies": true,
    237           "answer": true,
    238           "justification": "Exact model checkpoint specified: 'gpt-4-0613' with max_token=500 via OpenAI API, and 'StarCoder' via Hugging Face implementation.",
    239           "source": "haiku"
    240         },
    241         "prompts_provided": {
    242           "applies": true,
    243           "answer": true,
    244           "justification": "The autoprompting instruction is given verbatim in Section 3.1.1; the three generation strategy instructions are listed in full in Figure 3.",
    245           "source": "haiku"
    246         },
    247         "hyperparameters_reported": {
    248           "applies": true,
    249           "answer": true,
    250           "justification": "Temperature (0 for greedy, 1 for sampling), top-p=1, batch_size=30, max_output_length=1024, 4 candidate prompts, 30 fuzzing inputs per scoring are all stated in Section 4.1.",
    251           "source": "haiku"
    252         },
    253         "scaffolding_described": {
    254           "applies": true,
    255           "answer": true,
    256           "justification": "The two-stage pipeline (autoprompting and fuzzing loop) is described via formal algorithms (Algorithms 1 and 2) with pseudocode and worked examples in Figures 2 and 3.",
    257           "source": "haiku"
    258         },
    259         "data_preprocessing_documented": {
    260           "applies": true,
    261           "answer": true,
    262           "justification": "The autoprompting stage's transformation of raw user input (documentation, example code, specifications) into the distilled prompt is documented in Section 3.1.",
    263           "source": "haiku"
    264         }
    265       },
    266       "data_integrity": {
    267         "raw_data_available": {
    268           "applies": true,
    269           "answer": true,
    270           "justification": "Artifact at Zenodo and GitHub includes code and data; bug reports include issue links to developer trackers as mentioned in Section 5.4.",
    271           "source": "haiku"
    272         },
    273         "data_collection_described": {
    274           "applies": true,
    275           "answer": true,
    276           "justification": "Fuzzing campaign parameters (24-hour budget, 5 repetitions, 64-core workstation, specific SUT versions, default baseline settings) are described in Section 4.3.",
    277           "source": "haiku"
    278         },
    279         "recruitment_methods_described": {
    280           "applies": false,
    281           "answer": false,
    282           "justification": "No human participants; evaluation is fully automated on publicly available compiler/solver systems.",
    283           "source": "haiku"
    284         },
    285         "data_pipeline_documented": {
    286           "applies": true,
    287           "answer": true,
    288           "justification": "The full pipeline from user input → autoprompting → fuzzing loop → oracle checking → bug reporting is documented via algorithms and Section 3.",
    289           "source": "haiku"
    290         }
    291       },
    292       "contamination": {
    293         "training_cutoff_stated": {
    294           "applies": true,
    295           "answer": false,
    296           "justification": "The gpt-4-0613 checkpoint and StarCoder are named but their training data cutoff dates are not stated; the threats section only vaguely refers to 'the last year.'",
    297           "source": "haiku"
    298         },
    299         "train_test_overlap_discussed": {
    300           "applies": true,
    301           "answer": false,
    302           "justification": "The threats section discusses future data shift but does not address whether StarCoder or GPT-4 training data includes known bug-triggering code patterns for GCC/Clang/Z3, which could inflate apparent discovery rates.",
    303           "source": "haiku"
    304         },
    305         "benchmark_contamination_addressed": {
    306           "applies": false,
    307           "answer": false,
    308           "justification": "The evaluation targets are live compiler/solver systems, not static benchmarks; contamination of a fixed benchmark dataset is not the relevant concern here.",
    309           "source": "haiku"
    310         }
    311       },
    312       "human_studies": {
    313         "pre_registered": {
    314           "applies": false,
    315           "answer": false,
    316           "justification": "No human participants.",
    317           "source": "haiku"
    318         },
    319         "irb_or_ethics_approval": {
    320           "applies": false,
    321           "answer": false,
    322           "justification": "No human participants.",
    323           "source": "haiku"
    324         },
    325         "demographics_reported": {
    326           "applies": false,
    327           "answer": false,
    328           "justification": "No human participants.",
    329           "source": "haiku"
    330         },
    331         "inclusion_exclusion_criteria": {
    332           "applies": false,
    333           "answer": false,
    334           "justification": "No human participants.",
    335           "source": "haiku"
    336         },
    337         "randomization_described": {
    338           "applies": false,
    339           "answer": false,
    340           "justification": "No human participants.",
    341           "source": "haiku"
    342         },
    343         "blinding_described": {
    344           "applies": false,
    345           "answer": false,
    346           "justification": "No human participants.",
    347           "source": "haiku"
    348         },
    349         "attrition_reported": {
    350           "applies": false,
    351           "answer": false,
    352           "justification": "No human participants.",
    353           "source": "haiku"
    354         }
    355       },
    356       "cost_and_practicality": {
    357         "inference_cost_reported": {
    358           "applies": true,
    359           "answer": false,
    360           "justification": "GPT-4 API is used extensively (autoprompting + scoring) but no API cost per fuzzing campaign is reported; only autoprompting wall-clock time (2.3 min avg) is given.",
    361           "source": "haiku"
    362         },
    363         "compute_budget_stated": {
    364           "applies": true,
    365           "answer": false,
    366           "justification": "Hardware is specified (64-core, 256 GB RAM, 4 RTX A6000, one GPU per run) but total GPU-hours or compute cost for the full evaluation is not stated.",
    367           "source": "haiku"
    368         }
    369       }
    370     }
    371   },
    372   "claims": [
    373     {
    374       "claim": "Fuzz4All achieves 36.8% higher code coverage on average compared to the best state-of-the-art baseline across all six languages.",
    375       "evidence": "Table 2 shows per-SUT improvements from +13.7% (Go) to +75.6% (Qiskit), all statistically significant by Mann-Whitney U-test (p < 0.05).",
    376       "supported": "strong"
    377     },
    378     {
    379       "claim": "Fuzz4All found 98 bugs in widely used systems (GCC, Clang, Z3, CVC5, OpenJDK, Qiskit), with 64 confirmed as previously unknown.",
    380       "evidence": "Table 5 breaks down bugs by system and confirmation status; Section 5.4.1 gives four exemplary bug descriptions with developer confirmation.",
    381       "supported": "strong"
    382     },
    383     {
    384       "claim": "Targeted fuzzing achieves an 83% average hit rate for specific language features.",
    385       "evidence": "Table 3 shows per-feature hit rates across all six languages, with diagonal entries (targeted feature's own run) averaging 83%.",
    386       "supported": "strong"
    387     },
    388     {
    389       "claim": "Autoprompting improves coverage over directly using raw user input as a prompt.",
    390       "evidence": "Table 4 ablation shows autoprompt consistently outperforms raw prompt in coverage across all 6 languages (e.g., C: 182,530 vs 137,204).",
    391       "supported": "strong"
    392     },
    393     {
    394       "claim": "Despite generating 43% fewer fuzzing inputs and achieving only 37-47% validity rates, Fuzz4All's diversity drives higher coverage than fuzzers with near-100% validity rates.",
    395       "evidence": "Table 2 directly juxtaposes # programs and % valid alongside coverage; Csmith generates 61,883 valid inputs vs Fuzz4All's 44,324 (many invalid), yet Fuzz4All achieves 78% more coverage on GCC.",
    396       "supported": "moderate"
    397     },
    398     {
    399       "claim": "Fuzz4All's coverage continues to increase throughout the 24-hour campaign without plateauing, unlike baselines.",
    400       "evidence": "Figure 4 shows baseline coverage curves flattening while Fuzz4All's continues rising across all 6 SUT plots.",
    401       "supported": "strong"
    402     },
    403     {
    404       "claim": "The LLM can generate valid fuzzing inputs for quantum computing (Qiskit) despite limited training exposure to quantum APIs.",
    405       "evidence": "Table 2 shows 24.90% validity rate for Qiskit but +75.6% coverage improvement over MorphQ; Section 5.1.2 discusses the low LLM exposure to quantum code.",
    406       "supported": "moderate"
    407     }
    408   ],
    409   "methodology_tags": [
    410     "benchmark-eval",
    411     "case-study"
    412   ],
    413   "key_findings": "Fuzz4All achieves 36.8% higher code coverage than state-of-the-art language-specific fuzzers across 6 programming languages and 9 systems under test, despite generating fewer total inputs at lower validity rates. The autoprompting technique and iterative fuzzing loop with generation strategies are both independently validated as contributing to coverage gains. The system found 98 real-world bugs with 64 confirmed as previously unknown across GCC, Clang, Z3, CVC5, OpenJDK, and Qiskit, demonstrating practical effectiveness beyond coverage metrics.",
    414   "red_flags": [
    415     {
    416       "flag": "Universal overclaim",
    417       "detail": "'Universal' in the title implies any language/SUT, but the evaluation covers only 9 SUTs in 6 languages; generalizability to others is assumed but not demonstrated."
    418     },
    419     {
    420       "flag": "LLM training data contamination unaddressed",
    421       "detail": "StarCoder was trained on billions of code tokens from the same open-source projects (GCC, LLVM, etc.); whether generated bug-triggering patterns reflect memorized historical test cases rather than novel discovery is not discussed."
    422     },
    423     {
    424       "flag": "Variance absent from main tables",
    425       "detail": "Tables 2, 3, 4 report only point estimates; statistical spread appears only as min/max bands in Figure 4, making it impossible to assess result stability from the tables alone."
    426     },
    427     {
    428       "flag": "Inference cost omitted",
    429       "detail": "GPT-4 API is called extensively for autoprompting and scoring (30 inputs × 4 candidates per campaign) but no API cost per fuzzing run is reported, making practical adoption cost unknown."
    430     },
    431     {
    432       "flag": "No negative results",
    433       "detail": "Fuzz4All outperforms all baselines on every single SUT; no conditions under which it failed or baselines were competitive are reported, suggesting possible evaluation cherry-picking."
    434     }
    435   ],
    436   "cited_papers": [
    437     {
    438       "title": "Large Language Models are Zero-Shot Fuzzers: Fuzzing Deep-Learning Libraries via Large Language Models (TitanFuzz)",
    439       "relevance": "Direct predecessor using LLMs for fuzzing DL libraries; Fuzz4All extends this to universal language support"
    440     },
    441     {
    442       "title": "GrayC: Greybox Fuzzing of Compilers and Analysers for C",
    443       "relevance": "Primary C fuzzing baseline used in evaluation; represents state-of-the-art mutation-based compiler fuzzing"
    444     },
    445     {
    446       "title": "MorphQ: Metamorphic Testing of the Qiskit Quantum Computing Platform",
    447       "relevance": "Direct baseline for Qiskit fuzzing; Fuzz4All improves on it by +75.6% coverage"
    448     },
    449     {
    450       "title": "Finding typing compiler bugs (Hephaestus)",
    451       "relevance": "JVM compiler fuzzer baseline; represents combined generation/mutation approach for type-related bugs"
    452     },
    453     {
    454       "title": "Random testing for C and C++ compilers with YARPGen",
    455       "relevance": "C++ compiler fuzzing baseline; generation-based approach Fuzz4All outperforms"
    456     },
    457     {
    458       "title": "Generative type-aware mutation for testing SMT solvers (TypeFuzz)",
    459       "relevance": "SMT solver fuzzing baseline using type-aware mutations"
    460     },
    461     {
    462       "title": "CODAMOSA: Escaping Coverage Plateaus in Test Generation with Pre-Trained Large Language Models",
    463       "relevance": "Contemporaneous LLM-based testing approach; represents the broader trend of LLMs for test generation"
    464     },
    465     {
    466       "title": "Evaluating Fuzz Testing (Klees et al. 2018)",
    467       "relevance": "Methodology reference for fuzzing evaluation; authors follow its guidance on using Mann-Whitney U-test and 24-hour campaigns"
    468     },
    469     {
    470       "title": "StarCoder: may the source be with you!",
    471       "relevance": "The generation LLM used in Fuzz4All; key infrastructure component"
    472     },
    473     {
    474       "title": "Finding and understanding bugs in C compilers (Csmith)",
    475       "relevance": "Classic generation-based C compiler fuzzer used as baseline; represents 80K+ LoC manual effort that Fuzz4All's 872 LoC replaces"
    476     }
    477   ],
    478   "engagement_factors": {
    479     "practical_relevance": {
    480       "score": 3,
    481       "justification": "Found 64 confirmed new bugs in GCC, Clang, Z3, and other foundational tools; code is released on GitHub for immediate use."
    482     },
    483     "surprise_contrarian": {
    484       "score": 2,
    485       "justification": "Counterintuitive finding that lower validity rates (~37%) combined with higher diversity produce better coverage than near-100% validity fuzzers."
    486     },
    487     "fear_safety": {
    488       "score": 1,
    489       "justification": "Bug discovery in fundamental compiler/solver infrastructure has security implications, but the paper frames it as software quality rather than safety risk."
    490     },
    491     "drama_conflict": {
    492       "score": 1,
    493       "justification": "Strong claims against all prior fuzzers with consistent wins, but no adversarial critique of existing work that would generate controversy."
    494     },
    495     "demo_ability": {
    496       "score": 3,
    497       "justification": "Full code released on GitHub with documented usage; practitioners can target any SUT by providing documentation and running the tool."
    498     },
    499     "brand_recognition": {
    500       "score": 2,
    501       "justification": "Published at ICSE 2024 (top SE venue) by UIUC/Stuttgart groups with established reputations in software testing; uses GPT-4 brand."
    502     }
    503   },
    504   "hn_data": {
    505     "threads": [
    506       {
    507         "hn_id": "43975423",
    508         "title": "Show HN: HelixDB – Open-source vector-graph database for AI applications (Rust)",
    509         "points": 237,
    510         "comments": 112,
    511         "url": "https://news.ycombinator.com/item?id=43975423",
    512         "created_at": "2025-05-13T17:26:38Z"
    513       },
    514       {
    515         "hn_id": "41321960",
    516         "title": "Why is this a research paper? HybridRAG = VectorRAG context and GraphRAG context",
    517         "points": 4,
    518         "comments": 1,
    519         "url": "https://news.ycombinator.com/item?id=41321960",
    520         "created_at": "2024-08-22T16:31:57Z"
    521       },
    522       {
    523         "hn_id": "37079205",
    524         "title": "Effective Model for LK-99: Potential Avenue for New Superconductivity Research",
    525         "points": 3,
    526         "comments": 2,
    527         "url": "https://news.ycombinator.com/item?id=37079205",
    528         "created_at": "2023-08-10T17:39:55Z"
    529       },
    530       {
    531         "hn_id": "43702068",
    532         "title": "HybridRAG: Integrating Knowledge Graphs and Vector RAG",
    533         "points": 2,
    534         "comments": 0,
    535         "url": "https://news.ycombinator.com/item?id=43702068",
    536         "created_at": "2025-04-16T06:15:57Z"
    537       },
    538       {
    539         "hn_id": "35080011",
    540         "title": "Magnushammer: A Transformer-Based Approach to Premise Selection",
    541         "points": 2,
    542         "comments": 0,
    543         "url": "https://news.ycombinator.com/item?id=35080011",
    544         "created_at": "2023-03-09T10:42:59Z"
    545       },
    546       {
    547         "hn_id": "32339463",
    548         "title": "A Fast Text-Driven Approach for Generating Artistic Content",
    549         "points": 2,
    550         "comments": 0,
    551         "url": "https://news.ycombinator.com/item?id=32339463",
    552         "created_at": "2022-08-04T04:43:48Z"
    553       },
    554       {
    555         "hn_id": "40516562",
    556         "title": "Frugal random exploration strategy for shape recognition using stat. geom",
    557         "points": 1,
    558         "comments": 1,
    559         "url": "https://news.ycombinator.com/item?id=40516562",
    560         "created_at": "2024-05-29T20:23:09Z"
    561       },
    562       {
    563         "hn_id": "34364362",
    564         "title": "Bug Hunters Perspectives: Challenges and Benefits of the Bug Bounty Ecosystem",
    565         "points": 1,
    566         "comments": 1,
    567         "url": "https://news.ycombinator.com/item?id=34364362",
    568         "created_at": "2023-01-13T05:44:13Z"
    569       },
    570       {
    571         "hn_id": "35181128",
    572         "title": "Quantum Microscopy of Cancer Cells at the Heisenberg Limit",
    573         "points": 1,
    574         "comments": 0,
    575         "url": "https://news.ycombinator.com/item?id=35181128",
    576         "created_at": "2023-03-16T12:45:37Z"
    577       }
    578     ],
    579     "top_points": 237,
    580     "total_points": 253,
    581     "total_comments": 117
    582   }
    583 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs