scan-v5.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan-v5.json (27187B)
      1 {
      2   "scan_version": 5,
      3   "paper_type": "empirical",
      4   "paper": {
      5     "title": "DLAP: A Deep Learning Augmented Large Language Model Prompting Framework for Software Vulnerability Detection",
      6     "authors": [
      7       "Yanjing Yang",
      8       "Xin Zhou",
      9       "Runfeng Mao",
     10       "Jinwei Xu",
     11       "Lanxin Yang",
     12       "Yu Zhang",
     13       "Haifeng Shen",
     14       "He Zhang"
     15     ],
     16     "year": 2024,
     17     "venue": "Journal of Systems and Software",
     18     "arxiv_id": "2405.01202",
     19     "doi": "10.48550/arXiv.2405.01202"
     20   },
     21   "checklist": {
     22     "claims_and_evidence": {
     23       "abstract_claims_supported": {
     24         "applies": true,
     25         "answer": true,
     26         "justification": "Abstract claims of 10% higher F1 and 20% higher MCC over baselines are supported by Table 5; the '90% of fine-tuning' claim is directionally supported by Table 6 though DLAP actually exceeds fine-tuning on small datasets.",
     27         "source": "haiku"
     28       },
     29       "causal_claims_justified": {
     30         "applies": true,
     31         "answer": true,
     32         "justification": "Comparative experiments with held-out test sets and DL model selection experiments (RQ1–RQ3) provide adequate design for causal performance claims; the implicit fine-tuning mechanism is supported mathematically but with a softmax simplification.",
     33         "source": "haiku"
     34       },
     35       "generalization_bounded": {
     36         "applies": true,
     37         "answer": false,
     38         "justification": "Paper tests on 4 C/C++ projects but the conclusion declares 'superior and stable performance in software vulnerability detection tasks' broadly; Section 6.2 extends claims to other ASAT tasks without empirical support.",
     39         "source": "haiku"
     40       },
     41       "alternative_explanations_discussed": {
     42         "applies": true,
     43         "answer": false,
     44         "justification": "The paper does not discuss that the DL model's advantage may stem from being trained on the same project's data as the test set, nor that GPT-3.5 may have memorized public vulnerability code during pretraining.",
     45         "source": "haiku"
     46       },
     47       "proxy_outcome_distinction": {
     48         "applies": true,
     49         "answer": true,
     50         "justification": "Claims are about vulnerability detection accuracy and metrics (F1, MCC, FPR) directly measure that; no conflation of proxy metrics with broader software security outcomes.",
     51         "source": "haiku"
     52       }
     53     },
     54     "limitations_and_scope": {
     55       "limitations_section_present": {
     56         "applies": true,
     57         "answer": true,
     58         "justification": "Section 7 'Threats to Validity' is a dedicated section covering internal, construct, and external validity with multiple paragraphs.",
     59         "source": "haiku"
     60       },
     61       "threats_to_validity_specific": {
     62         "applies": true,
     63         "answer": false,
     64         "justification": "Threats mentioned (DL model quality, closed-source LLM internals, LLM choice) are somewhat generic; key threats like C/C++-only scope, GPT contamination of public repo code, and no true ablation are absent.",
     65         "source": "haiku"
     66       },
     67       "scope_boundaries_stated": {
     68         "applies": true,
     69         "answer": false,
     70         "justification": "No explicit statement that results are bounded to C/C++ function-level detection on these 4 specific projects; Section 6.2 expansively discusses adapting DLAP to other tasks without bounding current findings.",
     71         "source": "haiku"
     72       }
     73     },
     74     "conflicts_of_interest": {
     75       "funding_disclosed": {
     76         "applies": true,
     77         "answer": false,
     78         "justification": "No funding source is mentioned anywhere in the paper.",
     79         "source": "haiku"
     80       },
     81       "affiliations_disclosed": {
     82         "applies": true,
     83         "answer": true,
     84         "justification": "Authors are identified as affiliated with Software Institute, Nanjing University and Faculty of Science and Engineering, Southern Cross University.",
     85         "source": "haiku"
     86       },
     87       "funder_independent_of_outcome": {
     88         "applies": false,
     89         "answer": false,
     90         "justification": "No funding disclosed, so independence cannot be assessed.",
     91         "source": "haiku"
     92       },
     93       "financial_interests_declared": {
     94         "applies": true,
     95         "answer": false,
     96         "justification": "No competing interests or financial interests statement appears in the paper.",
     97         "source": "haiku"
     98       }
     99     },
    100     "scope_and_framing": {
    101       "key_terms_defined": {
    102         "applies": true,
    103         "answer": true,
    104         "justification": "'DL model' is explicitly distinguished from LLMs in footnote 1; 'implicit fine-tuning' is defined mathematically in Section 3.3 and the Appendix; vulnerability detection is framed as binary classification.",
    105         "source": "haiku"
    106       },
    107       "intended_contribution_clear": {
    108         "applies": true,
    109         "answer": true,
    110         "justification": "Three explicit contributions are listed: the DLAP framework, experiments on DL model selection, and empirical comparison of prompting vs. fine-tuning for vulnerability detection.",
    111         "source": "haiku"
    112       },
    113       "engagement_with_prior_work": {
    114         "applies": true,
    115         "answer": true,
    116         "justification": "Section 2 thoroughly reviews DL-based and LLM-based vulnerability detection, explicitly positioning DLAP against GRACE and four other prompting frameworks, explaining how DLAP builds on each.",
    117         "source": "haiku"
    118       }
    119     }
    120   },
    121   "type_checklist": {
    122     "empirical": {
    123       "artifacts": {
    124         "code_released": {
    125           "applies": true,
    126           "answer": true,
    127           "justification": "Source code and COT template library are stated as publicly available at https://github.com/Yang-Yanjing/DLAP.git, cited twice in the paper with a 'Data and materials' label.",
    128           "source": "haiku"
    129         },
    130         "data_released": {
    131           "applies": true,
    132           "answer": true,
    133           "justification": "GitHub footnote explicitly states 'Data and materials' are at the repository link; base datasets are from publicly available prior works (Fan et al., Chakraborty et al.); specific preprocessed splits are not confirmed released.",
    134           "source": "haiku"
    135         },
    136         "environment_specified": {
    137           "applies": true,
    138           "answer": false,
    139           "justification": "Table 2 provides hyperparameters and some version info (Java 8, Joern 0.3.1/2.0.157) but no requirements.txt, Dockerfile, or full system environment specification is provided.",
    140           "source": "haiku"
    141         },
    142         "reproduction_instructions": {
    143           "applies": true,
    144           "answer": false,
    145           "justification": "Algorithm 1 describes the algorithmic procedure at a high level; the paper provides no step-by-step instructions covering environment setup, data preparation, model training, and evaluation execution.",
    146           "source": "haiku"
    147         }
    148       },
    149       "statistical_methodology": {
    150         "confidence_intervals_or_error_bars": {
    151           "applies": true,
    152           "answer": false,
    153           "justification": "All results in Tables 3, 5, 6, 7 are single-point estimates; no confidence intervals or error bars are reported for any result.",
    154           "source": "haiku"
    155         },
    156         "significance_tests": {
    157           "applies": true,
    158           "answer": false,
    159           "justification": "No statistical significance tests are applied; all superiority claims are made from raw metric comparisons with no p-values or hypothesis testing.",
    160           "source": "haiku"
    161         },
    162         "effect_sizes_reported": {
    163           "applies": true,
    164           "answer": true,
    165           "justification": "Percentage differences are reported throughout (e.g., 'surpasses by an average of 7.2% and 10.5% on F1 and MCC') alongside absolute metric values that convey effect magnitude.",
    166           "source": "haiku"
    167         },
    168         "sample_size_justified": {
    169           "applies": true,
    170           "answer": false,
    171           "justification": "Dataset sizes are reported in Table 1 but no power analysis or justification that test set sizes are sufficient for the comparative conclusions is provided.",
    172           "source": "haiku"
    173         },
    174         "variance_reported": {
    175           "applies": true,
    176           "answer": false,
    177           "justification": "CV is used only to characterize DL model probability distributions for model selection, not to report variance across experimental runs; no standard deviation across runs of the main evaluation.",
    178           "source": "haiku"
    179         }
    180       },
    181       "evaluation_design": {
    182         "baselines_included": {
    183           "applies": true,
    184           "answer": true,
    185           "justification": "Four prompting baselines (PRol, PAux, PCot, GRACE) and LoRA fine-tuning (Vicuna-13B) are included as explicit comparisons.",
    186           "source": "haiku"
    187         },
    188         "baselines_contemporary": {
    189           "applies": true,
    190           "answer": true,
    191           "justification": "Baselines include GRACE (2024 JSS) and GPT-based prompting frameworks from 2023, which are contemporary with this 2024 submission.",
    192           "source": "haiku"
    193         },
    194         "ablation_study": {
    195           "applies": true,
    196           "answer": false,
    197           "justification": "No ablation isolates DLAP's components (ICL vs. COT vs. static tool input vs. DL augmentation); RQ1 selects among DL model types but does not test DLAP with components removed.",
    198           "source": "haiku"
    199         },
    200         "multiple_metrics": {
    201           "applies": true,
    202           "answer": true,
    203           "justification": "Five evaluation metrics are reported with rationale: Precision, Recall, F1, FPR, and MCC—the last specifically justified for class-imbalanced binary classification.",
    204           "source": "haiku"
    205         },
    206         "human_evaluation": {
    207           "applies": false,
    208           "answer": false,
    209           "justification": "No human evaluation is performed; Figure 8 shows one qualitative example but no systematic human assessment of detection outputs.",
    210           "source": "haiku"
    211         },
    212         "held_out_test_set": {
    213           "applies": true,
    214           "answer": true,
    215           "justification": "Datasets are split 80/20 train/test explicitly: 'we divided the dataset into training and testing sets with the 8:2 proportion.'",
    216           "source": "haiku"
    217         },
    218         "per_category_breakdown": {
    219           "applies": true,
    220           "answer": true,
    221           "justification": "All results in Tables 3, 5, and 6 are broken down per project (Chrome, Android, Linux, Qemu) with totals.",
    222           "source": "haiku"
    223         },
    224         "failure_cases_discussed": {
    225           "applies": true,
    226           "answer": false,
    227           "justification": "Figure 8 shows a success example only; failure cases are not shown or systematically discussed.",
    228           "source": "haiku"
    229         },
    230         "negative_results_reported": {
    231           "applies": true,
    232           "answer": true,
    233           "justification": "Table 6 explicitly reports that fine-tuning outperforms DLAP on large datasets (Chrome F1 82.0 vs 52.1; Linux F1 70.3 vs 65.4); the paper directly acknowledges 'fine-tuning an LLM on a large project has a higher F1 than DLAP.'",
    234           "source": "haiku"
    235         }
    236       },
    237       "setup_transparency": {
    238         "model_versions_specified": {
    239           "applies": true,
    240           "answer": true,
    241           "justification": "GPT-3.5-turbo-0125 (specific snapshot), Linevul with codeBERT, Llama-13B, and Vicuna-13B are all named with sufficient precision.",
    242           "source": "haiku"
    243         },
    244         "prompts_provided": {
    245           "applies": true,
    246           "answer": true,
    247           "justification": "Full verbatim prompts are provided for all four baseline frameworks (PRol, PAux, PCot) and DLAP's COT template library is available on GitHub.",
    248           "source": "haiku"
    249         },
    250         "hyperparameters_reported": {
    251           "applies": true,
    252           "answer": true,
    253           "justification": "Table 2 provides comprehensive hyperparameters for all three DL models: batch size, epochs, optimizer, loss function, embedding algorithm, architecture details.",
    254           "source": "haiku"
    255         },
    256         "scaffolding_described": {
    257           "applies": true,
    258           "answer": true,
    259           "justification": "The two-part DLAP framework (ICL in Section 3.3, COT in Section 3.4) is described in detail with pseudocode (Algorithm 1) and example figures.",
    260           "source": "haiku"
    261         },
    262         "data_preprocessing_documented": {
    263           "applies": true,
    264           "answer": true,
    265           "justification": "Preprocessing steps are documented: random undersampling of non-vulnerable samples to address class imbalance, 80/20 train/test split, and explicit project selection criteria.",
    266           "source": "haiku"
    267         }
    268       },
    269       "data_integrity": {
    270         "raw_data_available": {
    271           "applies": true,
    272           "answer": false,
    273           "justification": "While GitHub is cited for 'Data and materials,' the specific preprocessed datasets with vulnerability labels and train/test splits used in experiments are not confirmed released; base open-source code is available but not the labeled vulnerability dataset.",
    274           "source": "haiku"
    275         },
    276         "data_collection_described": {
    277           "applies": true,
    278           "answer": true,
    279           "justification": "Section 4.2 describes project selection criteria (used by prior work, >3000 functions, traceable vulnerability fix records) and references prior datasets [4, 12, 49] for methodology.",
    280           "source": "haiku"
    281         },
    282         "recruitment_methods_described": {
    283           "applies": false,
    284           "answer": false,
    285           "justification": "No human participants; data is derived from open-source software repositories.",
    286           "source": "haiku"
    287         },
    288         "data_pipeline_documented": {
    289           "applies": true,
    290           "answer": false,
    291           "justification": "Only high-level preprocessing (undersampling, 80/20 split) is described; the full pipeline from raw source repositories to labeled vulnerability functions with CVE-to-function mapping is not independently documented.",
    292           "source": "haiku"
    293         }
    294       },
    295       "contamination": {
    296         "training_cutoff_stated": {
    297           "applies": true,
    298           "answer": false,
    299           "justification": "GPT-3.5-turbo-0125 is used but its training data cutoff is never stated; the vulnerability code from public repositories (Chrome, Linux, Android, Qemu) predates GPT's training.",
    300           "source": "haiku"
    301         },
    302         "train_test_overlap_discussed": {
    303           "applies": true,
    304           "answer": false,
    305           "justification": "No discussion of whether GPT-3.5 may have seen the test functions from well-known public repositories during pretraining; this is a significant unaddressed contamination risk.",
    306           "source": "haiku"
    307         },
    308         "benchmark_contamination_addressed": {
    309           "applies": true,
    310           "answer": false,
    311           "justification": "All four evaluated projects (Chrome, Linux, Android, Qemu) are major public repositories whose code predates GPT-3.5's training cutoff; potential memorization is not addressed.",
    312           "source": "haiku"
    313         }
    314       },
    315       "human_studies": {
    316         "pre_registered": {
    317           "applies": false,
    318           "answer": false,
    319           "justification": "No human participants.",
    320           "source": "haiku"
    321         },
    322         "irb_or_ethics_approval": {
    323           "applies": false,
    324           "answer": false,
    325           "justification": "No human participants.",
    326           "source": "haiku"
    327         },
    328         "demographics_reported": {
    329           "applies": false,
    330           "answer": false,
    331           "justification": "No human participants.",
    332           "source": "haiku"
    333         },
    334         "inclusion_exclusion_criteria": {
    335           "applies": false,
    336           "answer": false,
    337           "justification": "No human participants.",
    338           "source": "haiku"
    339         },
    340         "randomization_described": {
    341           "applies": false,
    342           "answer": false,
    343           "justification": "No human participants.",
    344           "source": "haiku"
    345         },
    346         "blinding_described": {
    347           "applies": false,
    348           "answer": false,
    349           "justification": "No human participants.",
    350           "source": "haiku"
    351         },
    352         "attrition_reported": {
    353           "applies": false,
    354           "answer": false,
    355           "justification": "No human participants.",
    356           "source": "haiku"
    357         }
    358       },
    359       "cost_and_practicality": {
    360         "inference_cost_reported": {
    361           "applies": true,
    362           "answer": false,
    363           "justification": "Cost constraints motivating GPT-3.5-turbo selection are mentioned qualitatively but no actual API cost ($ per query or total) is reported.",
    364           "source": "haiku"
    365         },
    366         "compute_budget_stated": {
    367           "applies": true,
    368           "answer": true,
    369           "justification": "Table 7 provides GPU memory (GB) and training time (hours) for both DLAP and LoRA fine-tuning across all four datasets.",
    370           "source": "haiku"
    371         }
    372       }
    373     }
    374   },
    375   "claims": [
    376     {
    377       "claim": "DLAP outperforms state-of-the-art prompting frameworks by ~10% in F1 and ~20% in MCC across four C/C++ projects",
    378       "evidence": "Table 5: DLAP F1 52.1/49.3/65.4/66.7 vs best baseline GRACE 32.6/38.4/37.6/28.9 across Chrome/Android/Linux/Qemu",
    379       "supported": "strong"
    380     },
    381     {
    382       "claim": "Linevul (Transformer-based) is the optimal DL model for DLAP, outperforming Devign by 7.2% F1 and 10.5% MCC on average",
    383       "evidence": "Table 3 shows consistent Linevul superiority across all 4 projects; Table 4 shows Linevul has highest coefficient of variation (2.7 avg) indicating most discrete probability distribution",
    384       "supported": "strong"
    385     },
    386     {
    387       "claim": "DLAP achieves approximately 90% of fine-tuning performance at substantially lower computational cost",
    388       "evidence": "Table 6 shows DLAP total F1 58.4 vs fine-tuning 52.8 overall, but DLAP is much worse on large datasets (Chrome: 52.1 vs 82.0); Table 7 shows ~5x less GPU memory",
    389       "supported": "weak"
    390     },
    391     {
    392       "claim": "ICL prompts from DL models stimulate 'implicit fine-tuning' in LLMs by altering attention layer representations",
    393       "evidence": "Mathematical derivation in Section 3.3/Appendix using simplified linear attention (softmax removed); Figure 7 shows similar probability distributions between DLAP and fine-tuning",
    394       "supported": "weak"
    395     },
    396     {
    397       "claim": "DLAP generates more interpretable vulnerability detection outputs than fine-tuning",
    398       "evidence": "Figure 8 shows one qualitative example comparing DLAP explanatory output vs. fine-tuned LLM yes/no response; no systematic evaluation",
    399       "supported": "weak"
    400     }
    401   ],
    402   "methodology_tags": [
    403     "benchmark-eval"
    404   ],
    405   "key_findings": "DLAP combines pre-trained DL models (Linevul selected as optimal via CV analysis) with LLMs through ICL and COT prompting, consistently outperforming other LLM-based prompting frameworks by 10–20% in F1 and MCC on four C/C++ vulnerability datasets. The framework requires significantly less compute than LoRA fine-tuning (~5x less GPU memory) and achieves better performance on small/imbalanced datasets (Qemu), though fine-tuning wins on large datasets (Chrome, Linux). The central theoretical claim—that DL-augmented ICL induces 'implicit fine-tuning' via attention modification—relies on removing softmax from the attention mechanism, leaving the mechanistic explanation partially unverified.",
    406   "red_flags": [
    407     {
    408       "flag": "No statistical significance tests",
    409       "detail": "All superiority claims derive from raw metric comparisons across 4 datasets with no p-values, confidence intervals, or significance testing despite making comparative performance claims."
    410     },
    411     {
    412       "flag": "GPT-3.5 contamination unaddressed",
    413       "detail": "Test code from well-known public repositories (Chrome, Linux, Android, Qemu) predates GPT-3.5-turbo's training cutoff; the paper does not discuss whether the LLM may have memorized evaluated functions."
    414     },
    415     {
    416       "flag": "No component ablation study",
    417       "detail": "DLAP combines ICL, COT, static analysis tools, and DL model outputs, but there is no ablation isolating each component's contribution; it is unknown which components drive the observed gains."
    418     },
    419     {
    420       "flag": "DL model trained on same-project data creates informational advantage",
    421       "detail": "Linevul is trained on each project's training split then used to augment prompts for the same project's test set; prompting baselines do not have equivalent project-specific training, creating an uncontrolled advantage."
    422     },
    423     {
    424       "flag": "Implicit fine-tuning requires softmax removal",
    425       "detail": "The mathematical justification for implicit fine-tuning requires removing softmax from the attention mechanism; the authors acknowledge they 'cannot strictly demonstrate' gradient descent optimization occurs."
    426     },
    427     {
    428       "flag": "Single LLM evaluated",
    429       "detail": "Only GPT-3.5-turbo-0125 is used in the main evaluation; despite acknowledging this as an external validity threat, no additional LLMs are tested to assess robustness of the claimed improvements."
    430     }
    431   ],
    432   "cited_papers": [
    433     {
    434       "title": "GRACE: Empowering LLM-based software vulnerability detection with graph structure and in-context learning",
    435       "relevance": "Primary competing baseline and most direct predecessor; DLAP explicitly positions against GRACE's graph-based ICL approach"
    436     },
    437     {
    438       "title": "An Empirical Study of Deep Learning Models for Vulnerability Detection",
    439       "relevance": "Motivates DLAP by demonstrating variability between DL model runs and low inter-model agreement; cited for generalization issues"
    440     },
    441     {
    442       "title": "Deep Learning Based Vulnerability Detection: Are We There Yet",
    443       "relevance": "Demonstrates 73% average performance degradation on cross-project datasets; core motivation for combining DL with LLMs"
    444     },
    445     {
    446       "title": "LineVul: A Transformer-based Line-Level Vulnerability Prediction",
    447       "relevance": "The DL model selected as DLAP's core augmentation component; critical to understanding DLAP's architecture"
    448     },
    449     {
    450       "title": "A C/C++ Code Vulnerability Dataset with Code Changes and CVE Summaries",
    451       "relevance": "Source of Linux and Android vulnerability datasets used in evaluation"
    452     },
    453     {
    454       "title": "Chain-of-Thought Prompting Elicits Reasoning in Large Language Models",
    455       "relevance": "Foundational technique for DLAP's COT component"
    456     },
    457     {
    458       "title": "Why Can GPT Learn In-Context? Language Models Secretly Perform Gradient Descent as Meta-Optimizers",
    459       "relevance": "Theoretical basis for the 'implicit fine-tuning' mechanism that is central to DLAP's design rationale"
    460     },
    461     {
    462       "title": "Prompt-enhanced Software Vulnerability Detection using ChatGPT",
    463       "relevance": "Direct prior work providing three of the four prompting baselines (PRol, PAux, PCot) and evaluation methodology"
    464     }
    465   ],
    466   "engagement_factors": {
    467     "practical_relevance": {
    468       "score": 2,
    469       "justification": "Directly applicable to security practitioners; code and COT templates publicly available on GitHub for deployment on C/C++ projects."
    470     },
    471     "surprise_contrarian": {
    472       "score": 1,
    473       "justification": "Finding that low-cost prompting rivals expensive fine-tuning on small datasets is mildly interesting but aligns with growing evidence in the broader LLM literature."
    474     },
    475     "fear_safety": {
    476       "score": 2,
    477       "justification": "Addresses automated detection of software vulnerabilities with direct security implications; framed around protecting systems from exploitation."
    478     },
    479     "drama_conflict": {
    480       "score": 1,
    481       "justification": "Mild framing tension between prompting vs. fine-tuning paradigms, but presented cooperatively rather than confrontationally."
    482     },
    483     "demo_ability": {
    484       "score": 2,
    485       "justification": "Code available on GitHub; practitioners can test on their own C/C++ codebases, though GPT API access and project-specific DL model training are required."
    486     },
    487     "brand_recognition": {
    488       "score": 0,
    489       "justification": "Academic paper from Nanjing University and Southern Cross University with no famous lab, product, or industry partner involved."
    490     }
    491   },
    492   "hn_data": {
    493     "threads": [
    494       {
    495         "hn_id": "41873968",
    496         "title": "Why do random forests work? They are self-regularizing adaptive smoothers",
    497         "points": 295,
    498         "comments": 41,
    499         "url": "https://news.ycombinator.com/item?id=41873968"
    500       },
    501       {
    502         "hn_id": "40727755",
    503         "title": "Adversarial Perturbations Cannot Reliably Protect Artists from Generative AI",
    504         "points": 5,
    505         "comments": 0,
    506         "url": "https://news.ycombinator.com/item?id=40727755"
    507       },
    508       {
    509         "hn_id": "40858891",
    510         "title": "AI Agents That Matter",
    511         "points": 4,
    512         "comments": 0,
    513         "url": "https://news.ycombinator.com/item?id=40858891"
    514       },
    515       {
    516         "hn_id": "31257990",
    517         "title": "Physics-Based Inverse Rendering Using Combined Implicit and Explicit Geometries",
    518         "points": 2,
    519         "comments": 0,
    520         "url": "https://news.ycombinator.com/item?id=31257990"
    521       },
    522       {
    523         "hn_id": "42433386",
    524         "title": "Autonomous Intelligent Systems: From Illusion of Control to Inescapable Delusion",
    525         "points": 1,
    526         "comments": 0,
    527         "url": "https://news.ycombinator.com/item?id=42433386"
    528       },
    529       {
    530         "hn_id": "41649192",
    531         "title": "Sharing Dependencies for Accelerating Cold Starts in Serverless Functions",
    532         "points": 1,
    533         "comments": 0,
    534         "url": "https://news.ycombinator.com/item?id=41649192"
    535       },
    536       {
    537         "hn_id": "40220945",
    538         "title": "Search for gravitationally lensed interstellar transmissions",
    539         "points": 1,
    540         "comments": 0,
    541         "url": "https://news.ycombinator.com/item?id=40220945"
    542       },
    543       {
    544         "hn_id": "39973513",
    545         "title": "Search for Gravitationally Lensed Interstellar Transmissions",
    546         "points": 1,
    547         "comments": 0,
    548         "url": "https://news.ycombinator.com/item?id=39973513"
    549       },
    550       {
    551         "hn_id": "39589862",
    552         "title": "Understanding Tree Ensembles as Self-Regularizing Adaptive Smoothers",
    553         "points": 1,
    554         "comments": 0,
    555         "url": "https://news.ycombinator.com/item?id=39589862"
    556       },
    557       {
    558         "hn_id": "31269012",
    559         "title": "Pik-Fix: Restoring and Colorizing Old Photo",
    560         "points": 1,
    561         "comments": 0,
    562         "url": "https://news.ycombinator.com/item?id=31269012"
    563       }
    564     ],
    565     "top_points": 295,
    566     "total_points": 312,
    567     "total_comments": 41
    568   }
    569 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs