scan-v5.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan-v5.json (27254B)
      1 {
      2   "scan_version": 5,
      3   "paper_type": "empirical",
      4   "paper": {
      5     "title": "HITS: High-coverage LLM-based Unit Test Generation via Method Slicing",
      6     "authors": [
      7       "Zejun Wang",
      8       "Kaibo Liu",
      9       "Ge Li",
     10       "Zhi Jin"
     11     ],
     12     "year": 2024,
     13     "venue": "International Conference on Automated Software Engineering",
     14     "arxiv_id": "2408.11324",
     15     "doi": "10.1145/3691620.3695501"
     16   },
     17   "checklist": {
     18     "claims_and_evidence": {
     19       "abstract_claims_supported": {
     20         "applies": true,
     21         "answer": true,
     22         "justification": "Abstract claims HITS 'significantly outperforms' LLM and SBST baselines; Tables 4-5 confirm avg line coverage 55.09% vs best baseline 39.10% (EvoSuite) across all 10 projects.",
     23         "source": "haiku"
     24       },
     25       "causal_claims_justified": {
     26         "applies": true,
     27         "answer": true,
     28         "justification": "RQ3 ablation study (Table 7) systematically removes slicing workflow and prompt engineering separately, providing adequate evidence for causal attribution of coverage improvements to specific HITS components.",
     29         "source": "haiku"
     30       },
     31       "generalization_bounded": {
     32         "applies": true,
     33         "answer": false,
     34         "justification": "Abstract and conclusion claim broad superiority without bounding to the specific setting; evaluation covers only 10 small Java projects (1-30 complex methods each) with a single LLM, but claims are not explicitly scoped to these conditions in the main text.",
     35         "source": "haiku"
     36       },
     37       "alternative_explanations_discussed": {
     38         "applies": true,
     39         "answer": false,
     40         "justification": "The paper does not consider that HITS makes more API calls per method than baselines (one per slice), which could independently increase coverage via sampling diversity rather than the slicing insight per se.",
     41         "source": "haiku"
     42       },
     43       "proxy_outcome_distinction": {
     44         "applies": true,
     45         "answer": true,
     46         "justification": "Line and branch coverage are directly measured with Jacoco and are exactly what the paper claims to improve; no proxy conflation present.",
     47         "source": "haiku"
     48       }
     49     },
     50     "limitations_and_scope": {
     51       "limitations_section_present": {
     52         "applies": true,
     53         "answer": true,
     54         "justification": "Section 5.4 'Threats to Validity' is a dedicated subsection discussing dataset size and LLM generalizability limitations.",
     55         "source": "haiku"
     56       },
     57       "threats_to_validity_specific": {
     58         "applies": true,
     59         "answer": true,
     60         "justification": "Specific threats: (1) dataset limited to projects with 1-30 complex methods due to budget constraints excluding larger projects; (2) only gpt-3.5-turbo evaluated because other LLMs are 'either incapable of understanding the instructions or are much more expensive.'",
     61         "source": "haiku"
     62       },
     63       "scope_boundaries_stated": {
     64         "applies": true,
     65         "answer": true,
     66         "justification": "Scope explicitly bounded to complex Java methods (cyclomatic complexity > 10), one LLM (gpt-3.5-turbo-0125), and 10 specific open-source projects; future work explicitly names extending dataset and testing other LLMs.",
     67         "source": "haiku"
     68       }
     69     },
     70     "conflicts_of_interest": {
     71       "funding_disclosed": {
     72         "applies": true,
     73         "answer": false,
     74         "justification": "No funding acknowledgment section appears in the paper. Authors mention 'limited budget' for experiments but disclose no funding source.",
     75         "source": "haiku"
     76       },
     77       "affiliations_disclosed": {
     78         "applies": true,
     79         "answer": true,
     80         "justification": "All four authors list affiliation with 'Key Lab of HCST (PKU), MOE; SCS, Beijing, China'; no commercial affiliations to disclose.",
     81         "source": "haiku"
     82       },
     83       "funder_independent_of_outcome": {
     84         "applies": false,
     85         "answer": false,
     86         "justification": "No funder is disclosed; independence cannot be assessed.",
     87         "source": "haiku"
     88       },
     89       "financial_interests_declared": {
     90         "applies": true,
     91         "answer": false,
     92         "justification": "No competing interests or financial interests statement appears anywhere in the paper.",
     93         "source": "haiku"
     94       }
     95     },
     96     "scope_and_framing": {
     97       "key_terms_defined": {
     98         "applies": true,
     99         "answer": true,
    100         "justification": "'Complex methods' defined as functions with cyclomatic complexity > 10. 'Method slicing' and 'code slice' are defined and illustrated with a concrete example in Figure 1.",
    101         "source": "haiku"
    102       },
    103       "intended_contribution_clear": {
    104         "applies": true,
    105         "answer": true,
    106         "justification": "Three explicit contributions listed: proposing slice-based test generation with LLMs, implementing HITS for complex Java methods, and comprehensive evaluation against baselines.",
    107         "source": "haiku"
    108       },
    109       "engagement_with_prior_work": {
    110         "applies": true,
    111         "answer": true,
    112         "justification": "Section 2 reviews SBST methods, symbolic execution, DL approaches, and LLM-based methods; HITS is explicitly contrasted with ChatUniTest (which 'directly inspires' it) and SymPrompt's path-based alternative.",
    113         "source": "haiku"
    114       }
    115     }
    116   },
    117   "type_checklist": {
    118     "empirical": {
    119       "artifacts": {
    120         "code_released": {
    121           "applies": true,
    122           "answer": false,
    123           "justification": "Code is shared at an anonymous review link (anonymous.4open.science/r/SlicePromptTest4J-6CF1/) which is not a permanent public repository; such links typically expire after peer review.",
    124           "source": "haiku"
    125         },
    126         "data_released": {
    127           "applies": true,
    128           "answer": false,
    129           "justification": "The 10 evaluated projects are public on GitHub with versions listed, but the extracted complex method dataset is not explicitly released as a downloadable artifact.",
    130           "source": "haiku"
    131         },
    132         "environment_specified": {
    133           "applies": true,
    134           "answer": false,
    135           "justification": "Paper mentions GPT-3.5-turbo-0125, JUnit 5, Jacoco, and Mockito but provides no requirements file, Dockerfile, or complete dependency specification.",
    136           "source": "haiku"
    137         },
    138         "reproduction_instructions": {
    139           "applies": true,
    140           "answer": false,
    141           "justification": "No step-by-step reproduction instructions in the paper; the anonymous repository may contain them but is not verifiably accessible.",
    142           "source": "haiku"
    143         }
    144       },
    145       "statistical_methodology": {
    146         "confidence_intervals_or_error_bars": {
    147           "applies": true,
    148           "answer": false,
    149           "justification": "All coverage results are single point estimates with no confidence intervals or error bars across any table.",
    150           "source": "haiku"
    151         },
    152         "significance_tests": {
    153           "applies": true,
    154           "answer": false,
    155           "justification": "No statistical significance tests applied to comparative claims despite 10-22 percentage point differences across methods.",
    156           "source": "haiku"
    157         },
    158         "effect_sizes_reported": {
    159           "applies": true,
    160           "answer": true,
    161           "justification": "Percentage improvements are reported throughout (e.g., HITS 55.09% vs ChatUniTest 32.48% line coverage; abstract states '10 to 20 percent' improvement range with baseline context).",
    162           "source": "haiku"
    163         },
    164         "sample_size_justified": {
    165           "applies": true,
    166           "answer": false,
    167           "justification": "Dataset size (10 projects, ~120 complex methods) is dictated by budget constraints, not statistical power analysis.",
    168           "source": "haiku"
    169         },
    170         "variance_reported": {
    171           "applies": true,
    172           "answer": false,
    173           "justification": "Results are single-run greedy-decoding outputs; no variance across runs is reported or discussed.",
    174           "source": "haiku"
    175         }
    176       },
    177       "evaluation_design": {
    178         "baselines_included": {
    179           "applies": true,
    180           "answer": true,
    181           "justification": "Four baselines included: ChatUniTest, ChatTester, SymPrompt (all LLM-based), and EvoSuite (SBST).",
    182           "source": "haiku"
    183         },
    184         "baselines_contemporary": {
    185           "applies": true,
    186           "answer": true,
    187           "justification": "ChatUniTest (2023), ChatTester (2023), and SymPrompt (2024) are contemporary; EvoSuite is older but remains the standard SBST reference in this research area.",
    188           "source": "haiku"
    189         },
    190         "ablation_study": {
    191           "applies": true,
    192           "answer": true,
    193           "justification": "RQ3 ablation tests 'w/o slicing' and 'w/o slicing & PE' configurations, confirming contribution of each component in Table 7.",
    194           "source": "haiku"
    195         },
    196         "multiple_metrics": {
    197           "applies": true,
    198           "answer": true,
    199           "justification": "Three metrics used: line coverage, branch coverage, and pass rate (execution correctness), all reported across all 10 projects.",
    200           "source": "haiku"
    201         },
    202         "human_evaluation": {
    203           "applies": false,
    204           "answer": false,
    205           "justification": "Human evaluation of generated test quality is not standard in automated test generation; coverage metrics are the accepted automated proxy in this research area.",
    206           "source": "haiku"
    207         },
    208         "held_out_test_set": {
    209           "applies": true,
    210           "answer": true,
    211           "justification": "Authors explicitly partition projects into 'Learned' (6 projects in LLM training data) and 'Not Learned' (4 newer projects), providing a contamination-aware held-out evaluation split.",
    212           "source": "haiku"
    213         },
    214         "per_category_breakdown": {
    215           "applies": true,
    216           "answer": true,
    217           "justification": "Results broken down per-project for all 10 projects in every table, distinguishing learned vs. not-learned groups.",
    218           "source": "haiku"
    219         },
    220         "failure_cases_discussed": {
    221           "applies": true,
    222           "answer": true,
    223           "justification": "Section 5.1 case study shows ChatUniTest's failure on Parser.parse(); Section 5.3 analyzes compilation vs. runtime error distributions; underperformance on Commons-collections is analyzed.",
    224           "source": "haiku"
    225         },
    226         "negative_results_reported": {
    227           "applies": true,
    228           "answer": true,
    229           "justification": "HITS underperforms ChatUniTest on Commons-collections (43.71% vs 46.21% line coverage) and underperforms EvoSuite on 'Datafaker'; authors analyze reasons for both.",
    230           "source": "haiku"
    231         }
    232       },
    233       "setup_transparency": {
    234         "model_versions_specified": {
    235           "applies": true,
    236           "answer": true,
    237           "justification": "Exact snapshot version 'gpt-turbo-3.5-0125' specified; all baselines use the same model version for fair comparison.",
    238           "source": "haiku"
    239         },
    240         "prompts_provided": {
    241           "applies": true,
    242           "answer": true,
    243           "justification": "Figure 2 shows the prompt templates for slicing, test generation, and fixing including their chain-of-thought structure and key content; fill values (focal method, dependencies) are described in Section 3.",
    244           "source": "haiku"
    245         },
    246         "hyperparameters_reported": {
    247           "applies": true,
    248           "answer": false,
    249           "justification": "Only 'greedy generation' is mentioned; top-p is noted as being 'raised slowly' on format violations but no specific values are given for temperature or top-p.",
    250           "source": "haiku"
    251         },
    252         "scaffolding_described": {
    253           "applies": true,
    254           "answer": true,
    255           "justification": "Multi-step workflow (static analysis → decompose via CoT → slice-by-slice generation → executable validation → self-debug repair) is described in detail in Section 3 and Figure 2.",
    256           "source": "haiku"
    257         },
    258         "data_preprocessing_documented": {
    259           "applies": true,
    260           "answer": true,
    261           "justification": "Preprocessing documented: cyclomatic complexity > 10 threshold for method selection, static analysis for context retrieval (dependent classes, field declarations, Javadocs), and 3 explicit project selection criteria.",
    262           "source": "haiku"
    263         }
    264       },
    265       "data_integrity": {
    266         "raw_data_available": {
    267           "applies": true,
    268           "answer": false,
    269           "justification": "The 10 GitHub projects are publicly accessible, but the extracted complex method dataset is not explicitly released; the anonymous repository's persistence is uncertain.",
    270           "source": "haiku"
    271         },
    272         "data_collection_described": {
    273           "applies": true,
    274           "answer": true,
    275           "justification": "Data collection described with 3 explicit criteria: domain overlap with prior work, 1-30 complex methods per project, and GitHub star ranking. Table 2 provides project names, versions, and statistics.",
    276           "source": "haiku"
    277         },
    278         "recruitment_methods_described": {
    279           "applies": false,
    280           "answer": false,
    281           "justification": "No human participants; standard open-source benchmark projects used.",
    282           "source": "haiku"
    283         },
    284         "data_pipeline_documented": {
    285           "applies": true,
    286           "answer": true,
    287           "justification": "Pipeline documented: crawl projects → extract complex methods (cyclomatic complexity > 10) → retrieve context via static analysis. Table 2 shows resulting dataset statistics (#MUTs, avg line count, avg complexity).",
    288           "source": "haiku"
    289         }
    290       },
    291       "contamination": {
    292         "training_cutoff_stated": {
    293           "applies": true,
    294           "answer": false,
    295           "justification": "Authors categorize projects relative to gpt-3.5-turbo's training cutoff but never state the specific cutoff date in the paper.",
    296           "source": "haiku"
    297         },
    298         "train_test_overlap_discussed": {
    299           "applies": true,
    300           "answer": true,
    301           "justification": "Explicitly addressed: 6 projects created before training cutoff labeled 'Learned', 4 created after labeled 'Not Learned'. Authors analyze that LLMs can 'recall' tests for learned projects, affecting performance.",
    302           "source": "haiku"
    303         },
    304         "benchmark_contamination_addressed": {
    305           "applies": true,
    306           "answer": true,
    307           "justification": "The learned/not-learned split is a deliberate contamination mitigation strategy. Ablation analysis notes consistent improvements on not-learned subset, and performance gaps on learned projects are attributed to memorization.",
    308           "source": "haiku"
    309         }
    310       },
    311       "human_studies": {
    312         "pre_registered": {
    313           "applies": false,
    314           "answer": false,
    315           "justification": "No human participants.",
    316           "source": "haiku"
    317         },
    318         "irb_or_ethics_approval": {
    319           "applies": false,
    320           "answer": false,
    321           "justification": "No human participants.",
    322           "source": "haiku"
    323         },
    324         "demographics_reported": {
    325           "applies": false,
    326           "answer": false,
    327           "justification": "No human participants.",
    328           "source": "haiku"
    329         },
    330         "inclusion_exclusion_criteria": {
    331           "applies": false,
    332           "answer": false,
    333           "justification": "No human participants.",
    334           "source": "haiku"
    335         },
    336         "randomization_described": {
    337           "applies": false,
    338           "answer": false,
    339           "justification": "No human participants.",
    340           "source": "haiku"
    341         },
    342         "blinding_described": {
    343           "applies": false,
    344           "answer": false,
    345           "justification": "No human participants.",
    346           "source": "haiku"
    347         },
    348         "attrition_reported": {
    349           "applies": false,
    350           "answer": false,
    351           "justification": "No human participants.",
    352           "source": "haiku"
    353         }
    354       },
    355       "cost_and_practicality": {
    356         "inference_cost_reported": {
    357           "applies": true,
    358           "answer": false,
    359           "justification": "API costs not reported; 'limited budget' is mentioned as a constraint but no dollar amounts or token counts are provided.",
    360           "source": "haiku"
    361         },
    362         "compute_budget_stated": {
    363           "applies": true,
    364           "answer": false,
    365           "justification": "Total compute budget not stated; 'limited budget' is cited as reason for excluding large projects but no specific figures given.",
    366           "source": "haiku"
    367         }
    368       }
    369     }
    370   },
    371   "claims": [
    372     {
    373       "claim": "HITS significantly outperforms all LLM-based baselines and EvoSuite in line and branch coverage for complex Java methods",
    374       "evidence": "Tables 4-5: HITS avg 55.09% line / 48.12% branch vs ChatUniTest 32.48%/27.07%, ChatTester 20.71%/18.20%, SymPrompt 26.32%/25.10%, EvoSuite 39.10%/38.46% across 10 projects",
    375       "supported": "strong"
    376     },
    377     {
    378       "claim": "LLM-based methods without slicing underperform EvoSuite on complex methods, reversing the LLM advantage seen on all-method evaluations",
    379       "evidence": "EvoSuite (39.10% avg line) beats all LLM baselines (max 32.48%) on complex methods; prior work shows LLMs outperform EvoSuite on all-method averages (e.g., ChatUniTest 89.36% vs EvoSuite 80.02%)",
    380       "supported": "strong"
    381     },
    382     {
    383       "claim": "The slicing workflow is the primary contributor to coverage improvement, more than prompt engineering or post-processing",
    384       "evidence": "Table 7 ablation: HITS 55.09% → w/o slicing 50.61% (−4.5pp) → w/o slicing & PE 48.56% (−2pp more). However, post-processing alone lifts ChatUniTest from 32.48% to ~48.56%, providing most of the raw gain.",
    385       "supported": "moderate"
    386     },
    387     {
    388       "claim": "Low executable test pass rates cause low coverage, but high pass rates do not guarantee high coverage",
    389       "evidence": "RQ2: ChatUniTest on Commons-CLI achieves second-best pass rate yet lower coverage than EvoSuite. WIN project shows HITS 95.83% pass rate; analysis discusses need for coverage-directed guidance beyond pass rate.",
    390       "supported": "strong"
    391     },
    392     {
    393       "claim": "HITS achieves higher executable test pass rates than all LLM-based baselines",
    394       "evidence": "Table 6: HITS avg pass rate 69.11% vs ChatUniTest 41.82%, SymPrompt 21.00%, ChatTester 16.10%",
    395       "supported": "strong"
    396     }
    397   ],
    398   "methodology_tags": [
    399     "benchmark-eval",
    400     "empirical"
    401   ],
    402   "key_findings": "HITS achieves substantially higher coverage for complex Java methods (cyclomatic complexity > 10) by decomposing focal methods into slices and generating tests slice-by-slice: 55.09% avg line coverage and 48.12% branch coverage vs. the best baseline (EvoSuite) at 39.10%/38.46%. A key finding is that without HITS, all LLM-based test generation methods actually underperform traditional SBST (EvoSuite) on complex methods, reversing the advantage LLMs typically show on all-method evaluations. The ablation study shows the slicing workflow provides ~4.5pp improvement while post-processing alone accounts for ~16pp of the total 22.6pp gap over the naive ChatUniTest baseline. HITS also achieves higher executable test pass rates (69.11% avg) and demonstrates that high pass rates are necessary but not sufficient for high coverage.",
    403   "red_flags": [
    404     {
    405       "flag": "No statistical significance testing",
    406       "detail": "Coverage results across 10 projects are reported as point estimates with no confidence intervals, error bars, or significance tests despite making strong comparative superiority claims."
    407     },
    408     {
    409       "flag": "Post-processing conflates attribution",
    410       "detail": "The ablation shows 'ChatUniTest + PP' achieves 48.56% avg line coverage vs ChatUniTest's 32.48%, meaning HITS's post-processing alone accounts for ~16pp of the 22.6pp total gain. Main Tables 4-5 compare HITS against ChatUniTest without this post-processing, substantially inflating the apparent advantage of the slicing approach over the baseline."
    411     },
    412     {
    413       "flag": "SymPrompt reimplemented by authors",
    414       "detail": "Authors state 'We implement SymPrompt since we have found no implementations.' An in-house reimplementation of a baseline may not faithfully reproduce the original, potentially underestimating its performance."
    415     },
    416     {
    417       "flag": "Very small, budget-constrained dataset",
    418       "detail": "Only 10 projects with 1-30 complex methods each (~120 total methods) selected partly due to budget constraints rather than systematic sampling, limiting statistical power and generalizability."
    419     },
    420     {
    421       "flag": "Single LLM tested",
    422       "detail": "Only GPT-3.5-turbo-0125 evaluated; effectiveness on GPT-4, Claude, or open-source LLMs is entirely untested. Acknowledged as future work but limits the scope of claims."
    423     },
    424     {
    425       "flag": "Unreliable code artifact",
    426       "detail": "Code released only at an anonymous review link (anonymous.4open.science) which is not a permanent public repository; post-publication accessibility is uncertain, making reproduction difficult."
    427     }
    428   ],
    429   "cited_papers": [
    430     {
    431       "title": "ChatUniTest: a ChatGPT-based automated unit test generation tool",
    432       "relevance": "Primary baseline and direct inspiration for HITS; provides the foundational LLM-based unit test generation workflow that HITS extends with slicing."
    433     },
    434     {
    435       "title": "No More Manual Tests? Evaluating and Improving ChatGPT for Unit Test Generation (ChatTester)",
    436       "relevance": "LLM-based baseline using incremental context construction for unit test generation; key comparative evaluation point."
    437     },
    438     {
    439       "title": "Code-Aware Prompting: A study of Coverage Guided Test Generation in Regression Setting using LLM (SymPrompt)",
    440       "relevance": "Contemporary baseline using control-flow paths as LLM scaffolds; most closely related alternative approach to coverage-guided LLM test generation."
    441     },
    442     {
    443       "title": "Evolutionary Generation of Whole Test Suites (EvoSuite)",
    444       "relevance": "Primary SBST baseline using genetic algorithms; the standard automated test generation comparison in this research space."
    445     },
    446     {
    447       "title": "Teaching Large Language Models to Self-Debug",
    448       "relevance": "Self-Debug technique adopted by HITS for iterative repair of non-executable generated tests."
    449     },
    450     {
    451       "title": "An empirical evaluation of using large language models for automated unit test generation",
    452       "relevance": "Related empirical study contextualizing LLM capabilities for unit test generation that HITS builds upon."
    453     },
    454     {
    455       "title": "CodaMosa: Escaping coverage plateaus in test generation with pre-trained large language models",
    456       "relevance": "Hybrid SBST+LLM approach for Python test generation; related work combining traditional and LLM-based methods."
    457     },
    458     {
    459       "title": "Unit test case generation with transformers and focal context (AthenaTest)",
    460       "relevance": "Early deep learning approach to unit test generation from focal context; establishes the transformer-based generation paradigm HITS builds on."
    461     }
    462   ],
    463   "engagement_factors": {
    464     "practical_relevance": {
    465       "score": 3,
    466       "justification": "Unit test generation for complex methods is a direct practitioner need; HITS targets exactly the cases where existing tools fail most severely."
    467     },
    468     "surprise_contrarian": {
    469       "score": 2,
    470       "justification": "The finding that LLMs underperform SBST on complex methods challenges the prevailing narrative of broad LLM superiority in software engineering tasks."
    471     },
    472     "fear_safety": {
    473       "score": 0,
    474       "justification": "No AI safety or risk concerns; purely a software testing methodology paper."
    475     },
    476     "drama_conflict": {
    477       "score": 0,
    478       "justification": "No controversy or conflict; standard empirical comparison paper."
    479     },
    480     "demo_ability": {
    481       "score": 2,
    482       "justification": "HITS tool exists targeting Java complex methods, but anonymous repository makes immediate tryability uncertain post-review."
    483     },
    484     "brand_recognition": {
    485       "score": 1,
    486       "justification": "Peking University is a top Chinese research institution with some recognition, but not a major AI lab with brand pull."
    487     }
    488   },
    489   "hn_data": {
    490     "threads": [
    491       {
    492         "hn_id": "41669522",
    493         "title": "LlamaF: An Efficient Llama2 Architecture Accelerator on Embedded FPGAs",
    494         "points": 124,
    495         "comments": 29,
    496         "url": "https://news.ycombinator.com/item?id=41669522"
    497       },
    498       {
    499         "hn_id": "34125851",
    500         "title": "SwinFIR: Revisiting the SwinIR with Fast Fourier Convolution",
    501         "points": 23,
    502         "comments": 0,
    503         "url": "https://news.ycombinator.com/item?id=34125851"
    504       },
    505       {
    506         "hn_id": "42238858",
    507         "title": "Telepathic Datacenters: Fast RPCs Using Shared CXL Memory",
    508         "points": 4,
    509         "comments": 0,
    510         "url": "https://news.ycombinator.com/item?id=42238858"
    511       },
    512       {
    513         "hn_id": "41703093",
    514         "title": "Math framework of intelligence and consciousness based on Riemannian Geometry",
    515         "points": 4,
    516         "comments": 0,
    517         "url": "https://news.ycombinator.com/item?id=41703093"
    518       },
    519       {
    520         "hn_id": "42265487",
    521         "title": "Fast RPCs Using Shared CXL Memory [pdf]",
    522         "points": 2,
    523         "comments": 0,
    524         "url": "https://news.ycombinator.com/item?id=42265487"
    525       },
    526       {
    527         "hn_id": "41528121",
    528         "title": "Telepathic Datacenters: Fast RPCs Using Shared CXL Memory",
    529         "points": 2,
    530         "comments": 0,
    531         "url": "https://news.ycombinator.com/item?id=41528121"
    532       },
    533       {
    534         "hn_id": "41418082",
    535         "title": "Data Exposure from LLM Apps: An In-Depth Investigation of OpenAI's GPTs",
    536         "points": 2,
    537         "comments": 0,
    538         "url": "https://news.ycombinator.com/item?id=41418082"
    539       },
    540       {
    541         "hn_id": "41408373",
    542         "title": "Data Exposure from LLM Apps: An In-Depth Investigation of OpenAI's GPTs",
    543         "points": 2,
    544         "comments": 0,
    545         "url": "https://news.ycombinator.com/item?id=41408373"
    546       },
    547       {
    548         "hn_id": "41788577",
    549         "title": "Input-Dependent Power Usage in GPUs",
    550         "points": 2,
    551         "comments": 0,
    552         "url": "https://news.ycombinator.com/item?id=41788577"
    553       },
    554       {
    555         "hn_id": "39958822",
    556         "title": "In-Context Learning with Retrieved Demonstrations for Language Models",
    557         "points": 1,
    558         "comments": 0,
    559         "url": "https://news.ycombinator.com/item?id=39958822"
    560       }
    561     ],
    562     "top_points": 124,
    563     "total_points": 166,
    564     "total_comments": 29
    565   }
    566 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs