scan-v5.json (28242B)
1 { 2 "scan_version": 5, 3 "paper_type": "empirical", 4 "paper": { 5 "title": "Fixing 7,400 Bugs for 1$: Cheap Crash-Site Program Repair", 6 "authors": [ 7 "Han Zheng", 8 "Ilia Shumailov", 9 "Tianqi Fan", 10 "Aiden Hall", 11 "Mathias Payer" 12 ], 13 "year": 2025, 14 "venue": "arXiv.org", 15 "arxiv_id": "2505.13103", 16 "doi": "10.48550/arXiv.2505.13103" 17 }, 18 "checklist": { 19 "claims_and_evidence": { 20 "abstract_claims_supported": { 21 "applies": true, 22 "answer": false, 23 "justification": "The title claims '7,400 bugs' but the evaluation covers only 358 bugs — this number is never derived in the paper. The abstract's '73.5%' combined fixing rate cannot be reconciled with the body (195 + 60 additional = 255/358 ≈ 71.2%). The conclusion also swaps the 29.6% and 45.9% figures relative to Section 5.2.", 24 "source": "haiku" 25 }, 26 "causal_claims_justified": { 27 "applies": true, 28 "answer": true, 29 "justification": "Comparative experiments against CodeRover-S, Agentless, and VulMaster on the same 358-bug benchmark with identical evaluation metrics adequately support the causal claim that WILLIAMT's design reduces cost while maintaining repair rate.", 30 "source": "haiku" 31 }, 32 "generalization_bounded": { 33 "applies": true, 34 "answer": false, 35 "justification": "The title 'Fixing 7,400 Bugs' dramatically overstates the 358-bug evaluation scope; the paper claims 'broad applicability and scalability' beyond what the ARVO benchmark (4 memory corruption types from OSS-Fuzz) can support.", 36 "source": "haiku" 37 }, 38 "alternative_explanations_discussed": { 39 "applies": true, 40 "answer": false, 41 "justification": "The paper does not discuss why WILLIAMT and CodeRover-S fix largely disjoint sets of bugs, nor whether ARVO's 15-minute compilation filter systematically selects simpler bugs where templates work better.", 42 "source": "haiku" 43 }, 44 "proxy_outcome_distinction": { 45 "applies": true, 46 "answer": true, 47 "justification": "Appendix A explicitly analyzes the gap between 'plausible' fixes (PoC does not crash) and actual fixes, showing only 56/165 plausible patches pass manual review with broader inputs; the authors recommend manual developer verification.", 48 "source": "haiku" 49 } 50 }, 51 "limitations_and_scope": { 52 "limitations_section_present": { 53 "applies": true, 54 "answer": true, 55 "justification": "Section 6 'Discussion' substantively covers three limitations: incorrect crash site analysis, semantically disruptive patch insertion within conditionals, and the imprecise plausible fix metric.", 56 "source": "haiku" 57 }, 58 "threats_to_validity_specific": { 59 "applies": true, 60 "answer": true, 61 "justification": "Specific threats are named with examples: LLM variable identification fails when required variables are beyond the crash frame; patch insertion above a crash site inside an if-statement breaks control flow; the plausible metric doesn't test inputs beyond the PoC.", 62 "source": "haiku" 63 }, 64 "scope_boundaries_stated": { 65 "applies": true, 66 "answer": false, 67 "justification": "The paper does not explicitly state that results are bounded to memory corruption bugs (HBO, GBO, SBO, UAF) and do not apply to logic bugs or other vulnerability classes; the broad title actively misleads about scope.", 68 "source": "haiku" 69 } 70 }, 71 "conflicts_of_interest": { 72 "funding_disclosed": { 73 "applies": true, 74 "answer": false, 75 "justification": "No funding acknowledgment appears anywhere in the paper; two authors are employed at Google/Google DeepMind and two at EPFL, but no grant numbers or sponsorship are disclosed.", 76 "source": "haiku" 77 }, 78 "affiliations_disclosed": { 79 "applies": true, 80 "answer": true, 81 "justification": "Author affiliations (EPFL, Google DeepMind, Google Zurich, Google New York) are clearly listed in the paper header.", 82 "source": "haiku" 83 }, 84 "funder_independent_of_outcome": { 85 "applies": false, 86 "answer": false, 87 "justification": "No funding is disclosed; N/A.", 88 "source": "haiku" 89 }, 90 "financial_interests_declared": { 91 "applies": true, 92 "answer": false, 93 "justification": "No competing interests or financial interests statement appears in the paper.", 94 "source": "haiku" 95 } 96 }, 97 "scope_and_framing": { 98 "key_terms_defined": { 99 "applies": true, 100 "answer": true, 101 "justification": "Key terms are clearly defined: 'crash-site repair' vs 'root cause analysis', 'plausible fix' metric, spatial vs temporal memory corruption, and the three-stage repair objective framework (graceful crash, bail-out, root cause).", 102 "source": "haiku" 103 }, 104 "intended_contribution_clear": { 105 "applies": true, 106 "answer": true, 107 "justification": "Four explicit bullet points at the end of Section 1 state contributions: crash-site repair proposal, template-guided patch generation, WILLIAMT prototype evaluation, and combined cost/fixing rate results.", 108 "source": "haiku" 109 }, 110 "engagement_with_prior_work": { 111 "applies": true, 112 "answer": true, 113 "justification": "Section 2 provides background on LLM-based APR and OSS-Fuzz; direct comparisons with Agentless, CodeRover-S, and VulMaster situate the contribution clearly within SoTA.", 114 "source": "haiku" 115 } 116 } 117 }, 118 "type_checklist": { 119 "empirical": { 120 "artifacts": { 121 "code_released": { 122 "applies": true, 123 "answer": false, 124 "justification": "'We promise to fully release WILLIAMT upon paper acceptance' — this is a future promise, not a current release; the code is unavailable.", 125 "source": "haiku" 126 }, 127 "data_released": { 128 "applies": true, 129 "answer": true, 130 "justification": "ARVO is a publicly available benchmark; OSS-Fuzz bugs are publicly accessible. The evaluation data can be independently obtained.", 131 "source": "haiku" 132 }, 133 "environment_specified": { 134 "applies": true, 135 "answer": false, 136 "justification": "Hardware is described (Ubuntu 22.04, AMD EPYC 7302P, 64GB RAM, RTX 4090) but no requirements.txt, Dockerfile, or dependency specification for WILLIAMT itself is provided.", 137 "source": "haiku" 138 }, 139 "reproduction_instructions": { 140 "applies": true, 141 "answer": false, 142 "justification": "Appendix C describes the regex logic at a high level and shows one prompt example, but no step-by-step reproduction instructions are provided and the code is not released.", 143 "source": "haiku" 144 } 145 }, 146 "statistical_methodology": { 147 "confidence_intervals_or_error_bars": { 148 "applies": true, 149 "answer": false, 150 "justification": "All results are reported as single-run point estimates (46.1%, 54.5%, etc.) with no confidence intervals or error bars across any comparison.", 151 "source": "haiku" 152 }, 153 "significance_tests": { 154 "applies": true, 155 "answer": false, 156 "justification": "No statistical significance tests are performed for any comparative claim between WILLIAMT and baselines.", 157 "source": "haiku" 158 }, 159 "effect_sizes_reported": { 160 "applies": true, 161 "answer": true, 162 "justification": "Effect sizes are reported as percentage differences (46.1% vs 54.5% fix rate, 99.7% token reduction, 45.9% cost reduction) with baseline values provided for context.", 163 "source": "haiku" 164 }, 165 "sample_size_justified": { 166 "applies": true, 167 "answer": false, 168 "justification": "The 358-bug subset is chosen by a 15-minute compilation filter 'following recommended practice [60]', but no power analysis or justification for why 358 is sufficient is provided.", 169 "source": "haiku" 170 }, 171 "variance_reported": { 172 "applies": true, 173 "answer": false, 174 "justification": "No variance, standard deviation, or spread is reported for any result; all figures show single-run point estimates with no indication of run-to-run stability.", 175 "source": "haiku" 176 } 177 }, 178 "evaluation_design": { 179 "baselines_included": { 180 "applies": true, 181 "answer": true, 182 "justification": "Three baselines are compared: CodeRover-S (best SoTA), Agentless, and VulMaster, all evaluated on the same 358-bug ARVO subset with the same plausible fix metric.", 183 "source": "haiku" 184 }, 185 "baselines_contemporary": { 186 "applies": true, 187 "answer": true, 188 "justification": "All baselines are from 2024 publications (CodeRover-S arXiv Nov 2024, Agentless ISSTA 2024, VulMaster ICSE 2024), contemporary with the 2025 preprint.", 189 "source": "haiku" 190 }, 191 "ablation_study": { 192 "applies": true, 193 "answer": false, 194 "justification": "No ablation study is conducted; the contributions of regex-based context retrieval vs. template-guided patch generation are never separated to measure individual impact.", 195 "source": "haiku" 196 }, 197 "multiple_metrics": { 198 "applies": true, 199 "answer": true, 200 "justification": "Multiple metrics are used: plausible fix rate, token cost ($), execution time, per-LLM performance breakdown (Figure 6), and manual review pass rate (Appendix A).", 201 "source": "haiku" 202 }, 203 "human_evaluation": { 204 "applies": true, 205 "answer": true, 206 "justification": "Appendix A includes manual review of all 165 plausible patches from WILLIAMT-GPT-4o, determining which preserve behavior with broader inputs beyond the PoC.", 207 "source": "haiku" 208 }, 209 "held_out_test_set": { 210 "applies": false, 211 "answer": false, 212 "justification": "This is a benchmark evaluation of a repair system, not a prediction model requiring train/test split; N/A.", 213 "source": "haiku" 214 }, 215 "per_category_breakdown": { 216 "applies": true, 217 "answer": false, 218 "justification": "Results are not broken down by memory corruption type (HBO, GBO, SBO, UAF) — Figures 4 and 6 show aggregate fixing rates only, despite the system being designed per-category with different templates.", 219 "source": "haiku" 220 }, 221 "failure_cases_discussed": { 222 "applies": true, 223 "answer": true, 224 "justification": "Section 6 analyzes failure modes with specific examples; Appendix A quantifies 70 early-exit failures and 39 patches that block valid inputs beyond the PoC.", 225 "source": "haiku" 226 }, 227 "negative_results_reported": { 228 "applies": true, 229 "answer": true, 230 "justification": "VulMaster's near-complete failure (5 bugs fixed) is reported; WILLIAMT's ~37% 'No Patch' rate is acknowledged; Gemma3:1B fixes only 11/358 bugs.", 231 "source": "haiku" 232 } 233 }, 234 "setup_transparency": { 235 "model_versions_specified": { 236 "applies": true, 237 "answer": true, 238 "justification": "Specific model versions are stated: gpt-4o-2024-08-06, DeepSeek-V3, DeepSeek-R1, Claude 3.5-Haiku, Claude 3.7-Sonnet, Gemma3 1B/4B/12B/27B.", 239 "source": "haiku" 240 }, 241 "prompts_provided": { 242 "applies": true, 243 "answer": true, 244 "justification": "Figure 15 shows the complete actual prompt used for crash-site variable analysis including structured output requirements, constraints, and an example for a global-buffer-overflow bug.", 245 "source": "haiku" 246 }, 247 "hyperparameters_reported": { 248 "applies": true, 249 "answer": false, 250 "justification": "No temperature, top-p, or other LLM sampling hyperparameters are reported for any model.", 251 "source": "haiku" 252 }, 253 "scaffolding_described": { 254 "applies": true, 255 "answer": true, 256 "justification": "The two-component scaffolding (regex-based context retrieval and template-guided patch generation) is described with workflow diagrams, appendices showing regex logic, and template code (Figures 12–15).", 257 "source": "haiku" 258 }, 259 "data_preprocessing_documented": { 260 "applies": true, 261 "answer": true, 262 "justification": "Appendix C documents the preprocessing pipeline: PoC reproduction in Docker, ASan report parsing via regex, crash frame identification, source line extraction, and code window selection.", 263 "source": "haiku" 264 } 265 }, 266 "data_integrity": { 267 "raw_data_available": { 268 "applies": true, 269 "answer": false, 270 "justification": "ARVO benchmark is public, but WILLIAMT's per-bug patch outputs and intermediate results are not released; the code is promised but not yet available for independent verification.", 271 "source": "haiku" 272 }, 273 "data_collection_described": { 274 "applies": true, 275 "answer": true, 276 "justification": "The selection of 358 bugs from ARVO (all HOF, SOF, UAF, GOF bugs compilable within 15 minutes) is clearly stated with reference to ARVO's curation methodology.", 277 "source": "haiku" 278 }, 279 "recruitment_methods_described": { 280 "applies": false, 281 "answer": false, 282 "justification": "No human participant recruitment; evaluation uses a static benchmark dataset.", 283 "source": "haiku" 284 }, 285 "data_pipeline_documented": { 286 "applies": true, 287 "answer": true, 288 "justification": "The full pipeline from ClusterFuzz PoC → Docker reproduction → ASan report → regex extraction → LLM analysis → patch insertion → compilation → PoC re-execution is described in the paper and appendices.", 289 "source": "haiku" 290 } 291 }, 292 "contamination": { 293 "training_cutoff_stated": { 294 "applies": true, 295 "answer": false, 296 "justification": "The paper uses multiple LLMs but states no training data cutoffs, despite evaluating on public OSS-Fuzz bugs (with known CVEs and fix commits) that predate most model training cutoffs.", 297 "source": "haiku" 298 }, 299 "train_test_overlap_discussed": { 300 "applies": true, 301 "answer": false, 302 "justification": "ARVO bugs are real, public OSS-Fuzz reports with public fix commits; LLMs may have memorized these bugs and their fixes during training. This threat is never discussed.", 303 "source": "haiku" 304 }, 305 "benchmark_contamination_addressed": { 306 "applies": true, 307 "answer": false, 308 "justification": "ARVO bugs are from OSS-Fuzz reports predating model training cutoffs; the possibility that LLMs have seen these specific bug reports and reference fixes is not addressed.", 309 "source": "haiku" 310 } 311 }, 312 "human_studies": { 313 "pre_registered": { 314 "applies": false, 315 "answer": false, 316 "justification": "No human participants.", 317 "source": "haiku" 318 }, 319 "irb_or_ethics_approval": { 320 "applies": false, 321 "answer": false, 322 "justification": "No human participants.", 323 "source": "haiku" 324 }, 325 "demographics_reported": { 326 "applies": false, 327 "answer": false, 328 "justification": "No human participants.", 329 "source": "haiku" 330 }, 331 "inclusion_exclusion_criteria": { 332 "applies": false, 333 "answer": false, 334 "justification": "No human participants.", 335 "source": "haiku" 336 }, 337 "randomization_described": { 338 "applies": false, 339 "answer": false, 340 "justification": "No human participants.", 341 "source": "haiku" 342 }, 343 "blinding_described": { 344 "applies": false, 345 "answer": false, 346 "justification": "No human participants.", 347 "source": "haiku" 348 }, 349 "attrition_reported": { 350 "applies": false, 351 "answer": false, 352 "justification": "No human participants.", 353 "source": "haiku" 354 } 355 }, 356 "cost_and_practicality": { 357 "inference_cost_reported": { 358 "applies": true, 359 "answer": true, 360 "justification": "Inference cost is a central focus: Figure 5a compares $/bug ($0.0026 vs $0.93), Figure 7 shows cost per model in cents, and total cost for 358 bugs ($0.68) is explicitly stated.", 361 "source": "haiku" 362 }, 363 "compute_budget_stated": { 364 "applies": true, 365 "answer": false, 366 "justification": "Hardware specs are described (AMD EPYC 7302P, RTX 4090) and per-bug API costs are given, but the total compute budget across all experimental conditions is not summed or stated.", 367 "source": "haiku" 368 } 369 } 370 } 371 }, 372 "claims": [ 373 { 374 "claim": "WILLIAMT reduces token cost by 99.7% vs CodeRover-S while retaining over 86.7% of its fixing rate", 375 "evidence": "WILLIAMT fixes 165/358 (46.1%) vs CodeRover-S 195/358 (54.5%) on gpt-4o; average cost $0.0026 vs $0.93 per bug (Section 5.1, Figure 5a)", 376 "supported": "strong" 377 }, 378 { 379 "claim": "Combined WILLIAMT+CodeRover-S pipeline achieves 29.6% more fixes and 45.9% lower total cost than CodeRover-S alone", 380 "evidence": "Section 5.2 states 60 additional plausible fixes and 45.9% cost reduction; though the conclusion erroneously swaps these two percentages", 381 "supported": "moderate" 382 }, 383 { 384 "claim": "WILLIAMT can fix all 358 benchmark bugs for under $0.68 using GPT-4o — less than the cost of fixing one bug with CodeRover-S", 385 "evidence": "Directly stated in Section 5.2; consistent with $0.0026/bug × 358 = $0.93 total vs CodeRover-S's $0.93/bug", 386 "supported": "strong" 387 }, 388 { 389 "claim": "Gemma3:27B local model achieves 96.4% of GPT-4o's fixing performance on consumer hardware (RTX 4090 or Mac Mini M4)", 390 "evidence": "Figure 6: Gemma3:27B fixes 163 bugs vs GPT-4o's 165; Mac Mini M4 performance stated to be on par with RTX 4090", 391 "supported": "strong" 392 }, 393 { 394 "claim": "Plausible fix metric significantly overestimates actual fix quality: only 56/165 WILLIAMT plausible patches pass manual review", 395 "evidence": "Appendix A: 165 plausible → 95 avoid early exit → 56 pass manual review for broader inputs; 39 introduce early exits on non-PoC inputs", 396 "supported": "strong" 397 }, 398 { 399 "claim": "Reasoning models cost 5-6x more than non-reasoning models without fixing more bugs on this task", 400 "evidence": "Figure 7: DeepSeek-R1 costs 0.70 cent/bug vs Claude-haiku 0.09 cent/bug; fix rates are comparable (159 vs 170); Claude-haiku non-reasoning achieves the highest fix rate", 401 "supported": "strong" 402 } 403 ], 404 "methodology_tags": [ 405 "benchmark-eval", 406 "case-study" 407 ], 408 "key_findings": "WILLIAMT demonstrates that crash-site repair — inserting assertions directly before the crash point rather than fixing the root cause — achieves 86.7% of state-of-the-art APR performance at 0.3% of the token cost. When used as a pre-filter before CodeRover-S, the combined pipeline fixes 29.6% more bugs while reducing total cost by 45.9%. Local models (Gemma3:27B on RTX 4090 or Mac Mini M4) achieve near-parity with frontier models, and non-reasoning models outperform reasoning models on this template-constrained task. However, the plausible fix metric substantially overstates actual repair quality: only 34% (56/165) of 'plausible' patches pass manual review with broader inputs, meaning real-world utility requires human verification.", 409 "red_flags": [ 410 { 411 "flag": "Title exaggeration", 412 "detail": "The title claims '7,400 bugs' but the evaluation covers only 358 bugs. This number is never derived or explained anywhere in the paper body." 413 }, 414 { 415 "flag": "Abstract-body numerical inconsistency", 416 "detail": "The abstract states a '73.5%' combined fixing rate, but 195 (CodeRover-S) + 60 (additional) = 255/358 ≈ 71.2%. The 73.5% figure cannot be reconciled with the evidence in the body." 417 }, 418 { 419 "flag": "Conclusion swaps key results", 420 "detail": "The conclusion states 'reduces token usage by 29.6% and improves fixing rate by 45.9%' — these figures are transposed relative to the correct values in Section 5.2 (45.9% cost reduction, 29.6% fixing rate improvement)." 421 }, 422 { 423 "flag": "No statistical significance testing", 424 "detail": "No confidence intervals, significance tests, or variance estimates are reported across any comparison. All results are single-run point estimates on 358 bugs." 425 }, 426 { 427 "flag": "Benchmark contamination unaddressed", 428 "detail": "ARVO bugs are real public OSS-Fuzz reports with known CVEs and fix commits predating model training cutoffs. LLMs may have seen these bug reports and reference patches during training. This threat is never discussed." 429 }, 430 { 431 "flag": "Code not released", 432 "detail": "WILLIAMT is promised for open-source release upon paper acceptance but is currently unavailable, making independent reproduction impossible." 433 }, 434 { 435 "flag": "No ablation study", 436 "detail": "The two core components (regex-based context retrieval, template-guided patch generation) are never evaluated independently to determine each component's contribution to the results." 437 }, 438 { 439 "flag": "Plausible metric acknowledged weak but still used for comparisons", 440 "detail": "The paper itself shows only 56/165 (34%) plausible patches pass manual review, yet all SoTA comparisons use this metric — comparisons with CodeRover-S and Agentless use a metric the paper demonstrates is unreliable." 441 } 442 ], 443 "cited_papers": [ 444 { 445 "title": "Fixing Security Vulnerabilities with AI in OSS-Fuzz (CodeRover-S)", 446 "relevance": "Primary baseline and the SoTA APR tool; WILLIAMT is benchmarked against it for both cost and fixing rate on ARVO" 447 }, 448 { 449 "title": "Automated Program Repair via Conversation: Fixing 162 out of 337 Bugs for $0.42 each using ChatGPT (Agentless)", 450 "relevance": "Key baseline APR system compared on the same ARVO benchmark; also inspired the cost-per-bug framing" 451 }, 452 { 453 "title": "ARVO: Atlas of Reproducible Vulnerabilities for Open Source Software", 454 "relevance": "The benchmark dataset used for all evaluation; provides 5,000+ reproducible OSS-Fuzz memory corruption bugs" 455 }, 456 { 457 "title": "Out of Sight, Out of Mind: Better Automatic Vulnerability Repair by Broadening Input Ranges and Sources (VulMaster)", 458 "relevance": "Third baseline evaluated; its near-complete failure (5 bugs) provides important context for the difficulty of the task" 459 }, 460 { 461 "title": "AutoCodeRover: Autonomous Program Improvement", 462 "relevance": "Foundation of the CodeRover-S system; describes the multi-iteration repair loop that WILLIAMT's one-shot approach simplifies" 463 }, 464 { 465 "title": "Template-Guided Program Repair in the Era of Large Language Models", 466 "relevance": "Related prior work on template-guided repair that WILLIAMT extends to crash-site-specific repair" 467 }, 468 { 469 "title": "AddressSanitizer: A Fast Address Sanity Checker", 470 "relevance": "Core dependency — ASan reports are the primary structured input to WILLIAMT's regex-based context retrieval" 471 }, 472 { 473 "title": "Code Repair with LLMs Gives an Exploration-Exploitation Tradeoff", 474 "relevance": "Related APR work examining LLM-guided repair strategies; contextualizes the multi-attempt vs one-shot tradeoff" 475 } 476 ], 477 "engagement_factors": { 478 "practical_relevance": { 479 "score": 3, 480 "justification": "Directly applicable to any developer maintaining OSS with OSS-Fuzz integration; the Mac Mini M4 deployment and $0.68 total cost make it immediately accessible to individual developers." 481 }, 482 "surprise_contrarian": { 483 "score": 2, 484 "justification": "The core thesis — that crash-site repair (blocking exploitation) is sufficient and far cheaper than root-cause repair — is genuinely contrarian to the dominant APR research direction." 485 }, 486 "fear_safety": { 487 "score": 1, 488 "justification": "Addresses security vulnerability fixing backlog at scale, which has clear security implications, but does not raise novel AI risk concerns." 489 }, 490 "drama_conflict": { 491 "score": 1, 492 "justification": "Challenges expensive agentic APR tools with a template approach, but presents WILLIAMT as complementary to CodeRover-S rather than a replacement." 493 }, 494 "demo_ability": { 495 "score": 2, 496 "justification": "The Mac Mini M4 local deployment with Gemma3:4b is concrete and reproducible in principle, though the code is not yet released." 497 }, 498 "brand_recognition": { 499 "score": 2, 500 "justification": "Co-authors from Google DeepMind; evaluates GPT-4o, Claude 3.5-Haiku/3.7-Sonnet, DeepSeek — all major recognizable LLM brands with active developer audiences." 501 } 502 }, 503 "hn_data": { 504 "threads": [ 505 { 506 "hn_id": "45444062", 507 "title": "Machine Learnability as a Measure of Order in Aperiodic Sequences", 508 "points": 48, 509 "comments": 5, 510 "url": "https://news.ycombinator.com/item?id=45444062" 511 }, 512 { 513 "hn_id": "46697408", 514 "title": "WildCAT3D: Appearance-Aware Multi-View Diffusion in the Wild", 515 "points": 3, 516 "comments": 0, 517 "url": "https://news.ycombinator.com/item?id=46697408" 518 }, 519 { 520 "hn_id": "43401539", 521 "title": "CriteoPrivateAd: RealWorld Bidding Dataset to Design Private Advertising Systems", 522 "points": 2, 523 "comments": 1, 524 "url": "https://news.ycombinator.com/item?id=43401539" 525 }, 526 { 527 "hn_id": "43516923", 528 "title": "UniHOPE: A Unified Approach for Hand-Only and Hand-Object Pose Estimation", 529 "points": 2, 530 "comments": 0, 531 "url": "https://news.ycombinator.com/item?id=43516923" 532 }, 533 { 534 "hn_id": "43496516", 535 "title": "UniHOPE: A Unified Approach for Hand-Only and Hand-Object Pose Estimation", 536 "points": 2, 537 "comments": 0, 538 "url": "https://news.ycombinator.com/item?id=43496516" 539 }, 540 { 541 "hn_id": "36016970", 542 "title": "Visual Question Answering: Techniques and Common Trends in Recent Literature", 543 "points": 2, 544 "comments": 0, 545 "url": "https://news.ycombinator.com/item?id=36016970" 546 }, 547 { 548 "hn_id": "44686218", 549 "title": "The Heteronomy of Algorithms", 550 "points": 1, 551 "comments": 0, 552 "url": "https://news.ycombinator.com/item?id=44686218" 553 }, 554 { 555 "hn_id": "47380252", 556 "title": "Show HN: Karpathy's Autoresearch with Evolutionary Database", 557 "points": 1, 558 "comments": 0, 559 "url": "https://news.ycombinator.com/item?id=47380252" 560 }, 561 { 562 "hn_id": "40515506", 563 "title": "Evaluating AI-Generated Code for C++, Fortran, Go, Java, Julia, Matlab, etc.", 564 "points": 1, 565 "comments": 2, 566 "url": "https://news.ycombinator.com/item?id=40515506" 567 }, 568 { 569 "hn_id": "43104988", 570 "title": "Aide: AI-Driven Exploration in the Space of Code (Arxiv)", 571 "points": 1, 572 "comments": 1, 573 "url": "https://news.ycombinator.com/item?id=43104988" 574 } 575 ], 576 "top_points": 48, 577 "total_points": 63, 578 "total_comments": 9 579 } 580 }