scan-v5.json (27811B)
1 { 2 "scan_version": 5, 3 "paper_type": "empirical", 4 "paper": { 5 "title": "Detecting and Correcting Hallucinations in LLM-Generated Code via Deterministic AST Analysis", 6 "authors": [ 7 "Dipin Khati", 8 "Daniel Rodriguez-Cardenas", 9 "Paul Pantzer", 10 "Denys Poshyvanyk" 11 ], 12 "year": 2026, 13 "venue": "FORGE '26 (IEEE/ACM Third International Conference on AI Foundation Models and Software Engineering)", 14 "arxiv_id": "2601.19106", 15 "doi": "10.1145/3793655.3793725" 16 }, 17 "checklist": { 18 "claims_and_evidence": { 19 "abstract_claims_supported": { 20 "applies": true, 21 "answer": true, 22 "justification": "Abstract claims are supported: KCHs are explained with examples, existing mitigations are discussed in §1, and empirical results match stated performance (100% precision, 87.6% recall, 77.0% fix rate).", 23 "source": "haiku" 24 }, 25 "causal_claims_justified": { 26 "applies": false, 27 "answer": false, 28 "justification": "The paper makes no causal claims about mechanisms ('X causes Y'); it demonstrates detection/correction works empirically but does not claim to explain why hallucinations occur or why the deterministic approach succeeds mechanistically.", 29 "source": "haiku" 30 }, 31 "generalization_bounded": { 32 "applies": true, 33 "answer": true, 34 "justification": "The paper explicitly bounds scope: 'limited to five Python libraries', 'single-file, function-level analysis', and 'error distribution may not reflect real-world prevalence', while noting potential extension to other languages with AST support.", 35 "source": "haiku" 36 }, 37 "alternative_explanations_discussed": { 38 "applies": true, 39 "answer": false, 40 "justification": "The paper compares against existing approaches (constrained decoding, LLM-in-the-loop, deletion-based repair) but does not discuss alternative explanations for why their results hold (e.g., whether high precision is due to dataset properties, or whether 100% is inflated by easy cases).", 41 "source": "haiku" 42 }, 43 "proxy_outcome_distinction": { 44 "applies": true, 45 "answer": false, 46 "justification": "The paper claims to measure 'fix accuracy' as 'functionally correct, runnable code' but the evaluation is non-executing. No mechanism for validating that corrected code is actually correct is described (e.g., no ground truth comparison, no human review, no execution).", 47 "source": "haiku" 48 } 49 }, 50 "limitations_and_scope": { 51 "limitations_section_present": { 52 "applies": true, 53 "answer": true, 54 "justification": "Section 4 'Discussion and Future Work' includes a dedicated limitations paragraph acknowledging dataset size, library scope, and architectural constraints.", 55 "source": "haiku" 56 }, 57 "threats_to_validity_specific": { 58 "applies": true, 59 "answer": true, 60 "justification": "Specific threats are stated: '200-sample dataset is not exhaustive', 'Knowledge Base limited to five Python libraries', 'single-file analysis does not handle multi-module dataflows', and approach 'does not attempt to solve multi-line logical errors'.", 61 "source": "haiku" 62 }, 63 "scope_boundaries_stated": { 64 "applies": true, 65 "answer": true, 66 "justification": "Explicit scope boundaries: targets KCHs only (API + identifier conflicts), evaluated on Python snippets, limited to five libraries, single-file function-level analysis, and not addressing complex multi-line logical errors.", 67 "source": "haiku" 68 } 69 }, 70 "conflicts_of_interest": { 71 "funding_disclosed": { 72 "applies": true, 73 "answer": false, 74 "justification": "No funding source disclosed in abstract, body, or visible acknowledgments section.", 75 "source": "haiku" 76 }, 77 "affiliations_disclosed": { 78 "applies": true, 79 "answer": true, 80 "justification": "All four authors list affiliation with William & Mary, and no evaluated product is developed by the authors or institution, so conflict is minimal.", 81 "source": "haiku" 82 }, 83 "funder_independent_of_outcome": { 84 "applies": false, 85 "answer": false, 86 "justification": "No funder disclosed; NA.", 87 "source": "haiku" 88 }, 89 "financial_interests_declared": { 90 "applies": true, 91 "answer": false, 92 "justification": "No competing interests statement or declaration of patents, equity, or consulting relationships provided.", 93 "source": "haiku" 94 } 95 }, 96 "scope_and_framing": { 97 "key_terms_defined": { 98 "applies": true, 99 "answer": true, 100 "justification": "Key terms are defined: KCHs as 'code that flat-out contradicts the established, factual knowledge of a programming language or its libraries', AST parsing, and 'Dynamic Knowledge Base' via introspection are explained.", 101 "source": "haiku" 102 }, 103 "intended_contribution_clear": { 104 "applies": true, 105 "answer": true, 106 "justification": "Contribution is explicit: a deterministic post-processing framework for detecting AND correcting (not just deleting) KCHs in LLM code, positioned against prevention, LLM-in-the-loop, and deletion approaches.", 107 "source": "haiku" 108 }, 109 "engagement_with_prior_work": { 110 "applies": true, 111 "answer": true, 112 "justification": "§1 and §5 systematically engage with prior work (taxonomy [11], KCH definition [6], prevention [8,10], LLM-in-the-loop [1,9], deletion [14], type-checkers [5]), positioning the deterministic correction approach as novel.", 113 "source": "haiku" 114 } 115 } 116 }, 117 "type_checklist": { 118 "empirical": { 119 "artifacts": { 120 "code_released": { 121 "applies": true, 122 "answer": true, 123 "justification": "Paper states 'All data, code, and experimental configurations are publicly available in our replication package [3]' linking to https://github.com/WM-SEMERU/Hallucinations-in-Code.", 124 "source": "haiku" 125 }, 126 "data_released": { 127 "applies": true, 128 "answer": true, 129 "justification": "The 200-sample evaluation dataset is stated to be in the replication package alongside code, enabling independent verification.", 130 "source": "haiku" 131 }, 132 "environment_specified": { 133 "applies": true, 134 "answer": false, 135 "justification": "Python 3 is implied but no requirements.txt, Dockerfile, or dependency specifications provided. No virtual environment or package versions documented.", 136 "source": "haiku" 137 }, 138 "reproduction_instructions": { 139 "applies": true, 140 "answer": false, 141 "justification": "Paper provides high-level methodology but no step-by-step reproduction instructions. Replication package may contain these, but they are not included in the paper itself.", 142 "source": "haiku" 143 } 144 }, 145 "statistical_methodology": { 146 "applies": true, 147 "answer": false, 148 "justification": "No confidence intervals, error bars, or variance measures reported. Precision/recall/F1 are single point estimates without uncertainty quantification.", 149 "source": "haiku" 150 }, 151 "significance_tests": { 152 "applies": true, 153 "answer": false, 154 "justification": "No statistical significance tests, cross-validation, or bootstrapping reported. No p-values or hypothesis testing for comparative claims.", 155 "source": "haiku" 156 }, 157 "effect_sizes_reported": { 158 "applies": true, 159 "answer": true, 160 "justification": "Effect sizes provided: 100% precision, 87.6% recall, F1=0.934, 77% fix accuracy, with per-type and per-library breakdowns (Tables 3–4).", 161 "source": "haiku" 162 }, 163 "sample_size_justified": { 164 "applies": true, 165 "answer": false, 166 "justification": "Sample size (n=200 total, 161 hallucinated, 39 clean) is not justified. No power analysis or rationale provided for why 200 is adequate.", 167 "source": "haiku" 168 }, 169 "variance_reported": { 170 "applies": true, 171 "answer": false, 172 "justification": "Variance/std dev not reported. Only single point estimates; no repeated runs or error margins across samples or folds.", 173 "source": "haiku" 174 }, 175 "evaluation_design": { 176 "baselines_included": { 177 "applies": true, 178 "answer": false, 179 "justification": "No empirical baselines compared. PICARD, Synchromesh, LLM-in-the-loop, and Structural Trimming are discussed but not experimentally evaluated.", 180 "source": "haiku" 181 }, 182 "baselines_contemporary": { 183 "applies": false, 184 "answer": false, 185 "justification": "NA—no baselines included.", 186 "source": "haiku" 187 }, 188 "ablation_study": { 189 "applies": true, 190 "answer": false, 191 "justification": "No ablation study. The system has four components (AST parsing, KB construction, validation, correction) but no variant testing (e.g., KB vs no KB).", 192 "source": "haiku" 193 }, 194 "multiple_metrics": { 195 "applies": true, 196 "answer": true, 197 "justification": "Detection metrics (precision, recall, F1), correction accuracy, and per-category breakdowns (Tables 3–4 by type and library) provide multiple evaluation angles.", 198 "source": "haiku" 199 }, 200 "human_evaluation": { 201 "applies": true, 202 "answer": false, 203 "justification": "Dataset is manually curated, but no human evaluation of system outputs or corrected code is reported.", 204 "source": "haiku" 205 }, 206 "held_out_test_set": { 207 "applies": false, 208 "answer": false, 209 "justification": "Not a prediction task; no train/test split or held-out evaluation strategy described.", 210 "source": "haiku" 211 }, 212 "per_category_breakdown": { 213 "applies": true, 214 "answer": true, 215 "justification": "Table 3 breaks down by hallucination type (Missing Imports, Mis-typed API, Contextual Mismatches); Table 4 by library (numpy, pandas, matplotlib, json, requests).", 216 "source": "haiku" 217 }, 218 "failure_cases_discussed": { 219 "applies": true, 220 "answer": true, 221 "justification": "Manual analysis of 37 failed cases (20 false negatives, 17 failed corrections) is discussed, revealing matplotlib.pyplot struggles and pandas correction weakness (56.2% vs 97.9% for imports).", 222 "source": "haiku" 223 }, 224 "negative_results_reported": { 225 "applies": true, 226 "answer": true, 227 "justification": "Lower performance on contextual mismatches (33.3% detect, 0% correct) and pandas (56.2% correction) is transparently reported, along with discussion of limitations.", 228 "source": "haiku" 229 } 230 }, 231 "setup_transparency": { 232 "applies": true, 233 "answer": false, 234 "justification": "Dataset generation via 'GPT-5 with task-oriented instructions' is mentioned but no actual prompts, model version (snapshot), or hyperparameters (temperature, top-p) provided for reproducibility.", 235 "source": "haiku" 236 }, 237 "model_versions_specified": { 238 "applies": true, 239 "answer": false, 240 "justification": "'GPT-5' is named but no API version, snapshot date, or configuration parameters given.", 241 "source": "haiku" 242 }, 243 "prompts_provided": { 244 "applies": true, 245 "answer": false, 246 "justification": "No actual prompts or instructions provided for GPT-5 data generation; only high-level description 'task-oriented instructions for five target libraries'.", 247 "source": "haiku" 248 }, 249 "hyperparameters_reported": { 250 "applies": true, 251 "answer": false, 252 "justification": "No temperature, top-p, max_tokens, or other sampling parameters reported for GPT-5. No hyperparameters for the framework itself (O(n·m) complexity is noted but no tuning parameters).", 253 "source": "haiku" 254 }, 255 "scaffolding_described": { 256 "applies": false, 257 "answer": false, 258 "justification": "NA—the framework is deterministic static analysis, not an agent with scaffolding.", 259 "source": "haiku" 260 }, 261 "data_preprocessing_documented": { 262 "applies": true, 263 "answer": true, 264 "justification": "Dataset construction is documented: curated to contain 161 hallucinated samples in three categories (Mis-typed APIs, Missing imports, Contextual mismatches) and 39 clean samples from five libraries.", 265 "source": "haiku" 266 }, 267 "data_integrity": { 268 "raw_data_available": { 269 "applies": true, 270 "answer": true, 271 "justification": "Paper claims 'All data, code, and experimental configurations are publicly available in our replication package [3]' on GitHub.", 272 "source": "haiku" 273 }, 274 "data_collection_described": { 275 "applies": true, 276 "answer": true, 277 "justification": "Data collection via GPT-5 prompting is described; dataset composition (161 hallucinated, 39 clean) and categories are documented.", 278 "source": "haiku" 279 }, 280 "recruitment_methods_described": { 281 "applies": false, 282 "answer": false, 283 "justification": "NA—no human subjects; synthetic dataset from LLM prompting.", 284 "source": "haiku" 285 }, 286 "data_pipeline_documented": { 287 "applies": true, 288 "answer": true, 289 "justification": "Framework pipeline is well-documented in §2: Static Analysis → Dynamic KB → Deterministic Validation → Automated Correction, with each component explained.", 290 "source": "haiku" 291 } 292 }, 293 "contamination": { 294 "training_cutoff_stated": { 295 "applies": false, 296 "answer": false, 297 "justification": "NA—paper does not evaluate pre-trained models on benchmarks; it tests a deterministic tool on a synthetic dataset.", 298 "source": "haiku" 299 }, 300 "train_test_overlap_discussed": { 301 "applies": false, 302 "answer": false, 303 "justification": "NA—same as above.", 304 "source": "haiku" 305 }, 306 "benchmark_contamination_addressed": { 307 "applies": false, 308 "answer": false, 309 "justification": "NA—same as above.", 310 "source": "haiku" 311 } 312 }, 313 "human_studies": { 314 "pre_registered": { 315 "applies": false, 316 "answer": false, 317 "justification": "NA—no human subjects.", 318 "source": "haiku" 319 }, 320 "irb_or_ethics_approval": { 321 "applies": false, 322 "answer": false, 323 "justification": "NA—no human subjects.", 324 "source": "haiku" 325 }, 326 "demographics_reported": { 327 "applies": false, 328 "answer": false, 329 "justification": "NA—no human subjects.", 330 "source": "haiku" 331 }, 332 "inclusion_exclusion_criteria": { 333 "applies": false, 334 "answer": false, 335 "justification": "NA—no human subjects.", 336 "source": "haiku" 337 }, 338 "randomization_described": { 339 "applies": false, 340 "answer": false, 341 "justification": "NA—no human subjects.", 342 "source": "haiku" 343 }, 344 "blinding_described": { 345 "applies": false, 346 "answer": false, 347 "justification": "NA—no human subjects.", 348 "source": "haiku" 349 }, 350 "attrition_reported": { 351 "applies": false, 352 "answer": false, 353 "justification": "NA—no human subjects.", 354 "source": "haiku" 355 } 356 }, 357 "cost_and_practicality": { 358 "inference_cost_reported": { 359 "applies": true, 360 "answer": true, 361 "justification": "Runtime is reported: 'end-to-end analysis of all 200 samples completed in under 0.2 seconds on a single laptop CPU', demonstrating practical efficiency.", 362 "source": "haiku" 363 }, 364 "compute_budget_stated": { 365 "applies": true, 366 "answer": false, 367 "justification": "No compute budget stated for dataset generation (GPT-5 API costs) or evaluation infrastructure.", 368 "source": "haiku" 369 } 370 } 371 } 372 }, 373 "claims": [ 374 { 375 "claim": "Large Language Models frequently produce Knowledge Conflicting Hallucinations (KCHs)—semantic errors like non-existent API parameters that evade linters and cause runtime failures.", 376 "evidence": "Examples given (pd.read_exel), cited prior work [11, 12, 6], but not quantified in this paper.", 377 "supported": "moderate" 378 }, 379 { 380 "claim": "Constrained decoding methods (PICARD, Synchromesh) fail to catch semantic errors because they only enforce syntactic validity.", 381 "evidence": "Discussed in §1 and §5; no empirical comparison provided.", 382 "supported": "weak" 383 }, 384 { 385 "claim": "A deterministic static-analysis framework using AST parsing and library introspection can detect KCHs with 100% precision (zero false positives).", 386 "evidence": "Table 1: 141 TP, 0 FP out of 200 samples, achieving 100% precision.", 387 "supported": "strong" 388 }, 389 { 390 "claim": "The framework achieves 87.6% recall in KCH detection, identifying 141 of 161 hallucinated samples.", 391 "evidence": "Table 1: 141 TP, 20 FN, F1=0.934.", 392 "supported": "strong" 393 }, 394 { 395 "claim": "The framework can automatically correct 77% of detected hallucinations, producing functionally correct code.", 396 "evidence": "Table 2: 124 of 161 detected hallucinations corrected. However, no validation method is described (code is not executed; no ground truth comparison stated).", 397 "supported": "weak" 398 }, 399 { 400 "claim": "Performance varies significantly by hallucination type: Missing Imports (97.9% detect, 97.9% correct), Mis-typed APIs (84.5% detect, 70.0% correct), Contextual Mismatches (33.3% detect, 0% correct).", 401 "evidence": "Table 3 provides detailed breakdown by type.", 402 "supported": "strong" 403 }, 404 { 405 "claim": "The deterministic approach is computationally efficient, analyzing all 200 samples in under 0.2 seconds on a laptop CPU.", 406 "evidence": "Stated in §2.5 and §4.", 407 "supported": "strong" 408 }, 409 { 410 "claim": "The framework is a viable alternative to non-deterministic LLM-in-the-loop repair.", 411 "evidence": "Discussed in §1 and §4 as discussion point, but not empirically compared.", 412 "supported": "weak" 413 } 414 ], 415 "methodology_tags": [ 416 "benchmark-eval" 417 ], 418 "key_findings": "A deterministic, static-analysis framework leveraging Abstract Syntax Trees and library introspection via dynamic knowledge base construction can detect Knowledge Conflicting Hallucinations (KCHs) in LLM-generated Python code with 100% precision and 87.6% recall (F1=0.934), automatically correcting 77% of identified errors. Performance varies by error type: Missing Imports are highly recoverable (97.9% detect, 97.9% correct), Mis-typed APIs moderate (84.5% detect, 70.0% correct), and Contextual Mismatches poorly handled (33.3% detect, 0% correct), suggesting that semantic-intent errors remain intractable for simple string-matching approaches. The framework runs efficiently in <0.2 seconds for 200 samples, but evaluation is limited to 200 manually-curated samples across five Python libraries, raising questions about real-world prevalence and generalizability.", 419 "red_flags": [ 420 { 421 "flag": "No empirical baseline comparison", 422 "detail": "Claims superiority over PICARD, Synchromesh, LLM-in-the-loop repair, and mypy but provides no direct experimental comparison. Comparisons are only qualitative discussion." 423 }, 424 { 425 "flag": "Small, manually curated dataset may not reflect real-world error distribution", 426 "detail": "200 samples (161 hallucinated, 39 clean) is acknowledged as potentially biased. Authors note 'error distribution may not reflect real-world prevalence'." 427 }, 428 { 429 "flag": "Correction verification method not stated", 430 "detail": "Paper claims 'fix accuracy' by measuring 'functionally correct, runnable code' but the approach is explicitly non-executing. No ground truth comparison, human review, or execution validation described." 431 }, 432 { 433 "flag": "Limited generalizability", 434 "detail": "Evaluation restricted to Python; Knowledge Base limited to five libraries (numpy, pandas, requests, matplotlib, json). Claim of generalizability to Java/TypeScript is speculative." 435 }, 436 { 437 "flag": "No confidence intervals or statistical testing", 438 "detail": "Single point estimates for precision, recall, F1 without uncertainty quantification, confidence intervals, or cross-validation." 439 }, 440 { 441 "flag": "GPT-5 dataset generation not reproducible", 442 "detail": "Prompts, model version (snapshot date), temperature, and hyperparameters for GPT-5 not provided. Cannot regenerate the evaluation dataset independently." 443 }, 444 { 445 "flag": "Contextual Mismatches nearly undetectable", 446 "detail": "Only 3 samples (1.5% of dataset); 33.3% detection, 0% correction. This critical category is under-represented and handled poorly." 447 }, 448 { 449 "flag": "Pandas performance significantly lower", 450 "detail": "Pandas achieves only 56.2% correction accuracy vs 93.8% for numpy and 93.9% for requests, but no analysis of why or how to improve." 451 }, 452 { 453 "flag": "No human evaluation of corrections", 454 "detail": "Corrected code samples not reviewed by developers or automated validators to confirm functional correctness." 455 }, 456 { 457 "flag": "Missing environment and reproduction specifications", 458 "detail": "No requirements.txt, Dockerfile, or dependency versions provided. Replication package may exist but paper itself lacks these details." 459 } 460 ], 461 "cited_papers": [ 462 { 463 "title": "Exploring and Evaluating Hallucinations in LLM-Powered Code Generation", 464 "authors": "Liu, F., Liu, Y., Shi, L., et al.", 465 "arxiv_id": "2404.00971", 466 "year": 2024, 467 "relevance": "Defines KCH (Knowledge Conflicting Hallucinations) taxonomy and benchmarks; foundational reference for this paper's problem statement." 468 }, 469 { 470 "title": "Bugs in Large Language Models Generated Code: An Empirical Study", 471 "authors": "Tambon, F., Moradi Dakhel, A., et al.", 472 "arxiv_id": "2403.08937", 473 "year": 2024, 474 "relevance": "Early taxonomy of LLM code generation bugs; establishes prevalence of hallucinations in the field." 475 }, 476 { 477 "title": "Hallucination by Code Generation LLMs: Taxonomy, Benchmarks, Mitigation, and Challenges", 478 "authors": "Lee, Y., Song, J. Y., Kim, D., et al.", 479 "arxiv_id": "2504.20799", 480 "year": 2025, 481 "relevance": "Comprehensive survey of hallucination types and mitigation strategies; directly relevant to positioning this work." 482 }, 483 { 484 "title": "The Impact of AI on Developer Productivity: Evidence from GitHub Copilot", 485 "authors": "Peng, S., Kalliamvakou, E., Cihon, P., Demirer, M.", 486 "arxiv_id": "2302.06590", 487 "year": 2023, 488 "relevance": "Establishes productivity gains from LLM code generation; motivates the need for hallucination mitigation." 489 }, 490 { 491 "title": "Synchromesh: Reliable code generation from pre-trained language models", 492 "authors": "Poesia, G., Polozov, O., Le, V., et al.", 493 "arxiv_id": "2201.11227", 494 "year": 2022, 495 "relevance": "Constrained decoding approach for code generation; example of prevention strategy that misses semantic errors." 496 }, 497 { 498 "title": "PICARD: Parsing Incrementally for Constrained Auto-Regressive Decoding from Language Models", 499 "authors": "Scholak, T., Schucher, N., Bahdanau, D.", 500 "year": 2021, 501 "relevance": "Foundational constrained decoding method for grammar enforcement; shown to miss KCHs." 502 }, 503 { 504 "title": "Static Analysis as a Feedback Loop: Enhancing LLM-Generated Code Beyond Correctness", 505 "authors": "Blyth, S., Licorish, S. A., Treude, C., Wagner, M.", 506 "arxiv_id": "2508.14419", 507 "year": 2025, 508 "relevance": "LLM-in-the-loop repair strategy; represents non-deterministic approach this paper positions against." 509 }, 510 { 511 "title": "Cutting the Root of Hallucination: Structural Trimming for Vulnerability Mitigation in Code LLMs", 512 "authors": "Zhang, Y.", 513 "year": 2025, 514 "relevance": "AST-based deletion approach for safety; represents deletion-based mitigation that this paper extends toward correction." 515 } 516 ], 517 "engagement_factors": { 518 "practical_relevance": { 519 "score": 2, 520 "justification": "Tool could be integrated into IDEs for real-time code-generation validation, directly useful for practitioners, but limited scope (5 libraries, single-file) reduces immediate applicability." 521 }, 522 "surprise_contrarian": { 523 "score": 1, 524 "justification": "Using static analysis for code correctness is well-established (mypy, linters); applying it post-hoc to LLM hallucinations is incremental rather than conceptually novel." 525 }, 526 "fear_safety": { 527 "score": 2, 528 "justification": "Addresses a real safety concern (LLM-generated code causing runtime failures), positioning deterministic checking as a trust-building mechanism for AI-assisted development." 529 }, 530 "drama_conflict": { 531 "score": 0, 532 "justification": "Straightforward technical paper with no controversy, competing claims, or dramatic narrative elements." 533 }, 534 "demo_ability": { 535 "score": 1, 536 "justification": "Code is open-source on GitHub and can be demoed locally, but requires setup (library introspection); not immediately web-demoable or friction-free." 537 }, 538 "brand_recognition": { 539 "score": 1, 540 "justification": "William & Mary's SEMERU Lab is respected in software engineering research but not a top-tier AI lab; lead author Dipin Khati not widely known in the field." 541 } 542 }, 543 "hn_data": { 544 "threads": [ 545 { 546 "hn_id": "46885582", 547 "title": "Who's in Charge? Disempowerment Patterns in Real-World LLM Usage", 548 "points": 3, 549 "comments": 0, 550 "url": "https://news.ycombinator.com/item?id=46885582", 551 "created_at": "2026-02-04T13:28:17Z" 552 }, 553 { 554 "hn_id": "47119379", 555 "title": "Who's in Charge? Disempowerment Patterns in Real-World LLM Usage", 556 "points": 2, 557 "comments": 1, 558 "url": "https://news.ycombinator.com/item?id=47119379", 559 "created_at": "2026-02-23T08:01:55Z" 560 }, 561 { 562 "hn_id": "46811142", 563 "title": "Anthropic: Who's in Charge? Disempowerment Patterns in Real-World LLM Usage", 564 "points": 2, 565 "comments": 1, 566 "url": "https://news.ycombinator.com/item?id=46811142", 567 "created_at": "2026-01-29T15:04:00Z" 568 }, 569 { 570 "hn_id": "47477667", 571 "title": "TinyTorch: Building Machine Learning Systems from First Principles", 572 "points": 2, 573 "comments": 0, 574 "url": "https://news.ycombinator.com/item?id=47477667", 575 "created_at": "2026-03-22T14:03:42Z" 576 } 577 ], 578 "top_points": 3, 579 "total_points": 9, 580 "total_comments": 2 581 } 582 }