scan-v5.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan-v5.json (30954B)
      1 {
      2   "scan_version": 5,
      3   "paper_type": "empirical",
      4   "paper": {
      5     "title": "The Impact of Large Language Models on Open-source Innovation: Evidence from GitHub Copilot",
      6     "authors": [
      7       "Doron Yeverechyahu",
      8       "Raveesh Mayya",
      9       "Gal Oestreicher-Singer"
     10     ],
     11     "year": 2024,
     12     "venue": "International Conference on Interaction Sciences",
     13     "arxiv_id": "2409.08379",
     14     "doi": "10.2139/ssrn.4684662"
     15   },
     16   "checklist": {
     17     "claims_and_evidence": {
     18       "abstract_claims_supported": {
     19         "applies": true,
     20         "answer": true,
     21         "justification": "All major abstract claims are supported: 37.05% Python/54.43% Rust commit increases (Table 3), iterative > capability innovation gap (Tables 4–5), and the widening gap post-upgrade and in high-activity projects (Tables 6–7).",
     22         "source": "haiku"
     23       },
     24       "causal_claims_justified": {
     25         "applies": true,
     26         "answer": true,
     27         "justification": "The natural experiment design (Copilot selectively supporting Python/Rust but not R/Haskell) combined with DiD, PSM, and formal parallel trends testing (Figure 4) provides adequate basis for causal inference under standard identification assumptions.",
     28         "source": "haiku"
     29       },
     30       "generalization_bounded": {
     31         "applies": true,
     32         "answer": false,
     33         "justification": "Section 6.1 generalizes findings to 'other knowledge domains' such as customer service and document retrieval without empirical support from those settings; the limitations section offers caution but doesn't retract the extrapolation in the main discussion.",
     34         "source": "haiku"
     35       },
     36       "alternative_explanations_discussed": {
     37         "applies": true,
     38         "answer": false,
     39         "justification": "The paper attributes the iterative-capability gap to 'contextual information' and 'well-defined solution spaces' without seriously engaging with competing explanations such as ecosystem maturity differences between Python/Rust and R/Haskell, or Copilot's autocomplete UX inherently favoring small edits over new feature development.",
     40         "source": "haiku"
     41       },
     42       "proxy_outcome_distinction": {
     43         "applies": true,
     44         "answer": true,
     45         "justification": "The paper explicitly constructs and theoretically justifies operationalizations: commits with net new function definitions as capability innovation and maintenance commits as iterative innovation, explaining why each proxy captures its construct (Section 3.3).",
     46         "source": "haiku"
     47       }
     48     },
     49     "limitations_and_scope": {
     50       "limitations_section_present": {
     51         "applies": true,
     52         "answer": true,
     53         "justification": "Section 6.3 is a dedicated multi-paragraph limitations section discussing temporal scope, partial adoption, Copilot's limited interactivity compared to current LLMs, and generalization constraints.",
     54         "source": "haiku"
     55       },
     56       "threats_to_validity_specific": {
     57         "applies": true,
     58         "answer": true,
     59         "justification": "Specific threats named include: not all contributors used Copilot (noted as creating a lower-bound estimate), the narrow Oct 2021–Dec 2022 window, and that Copilot lacked modern chat capabilities—these are concrete rather than generic disclaimers.",
     60         "source": "haiku"
     61       },
     62       "scope_boundaries_stated": {
     63         "applies": true,
     64         "answer": true,
     65         "justification": "The authors explicitly state that findings reflect 'initial adoption patterns among early users rather than the full potential of current LLM capabilities' and that 'generalizing these findings to other innovation domains than open-source requires caution.'",
     66         "source": "haiku"
     67       }
     68     },
     69     "conflicts_of_interest": {
     70       "funding_disclosed": {
     71         "applies": true,
     72         "answer": true,
     73         "justification": "EU Horizon 2020 program (#759540) for Oestreicher-Singer and NYU Stern CGEB for Mayya are both acknowledged in the footnote on page 1.",
     74         "source": "haiku"
     75       },
     76       "affiliations_disclosed": {
     77         "applies": true,
     78         "answer": true,
     79         "justification": "All three authors disclose their institutional affiliations (Tel Aviv University and NYU Stern) on the title page; these are independent academic institutions with no direct tie to GitHub or OpenAI.",
     80         "source": "haiku"
     81       },
     82       "funder_independent_of_outcome": {
     83         "applies": true,
     84         "answer": true,
     85         "justification": "EU Horizon 2020 and NYU CGEB are independent of GitHub, Microsoft, and OpenAI and have no financial stake in whether GitHub Copilot is found to boost innovation.",
     86         "source": "haiku"
     87       },
     88       "financial_interests_declared": {
     89         "applies": true,
     90         "answer": false,
     91         "justification": "There is no competing interests or financial interests declaration beyond the funding acknowledgments; no statement that the authors have no patents, equity, or consulting relationships relevant to the study.",
     92         "source": "haiku"
     93       }
     94     },
     95     "scope_and_framing": {
     96       "key_terms_defined": {
     97         "applies": true,
     98         "answer": true,
     99         "justification": "'Capability innovation' and 'iterative innovation' are formally defined (Section 2.2), positioned against prior constructs in Table 1, and operationalized via function addition analysis and LLM commit classification.",
    100         "source": "haiku"
    101       },
    102       "intended_contribution_clear": {
    103         "applies": true,
    104         "answer": true,
    105         "justification": "Section 1 explicitly states three contributions: causal evidence that LLMs boost collaborative innovation, evidence of differential impact on innovation types, and evidence on how the gap evolves with model improvement.",
    106         "source": "haiku"
    107       },
    108       "engagement_with_prior_work": {
    109         "applies": true,
    110         "answer": true,
    111         "justification": "Section 2 provides a structured literature review situating the work relative to organizational innovation theory (March 1991; Henderson & Clark 1990; Tushman & Anderson 1986) and recent empirical LLM-productivity work (Brynjolfsson et al. 2025; Noy & Zhang 2023; Song et al. 2024).",
    112         "source": "haiku"
    113       }
    114     }
    115   },
    116   "type_checklist": {
    117     "empirical": {
    118       "artifacts": {
    119         "code_released": {
    120           "applies": true,
    121           "answer": false,
    122           "justification": "No analysis code or function-detection parsers are released; Appendix C describes the logic in prose but no repository link or code release is mentioned.",
    123           "source": "haiku"
    124         },
    125         "data_released": {
    126           "applies": true,
    127           "answer": false,
    128           "justification": "The 1.1 million commit dataset collected via the GitHub API is not released; no data repository is linked and the paper does not mention plans to share the data.",
    129           "source": "haiku"
    130         },
    131         "environment_specified": {
    132           "applies": true,
    133           "answer": false,
    134           "justification": "Table B2 provides GPT-4o endpoint and parameters for the annotation task, but no requirements file, Docker image, or software environment is specified for the DiD analysis pipeline.",
    135           "source": "haiku"
    136         },
    137         "reproduction_instructions": {
    138           "applies": true,
    139           "answer": false,
    140           "justification": "The paper describes methodology in prose but provides no step-by-step instructions sufficient to reproduce the data collection, PSM, or regression analyses without significant guesswork.",
    141           "source": "haiku"
    142         }
    143       },
    144       "statistical_methodology": {
    145         "confidence_intervals_or_error_bars": {
    146           "applies": true,
    147           "answer": true,
    148           "justification": "Standard errors clustered by package are reported in parentheses for all regression tables; Figure 4 explicitly shows 99% confidence interval bars for parallel trends.",
    149           "source": "haiku"
    150         },
    151         "significance_tests": {
    152           "applies": true,
    153           "answer": true,
    154           "justification": "Statistical significance at p<0.001, p<0.01, and p<0.05 levels is reported throughout all regression tables; triple-difference tests (Tables A3, A5) formally test whether differences between innovation types are significant.",
    155           "source": "haiku"
    156         },
    157         "effect_sizes_reported": {
    158           "applies": true,
    159           "answer": true,
    160           "justification": "Percentage increases relative to pre-treatment means are consistently reported alongside absolute coefficients (e.g., 37.05% for Python, 54.43% for Rust) providing meaningful effect size context.",
    161           "source": "haiku"
    162         },
    163         "sample_size_justified": {
    164           "applies": true,
    165           "answer": false,
    166           "justification": "Sample sizes (1,187 Python/R pairs; 1,373 Rust/Haskell pairs) are determined by data availability and nearest-neighbor PSM rather than a power analysis or pre-specified sample size justification.",
    167           "source": "haiku"
    168         },
    169         "variance_reported": {
    170           "applies": true,
    171           "answer": true,
    172           "justification": "Standard errors are reported for all main estimates; Jackknife variance estimation is used for the Synthetic DiD models and explicitly noted in table footnotes.",
    173           "source": "haiku"
    174         }
    175       },
    176       "evaluation_design": {
    177         "baselines_included": {
    178           "applies": true,
    179           "answer": true,
    180           "justification": "R packages serve as the counterfactual baseline for Python, and Haskell packages for Rust; PSM-matched control groups are the explicit comparison condition throughout.",
    181           "source": "haiku"
    182         },
    183         "baselines_contemporary": {
    184           "applies": true,
    185           "answer": true,
    186           "justification": "Control packages are from the same time period (Oct 2019–Dec 2022) and matched on pre-treatment commit activity, making them contemporary and comparable.",
    187           "source": "haiku"
    188         },
    189         "ablation_study": {
    190           "applies": false,
    191           "answer": false,
    192           "justification": "This is an observational natural experiment study, not a system design with modular components; ablation is not applicable.",
    193           "source": "haiku"
    194         },
    195         "multiple_metrics": {
    196           "applies": true,
    197           "answer": true,
    198           "justification": "The study uses four complementary outcome measures: total commit count, commits with/without new function definitions, LLM-classified commit categories (code development vs. maintenance), and package version release counts.",
    199           "source": "haiku"
    200         },
    201         "human_evaluation": {
    202           "applies": true,
    203           "answer": true,
    204           "justification": "Three expert human annotators independently labeled 400 commits (200 Python, 200 R) as ground truth to benchmark LLM classifiers; Cohen's Kappa is reported (Table B1).",
    205           "source": "haiku"
    206         },
    207         "held_out_test_set": {
    208           "applies": false,
    209           "answer": false,
    210           "justification": "This is a causal inference study, not a prediction task; held-out test sets are not applicable.",
    211           "source": "haiku"
    212         },
    213         "per_category_breakdown": {
    214           "applies": true,
    215           "answer": true,
    216           "justification": "Results are broken down by innovation type (capability vs. iterative), language pair (Python/R; Rust/Haskell), project activity level (high vs. low), and pre/post model upgrade period.",
    217           "source": "haiku"
    218         },
    219         "failure_cases_discussed": {
    220           "applies": true,
    221           "answer": true,
    222           "justification": "Section 5.3.1 and Panel B of Table 6 show that for low-activity Rust/Haskell projects, baseline DiD effects are small and non-significant, constituting an explicit null-result subgroup discussion.",
    223           "source": "haiku"
    224         },
    225         "negative_results_reported": {
    226           "applies": true,
    227           "answer": true,
    228           "justification": "The paper reports that in low-activity Rust/Haskell projects the treatment effects are near zero and statistically insignificant (Panel B, Table 6), and the upgrade analysis shows some non-significant code development coefficients (Table 7, Panel A, columns 5–6).",
    229           "source": "haiku"
    230         }
    231       },
    232       "setup_transparency": {
    233         "model_versions_specified": {
    234           "applies": true,
    235           "answer": true,
    236           "justification": "Table B2 specifies the exact model version 'gpt-4o-2024-08-06' used for commit classification along with all inference parameters.",
    237           "source": "haiku"
    238         },
    239         "prompts_provided": {
    240           "applies": true,
    241           "answer": true,
    242           "justification": "Appendix B reproduces the full verbatim prompt used for LLM commit classification including category definitions, output format requirements, and examples.",
    243           "source": "haiku"
    244         },
    245         "hyperparameters_reported": {
    246           "applies": true,
    247           "answer": true,
    248           "justification": "Table B2 reports temperature=0.0, top_p=1.0, frequency_penalty=0.0, presence_penalty=0.0 for the GPT-4o annotation model.",
    249           "source": "haiku"
    250         },
    251         "scaffolding_described": {
    252           "applies": false,
    253           "answer": false,
    254           "justification": "This is an observational study evaluating an existing tool (Copilot); no agentic scaffolding is built or evaluated.",
    255           "source": "haiku"
    256         },
    257         "data_preprocessing_documented": {
    258           "applies": true,
    259           "answer": true,
    260           "justification": "Section 3.2 documents exclusion criteria (packages not updated in 2 years pre-launch), source repositories for each language, PSM matching on pre-treatment commit activity, and quarterly aggregation; Appendix C provides language-specific function detection algorithms.",
    261           "source": "haiku"
    262         }
    263       },
    264       "data_integrity": {
    265         "raw_data_available": {
    266           "applies": true,
    267           "answer": false,
    268           "justification": "The 1.1M+ commit dataset is not released; footnote 5 notes it required 4 months and 0.5 TB of GitHub API downloads, but no public data repository is provided.",
    269           "source": "haiku"
    270         },
    271         "data_collection_described": {
    272           "applies": true,
    273           "answer": true,
    274           "justification": "Section 3.2 describes GitHub's official API as the source, the top-2000 packages per language (with R using full CRAN), exclusion of inactive packages, and the resulting matched sample sizes.",
    275           "source": "haiku"
    276         },
    277         "recruitment_methods_described": {
    278           "applies": false,
    279           "answer": false,
    280           "justification": "Data is collected from public GitHub repositories programmatically; no participant recruitment is involved.",
    281           "source": "haiku"
    282         },
    283         "data_pipeline_documented": {
    284           "applies": true,
    285           "answer": true,
    286           "justification": "The full pipeline from GitHub API collection → inactive package exclusion → PSM matching → quarterly aggregation → DiD estimation is documented across Sections 3.2, 4.1, and Appendices B and C, though without executable code.",
    287           "source": "haiku"
    288         }
    289       },
    290       "contamination": {
    291         "training_cutoff_stated": {
    292           "applies": false,
    293           "answer": false,
    294           "justification": "The paper evaluates Copilot's impact on developer behavior, not model capabilities on benchmarks; contamination in the standard sense is not applicable.",
    295           "source": "haiku"
    296         },
    297         "train_test_overlap_discussed": {
    298           "applies": false,
    299           "answer": false,
    300           "justification": "Not applicable; the study is a causal field study of platform adoption, not a benchmark evaluation.",
    301           "source": "haiku"
    302         },
    303         "benchmark_contamination_addressed": {
    304           "applies": false,
    305           "answer": false,
    306           "justification": "No model capability benchmarking is performed; contamination is not a relevant threat to this study design.",
    307           "source": "haiku"
    308         }
    309       },
    310       "human_studies": {
    311         "pre_registered": {
    312           "applies": false,
    313           "answer": false,
    314           "justification": "No human subjects study; the annotation validation uses research assistants but is not a primary human subjects experiment requiring pre-registration.",
    315           "source": "haiku"
    316         },
    317         "irb_or_ethics_approval": {
    318           "applies": false,
    319           "answer": false,
    320           "justification": "No human participants in the main study; GitHub data is public.",
    321           "source": "haiku"
    322         },
    323         "demographics_reported": {
    324           "applies": false,
    325           "answer": false,
    326           "justification": "No human participants.",
    327           "source": "haiku"
    328         },
    329         "inclusion_exclusion_criteria": {
    330           "applies": false,
    331           "answer": false,
    332           "justification": "No human participants.",
    333           "source": "haiku"
    334         },
    335         "randomization_described": {
    336           "applies": false,
    337           "answer": false,
    338           "justification": "No human participants.",
    339           "source": "haiku"
    340         },
    341         "blinding_described": {
    342           "applies": false,
    343           "answer": false,
    344           "justification": "No human participants.",
    345           "source": "haiku"
    346         },
    347         "attrition_reported": {
    348           "applies": false,
    349           "answer": false,
    350           "justification": "No human participants.",
    351           "source": "haiku"
    352         }
    353       },
    354       "cost_and_practicality": {
    355         "inference_cost_reported": {
    356           "applies": true,
    357           "answer": false,
    358           "justification": "Token counts are reported (638M input, 60M output tokens for GPT-4o) but actual dollar cost is not stated; the 4+ month API data collection effort is noted but also not monetized.",
    359           "source": "haiku"
    360         },
    361         "compute_budget_stated": {
    362           "applies": true,
    363           "answer": false,
    364           "justification": "No total computational budget is stated; the 0.5 TB GitHub download and 4-month collection period give a rough sense of scale but are not a formal compute budget.",
    365           "source": "haiku"
    366         }
    367       }
    368     }
    369   },
    370   "claims": [
    371     {
    372       "claim": "GitHub Copilot increased commit volume by 37.05% for Python packages relative to matched R packages, and by 54.43% for Rust relative to matched Haskell packages.",
    373       "evidence": "Table 3 TWFE estimates: 6.816 commits/quarter for Python/R (p<0.001) and 5.583 for Rust/Haskell (p<0.001), relative to pre-treatment means of 18.395 and 10.258 respectively.",
    374       "supported": "strong"
    375     },
    376     {
    377       "claim": "Copilot disproportionately increased iterative innovation (maintenance commits, no new functions) over capability innovation (code development, new function additions) in absolute terms.",
    378       "evidence": "Tables 4 and 5: non-function-adding commits increase 6.016 vs 0.800 for function-adding (Python/R); maintenance increases 2.299 vs code development 1.472; patterns replicate across Rust/Haskell and in SDiD models.",
    379       "supported": "strong"
    380     },
    381     {
    382       "claim": "The iterative-capability innovation gap is substantially wider in high-activity projects than low-activity projects.",
    383       "evidence": "Table 6 Panel A: in high-activity Python/R projects the gap between non-function and function-adding commits is 8.679 vs 1.639 in low-activity; similar pattern for Rust/Haskell where effects are near-zero in low-activity packages.",
    384       "supported": "strong"
    385     },
    386     {
    387       "claim": "The June 2022 GitHub Copilot model upgrade further widened the gap between iterative and capability innovation rather than narrowing it.",
    388       "evidence": "Table 7: post-upgrade incremental effect on non-function-adding commits (1.762, p<0.05) exceeds that on function-adding commits (0.345, p<0.05) for Python/R; similar direction for Rust/Haskell, though some coefficients are not statistically significant.",
    389       "supported": "moderate"
    390     },
    391     {
    392       "claim": "Copilot increased package version release frequency by 10–37% across language pairs.",
    393       "evidence": "Table A7: coefficient of 0.094 (10.04% increase) for Python/R and 0.069 (37.46% increase) for Rust/Haskell, significant at p<0.05 across all three specifications.",
    394       "supported": "strong"
    395     },
    396     {
    397       "claim": "GPT-4o achieves 'substantial' inter-rater agreement with human annotators (Cohen's Kappa 0.72) for commit classification.",
    398       "evidence": "Table B1: GPT-4o-2024-08-06 achieves 79.5% agreement rate and κ=0.72 compared to three expert human annotators on 400 sampled commits.",
    399       "supported": "strong"
    400     }
    401   ],
    402   "methodology_tags": [
    403     "observational",
    404     "natural-experiment",
    405     "did"
    406   ],
    407   "key_findings": "Using GitHub Copilot's selective October 2021 launch (supporting Python/Rust but not R/Haskell) as a natural experiment, this study finds that LLM coding assistance increased overall commit volume by 37–54% for supported languages. Critically, this boost was disproportionately concentrated in iterative innovation (maintenance commits, commits without new function definitions) rather than capability innovation (feature-introducing commits), with the ratio of iterative to capability gains approximately 7.5:1 in absolute terms for Python/R. The gap was amplified in high-activity projects and after the June 2022 model upgrade, suggesting that as LLMs improve contextual understanding, they increasingly benefit well-defined, context-rich tasks over open-ended creative exploration—with potential long-term implications for the balance of innovation in collaborative open-source communities.",
    408   "red_flags": [
    409     {
    410       "flag": "Commit count ≠ innovation quality",
    411       "detail": "The paper measures volume of commits as a proxy for innovation output but does not address whether more commits, particularly more maintenance commits, translate to meaningful innovation value or project health improvements."
    412     },
    413     {
    414       "flag": "Unknown Copilot adoption rate in treated group",
    415       "detail": "Not all Python/Rust contributors necessarily used Copilot; the paper acknowledges this dilutes effects toward zero (a lower bound), but the unknown adoption rate makes it impossible to estimate the effect on actual Copilot users."
    416     },
    417     {
    418       "flag": "Language ecosystem confound",
    419       "detail": "Python and Rust may have fundamentally different ratios of maintenance to new feature work relative to R and Haskell for reasons unrelated to Copilot (ecosystem maturity, use-case distributions), which PSM on commit activity alone may not fully address."
    420     },
    421     {
    422       "flag": "GPT-4o trained on the corpus",
    423       "detail": "GPT-4o was used to classify 1.1M commits and its training data almost certainly includes GitHub commit messages from these repositories; potential systematic classification bias from familiarity is not discussed."
    424     },
    425     {
    426       "flag": "Data not reproducible",
    427       "detail": "The dataset required 4 months and ~0.5 TB of GitHub API access to collect; no data release is provided, and GitHub's API rate limits and terms of service make independent replication practically infeasible."
    428     },
    429     {
    430       "flag": "Model upgrade confound",
    431       "detail": "The June 2022 Copilot upgrade coincided with broader adoption growth and potentially other platform changes; the paper treats it as a clean treatment but does not isolate the model quality improvement from concurrent adoption increases."
    432     }
    433   ],
    434   "cited_papers": [
    435     {
    436       "title": "Generative AI at Work",
    437       "relevance": "Key productivity benchmark study (Brynjolfsson, Li, Raymond 2025) that this paper extends from guided organizational to voluntary collaborative settings."
    438     },
    439     {
    440       "title": "Experimental evidence on the productivity effects of generative artificial intelligence",
    441       "relevance": "Noy and Zhang 2023 RCT on writing productivity; primary comparison for controlled vs. field study design."
    442     },
    443     {
    444       "title": "The impact of generative AI on collaborative open-source software development: Evidence from GitHub Copilot",
    445       "relevance": "Song et al. 2024 concurrent work on Copilot in open source; directly compared in literature review."
    446     },
    447     {
    448       "title": "Generative AI and the Nature of Work",
    449       "relevance": "Hoffmann et al. 2024 on open-source productivity; cited as not distinguishing capability vs. iterative innovation types."
    450     },
    451     {
    452       "title": "The impact of AI on developer productivity: Evidence from GitHub Copilot",
    453       "relevance": "Peng et al. 2023 RCT showing individual productivity gains; framed as capability task context contrasted with this paper's iterative-task findings."
    454     },
    455     {
    456       "title": "Synthetic Difference-in-Differences",
    457       "relevance": "Arkhangelsky et al. 2021 — the modern estimation framework used as the alternative identification strategy throughout."
    458     },
    459     {
    460       "title": "Generative artificial intelligence enhances individual creativity but reduces the collective diversity of novel content",
    461       "relevance": "Doshi and Houser 2024 on creative tasks; cited for the mixed evidence on LLMs in open-ended contexts, supporting the iterative-capability distinction."
    462     },
    463     {
    464       "title": "The consequences of generative AI for online knowledge communities",
    465       "relevance": "Burtch et al. 2024 — contrasting finding of declining knowledge exchange on UGC platforms vs. this paper's increased contributions in open source."
    466     },
    467     {
    468       "title": "Exploration and exploitation in organizational learning",
    469       "relevance": "March 1991 — foundational theory grounding the capability/iterative framework's relationship to exploration/exploitation."
    470     },
    471     {
    472       "title": "From Human Annotation to LLMs: SILICON Annotation Workflow for Management Research",
    473       "relevance": "Cheng, Mayya, Sedoc 2024 — the annotation guideline methodology this paper follows for LLM-based commit classification."
    474     }
    475   ],
    476   "engagement_factors": {
    477     "practical_relevance": {
    478       "score": 3,
    479       "justification": "Directly actionable for platform designers, open-source maintainers, and organizations adopting Copilot — quantifies real productivity effects and identifies which tasks benefit most."
    480     },
    481     "surprise_contrarian": {
    482       "score": 2,
    483       "justification": "The finding that LLMs boost maintenance far more than new feature development challenges the common narrative of AI as a 'creativity amplifier' and suggests a potential innovation-type imbalance risk."
    484     },
    485     "fear_safety": {
    486       "score": 1,
    487       "justification": "Raises mild concern about LLMs shifting open-source communities toward exploitation-focused activities, potentially reducing architectural innovation over time, but frames it as a policy challenge rather than an existential risk."
    488     },
    489     "drama_conflict": {
    490       "score": 1,
    491       "justification": "Mild tension with prior work showing LLMs help with creative tasks; no major controversy but the maintenance-dominates finding invites debate about long-term innovation health."
    492     },
    493     "demo_ability": {
    494       "score": 0,
    495       "justification": "Historical observational study of 2021–2022 data; nothing for readers to try or interact with."
    496     },
    497     "brand_recognition": {
    498       "score": 2,
    499       "justification": "GitHub Copilot is a widely recognized product; the GitHub/OpenAI branding makes the paper immediately accessible to a broad technical audience."
    500     }
    501   },
    502   "hn_data": {
    503     "threads": [
    504       {
    505         "hn_id": "40975320",
    506         "title": "Large models of what? Mistaking engineering achievements for linguistic agency",
    507         "points": 184,
    508         "comments": 156,
    509         "url": "https://news.ycombinator.com/item?id=40975320",
    510         "created_at": "2024-07-16T10:54:31Z"
    511       },
    512       {
    513         "hn_id": "27364777",
    514         "title": "Event-based backpropagation for exact gradients in spiking neural networks",
    515         "points": 119,
    516         "comments": 37,
    517         "url": "https://news.ycombinator.com/item?id=27364777",
    518         "created_at": "2021-06-02T04:17:14Z"
    519       },
    520       {
    521         "hn_id": "41352091",
    522         "title": "Realistic Synthetic UGC: A Scaffolding Approach to Generating Online Discussions",
    523         "points": 35,
    524         "comments": 6,
    525         "url": "https://news.ycombinator.com/item?id=41352091",
    526         "created_at": "2024-08-25T22:32:32Z"
    527       },
    528       {
    529         "hn_id": "24514218",
    530         "title": "EventProp: Backpropagation for Exact Gradients in Spiking Neural Networks",
    531         "points": 8,
    532         "comments": 1,
    533         "url": "https://news.ycombinator.com/item?id=24514218",
    534         "created_at": "2020-09-18T08:13:29Z"
    535       },
    536       {
    537         "hn_id": "44934611",
    538         "title": "Scientific and technological knowledge grows linearly over time",
    539         "points": 4,
    540         "comments": 0,
    541         "url": "https://news.ycombinator.com/item?id=44934611",
    542         "created_at": "2025-08-17T20:22:11Z"
    543       },
    544       {
    545         "hn_id": "40488641",
    546         "title": "Kolmogorov-Arnold Networks (KANs) for Time Series Analysis",
    547         "points": 3,
    548         "comments": 0,
    549         "url": "https://news.ycombinator.com/item?id=40488641",
    550         "created_at": "2024-05-27T07:54:04Z"
    551       },
    552       {
    553         "hn_id": "41566236",
    554         "title": "The Impact of LLMs on Open-Source Innovation: Evidence from GitHub Copilot",
    555         "points": 2,
    556         "comments": 0,
    557         "url": "https://news.ycombinator.com/item?id=41566236",
    558         "created_at": "2024-09-17T10:35:50Z"
    559       },
    560       {
    561         "hn_id": "39693481",
    562         "title": "Neural Exec: Learning Execution Triggers for Prompt Injection Attacks",
    563         "points": 1,
    564         "comments": 0,
    565         "url": "https://news.ycombinator.com/item?id=39693481",
    566         "created_at": "2024-03-13T16:22:05Z"
    567       },
    568       {
    569         "hn_id": "39469976",
    570         "title": "Seasons: Signal and Energy Aware Sensing on iNtermittent Systems",
    571         "points": 1,
    572         "comments": 0,
    573         "url": "https://news.ycombinator.com/item?id=39469976",
    574         "created_at": "2024-02-22T17:09:07Z"
    575       },
    576       {
    577         "hn_id": "25196609",
    578         "title": "GuessTheMusic: Song Identification from EEG Response",
    579         "points": 1,
    580         "comments": 0,
    581         "url": "https://news.ycombinator.com/item?id=25196609",
    582         "created_at": "2020-11-24T08:58:28Z"
    583       }
    584     ],
    585     "top_points": 184,
    586     "total_points": 358,
    587     "total_comments": 200
    588   }
    589 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs