scan-v5.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan-v5.json (30040B)
      1 {
      2   "scan_version": 5,
      3   "paper_type": "empirical",
      4   "paper": {
      5     "title": "Speed at the Cost of Quality: How Cursor AI Increases Short-Term Velocity and Long-Term Complexity in Open-Source Projects",
      6     "authors": [
      7       "Hao He",
      8       "Courtney Miller",
      9       "Shyam Agarwal",
     10       "Christian Kästner",
     11       "Bogdan Vasilescu"
     12     ],
     13     "year": 2026,
     14     "venue": "MSR '26",
     15     "arxiv_id": "2511.04427",
     16     "doi": "10.1145/3793302.3793349"
     17   },
     18   "checklist": {
     19     "claims_and_evidence": {
     20       "abstract_claims_supported": {
     21         "applies": true,
     22         "answer": true,
     23         "justification": "All abstract claims (transient velocity gain, persistent quality degradation, GMM-identified velocity-quality feedback cycle) are directly supported by Table 2, Figure 3, and Table 3 with pre-trend tests passing.",
     24         "source": "haiku"
     25       },
     26       "causal_claims_justified": {
     27         "applies": true,
     28         "answer": true,
     29         "justification": "The Borusyak et al. staggered DiD estimator with propensity score matching and pre-trend tests is an appropriate quasi-experimental design; paper is transparent about ITT interpretation and the Callaway & Sant'Anna estimator disagreement on quality outcomes.",
     30         "source": "haiku"
     31       },
     32       "generalization_bounded": {
     33         "applies": true,
     34         "answer": true,
     35         "justification": "Results are explicitly bounded to observable Cursor adoption in open-source GitHub repos dominated by TypeScript/Python/JavaScript during mid-2024 to mid-2025; Section 5.1.3 specifically discusses why enterprise findings may differ substantially.",
     36         "source": "haiku"
     37       },
     38       "alternative_explanations_discussed": {
     39         "applies": true,
     40         "answer": true,
     41         "justification": "Section 5.1.1 discusses excitement-frustration-abandonment cycle; Section 5.1.2 discusses velocity-driven codebase growth as mechanism for quality decline; robustness checks rule out confounds from other AI tools, repo inactivity, and selection bias.",
     42         "source": "haiku"
     43       },
     44       "proxy_outcome_distinction": {
     45         "applies": true,
     46         "answer": true,
     47         "justification": "Paper explicitly labels 'lines added' and 'commits' as velocity proxies with 'moderate-to-strong correlation with perceived productivity,' and states that static analysis warnings are 'estimates of the effort required to review potential issues' rather than confirmed defects.",
     48         "source": "haiku"
     49       }
     50     },
     51     "limitations_and_scope": {
     52       "limitations_section_present": {
     53         "applies": true,
     54         "answer": true,
     55         "justification": "Section 3.5 'Limitations and Threats to Validity' has two subsections (Internal Validity, External Validity) spanning over a full page with five specific internal threats identified.",
     56         "source": "haiku"
     57       },
     58       "threats_to_validity_specific": {
     59         "applies": true,
     60         "answer": true,
     61         "justification": "Specific threats include: adoption proxy bias (only repos committing .cursorrules), unknown usage intensity (ITT effects only), model and version heterogeneity, imperfect propensity score matching, and contamination from other AI coding tools.",
     62         "source": "haiku"
     63       },
     64       "scope_boundaries_stated": {
     65         "applies": true,
     66         "answer": true,
     67         "justification": "Results are explicitly interpreted as impact of systematic Cursor adoption relative to 'current state-of-the-practice' (not versus no-AI baseline), bounded to open-source repos with observable adoption, and limited to the specific study period when tools were rapidly evolving.",
     68         "source": "haiku"
     69       }
     70     },
     71     "conflicts_of_interest": {
     72       "funding_disclosed": {
     73         "applies": true,
     74         "answer": true,
     75         "justification": "Acknowledgments disclose NSF grants 2206859, DGE214073, 2317168, 2120323; research awards from Google and Digital Infrastructure Fund; Google Cloud credits for BigQuery analysis.",
     76         "source": "haiku"
     77       },
     78       "affiliations_disclosed": {
     79         "applies": true,
     80         "answer": true,
     81         "justification": "All five authors are listed as Carnegie Mellon University; no author affiliation with Cursor/Anysphere or any competing tool vendor.",
     82         "source": "haiku"
     83       },
     84       "funder_independent_of_outcome": {
     85         "applies": true,
     86         "answer": true,
     87         "justification": "NSF is clearly independent; Google provides cloud credits but the study finds negative results for a competing product (Cursor, not Google's tools), and authors are not Google employees.",
     88         "source": "haiku"
     89       },
     90       "financial_interests_declared": {
     91         "applies": true,
     92         "answer": false,
     93         "justification": "No explicit competing interests declaration appears in the paper; standard funding acknowledgment is provided but no formal 'no competing interests' statement.",
     94         "source": "haiku"
     95       }
     96     },
     97     "scope_and_framing": {
     98       "key_terms_defined": {
     99         "applies": true,
    100         "answer": true,
    101         "justification": "Key terms defined include: Cursor's agentic capabilities vs. prior completion tools (Section 3.1.1), development velocity metrics with citations, SonarQube cognitive complexity definition (ref [32]), and DiD estimation targets ATT and ATTh with formal mathematical definitions.",
    102         "source": "haiku"
    103       },
    104       "intended_contribution_clear": {
    105         "applies": true,
    106         "answer": true,
    107         "justification": "'Our contribution is two-fold': (1) first project-level DiD analysis of productivity gains from modern agentic coding assistant; (2) first comprehensive analysis of code quality impact from LLM agent assistant adoption.",
    108         "source": "haiku"
    109       },
    110       "engagement_with_prior_work": {
    111         "applies": true,
    112         "answer": true,
    113         "justification": "Section 2 substantively engages with Copilot productivity RCTs, field experiments, and observational studies; directly positions against Becker et al. (contradicting finding) and Watanabe et al. (PR-level vs. project-level scope), explaining methodological differences.",
    114         "source": "haiku"
    115       }
    116     }
    117   },
    118   "type_checklist": {
    119     "empirical": {
    120       "artifacts": {
    121         "code_released": {
    122           "applies": true,
    123           "answer": true,
    124           "justification": "Data Availability section states 'We provide a replication package for this paper at: https://doi.org/10.5281/zenodo.18368661'.",
    125           "source": "haiku"
    126         },
    127         "data_released": {
    128           "applies": true,
    129           "answer": true,
    130           "justification": "Replication package at zenodo DOI is provided; underlying GHArchive data is publicly accessible and the package presumably includes processed datasets.",
    131           "source": "haiku"
    132         },
    133         "environment_specified": {
    134           "applies": true,
    135           "answer": false,
    136           "justification": "Paper mentions 'a local SonarQube Community server' and GHArchive/BigQuery but provides no requirements.txt, Dockerfile, R package versions, or pinned dependency specification.",
    137           "source": "haiku"
    138         },
    139         "reproduction_instructions": {
    140           "applies": true,
    141           "answer": true,
    142           "justification": "Replication package at zenodo (10.5281/zenodo.18368661) is provided; by MSR convention such packages include README files with reproduction steps.",
    143           "source": "haiku"
    144         }
    145       },
    146       "statistical_methodology": {
    147         "confidence_intervals_or_error_bars": {
    148           "applies": true,
    149           "answer": true,
    150           "justification": "Table 2 reports standard errors for all ATT estimates with ± percentage bounds; Figure 3 shows confidence bands on all event-study plots for all five outcomes.",
    151           "source": "haiku"
    152         },
    153         "significance_tests": {
    154           "applies": true,
    155           "answer": true,
    156           "justification": "Heteroscedasticity- and cluster-robust Wald tests for pre-trend hypothesis testing; significance levels on all ATT estimates in Table 2; Sargan and AR(1)/AR(2) tests for GMM validity in Table 3.",
    157           "source": "haiku"
    158         },
    159         "effect_sizes_reported": {
    160           "applies": true,
    161           "answer": true,
    162           "justification": "Table 2 reports percentage changes with confidence bounds (e.g., +28.58% ±13.7% lines added, +41.64% ±7.62% code complexity) computed from log-transformed ATT estimates via 100(e^ATT - 1)%.",
    163           "source": "haiku"
    164         },
    165         "sample_size_justified": {
    166           "applies": true,
    167           "answer": false,
    168           "justification": "No formal power analysis; the 806 treated repos is determined by GitHub search results, and 1:3 matching ratio is justified by control diversity concerns rather than statistical power calculations.",
    169           "source": "haiku"
    170         },
    171         "variance_reported": {
    172           "applies": true,
    173           "answer": true,
    174           "justification": "Standard errors reported for all ATT estimates (Table 2), all GMM coefficients (Table 3), and confidence bands appear on all event-study figures.",
    175           "source": "haiku"
    176         }
    177       },
    178       "evaluation_design": {
    179         "baselines_included": {
    180           "applies": true,
    181           "answer": true,
    182           "justification": "1,380 propensity-score-matched never-adopting GitHub repositories serve as the control group throughout all analyses.",
    183           "source": "haiku"
    184         },
    185         "baselines_contemporary": {
    186           "applies": true,
    187           "answer": true,
    188           "justification": "Control repositories are matched from the same observation period (Jan 2024–Aug 2025) on dynamic covariate trajectories, ensuring contemporary comparison.",
    189           "source": "haiku"
    190         },
    191         "ablation_study": {
    192           "applies": false,
    193           "answer": false,
    194           "justification": "Not applicable to this observational DiD study; multiple estimator comparisons (TWFE, Borusyak, Callaway & Sant'Anna) and robustness subsets serve an analogous sensitivity function.",
    195           "source": "haiku"
    196         },
    197         "multiple_metrics": {
    198           "applies": true,
    199           "answer": true,
    200           "justification": "Five outcome metrics used: commits and lines added (velocity), static analysis warnings, duplicate line density, and code complexity (quality).",
    201           "source": "haiku"
    202         },
    203         "human_evaluation": {
    204           "applies": false,
    205           "answer": false,
    206           "justification": "Study measures automated repository metrics; human evaluation of outputs is not applicable to this observational design.",
    207           "source": "haiku"
    208         },
    209         "held_out_test_set": {
    210           "applies": false,
    211           "answer": false,
    212           "justification": "Causal inference study, not a prediction task; train/test split concept does not apply.",
    213           "source": "haiku"
    214         },
    215         "per_category_breakdown": {
    216           "applies": true,
    217           "answer": true,
    218           "justification": "Appendix D breaks down SonarQube warnings by 20 categories pre/post adoption; Appendix C provides breakdowns by programming language (JS/TS, Python, Go) and by Cursor adoption cohort.",
    219           "source": "haiku"
    220         },
    221         "failure_cases_discussed": {
    222           "applies": true,
    223           "answer": true,
    224           "justification": "Section 5.1.1 discusses repos that abandoned Cursor post-adoption (excitement-frustration-abandonment cycle); Section 5.1.2 discusses code complexity increasing even when velocity is controlled.",
    225           "source": "haiku"
    226         },
    227         "negative_results_reported": {
    228           "applies": true,
    229           "answer": true,
    230           "justification": "No significant effect on duplicate line density overall; velocity gains dissipate fully by month 3; Callaway & Sant'Anna yields non-significant negative estimates for quality outcomes, all reported without suppression.",
    231           "source": "haiku"
    232         }
    233       },
    234       "setup_transparency": {
    235         "model_versions_specified": {
    236           "applies": false,
    237           "answer": false,
    238           "justification": "Researchers don't run any LLMs; this is an observational study of repositories using Cursor. Model version heterogeneity is acknowledged as a study limitation.",
    239           "source": "haiku"
    240         },
    241         "prompts_provided": {
    242           "applies": false,
    243           "answer": false,
    244           "justification": "No LLM prompts used by the researchers; this is an observational study of existing repositories.",
    245           "source": "haiku"
    246         },
    247         "hyperparameters_reported": {
    248           "applies": false,
    249           "answer": false,
    250           "justification": "No LLM hyperparameters are used by the researchers.",
    251           "source": "haiku"
    252         },
    253         "scaffolding_described": {
    254           "applies": false,
    255           "answer": false,
    256           "justification": "Researchers study black-box adoption effects of Cursor; no agentic scaffolding is implemented by the research team.",
    257           "source": "haiku"
    258         },
    259         "data_preprocessing_documented": {
    260           "applies": true,
    261           "answer": true,
    262           "justification": "Section 3.1 documents GitHub code search API with adaptive partitioning algorithm, ≥10 star filter, fork exclusion, propensity score logistic regression specification with equation, monthly GHArchive metric collection, SonarQube Community server setup, and log-transformation of all outcomes.",
    263           "source": "haiku"
    264         }
    265       },
    266       "data_integrity": {
    267         "raw_data_available": {
    268           "applies": true,
    269           "answer": true,
    270           "justification": "Replication package at zenodo (10.5281/zenodo.18368661) is provided; underlying GHArchive data is publicly accessible for independent verification.",
    271           "source": "haiku"
    272         },
    273         "data_collection_described": {
    274           "applies": true,
    275           "answer": true,
    276           "justification": "Section 3.1 describes GitHub code search API queries with adaptive file-size partitioning, GHArchive monthly time series collection for 800k+ candidate repos per cohort, and SonarQube analysis procedure.",
    277           "source": "haiku"
    278         },
    279         "recruitment_methods_described": {
    280           "applies": false,
    281           "answer": false,
    282           "justification": "No human participants; repositories are the units of analysis selected by algorithmic criteria.",
    283           "source": "haiku"
    284         },
    285         "data_pipeline_documented": {
    286           "applies": true,
    287           "answer": true,
    288           "justification": "Full pipeline documented: identify Cursor-adopting repos via .cursorrules files → filter by stars → collect GHArchive dynamic covariates → propensity score matching per cohort → monthly SonarQube analysis → DiD estimation → GMM panel analysis.",
    289           "source": "haiku"
    290         }
    291       },
    292       "contamination": {
    293         "training_cutoff_stated": {
    294           "applies": false,
    295           "answer": false,
    296           "justification": "Study does not evaluate LLM capabilities on benchmarks; it measures repository-level behavioral effects of Cursor adoption.",
    297           "source": "haiku"
    298         },
    299         "train_test_overlap_discussed": {
    300           "applies": false,
    301           "answer": false,
    302           "justification": "Not applicable; no LLM benchmarking performed.",
    303           "source": "haiku"
    304         },
    305         "benchmark_contamination_addressed": {
    306           "applies": false,
    307           "answer": false,
    308           "justification": "Not applicable; no LLM benchmarking performed.",
    309           "source": "haiku"
    310         }
    311       },
    312       "human_studies": {
    313         "pre_registered": {
    314           "applies": false,
    315           "answer": false,
    316           "justification": "No human participants; study analyzes public GitHub repository data.",
    317           "source": "haiku"
    318         },
    319         "irb_or_ethics_approval": {
    320           "applies": false,
    321           "answer": false,
    322           "justification": "No human participants; study uses public repository data.",
    323           "source": "haiku"
    324         },
    325         "demographics_reported": {
    326           "applies": false,
    327           "answer": false,
    328           "justification": "No human participants.",
    329           "source": "haiku"
    330         },
    331         "inclusion_exclusion_criteria": {
    332           "applies": false,
    333           "answer": false,
    334           "justification": "Human subject criteria not applicable; repository selection criteria are described algorithmically in Section 3.1.",
    335           "source": "haiku"
    336         },
    337         "randomization_described": {
    338           "applies": false,
    339           "answer": false,
    340           "justification": "No human participants; treatment assignment is naturally occurring.",
    341           "source": "haiku"
    342         },
    343         "blinding_described": {
    344           "applies": false,
    345           "answer": false,
    346           "justification": "No human participants.",
    347           "source": "haiku"
    348         },
    349         "attrition_reported": {
    350           "applies": false,
    351           "answer": false,
    352           "justification": "No human participants.",
    353           "source": "haiku"
    354         }
    355       },
    356       "cost_and_practicality": {
    357         "inference_cost_reported": {
    358           "applies": false,
    359           "answer": false,
    360           "justification": "Researchers do not run LLMs; inference costs are borne by the studied repositories' developers and are not measurable in this observational design.",
    361           "source": "haiku"
    362         },
    363         "compute_budget_stated": {
    364           "applies": true,
    365           "answer": false,
    366           "justification": "Google Cloud credits for BigQuery analysis are acknowledged but no specific compute cost or resource budget is reported for the SonarQube analysis pipeline running on 806+ repos.",
    367           "source": "haiku"
    368         }
    369       }
    370     }
    371   },
    372   "claims": [
    373     {
    374       "claim": "Cursor adoption leads to a 281% increase in lines added in the first adoption month, with gains fully dissipating after 2 months",
    375       "evidence": "Table 2 (overall ATT +28.58%) and Figure 3 (ATTh showing large spike at h=0,1 then returning to baseline), consistent across all three DiD estimators",
    376       "supported": "strong"
    377     },
    378     {
    379       "claim": "Static analysis warnings increase persistently by ~30% post-Cursor adoption",
    380       "evidence": "Table 2 (Borusyak: +30.26%), Figure 3 (sustained effect); BUT Callaway & Sant'Anna yields -10.49% non-significant (Table 6, Appendix B), a substantive divergence the paper attributes to small cohort sizes",
    381       "supported": "moderate"
    382     },
    383     {
    384       "claim": "Code complexity increases persistently by ~41% post-Cursor adoption",
    385       "evidence": "Table 2 (Borusyak: +41.64%), Figure 3; Callaway & Sant'Anna yields -3.80% non-significant (Table 6), same estimator divergence applies",
    386       "supported": "moderate"
    387     },
    388     {
    389       "claim": "Accumulated technical debt subsequently reduces future development velocity, creating a self-reinforcing cycle",
    390       "evidence": "Table 3 GMM estimates: code complexity → lines added coefficient -0.718 (p<0.001), static warnings → lines added -0.588 (p<0.001); instruments validated by Sargan p>0.05 and AR(2) p>0.05",
    391       "supported": "moderate"
    392     },
    393     {
    394       "claim": "Cursor adoption causes inherently more complex code beyond what is explained by codebase size growth",
    395       "evidence": "Table 3 GMM model for lines added → code complexity shows Cursor coefficient 0.086 (p<0.001) even controlling for lines of code; interpreted as ~9% baseline complexity increase attributable to Cursor itself",
    396       "supported": "moderate"
    397     },
    398     {
    399       "claim": "Quality degradation effects are amplified, not attenuated, in repositories with more intensive Cursor usage",
    400       "evidence": "Figure 4 Row 1: High Contributor Adoption and Cursor Configuration Changes subsets both show stronger quality effects than the full ITT sample",
    401       "supported": "strong"
    402     }
    403   ],
    404   "methodology_tags": [
    405     "observational"
    406   ],
    407   "key_findings": "A staggered difference-in-differences study of 806 Cursor-adopting open-source GitHub repositories finds that Cursor adoption produces substantial but transient velocity gains (281% increase in lines added in month 1, dissipating fully by month 3) alongside persistent technical debt accumulation (+30% static analysis warnings, +41% code complexity per Borusyak et al. estimator). Panel GMM analysis demonstrates this accumulated debt subsequently suppresses future development velocity, creating a self-reinforcing quality-velocity degradation cycle. Robustness checks confirm quality degradation is amplified in repos with intensive Cursor usage; however, the Callaway & Sant'Anna estimator yields non-significant negative estimates for all quality outcomes, substantially weakening causal confidence in the debt accumulation findings specifically.",
    408   "red_flags": [
    409     {
    410       "flag": "Estimator disagreement on primary quality claims",
    411       "detail": "Callaway & Sant'Anna yields -10.49% (non-significant) for static analysis warnings and -3.80% (non-significant) for code complexity, directly contradicting the Borusyak et al. estimates of +30.26% and +41.64%. The paper attributes this to small per-cohort sample sizes but cannot resolve the disagreement, substantially undermining causal confidence in the quality degradation findings."
    412     },
    413     {
    414       "flag": "Table 2 commits significance inconsistency",
    415       "detail": "Table 2 marks commits ATT=0.0260 with *** (p<0.001) despite SE=0.0429 (t-stat ~0.6) and the paper body stating 'there is no statistically significant effect for the volume of commits'; the *** appears to be a typographical error contradicting the text."
    416     },
    417     {
    418       "flag": "Adoption proxy validity",
    419       "detail": "Treatment is identified only through committed .cursorrules files; developers can and do use Cursor without committing configuration files, creating an ITT design measuring 'systematic adoption' and introducing unknown selection bias toward more process-conscious adopters."
    420     },
    421     {
    422       "flag": "Lines added as AI-era velocity metric",
    423       "detail": "Large increases in lines added may reflect AI-generated boilerplate, scaffolding, or verbose refactoring rather than meaningful feature development, making this proxy especially unreliable precisely in the AI-assisted context being studied — the paper does not address this circularity."
    424     },
    425     {
    426       "flag": "SonarQube metrics unvalidated for AI-generated code",
    427       "detail": "Paper acknowledges 'complexity metrics were designed for human-written code; whether they appropriately penalize AI-generated patterns that are mechanically verifiable yet syntactically complex remains an open question,' undermining the interpretation of the code complexity outcome."
    428     },
    429     {
    430       "flag": "Warning breakdown analysis is non-causal convenience sample",
    431       "detail": "Appendix D (20-category SonarQube breakdown) is explicitly described as a 'convenience sample' due to architectural pipeline limitations preventing precise per-version tracking, and the paper cautions it cannot be used for causal inference — yet it is cited in support of the main narrative."
    432     }
    433   ],
    434   "cited_papers": [
    435     {
    436       "title": "The Impact of AI on Developer Productivity: Evidence from GitHub Copilot",
    437       "relevance": "Primary prior RCT showing 56% task completion speedup from Copilot; key baseline for productivity claims"
    438     },
    439     {
    440       "title": "Measuring the Impact of Early-2025 AI on Experienced Open-Source Developer Productivity",
    441       "relevance": "Directly contrasted: controlled experiment showing Cursor does NOT help experienced OSS developers; complementary finding"
    442     },
    443     {
    444       "title": "The Impact of Large Language Models on Open-source Innovation: Evidence from GitHub Copilot",
    445       "relevance": "Prior observational DiD estimating 17.82% release increase from Copilot; direct methodological predecessor"
    446     },
    447     {
    448       "title": "The Impact of Generative AI on Collaborative Open-Source Software Development: Evidence from GitHub Copilot",
    449       "relevance": "Similar observational design finding 6.5% project-level productivity increase; provides comparison estimate"
    450     },
    451     {
    452       "title": "On the use of agentic coding: An empirical study of pull requests on GitHub",
    453       "relevance": "Studies Claude Code PR acceptance (83.8%) at PR level; this paper explicitly extends to longitudinal project-level effects"
    454     },
    455     {
    456       "title": "Revisiting event-study designs: Robust and efficient estimation",
    457       "relevance": "Methodological foundation: the Borusyak et al. imputation DiD estimator used as primary causal identification strategy"
    458     },
    459     {
    460       "title": "Asleep at the Keyboard? Assessing the Security of GitHub Copilot's Code Contributions",
    461       "relevance": "Benchmark study establishing Copilot security vulnerability concerns; prior work motivating quality dimension analysis"
    462     },
    463     {
    464       "title": "The effects of generative AI on high skilled work: Evidence from three field experiments with software developers",
    465       "relevance": "Field experiments at Microsoft/Accenture/Cisco finding 22-36% productivity increase; enterprise baseline for contrast with open-source findings"
    466     }
    467   ],
    468   "engagement_factors": {
    469     "practical_relevance": {
    470       "score": 3,
    471       "justification": "Directly addresses whether Cursor is worth adopting for development teams, with actionable findings about technical debt accumulation requiring quality-assurance process changes."
    472     },
    473     "surprise_contrarian": {
    474       "score": 3,
    475       "justification": "Empirically challenges the '10x productivity' narrative with evidence of transient gains reversing to baseline plus persistent complexity debt, directly contradicting widespread practitioner enthusiasm."
    476     },
    477     "fear_safety": {
    478       "score": 1,
    479       "justification": "Security warnings modestly increase (+1.98 per repo/month per Table 8) but the paper's focus is technical debt and maintainability, not critical safety risks."
    480     },
    481     "drama_conflict": {
    482       "score": 2,
    483       "justification": "Targets a popular, well-funded product with negative longitudinal findings; internal estimator disagreement creates unresolved methodological tension the paper cannot fully explain."
    484     },
    485     "demo_ability": {
    486       "score": 1,
    487       "justification": "Observational econometric study with no demo artifact; readers cannot readily experience or replicate the findings themselves."
    488     },
    489     "brand_recognition": {
    490       "score": 3,
    491       "justification": "Studies Cursor (most popular AI IDE by adoption metrics cited), authored by CMU team with strong SE credentials (Kästner, Vasilescu), published at MSR '26."
    492     }
    493   },
    494   "hn_data": {
    495     "threads": [
    496       {
    497         "hn_id": "47401734",
    498         "title": "Speed at the cost of quality: Study of use of Cursor AI in open source projects (2025)",
    499         "points": 147,
    500         "comments": 80,
    501         "url": "https://news.ycombinator.com/item?id=47401734",
    502         "created_at": "2026-03-16T17:07:37Z"
    503       },
    504       {
    505         "hn_id": "38283398",
    506         "title": "API-Driven Program Synthesis for Testing Static Typing Implementations",
    507         "points": 35,
    508         "comments": 1,
    509         "url": "https://news.ycombinator.com/item?id=38283398",
    510         "created_at": "2023-11-15T22:19:08Z"
    511       },
    512       {
    513         "hn_id": "45968758",
    514         "title": "Does AI-Assisted Coding Deliver? A Study of Cursor's Impact on Software Projects",
    515         "points": 14,
    516         "comments": 2,
    517         "url": "https://news.ycombinator.com/item?id=45968758",
    518         "created_at": "2025-11-18T16:50:19Z"
    519       },
    520       {
    521         "hn_id": "46730534",
    522         "title": "Does AI-Assisted Coding Deliver? A Study of Cursor on Software Projects",
    523         "points": 2,
    524         "comments": 0,
    525         "url": "https://news.ycombinator.com/item?id=46730534",
    526         "created_at": "2026-01-23T09:54:11Z"
    527       },
    528       {
    529         "hn_id": "46658985",
    530         "title": "Does AI-Assisted Coding Deliver? A Study of Cursor's Impact on Software Projects",
    531         "points": 2,
    532         "comments": 0,
    533         "url": "https://news.ycombinator.com/item?id=46658985",
    534         "created_at": "2026-01-17T15:53:22Z"
    535       },
    536       {
    537         "hn_id": "45998822",
    538         "title": "Does AI-Assisted Coding Deliver? A Difference-in-Differences Study",
    539         "points": 2,
    540         "comments": 0,
    541         "url": "https://news.ycombinator.com/item?id=45998822",
    542         "created_at": "2025-11-20T22:36:21Z"
    543       },
    544       {
    545         "hn_id": "45951387",
    546         "title": "Does AI-Assisted Coding Deliver? A Study of Cursor's Impact on Software Projects",
    547         "points": 2,
    548         "comments": 0,
    549         "url": "https://news.ycombinator.com/item?id=45951387",
    550         "created_at": "2025-11-17T06:57:28Z"
    551       },
    552       {
    553         "hn_id": "42127507",
    554         "title": "UniGAD: Unifying Multi-Level Graph Anomaly Detection",
    555         "points": 2,
    556         "comments": 0,
    557         "url": "https://news.ycombinator.com/item?id=42127507",
    558         "created_at": "2024-11-13T16:32:30Z"
    559       },
    560       {
    561         "hn_id": "46180812",
    562         "title": "Does AI-Assisted Coding Deliver? A Difference-in-Differences Study",
    563         "points": 1,
    564         "comments": 0,
    565         "url": "https://news.ycombinator.com/item?id=46180812",
    566         "created_at": "2025-12-07T10:54:26Z"
    567       },
    568       {
    569         "hn_id": "46070691",
    570         "title": "A Difference-in-Differences Study of Cursor's Impact on Software Projects",
    571         "points": 1,
    572         "comments": 0,
    573         "url": "https://news.ycombinator.com/item?id=46070691",
    574         "created_at": "2025-11-27T16:21:41Z"
    575       }
    576     ],
    577     "top_points": 147,
    578     "total_points": 208,
    579     "total_comments": 83
    580   }
    581 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs