scan-v5.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan-v5.json (27266B)
      1 {
      2   "scan_version": 5,
      3   "paper_type": "empirical",
      4   "paper": {
      5     "title": "Are Coding Agents Generating Over-Mocked Tests? An Empirical Study",
      6     "authors": [
      7       "Andre Hora",
      8       "Romain Robbes"
      9     ],
     10     "year": 2026,
     11     "venue": "MSR '26",
     12     "arxiv_id": "2602.00409",
     13     "doi": "10.1145/3793302.3793362"
     14   },
     15   "checklist": {
     16     "claims_and_evidence": {
     17       "abstract_claims_supported": {
     18         "applies": true,
     19         "answer": true,
     20         "justification": "All quantitative claims in the abstract (60%, 23% vs 13%, 68%, 36% vs 26%, 95% mock type concentration) are directly backed by contingency tables and statistical tests in Section 3.",
     21         "source": "haiku"
     22       },
     23       "causal_claims_justified": {
     24         "applies": false,
     25         "answer": false,
     26         "justification": "The paper makes observational association claims ('more likely to') rather than causal claims; the study design is a mining study without intervention and the authors consistently use correlational language throughout.",
     27         "source": "haiku"
     28       },
     29       "generalization_bounded": {
     30         "applies": true,
     31         "answer": true,
     32         "justification": "Section 5 explicitly states 'our findings cannot be directly generalized to repositories written in other languages or using other agents,' bounding scope to Python, JavaScript, and TypeScript in 2025.",
     33         "source": "haiku"
     34       },
     35       "alternative_explanations_discussed": {
     36         "applies": true,
     37         "answer": false,
     38         "justification": "The paper does not discuss key alternative explanations for why agents mock more — e.g., selection effects (agent-adopting repos may have more complex code requiring more mocking) or developer-preference confounds; only the 'easier to generate automatically' hypothesis is briefly proposed without evaluation.",
     39         "source": "haiku"
     40       },
     41       "proxy_outcome_distinction": {
     42         "applies": true,
     43         "answer": true,
     44         "justification": "The paper clearly distinguishes between what is measured (presence of mock identifiers in test commit diffs, validated at 94% precision) and what is claimed (mocking frequency tendencies of coding agents), and does not conflate commit counts with test quality.",
     45         "source": "haiku"
     46       }
     47     },
     48     "limitations_and_scope": {
     49       "limitations_section_present": {
     50         "applies": true,
     51         "answer": true,
     52         "justification": "Section 5 'Threats to Validity' provides a dedicated limitations discussion covering detection precision, agent commit attribution, and generalization.",
     53         "source": "haiku"
     54       },
     55       "threats_to_validity_specific": {
     56         "applies": true,
     57         "answer": true,
     58         "justification": "Specific threats are quantified: 94% precision for mock detection (manually inspected 100 commits across 10 repositories), 100% precision for agent commit detection (500 manually inspected commits), and handling of Co-Authored-By variant casing.",
     59         "source": "haiku"
     60       },
     61       "scope_boundaries_stated": {
     62         "applies": true,
     63         "answer": true,
     64         "justification": "The paper explicitly bounds scope to three languages, three specific coding agents, commits from 2025, and repositories meeting stated criteria (≥100 commits, ≥5,000 non-blank LOC, not forks, recently active).",
     65         "source": "haiku"
     66       }
     67     },
     68     "conflicts_of_interest": {
     69       "funding_disclosed": {
     70         "applies": true,
     71         "answer": true,
     72         "justification": "Acknowledgments disclose funding from CNPq grants (408817/2024-0 and 403304/2025-3), CAPES, FAPEMIG, INES.IA, and the French State/IdEx université de Bordeaux.",
     73         "source": "haiku"
     74       },
     75       "affiliations_disclosed": {
     76         "applies": true,
     77         "answer": true,
     78         "justification": "Author affiliations are clearly stated: Hora at UFMG (Brazil) and Robbes at Univ. Bordeaux, CNRS, Bordeaux INP, LaBRI (France).",
     79         "source": "haiku"
     80       },
     81       "funder_independent_of_outcome": {
     82         "applies": true,
     83         "answer": true,
     84         "justification": "Funding comes from government and academic agencies (CNPq, CAPES, French State) with no affiliation to the coding agent companies (Anthropic, GitHub, Cursor) whose products are studied.",
     85         "source": "haiku"
     86       },
     87       "financial_interests_declared": {
     88         "applies": true,
     89         "answer": false,
     90         "justification": "No competing interests statement or declaration of financial interests (patents, equity, consulting) is included anywhere in the paper.",
     91         "source": "haiku"
     92       }
     93     },
     94     "scope_and_framing": {
     95       "key_terms_defined": {
     96         "applies": true,
     97         "answer": true,
     98         "justification": "Key terms are explicitly defined: 'coding agents' (Section 2.1.1 — autonomous tools that invoke external tools, execute code, and author commits), 'test doubles/mocks' (Section 2.6 — Meszaros taxonomy: dummy, stub, spy, mock, fake), and 'agent commits', 'test commits', 'mock commits' are operationally defined.",
     99         "source": "haiku"
    100       },
    101       "intended_contribution_clear": {
    102         "applies": true,
    103         "answer": true,
    104         "justification": "Contributions are explicitly stated: '(1) the first empirical study to analyze agent-generated tests in real-world software systems; and (2) multiple actionable implications for practitioners and researchers.'",
    105         "source": "haiku"
    106       },
    107       "engagement_with_prior_work": {
    108         "applies": true,
    109         "answer": true,
    110         "justification": "Section 6 engages substantively with prior work on coding agents (Becker et al., Kumar et al., Bouzenia & Pradel), LLM-generated test quality (Alshahwan et al., Ouédraogo et al.), and mocking practices (Spadini et al., Qin 2025), positioning this study as the first to examine mocking in agent-generated code at scale in the wild.",
    111         "source": "haiku"
    112       }
    113     }
    114   },
    115   "type_checklist": {
    116     "empirical": {
    117       "artifacts": {
    118         "code_released": {
    119           "applies": true,
    120           "answer": true,
    121           "justification": "Section 2.7 explicitly states 'Our scripts and dataset are publicly available at: https://doi.org/10.5281/zenodo.17427638.'",
    122           "source": "haiku"
    123         },
    124         "data_released": {
    125           "applies": true,
    126           "answer": true,
    127           "justification": "The dataset (commits and repository metadata) is publicly available on Zenodo at the stated DOI.",
    128           "source": "haiku"
    129         },
    130         "environment_specified": {
    131           "applies": true,
    132           "answer": false,
    133           "justification": "The paper mentions using PyDriller and GitEvo but provides no requirements.txt, Dockerfile, or specific version numbers for any dependency.",
    134           "source": "haiku"
    135         },
    136         "reproduction_instructions": {
    137           "applies": true,
    138           "answer": false,
    139           "justification": "The paper provides a Zenodo link for scripts but includes no step-by-step reproduction instructions; reproducing the pipeline would require inferring the full workflow from the methodology description.",
    140           "source": "haiku"
    141         }
    142       },
    143       "statistical_methodology": {
    144         "confidence_intervals_or_error_bars": {
    145           "applies": true,
    146           "answer": false,
    147           "justification": "No confidence intervals or error bars are reported for any headline percentages (23%, 36%, etc.); only Chi-squared statistics, p-values, and Cliff's delta are provided.",
    148           "source": "haiku"
    149         },
    150         "significance_tests": {
    151           "applies": true,
    152           "answer": true,
    153           "justification": "Chi-squared tests of independence are applied for commit-level analyses in RQ1 and RQ2; paired Wilcoxon tests (with normality confirmed via Shapiro-Wilk and D'Agostino) are used for repository-level comparisons.",
    154           "source": "haiku"
    155         },
    156         "effect_sizes_reported": {
    157           "applies": true,
    158           "answer": true,
    159           "justification": "Cliff's delta effect sizes are reported for both repository-level comparisons: negligible for lower agentic activity repositories and small (0.252) for higher agentic activity.",
    160           "source": "haiku"
    161         },
    162         "sample_size_justified": {
    163           "applies": true,
    164           "answer": false,
    165           "justification": "No power analysis or principled sample size justification is provided; the sample of 2,168 repositories emerges from SEART selection criteria rather than any prospective sizing calculation.",
    166           "source": "haiku"
    167         },
    168         "variance_reported": {
    169           "applies": true,
    170           "answer": false,
    171           "justification": "Table 10 reports medians for repository-level mock commit ratios but provides no standard deviations, interquartile ranges, or other spread measures for any result.",
    172           "source": "haiku"
    173         }
    174       },
    175       "evaluation_design": {
    176         "baselines_included": {
    177           "applies": true,
    178           "answer": true,
    179           "justification": "Non-agent commits serve as the direct baseline throughout all three RQs, with explicit agent vs. non-agent proportions in every contingency table.",
    180           "source": "haiku"
    181         },
    182         "baselines_contemporary": {
    183           "applies": true,
    184           "answer": true,
    185           "justification": "Non-agent commits are drawn from the same repositories and same time period (2025) as agent commits, making comparisons directly contemporary.",
    186           "source": "haiku"
    187         },
    188         "ablation_study": {
    189           "applies": false,
    190           "answer": false,
    191           "justification": "This is an observational mining study, not a system design paper; ablation analysis is not applicable.",
    192           "source": "haiku"
    193         },
    194         "multiple_metrics": {
    195           "applies": true,
    196           "answer": true,
    197           "justification": "The paper uses commit-level ratios, repository-level proportions, Chi-squared statistics with standardized residuals, Wilcoxon p-values, Cliff's delta, and mock type distribution across all five test double categories.",
    198           "source": "haiku"
    199         },
    200         "human_evaluation": {
    201           "applies": true,
    202           "answer": true,
    203           "justification": "Authors manually inspected 500 agent commits to validate classifier precision (100%) and 100 randomly selected mock commits across 10 repositories to validate mock detection precision (94%).",
    204           "source": "haiku"
    205         },
    206         "held_out_test_set": {
    207           "applies": false,
    208           "answer": false,
    209           "justification": "This is an observational mining study, not a prediction task; held-out test sets are not applicable.",
    210           "source": "haiku"
    211         },
    212         "per_category_breakdown": {
    213           "applies": true,
    214           "answer": true,
    215           "justification": "Results are broken down by programming language (Python vs JS/TS) in Tables 5, 8, and 10, and by individual coding agent (Claude, Copilot, Cursor) in Tables 5 and 8.",
    216           "source": "haiku"
    217         },
    218         "failure_cases_discussed": {
    219           "applies": true,
    220           "answer": false,
    221           "justification": "The browser-use example where agents added mocks despite explicit configuration to the contrary is a descriptive observation, not a systematic discussion of failure modes or when the methodology breaks down.",
    222           "source": "haiku"
    223         },
    224         "negative_results_reported": {
    225           "applies": true,
    226           "answer": true,
    227           "justification": "The paper reports null results: no notable language difference in mock rates (Python 37% vs JS/TS 35%), and negligible Cliff's delta for lower-agentic-activity repositories despite a statistically significant Wilcoxon result.",
    228           "source": "haiku"
    229         }
    230       },
    231       "setup_transparency": {
    232         "model_versions_specified": {
    233           "applies": false,
    234           "answer": false,
    235           "justification": "This is a mining study that does not run LLM inference; the specific versions of Claude Code, Copilot, and Cursor active during studied commits cannot be determined from commit metadata and are not reported.",
    236           "source": "haiku"
    237         },
    238         "prompts_provided": {
    239           "applies": false,
    240           "answer": false,
    241           "justification": "No LLMs are invoked in the authors' analysis pipeline; the study mines existing commit data rather than querying models.",
    242           "source": "haiku"
    243         },
    244         "hyperparameters_reported": {
    245           "applies": false,
    246           "answer": false,
    247           "justification": "No models are run by the authors; hyperparameters are not applicable to a repository mining study.",
    248           "source": "haiku"
    249         },
    250         "scaffolding_described": {
    251           "applies": false,
    252           "answer": false,
    253           "justification": "The authors do not deploy agentic scaffolding; they analyze traces left by existing coding agents in real repositories.",
    254           "source": "haiku"
    255         },
    256         "data_preprocessing_documented": {
    257           "applies": true,
    258           "answer": true,
    259           "justification": "Preprocessing is thoroughly documented: SEART selection criteria (Section 2.2), agent file detection patterns (Table 1), commit author/co-author matching logic (Section 2.4), test file patterns (Table 2), mock identifier detection rules (Section 2.6.1), and mock commit classification (Section 2.6.2) are all fully specified.",
    260           "source": "haiku"
    261         }
    262       },
    263       "data_integrity": {
    264         "raw_data_available": {
    265           "applies": true,
    266           "answer": true,
    267           "justification": "The dataset is publicly available at Zenodo (doi.org/10.5281/zenodo.17427638) as explicitly stated in Section 2.7.",
    268           "source": "haiku"
    269         },
    270         "data_collection_described": {
    271           "applies": true,
    272           "answer": true,
    273           "justification": "Data collection is thoroughly described: SEART tool and selection criteria, filtering from 114,098 to 2,168 repositories, cloning for agent file detection, and commit metadata parsing for all three classification steps.",
    274           "source": "haiku"
    275         },
    276         "recruitment_methods_described": {
    277           "applies": false,
    278           "answer": false,
    279           "justification": "No human participants; this is a repository mining study using automated collection from GitHub via the SEART tool.",
    280           "source": "haiku"
    281         },
    282         "data_pipeline_documented": {
    283           "applies": true,
    284           "answer": true,
    285           "justification": "The full pipeline is documented across Sections 2.2–2.7: SEART selection → language/agent filter → agent commit detection → test commit detection → mock commit detection → RQ analysis, including the tools used (PyDriller, GitEvo).",
    286           "source": "haiku"
    287         }
    288       },
    289       "contamination": {
    290         "training_cutoff_stated": {
    291           "applies": false,
    292           "answer": false,
    293           "justification": "This is not a benchmark evaluation of model capabilities; no models are evaluated on test sets.",
    294           "source": "haiku"
    295         },
    296         "train_test_overlap_discussed": {
    297           "applies": false,
    298           "answer": false,
    299           "justification": "Not applicable; this is a mining study, not a model capability evaluation.",
    300           "source": "haiku"
    301         },
    302         "benchmark_contamination_addressed": {
    303           "applies": false,
    304           "answer": false,
    305           "justification": "Not applicable; no benchmarks are used for model evaluation.",
    306           "source": "haiku"
    307         }
    308       },
    309       "human_studies": {
    310         "pre_registered": {
    311           "applies": false,
    312           "answer": false,
    313           "justification": "No human participants.",
    314           "source": "haiku"
    315         },
    316         "irb_or_ethics_approval": {
    317           "applies": false,
    318           "answer": false,
    319           "justification": "No human participants.",
    320           "source": "haiku"
    321         },
    322         "demographics_reported": {
    323           "applies": false,
    324           "answer": false,
    325           "justification": "No human participants.",
    326           "source": "haiku"
    327         },
    328         "inclusion_exclusion_criteria": {
    329           "applies": false,
    330           "answer": false,
    331           "justification": "No human participants; inclusion/exclusion criteria apply to repositories and are fully documented in Section 2.2.",
    332           "source": "haiku"
    333         },
    334         "randomization_described": {
    335           "applies": false,
    336           "answer": false,
    337           "justification": "No human participants.",
    338           "source": "haiku"
    339         },
    340         "blinding_described": {
    341           "applies": false,
    342           "answer": false,
    343           "justification": "No human participants.",
    344           "source": "haiku"
    345         },
    346         "attrition_reported": {
    347           "applies": false,
    348           "answer": false,
    349           "justification": "No human participants.",
    350           "source": "haiku"
    351         }
    352       },
    353       "cost_and_practicality": {
    354         "inference_cost_reported": {
    355           "applies": false,
    356           "answer": false,
    357           "justification": "No LLM inference is performed by the authors; this is a repository mining study.",
    358           "source": "haiku"
    359         },
    360         "compute_budget_stated": {
    361           "applies": true,
    362           "answer": false,
    363           "justification": "The paper does not report the computational cost of cloning and analyzing 2,168 repositories and 1.2 million commits, which is non-trivial.",
    364           "source": "haiku"
    365         }
    366       }
    367     }
    368   },
    369   "claims": [
    370     {
    371       "claim": "23% of commits made by coding agents add or modify test files, compared with 13% for non-agents",
    372       "evidence": "Table 4: 11,035/48,563 agent commits are test commits vs 158,326/1,206,315 non-agent commits; Chi-squared = 3,683.06, p < 0.001, standardized residual = 55.35",
    373       "supported": "strong"
    374     },
    375     {
    376       "claim": "60% of repositories with agent activity also contain agent test activity",
    377       "evidence": "Table 4 and Section 3.1: 729 out of 1,219 repositories with agent commits also contain agent test commits",
    378       "supported": "strong"
    379     },
    380     {
    381       "claim": "36% of test commits made by coding agents add mocks, compared with 26% for non-agents",
    382       "evidence": "Table 7: 3,934/11,035 agent test commits are mock commits vs 40,966/158,326 non-agent test commits; Chi-squared = 505.5, p < 0.001",
    383       "supported": "strong"
    384     },
    385     {
    386       "claim": "In repositories with higher agentic activity (≥50 agent commits), agents have a significantly higher mock ratio (36%) than non-agents (28%) with small effect size",
    387       "evidence": "Table 10b: Wilcoxon p < 0.001, Cliff's delta = 0.252 across 179 repositories; lower-agentic-activity repos show negligible effect despite statistical significance",
    388       "supported": "moderate"
    389     },
    390     {
    391       "claim": "Coding agents predominantly use the 'mock' type (95%) while non-agents use a wider variety — fake (57%), spy (51%), mock (91%)",
    392       "evidence": "Figure 5: Distribution of mock types across 496 repositories with agent mock activity; agents are concentrated on the generic mock type while non-agents show broader distribution",
    393       "supported": "strong"
    394     },
    395     {
    396       "claim": "Repositories created in 2025 show a higher share of agent test commits (17%) and mock commits (19%) compared to the full dataset (7% and 9%)",
    397       "evidence": "Tables 6 and 9: For 2025-created repos, 4,526/26,654 test commits are agent commits (17%) and 1,529/7,855 mock commits are agent commits (19%)",
    398       "supported": "strong"
    399     },
    400     {
    401       "claim": "Mock-related instructions in agent configuration files are far less common than test instructions, suggesting a guidance gap",
    402       "evidence": "Table 12: GitHub Code Search finds 13k CLAUDE.md files with 'mock' vs 102k with 'test' out of 112k total; causal link between guidance and behavior not established",
    403       "supported": "weak"
    404     }
    405   ],
    406   "methodology_tags": [
    407     "observational",
    408     "case-study"
    409   ],
    410   "key_findings": "Coding agents are significantly more likely to modify test files (23% vs 13% of commits) and add mocks to those tests (36% vs 26%) than non-agent contributors, with both differences statistically significant (p < 0.001) and the mock difference confirmed in a paired within-repository analysis (Cliff's delta = 0.252, small). Agents show markedly less diversity in test double types, relying almost exclusively on the generic 'mock' type (95%) compared to non-agents who also commonly use 'fake' (57%) and 'spy' (51%). The proportion of agent-generated tests and mocks is growing rapidly, accounting for 17–19% of recently created repositories' test/mock commits vs 7–9% overall. The paper finds that mock guidance in agent configuration files (e.g., CLAUDE.md) is uncommon, and agents occasionally add mocks even in repositories that explicitly prohibit it, suggesting configuration-based guidance has limited enforcement.",
    411   "red_flags": [
    412     {
    413       "flag": "Title implies quality judgment not demonstrated",
    414       "detail": "The paper establishes that agents mock more frequently but cannot demonstrate this constitutes 'over-mocking' — no assessment of mock appropriateness, test effectiveness, bug-detection rates, or maintenance cost is included; the normative claim in the title exceeds the observational evidence."
    415     },
    416     {
    417       "flag": "Selection confound not fully addressed",
    418       "detail": "Repositories adopting coding agents may systematically differ in type (newer projects, higher complexity, specific domains) creating selection effects that independently explain higher mocking rates; the paired within-repository analysis partially mitigates this but developer-preference confounds remain (agent-adopting developers may already favor mocking)."
    419     },
    420     {
    421       "flag": "Agent versions not tracked",
    422       "detail": "Specific versions of Claude Code, Copilot, and Cursor active during the studied commits are not identified; since model updates change agent behavior rapidly, findings may not reflect current or future agent behavior."
    423     },
    424     {
    425       "flag": "No confidence intervals on main estimates",
    426       "detail": "All headline percentages (23%, 36%, 95%, etc.) are reported as point estimates without confidence intervals, making precision of the key comparative claims unassessable."
    427     },
    428     {
    429       "flag": "Unknown recall of mock detection method",
    430       "detail": "The identifier-based mock detection is validated only for precision (94%) but not recall; unknown false-negative rate could systematically bias the agent vs. non-agent comparison if agents use different naming conventions than the patterns searched."
    431     }
    432   ],
    433   "cited_papers": [
    434     {
    435       "title": "Promises, Perils, and (Timely) Heuristics for Mining Coding Agent Activity",
    436       "relevance": "Foundational companion paper by same authors establishing the methodology for detecting agent commits via co-authorship metadata in real repositories — directly enables this study"
    437     },
    438     {
    439       "title": "Agentic Much? Adoption of Coding Agents on GitHub",
    440       "relevance": "Under-submission companion paper measuring overall adoption rates of coding agents on GitHub, providing broader context for this study's scope and agent selection rationale"
    441     },
    442     {
    443       "title": "Measuring the Impact of Early-2025 AI on Experienced Open-Source Developer Productivity",
    444       "relevance": "Becker et al. controlled experiment finding 19% task completion time increase despite 20% perceived productivity gain — key context for evaluating coding agent real-world effectiveness"
    445     },
    446     {
    447       "title": "To Mock or Not to Mock: Divergence in Mocking Practices Between LLM and Developers",
    448       "relevance": "Direct predecessor: Qin 2025 compared GPT-4o mock decisions vs developers in a single system, finding LLMs generate more mocks; this paper scales that finding to real-world agent commits across thousands of repositories"
    449     },
    450     {
    451       "title": "Mock objects for testing Java systems: Why and how developers use them, and how they evolve",
    452       "relevance": "Spadini et al. foundational empirical study of human mocking practices in Java; establishes baseline understanding for comparison with agent behavior"
    453     },
    454     {
    455       "title": "Use of test doubles in Android testing: An in-depth investigation",
    456       "relevance": "Fazzini et al. study whose identifier-based mock detection methodology is directly adapted by this paper for detecting test doubles in commits"
    457     },
    458     {
    459       "title": "Understanding Software Engineering Agents: A Study of Thought-Action-Result Trajectories",
    460       "relevance": "Bouzenia & Pradel study of agent interaction logs from SWE-bench; related characterization of coding agent action patterns in software engineering tasks"
    461     },
    462     {
    463       "title": "The Rise of AI Teammates in Software Engineering: How Autonomous Coding Agents Are Reshaping Software Engineering",
    464       "relevance": "Li et al. survey providing context on the broader adoption and capabilities of coding agents used to motivate the scope of this study"
    465     }
    466   ],
    467   "engagement_factors": {
    468     "practical_relevance": {
    469       "score": 3,
    470       "justification": "Directly actionable for anyone using Claude Code, Copilot, or Cursor — the recommendation to add mock guidance to CLAUDE.md configuration files is immediately applicable and the finding applies to millions of developers."
    471     },
    472     "surprise_contrarian": {
    473       "score": 2,
    474       "justification": "Finding that agents mock at 36% vs 26% and concentrate almost exclusively on the generic 'mock' type (95% vs 91%/57%/51% for non-agents) is a concrete, counterintuitive result about agent behavior that challenges assumptions of quality parity."
    475     },
    476     "fear_safety": {
    477       "score": 1,
    478       "justification": "Tests with excessive mocking may mask integration bugs and allow code to drift from mock contracts, with software reliability implications, but no direct safety or security concerns are raised."
    479     },
    480     "drama_conflict": {
    481       "score": 1,
    482       "justification": "Mild controversy around AI-generated code quality; Kent Beck's LinkedIn quote adds human interest color but the paper is primarily technical without major conflict angles."
    483     },
    484     "demo_ability": {
    485       "score": 1,
    486       "justification": "Scripts and dataset are available on Zenodo, but reproducing requires cloning thousands of GitHub repositories and running analysis scripts — not a quick demo, though practitioners can immediately apply configuration file guidance."
    487     },
    488     "brand_recognition": {
    489       "score": 2,
    490       "justification": "Directly studies Claude Code, GitHub Copilot, and Cursor with data from Microsoft/VS Code, home-assistant/core, and Apache repositories — high brand recognition among software engineering practitioners."
    491     }
    492   },
    493   "hn_data": {
    494     "threads": [],
    495     "top_points": 0,
    496     "total_points": 0,
    497     "total_comments": 0
    498   }
    499 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs