scan-v5.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan-v5.json (30898B)
      1 {
      2   "scan_version": 5,
      3   "paper_type": "empirical",
      4   "paper": {
      5     "title": "Intuition to Evidence: Measuring AI's True Impact on Developer Productivity",
      6     "authors": [
      7       "Anand Kumar",
      8       "Vishal Khare",
      9       "Deepak Sharma",
     10       "Satyam Kumar",
     11       "Vijay Saini",
     12       "Anshul Yadav",
     13       "Sachendra Jain",
     14       "Ankit Rana",
     15       "Pratham Verma",
     16       "Vaibhav Meena",
     17       "Avinash Edubilli"
     18     ],
     19     "year": 2025,
     20     "venue": "arXiv",
     21     "arxiv_id": "2509.19708",
     22     "doi": null
     23   },
     24   "checklist": {
     25     "claims_and_evidence": {
     26       "abstract_claims_supported": {
     27         "applies": true,
     28         "answer": true,
     29         "justification": "All abstract claims (31.8% cycle time reduction, 85% satisfaction for code review, 93% desire to continue, 61% code volume increase for top adopters) are substantiated in the results sections with matching statistics.",
     30         "source": "haiku"
     31       },
     32       "causal_claims_justified": {
     33         "applies": true,
     34         "answer": false,
     35         "justification": "The paper makes explicit causal claims ('causal attribution of productivity gains to AI tool utilization', 'productivity benefits are directly tied to engagement intensity') but the quasi-experimental design cannot rule out self-selection — high adopters likely self-selected due to pre-existing motivation or skill, and the within-subjects comparison conflates adoption effects with temporal/organizational changes.",
     36         "source": "haiku"
     37       },
     38       "generalization_bounded": {
     39         "applies": true,
     40         "answer": false,
     41         "justification": "The title 'Measuring AI's True Impact on Developer Productivity' and broad conclusions about AI coding tools providing 'substantial value' extend far beyond a single-organization deployment of one in-house tool, despite a limitations section acknowledging single-org scope.",
     42         "source": "haiku"
     43       },
     44       "alternative_explanations_discussed": {
     45         "applies": true,
     46         "answer": true,
     47         "justification": "Section 4.6 explicitly discusses Hawthorne effects, selection bias, maturation effects, temporal trends, and motivation differences as alternative explanations, with specific controls attempted for each.",
     48         "source": "haiku"
     49       },
     50       "proxy_outcome_distinction": {
     51         "applies": true,
     52         "answer": false,
     53         "justification": "The paper equates 'lines of code shipped' with 'developer productivity' without discussing whether LOC is a valid proxy; a 61% LOC increase is presented as a 61% productivity increase with no caveats about code quality or business value of the additional code.",
     54         "source": "haiku"
     55       }
     56     },
     57     "limitations_and_scope": {
     58       "limitations_section_present": {
     59         "applies": true,
     60         "answer": true,
     61         "justification": "Section 7.2 'Limitations' is a dedicated subsection listing four specific limitations: single organization, in-house tool differences vs public tools, 1-year observation period, and Hawthorne effects.",
     62         "source": "haiku"
     63       },
     64       "threats_to_validity_specific": {
     65         "applies": true,
     66         "answer": true,
     67         "justification": "Section 4.6 discusses specific threats including selection bias (addressed via propensity score matching), Hawthorne effects (mitigated by not informing participants), maturation effects (controlled via extended baseline), and external validity bounded to similar organizational structures.",
     68         "source": "haiku"
     69       },
     70       "scope_boundaries_stated": {
     71         "applies": true,
     72         "answer": true,
     73         "justification": "The limitations explicitly state 'single-organisation study may limit generalisability' and 'our in-house AI system may have characteristics that differ from publicly available tools', with scope bounded to 'similar organizational structures, team sizes, and technology stacks'.",
     74         "source": "haiku"
     75       }
     76     },
     77     "conflicts_of_interest": {
     78       "funding_disclosed": {
     79         "applies": true,
     80         "answer": false,
     81         "justification": "No funding source is disclosed anywhere in the paper; there is no acknowledgment or funding statement.",
     82         "source": "haiku"
     83       },
     84       "affiliations_disclosed": {
     85         "applies": true,
     86         "answer": true,
     87         "justification": "All authors' email addresses end in @1mg.com, clearly identifying them as employees of the company whose in-house tool (DeputyDev) is being evaluated.",
     88         "source": "haiku"
     89       },
     90       "funder_independent_of_outcome": {
     91         "applies": true,
     92         "answer": false,
     93         "justification": "The authors are employees of the organization that built and deployed DeputyDev; the employer has a direct financial interest in demonstrating positive ROI for its own tool.",
     94         "source": "haiku"
     95       },
     96       "financial_interests_declared": {
     97         "applies": true,
     98         "answer": false,
     99         "justification": "No competing interests statement, patent disclosures, or equity declarations are present anywhere in the paper.",
    100         "source": "haiku"
    101       }
    102     },
    103     "scope_and_framing": {
    104       "key_terms_defined": {
    105         "applies": true,
    106         "answer": false,
    107         "justification": "'Productivity' is used throughout without formal definition; the paper operationalizes it as LOC shipped and PR cycle time but never explicitly defines the term or acknowledges these as imperfect proxies for actual software development value.",
    108         "source": "haiku"
    109       },
    110       "intended_contribution_clear": {
    111         "applies": true,
    112         "answer": true,
    113         "justification": "The paper explicitly positions itself as 'the first comprehensive study of large-scale, real-world deployment of an AI-assisted development environment in a production setting' with four specific research questions stated in the introduction.",
    114         "source": "haiku"
    115       },
    116       "engagement_with_prior_work": {
    117         "applies": true,
    118         "answer": true,
    119         "justification": "Section 2 contains seven subsections engaging with prior work on AI-assisted code generation, automated code review, industry adoption, benchmarks, and recent empirical studies, positioning this work as addressing the gap in real-world longitudinal deployment studies.",
    120         "source": "haiku"
    121       }
    122     }
    123   },
    124   "type_checklist": {
    125     "empirical": {
    126       "artifacts": {
    127         "code_released": {
    128           "applies": true,
    129           "answer": true,
    130           "justification": "Section 8 links to four GitHub repositories (tata1mg/deputydev-*) containing the DeputyDev implementation; however, no statistical analysis code for the study's productivity metrics is released.",
    131           "source": "haiku"
    132         },
    133         "data_released": {
    134           "applies": true,
    135           "answer": false,
    136           "justification": "No dataset is released; Section 7.3.2 promises 'We plan to release anonymised versions of our dataset' as future work, which counts as a future promise.",
    137           "source": "haiku"
    138         },
    139         "environment_specified": {
    140           "applies": true,
    141           "answer": false,
    142           "justification": "No requirements.txt, Dockerfile, or dependency specifications are provided for reproducing the analysis or running the tool in a standardized environment.",
    143           "source": "haiku"
    144         },
    145         "reproduction_instructions": {
    146           "applies": true,
    147           "answer": false,
    148           "justification": "No step-by-step reproduction instructions are provided for the productivity analysis; the study involves proprietary organizational data from a private enterprise deployment.",
    149           "source": "haiku"
    150         }
    151       },
    152       "statistical_methodology": {
    153         "confidence_intervals_or_error_bars": {
    154           "applies": true,
    155           "answer": true,
    156           "justification": "Standard deviations are reported for cycle time and review time metrics (e.g., 'mean cycle time of 150.5h (±13.1h)' and '99.6h (±23.7h)'), providing spread information for the main quantitative results.",
    157           "source": "haiku"
    158         },
    159         "significance_tests": {
    160           "applies": true,
    161           "answer": true,
    162           "justification": "Statistical significance tests are reported for main comparative claims: p=0.0018 for cycle time reduction, p=0.0076 for review time reduction, and p<0.001 for code productivity increase in high adoption cohort.",
    163           "source": "haiku"
    164         },
    165         "effect_sizes_reported": {
    166           "applies": true,
    167           "answer": true,
    168           "justification": "Cohen's d is reported for code productivity comparisons (d=1.42 for high adoption cohort, d=-0.31 for low adoption cohort), and percentage improvements with baseline context are provided for all main results.",
    169           "source": "haiku"
    170         },
    171         "sample_size_justified": {
    172           "applies": true,
    173           "answer": false,
    174           "justification": "The 300-engineer study population and 30-engineer cohort sizes are described but no power analysis or formal sample size justification is provided.",
    175           "source": "haiku"
    176         },
    177         "variance_reported": {
    178           "applies": true,
    179           "answer": true,
    180           "justification": "Standard deviations are reported for cycle time and review time metrics; however variance is not consistently reported across all metrics (e.g., code volume figures lack spread measures).",
    181           "source": "haiku"
    182         }
    183       },
    184       "evaluation_design": {
    185         "baselines_included": {
    186           "applies": true,
    187           "answer": true,
    188           "justification": "Within-subjects baseline (Sep 2024 - Feb 2025 pre-deployment period) and between-subjects baseline (low adoption cohort n=30) are both used as comparison points.",
    189           "source": "haiku"
    190         },
    191         "baselines_contemporary": {
    192           "applies": true,
    193           "answer": true,
    194           "justification": "The baseline is the same engineers' pre-deployment performance in the same organizational context, which is the most contemporary and ecologically valid comparison possible for a deployment study.",
    195           "source": "haiku"
    196         },
    197         "ablation_study": {
    198           "applies": false,
    199           "answer": false,
    200           "justification": "This is an observational deployment study of a production system, not a component evaluation; ablation studies are not applicable to this study design.",
    201           "source": "haiku"
    202         },
    203         "multiple_metrics": {
    204           "applies": true,
    205           "answer": true,
    206           "justification": "Multiple metrics are reported: PR cycle time, review time, lines of code shipped, AI acceptance rates, NPS score (34), satisfaction surveys (85%/93%), usage distribution, and ROI cost analysis.",
    207           "source": "haiku"
    208         },
    209         "human_evaluation": {
    210           "applies": true,
    211           "answer": true,
    212           "justification": "Surveys of 228 developers (76% response rate) and NPS surveys of 125 participants directly evaluate developer satisfaction and perceived quality of system outputs.",
    213           "source": "haiku"
    214         },
    215         "held_out_test_set": {
    216           "applies": false,
    217           "answer": false,
    218           "justification": "This is an observational deployment study, not a prediction task; held-out test sets are not applicable.",
    219           "source": "haiku"
    220         },
    221         "per_category_breakdown": {
    222           "applies": true,
    223           "answer": true,
    224           "justification": "Results are broken down by experience level (SDE1/SDE2/SDE3 in Table 3), adoption cohort (high/moderate/low), tool type (code review vs generation), usage category (UI 25.2%, bug fixing 21.8%, etc.), and engineering role (backend/frontend/mobile/QA).",
    225           "source": "haiku"
    226         },
    227         "failure_cases_discussed": {
    228           "applies": true,
    229           "answer": true,
    230           "justification": "Section 6.3 'What Didn't Work' explicitly discusses automatic acceptance failures, over-automation resistance, and one-size-fits-all limitations; the low-adoption cohort's -11.4% productivity decline is also prominently reported.",
    231           "source": "haiku"
    232         },
    233         "negative_results_reported": {
    234           "applies": true,
    235           "answer": true,
    236           "justification": "The low adoption cohort showed a statistically non-significant -11.4% decline in code shipped (p=0.08); early acceptance rates started at 4.7%; and infrastructure stability issues limiting code generation adoption are reported.",
    237           "source": "haiku"
    238         }
    239       },
    240       "setup_transparency": {
    241         "model_versions_specified": {
    242           "applies": true,
    243           "answer": true,
    244           "justification": "The PR review system is described as using 'Claude Sonnet 3.7 [45] and 4.0 [46] models' with specific version references; AWS Bedrock is identified as the primary provider (81% of LLM costs).",
    245           "source": "haiku"
    246         },
    247         "prompts_provided": {
    248           "applies": true,
    249           "answer": false,
    250           "justification": "No prompts or system instructions for the six specialized review agents are provided; only their focus areas and available tools are listed in Table 1.",
    251           "source": "haiku"
    252         },
    253         "hyperparameters_reported": {
    254           "applies": true,
    255           "answer": false,
    256           "justification": "No hyperparameters such as temperature, top-p, or context window settings are reported for any of the AI models used in the system.",
    257           "source": "haiku"
    258         },
    259         "scaffolding_described": {
    260           "applies": true,
    261           "answer": true,
    262           "justification": "The multi-agent PR review architecture (six parallel agents with specific tools: File Reader, Path Searcher, Grep, Planner Tool; comment blending engine) and the VSCode extension with Weaviate vector DB chunking are described in Sections 3.1 and 3.2.",
    263           "source": "haiku"
    264         },
    265         "data_preprocessing_documented": {
    266           "applies": true,
    267           "answer": true,
    268           "justification": "Section 4.5 describes the automated data pipeline: Bitbucket/GitHub webhooks for PR metrics, direct DeputyDev instrumentation for generation metrics, and standardized productivity metric calculations using 'consistent algorithms across all measurement periods'.",
    269           "source": "haiku"
    270         }
    271       },
    272       "data_integrity": {
    273         "raw_data_available": {
    274           "applies": true,
    275           "answer": false,
    276           "justification": "No raw data is available; Section 7.3.2 promises future release of anonymized data but provides nothing currently.",
    277           "source": "haiku"
    278         },
    279         "data_collection_described": {
    280           "applies": true,
    281           "answer": true,
    282           "justification": "Section 4.5 describes automated instrumentation via version control webhooks (Bitbucket/GitHub), direct tool instrumentation for generation metrics, and multi-source validation triangulating quantitative metrics, surveys (228 developers), and qualitative interviews (125 engineers).",
    283           "source": "haiku"
    284         },
    285         "recruitment_methods_described": {
    286           "applies": true,
    287           "answer": false,
    288           "justification": "No explicit recruitment criteria or inclusion/exclusion criteria are stated for the 300-engineer population; it is unclear whether this represents all engineers at the organization or a selected subset.",
    289           "source": "haiku"
    290         },
    291         "data_pipeline_documented": {
    292           "applies": true,
    293           "answer": true,
    294           "justification": "Section 4.5 documents the full pipeline from webhook capture through DeputyDev analytics storage to analysis, including the specific metrics extracted at each stage (commit frequency, LOC, PR metrics, suggestion acceptance rates).",
    295           "source": "haiku"
    296         }
    297       },
    298       "contamination": {
    299         "training_cutoff_stated": {
    300           "applies": false,
    301           "answer": false,
    302           "justification": "This is an observational deployment study measuring developer productivity metrics in production; it does not evaluate model capabilities on standard benchmarks, making training cutoff contamination not applicable.",
    303           "source": "haiku"
    304         },
    305         "train_test_overlap_discussed": {
    306           "applies": false,
    307           "answer": false,
    308           "justification": "Not applicable — the paper does not evaluate model performance on any benchmark dataset.",
    309           "source": "haiku"
    310         },
    311         "benchmark_contamination_addressed": {
    312           "applies": false,
    313           "answer": false,
    314           "justification": "Not applicable — the study does not evaluate model capabilities on any standard benchmark.",
    315           "source": "haiku"
    316         }
    317       },
    318       "human_studies": {
    319         "pre_registered": {
    320           "applies": true,
    321           "answer": false,
    322           "justification": "No pre-registration of the study is mentioned anywhere in the paper.",
    323           "source": "haiku"
    324         },
    325         "irb_or_ethics_approval": {
    326           "applies": true,
    327           "answer": false,
    328           "justification": "No IRB or ethics approval is mentioned; notably, the paper states participants 'were not informed that they were under observation' as a Hawthorne effect mitigation, raising ethical concerns that are not addressed.",
    329           "source": "haiku"
    330         },
    331         "demographics_reported": {
    332           "applies": true,
    333           "answer": true,
    334           "justification": "Survey participants' role breakdown is reported (backend 64%, frontend 17%, Android/iOS 14%, QA 5%) and engineers are classified by seniority level (SDE1/SDE2/SDE3) throughout the analysis.",
    335           "source": "haiku"
    336         },
    337         "inclusion_exclusion_criteria": {
    338           "applies": true,
    339           "answer": false,
    340           "justification": "No explicit inclusion or exclusion criteria are stated for the 300-engineer population; it is unclear whether this represents all engineers or a selected subset of the organization.",
    341           "source": "haiku"
    342         },
    343         "randomization_described": {
    344           "applies": false,
    345           "answer": false,
    346           "justification": "The study explicitly uses a quasi-experimental design without randomization, justified by operational constraints; randomization was not part of the design and is therefore not applicable.",
    347           "source": "haiku"
    348         },
    349         "blinding_described": {
    350           "applies": true,
    351           "answer": false,
    352           "justification": "Participants were not told they were being observed (a form of concealment), but no formal blinding of evaluators or analysts is described, and the intervention was not blinded.",
    353           "source": "haiku"
    354         },
    355         "attrition_reported": {
    356           "applies": true,
    357           "answer": false,
    358           "justification": "Engineer attrition over the 1-year study period is not reported; survey non-response (72/300 = 24%) is implicit but not analyzed for differential dropout patterns.",
    359           "source": "haiku"
    360         }
    361       },
    362       "cost_and_practicality": {
    363         "inference_cost_reported": {
    364           "applies": true,
    365           "answer": true,
    366           "justification": "Table 5 provides monthly LLM API costs by provider (Bedrock, OpenAI, Vertex AI) totaling $46,833 over 5 months, with per-engineer monthly cost of $30-34 explicitly calculated.",
    367           "source": "haiku"
    368         },
    369         "compute_budget_stated": {
    370           "applies": true,
    371           "answer": true,
    372           "justification": "5-month total operational cost of $46,833 is reported with annualized projection of ~$112,000, representing '1-2% additional cost to typical engineering costs' for 300 engineers.",
    373           "source": "haiku"
    374         }
    375       }
    376     }
    377   },
    378   "claims": [
    379     {
    380       "claim": "AI-assisted development reduced overall PR review cycle time by 31.8% (cycle time 33.8%, review time 29.8%) with statistical significance",
    381       "evidence": "Cohort 1 baseline mean cycle time 150.5h (±13.1h) vs Cohort 2 99.6h (±23.7h); p=0.0018 for cycle time, p=0.0076 for review time",
    382       "supported": "moderate"
    383     },
    384     {
    385       "claim": "Top 30 DeputyDev users achieved a 61% increase in shipped code volume post-adoption",
    386       "evidence": "High adoption cohort (n=30): 168,676 → 272,191 LOC shipped; p<0.001, Cohen's d=1.42",
    387       "supported": "weak"
    388     },
    389     {
    390       "claim": "Junior engineers (SDE1) benefit most from AI assistance with 77% productivity increase vs ~45% for mid/senior engineers",
    391       "evidence": "Table 3: SDE1 80,492 → 142,354 LOC (77%), SDE2/SDE3 ~45% increase within the high adoption cohort",
    392       "supported": "weak"
    393     },
    394     {
    395       "claim": "Developer satisfaction was high: 85% wanted code review to continue and 93% planned to keep DeputyDev in their workflow",
    396       "evidence": "Survey of 228 engineers (76% response rate): 194/228 (85%) positive on code review continuance, 93% plan to continue overall",
    397       "supported": "moderate"
    398     },
    399     {
    400       "claim": "DeputyDev provides positive ROI at $30-34/engineer/month representing 1-2% additional engineering cost overhead",
    401       "evidence": "5-month total cost $46,833 documented in Table 5; compared against 31.8% cycle time reduction and 61% code volume increase for high adopters",
    402       "supported": "weak"
    403     },
    404     {
    405       "claim": "Low adopters showed an 11.4% decline in shipped code volume, demonstrating that benefits require active engagement",
    406       "evidence": "Low adoption cohort (n=30): 253,332 → 224,282 LOC; p=0.08, Cohen's d=-0.31; only 200 AI-generated lines merged",
    407       "supported": "moderate"
    408     }
    409   ],
    410   "methodology_tags": [
    411     "observational",
    412     "case-study"
    413   ],
    414   "key_findings": "This large-scale enterprise deployment study of DeputyDev (an in-house multi-agent code review + VSCode code generation platform using Claude Sonnet) across 300 engineers over 1 year found statistically significant reductions in PR review cycle time (31.8%, p<0.01) and a 61% increase in code volume for the top 10% of adopters (Cohen's d=1.42). Developer satisfaction was high (85% for code review, 93% wanting to continue), and the system scaled to 376,943 AI interactions/month at $30-34/engineer/month. The key conditional finding is that adoption intensity determines outcomes: low adopters showed an 11.4% decline in code shipped, and junior engineers showed the largest gains (77%) versus senior engineers (45%), suggesting AI tools amplify existing workflows rather than uniformly improving productivity.",
    415   "red_flags": [
    416     {
    417       "flag": "Self-evaluation: authors evaluate own tool",
    418       "detail": "All 11 authors are 1mg.com employees evaluating DeputyDev, their own in-house platform, with no independent third-party validation of the productivity findings."
    419     },
    420     {
    421       "flag": "LOC conflated with productivity",
    422       "detail": "Lines of code shipped is the primary productivity metric; a 61% LOC increase is labeled a '61% productivity increase' without acknowledging LOC can reflect AI-generated boilerplate, reduced quality, or technical debt rather than genuine value."
    423     },
    424     {
    425       "flag": "Self-selection bias in adoption cohorts",
    426       "detail": "High adoption cohort self-selected into heavy tool use; baseline matching and propensity scores cannot eliminate the likelihood that more motivated or capable engineers adopted the tool more, confounding the productivity comparison."
    427     },
    428     {
    429       "flag": "Title overclaims generalizability",
    430       "detail": "Deploying one in-house tool at one Indian healthcare company does not measure 'AI's True Impact on Developer Productivity'; the title makes a universal claim the study cannot support."
    431     },
    432     {
    433       "flag": "Participants unaware of observation — no ethics review",
    434       "detail": "The paper explicitly states participants 'were not informed that they were under observation', but no IRB or ethics approval is mentioned, raising concerns about informed consent for a covert observational study."
    435     },
    436     {
    437       "flag": "No comparison to alternative AI tools",
    438       "detail": "Results are presented without any benchmarking against GitHub Copilot, Cursor, or other publicly available tools, making it impossible to evaluate whether DeputyDev outperforms alternatives."
    439     }
    440   ],
    441   "cited_papers": [
    442     {
    443       "title": "The impact of AI on developer productivity: Evidence from GitHub Copilot",
    444       "relevance": "Key prior work (55.8% faster task completion in controlled study) that this paper contextualizes and claims to extend with real-world longitudinal observational evidence"
    445     },
    446     {
    447       "title": "DeputyDev - AI Powered Developer Assistant: Breaking the Code Review Logjam through Contextual AI to Boost Developer Productivity",
    448       "relevance": "Prior publication from overlapping authors specifically on the code review component, reporting 23.09% PR duration reduction in a double-controlled A/B experiment with 200+ engineers"
    449     },
    450     {
    451       "title": "Measuring the impact of early-2025 AI on experienced open-source developer productivity",
    452       "relevance": "Contemporaneous study finding 19% increase in completion time with AI tools — a contrasting negative result to this paper's positive productivity claims"
    453     },
    454     {
    455       "title": "SWE-bench: Can language models resolve real-world GitHub issues?",
    456       "relevance": "Benchmark-based evaluation paradigm that this paper explicitly contrasts its real-world observational approach against"
    457     },
    458     {
    459       "title": "AI-based Code Review in Practice: A Survey of the Landscape and Directions",
    460       "relevance": "Surveys 16 AI code review tools across 178 repositories, providing broader empirical context for automated code review effectiveness and comment quality"
    461     },
    462     {
    463       "title": "On the use of ChatGPT for code review: Do developers like reviews by ChatGPT?",
    464       "relevance": "Found 30.7% negative developer reactions to ChatGPT code reviews — relevant contrast to this paper's unusually high 85% satisfaction rate"
    465     },
    466     {
    467       "title": "AI-assisted assessment of coding practices in modern code review",
    468       "relevance": "Demonstrates feasibility of end-to-end AI code review with high user acceptance across four programming languages, providing comparison baseline for multi-agent approaches"
    469     }
    470   ],
    471   "engagement_factors": {
    472     "practical_relevance": {
    473       "score": 3,
    474       "justification": "Provides detailed deployment costs (Table 5), adoption curves, per-role breakdowns, and ROI analysis directly applicable to engineering managers evaluating AI tool investments."
    475     },
    476     "surprise_contrarian": {
    477       "score": 1,
    478       "justification": "Confirms the prevailing view that AI tools improve productivity; the finding that low adopters showed a productivity decline is mildly contrarian to 'rising tide' assumptions."
    479     },
    480     "fear_safety": {
    481       "score": 0,
    482       "justification": "No AI safety or risk concerns are raised; the paper is uniformly positive about enterprise AI tool deployment."
    483     },
    484     "drama_conflict": {
    485       "score": 1,
    486       "justification": "The stark divergence between high and low adoption cohorts (61% gain vs -11% decline) creates narrative tension around adoption intensity as the determinative variable."
    487     },
    488     "demo_ability": {
    489       "score": 2,
    490       "justification": "GitHub repositories for the DeputyDev tool are linked in Section 8, allowing practitioners to inspect or potentially adapt the multi-agent review system architecture."
    491     },
    492     "brand_recognition": {
    493       "score": 1,
    494       "justification": "1mg is a recognizable Indian healthcare e-commerce company but not a major AI lab; use of Claude Sonnet 3.7/4.0 and AWS Bedrock adds modest brand recognition."
    495     }
    496   },
    497   "hn_data": {
    498     "threads": [
    499       {
    500         "hn_id": "45114579",
    501         "title": "The wall confronting large language models",
    502         "points": 172,
    503         "comments": 200,
    504         "url": "https://news.ycombinator.com/item?id=45114579",
    505         "created_at": "2025-09-03T11:40:41Z"
    506       },
    507       {
    508         "hn_id": "43890313",
    509         "title": "Your ViT Is Secretly an Image Segmentation Model",
    510         "points": 10,
    511         "comments": 0,
    512         "url": "https://news.ycombinator.com/item?id=43890313",
    513         "created_at": "2025-05-04T22:54:49Z"
    514       },
    515       {
    516         "hn_id": "44407745",
    517         "title": "The Unreasonable Effectiveness of Mathematical Experiments",
    518         "points": 8,
    519         "comments": 0,
    520         "url": "https://news.ycombinator.com/item?id=44407745",
    521         "created_at": "2025-06-28T20:07:21Z"
    522       },
    523       {
    524         "hn_id": "43889722",
    525         "title": "Mega Mass Assembly with JWST: The MIRI EGS Galaxy and AGN Survey",
    526         "points": 6,
    527         "comments": 0,
    528         "url": "https://news.ycombinator.com/item?id=43889722",
    529         "created_at": "2025-05-04T21:26:16Z"
    530       },
    531       {
    532         "hn_id": "44808368",
    533         "title": "The wall confronting large language models",
    534         "points": 5,
    535         "comments": 1,
    536         "url": "https://news.ycombinator.com/item?id=44808368",
    537         "created_at": "2025-08-06T06:21:39Z"
    538       },
    539       {
    540         "hn_id": "46203378",
    541         "title": "Are most sentences unique? An empirical examination of Chomskyan claims",
    542         "points": 3,
    543         "comments": 0,
    544         "url": "https://news.ycombinator.com/item?id=46203378",
    545         "created_at": "2025-12-09T10:25:50Z"
    546       },
    547       {
    548         "hn_id": "44304578",
    549         "title": "Serving Large Language Models on Huawei CloudMatrix384",
    550         "points": 3,
    551         "comments": 0,
    552         "url": "https://news.ycombinator.com/item?id=44304578",
    553         "created_at": "2025-06-17T22:18:43Z"
    554       },
    555       {
    556         "hn_id": "43318708",
    557         "title": "MAML: Towards a Faster Web in Developing Regions",
    558         "points": 2,
    559         "comments": 2,
    560         "url": "https://news.ycombinator.com/item?id=43318708",
    561         "created_at": "2025-03-10T10:03:48Z"
    562       },
    563       {
    564         "hn_id": "46445614",
    565         "title": "Mechanical non-reciprocity programmed by shear jamming in soft composite solids",
    566         "points": 2,
    567         "comments": 0,
    568         "url": "https://news.ycombinator.com/item?id=46445614",
    569         "created_at": "2025-12-31T16:32:15Z"
    570       },
    571       {
    572         "hn_id": "46205110",
    573         "title": "Not Minds, but Signs: Reframing LLMs Through Semiotics [pdf]",
    574         "points": 1,
    575         "comments": 0,
    576         "url": "https://news.ycombinator.com/item?id=46205110",
    577         "created_at": "2025-12-09T14:13:43Z"
    578       }
    579     ],
    580     "top_points": 172,
    581     "total_points": 212,
    582     "total_comments": 203
    583   }
    584 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs