scan.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan.json (21666B)
      1 {
      2   "paper": {
      3     "title": "Canaries in the Coal Mine? Six Facts about the Recent Employment Effects of Artificial Intelligence",
      4     "authors": ["Erik Brynjolfsson", "Bharat Chandar", "Ruyu Chen"],
      5     "year": 2025,
      6     "venue": "Stanford Digital Economy Lab Working Paper"
      7   },
      8   "checklist": {
      9     "artifacts": {
     10       "code_released": {
     11         "applies": true,
     12         "answer": false,
     13         "justification": "No code repository or archive is mentioned in the paper."
     14       },
     15       "data_released": {
     16         "applies": true,
     17         "answer": false,
     18         "justification": "The ADP payroll data is proprietary administrative data. The authors note they 'are grateful to ADP for access to the data' but do not release it."
     19       },
     20       "environment_specified": {
     21         "applies": true,
     22         "answer": false,
     23         "justification": "No environment specifications, software versions, or dependency information is provided."
     24       },
     25       "reproduction_instructions": {
     26         "applies": true,
     27         "answer": false,
     28         "justification": "No reproduction instructions or scripts are provided. The data is proprietary and no replication package is offered."
     29       }
     30     },
     31     "statistical_methodology": {
     32       "confidence_intervals_or_error_bars": {
     33         "applies": true,
     34         "answer": true,
     35         "justification": "Figure 4 shows Poisson event study coefficients with confidence intervals (shaded bands). The regression results include standard errors clustered by firm (Section 4.4)."
     36       },
     37       "significance_tests": {
     38         "applies": true,
     39         "answer": true,
     40         "justification": "The Poisson regression results in Section 4.4 report statistically significant effects for young workers and note that estimates for other age groups are 'not statistically significant.' Standard errors are clustered by firm."
     41       },
     42       "effect_sizes_reported": {
     43         "applies": true,
     44         "answer": true,
     45         "justification": "Effect sizes are reported with baseline context: '15 log-point decline in relative employment' (Section 4.4), '16% relative employment declines' (abstract), '6% decline... compared to a 6-9% increase for older workers' (Section 4.2)."
     46       },
     47       "sample_size_justified": {
     48         "applies": true,
     49         "answer": false,
     50         "justification": "The sample size is described (3.5-5 million workers per month) but there is no formal power analysis or justification for why this sample is sufficient for the specific age-occupation cells being analyzed."
     51       },
     52       "variance_reported": {
     53         "applies": true,
     54         "answer": false,
     55         "justification": "The paper does not report variance or standard deviation across runs or bootstrapped samples. Results appear to be single estimates from the full dataset without spread measures."
     56       }
     57     },
     58     "evaluation_design": {
     59       "baselines_included": {
     60         "applies": true,
     61         "answer": true,
     62         "justification": "The paper uses less AI-exposed occupations as baselines (quintile 1 is the reference group) and compares young workers against older age groups as controls."
     63       },
     64       "baselines_contemporary": {
     65         "applies": true,
     66         "answer": true,
     67         "justification": "Comparisons are made against contemporaneous employment trends in less-exposed occupations and older workers, using the same time period (2021-2025)."
     68       },
     69       "ablation_study": {
     70         "applies": true,
     71         "answer": true,
     72         "justification": "Section 4.6 presents extensive robustness checks that function as ablations: excluding tech occupations, separating teleworkable vs non-teleworkable jobs, excluding tech firms, splitting by college share, controlling for interest rate exposure."
     73       },
     74       "multiple_metrics": {
     75         "applies": true,
     76         "answer": true,
     77         "justification": "The paper examines both employment counts and compensation (Section 4.5), and uses multiple AI exposure measures (Eloundou et al. GPT-4 beta and Anthropic Economic Index usage/automation/augmentation)."
     78       },
     79       "human_evaluation": {
     80         "applies": false,
     81         "answer": false,
     82         "justification": "This is an observational labor economics study using administrative data. Human evaluation of outputs is not applicable."
     83       },
     84       "held_out_test_set": {
     85         "applies": false,
     86         "answer": false,
     87         "justification": "This is an observational study, not a predictive modeling exercise. Held-out test sets are not applicable."
     88       },
     89       "per_category_breakdown": {
     90         "applies": true,
     91         "answer": true,
     92         "justification": "Results are broken down by age group (6 groups), AI exposure quintile (5 groups), occupation type, automation vs augmentation, teleworkability, college share, and gender (Figures 1-5, A12-A28)."
     93       },
     94       "failure_cases_discussed": {
     95         "applies": true,
     96         "answer": true,
     97         "justification": "The paper discusses where the pattern does NOT hold: augmentative AI occupations show employment growth, not decline (Fact 3). Compensation shows little divergence (Fact 5). The pattern is weaker for high college-share occupations."
     98       },
     99       "negative_results_reported": {
    100         "applies": true,
    101         "answer": true,
    102         "justification": "Fact 5 reports that compensation does not show the same pattern as employment. Fact 3 shows augmentation quintiles do not exhibit employment declines. Denmark results from Humlum and Vestergaard (2025) showing minimal effects are cited."
    103       }
    104     },
    105     "claims_and_evidence": {
    106       "abstract_claims_supported": {
    107         "applies": true,
    108         "answer": true,
    109         "justification": "The abstract claims (16% relative decline for 22-25 year olds, adjustments via employment not compensation, automation vs augmentation distinction, robustness to excluding tech firms) are all supported by the corresponding figures and regression results in Sections 4.1-4.6."
    110       },
    111       "causal_claims_justified": {
    112         "applies": true,
    113         "answer": false,
    114         "justification": "The paper uses language like 'consistent with the hypothesis that generative AI has begun to affect entry-level employment' and explicitly cautions that 'the facts we document may in part be influenced by factors other than generative AI.' However, the title framing and presentation strongly suggest causation while the identification strategy (exposure quintiles as treatment) cannot rule out confounds beyond firm-time shocks. The authors acknowledge this limitation."
    115       },
    116       "generalization_bounded": {
    117         "applies": true,
    118         "answer": true,
    119         "justification": "The paper is careful to note that ADP data 'does not exactly match the distribution of firms across the broader US economy' (Section 3.1), discusses differences from Denmark results, and frames findings as 'facts' rather than universal conclusions. They explicitly state 'our main estimates may be influenced by factors other than generative AI.'"
    120       },
    121       "alternative_explanations_discussed": {
    122         "applies": true,
    123         "answer": true,
    124         "justification": "Section 4.6 systematically addresses alternative explanations: tech hiring slowdown, remote work/outsourcing, COVID education effects, interest rate exposure, COVID stimulus, and post-pandemic overhiring correction. Each is tested with robustness checks."
    125       }
    126     },
    127     "setup_transparency": {
    128       "model_versions_specified": {
    129         "applies": false,
    130         "answer": false,
    131         "justification": "This paper does not use or evaluate AI models. It studies labor market outcomes using administrative payroll data."
    132       },
    133       "prompts_provided": {
    134         "applies": false,
    135         "answer": false,
    136         "justification": "No prompting is used in this study."
    137       },
    138       "hyperparameters_reported": {
    139         "applies": true,
    140         "answer": true,
    141         "justification": "The Poisson regression specification is fully described in Equation 4.1 with fixed effects structure. Sample restrictions are documented: firms with at least 10 workers per age group per period, at least 100 total observations per quintile. Standard errors clustered by firm."
    142       },
    143       "scaffolding_described": {
    144         "applies": false,
    145         "answer": false,
    146         "justification": "No agentic scaffolding is used in this study."
    147       },
    148       "data_preprocessing_documented": {
    149         "applies": true,
    150         "answer": true,
    151         "justification": "Section 3.1 documents preprocessing: restricting to positive earnings, excluding part-time, under age 70, balanced firm panel (Jan 2021-Sep 2025), excluding workers without job titles (~30%), mapping job titles to SOC codes. Section 3.2 describes the crosswalk for merging exposure measures."
    152       }
    153     },
    154     "limitations_and_scope": {
    155       "limitations_section_present": {
    156         "applies": true,
    157         "answer": false,
    158         "justification": "There is no dedicated limitations or threats-to-validity section. Caveats are scattered throughout (e.g., end of Section 4.1, conclusion) but there is no substantive dedicated discussion."
    159       },
    160       "threats_to_validity_specific": {
    161         "applies": true,
    162         "answer": true,
    163         "justification": "The paper discusses specific threats throughout: ADP sample non-representativeness (Section 3.1, citing Cajner et al. 2018), post-pandemic overhiring correction (Section 4.4), COVID education effects (Section 4.6), interest rate exposure correlation (Section 4.6), tech industry slowdown (Section 4.6)."
    164       },
    165       "scope_boundaries_stated": {
    166         "applies": true,
    167         "answer": true,
    168         "justification": "The paper explicitly states 'the facts we document may in part be influenced by factors other than generative AI' and calls for 'better firm-level AI adoption data' for causal identification. They acknowledge their data represents ADP clients, not the full US economy."
    169       }
    170     },
    171     "data_integrity": {
    172       "raw_data_available": {
    173         "applies": true,
    174         "answer": false,
    175         "justification": "The ADP payroll data is proprietary and not available for independent verification."
    176       },
    177       "data_collection_described": {
    178         "applies": true,
    179         "answer": true,
    180         "justification": "Section 3.1 describes the data source (ADP payroll records), coverage (25+ million US workers), time period (monthly through September 2025), and what is observed (job titles, earnings, age). The AI exposure measures are from published sources (Eloundou et al. 2024, Handa et al. 2025)."
    181       },
    182       "recruitment_methods_described": {
    183         "applies": false,
    184         "answer": false,
    185         "justification": "No human participants were recruited. This is an analysis of administrative payroll records."
    186       },
    187       "data_pipeline_documented": {
    188         "applies": true,
    189         "answer": true,
    190         "justification": "The pipeline from raw ADP data to analysis sample is documented in Section 3.1: restrictions on earnings, employment type, age, firm balance, job title availability. The merge with AI exposure measures via SOC crosswalk is described in Section 3.2."
    191       }
    192     },
    193     "conflicts_of_interest": {
    194       "funding_disclosed": {
    195         "applies": true,
    196         "answer": true,
    197         "justification": "Acknowledgments state: 'We are grateful to ADP for access to the data and the Stanford Digital Economy Lab for financial support.'"
    198       },
    199       "affiliations_disclosed": {
    200         "applies": true,
    201         "answer": true,
    202         "justification": "All three authors are affiliated with Stanford University. Brynjolfsson is also affiliated with NBER. Affiliations are clearly listed."
    203       },
    204       "funder_independent_of_outcome": {
    205         "applies": true,
    206         "answer": false,
    207         "justification": "ADP, a payroll processing company, provided both the data and is acknowledged. ADP could have an interest in research that highlights the value of employment tracking data. The Stanford Digital Economy Lab, which funded the work, is directed by Brynjolfsson, one of the authors."
    208       },
    209       "financial_interests_declared": {
    210         "applies": true,
    211         "answer": false,
    212         "justification": "No competing interests or financial interests statement is present in the paper."
    213       }
    214     },
    215     "contamination": {
    216       "training_cutoff_stated": {
    217         "applies": false,
    218         "answer": false,
    219         "justification": "This paper does not evaluate a pre-trained model on any benchmark. It is an observational labor economics study."
    220       },
    221       "train_test_overlap_discussed": {
    222         "applies": false,
    223         "answer": false,
    224         "justification": "No model evaluation on benchmarks; contamination is not applicable."
    225       },
    226       "benchmark_contamination_addressed": {
    227         "applies": false,
    228         "answer": false,
    229         "justification": "No model evaluation on benchmarks; contamination is not applicable."
    230       }
    231     },
    232     "human_studies": {
    233       "pre_registered": {
    234         "applies": false,
    235         "answer": false,
    236         "justification": "This is an observational study of administrative payroll data, not a human subjects study."
    237       },
    238       "irb_or_ethics_approval": {
    239         "applies": false,
    240         "answer": false,
    241         "justification": "No human participants recruited; administrative data analysis."
    242       },
    243       "demographics_reported": {
    244         "applies": false,
    245         "answer": false,
    246         "justification": "No human participants; administrative data analysis."
    247       },
    248       "inclusion_exclusion_criteria": {
    249         "applies": false,
    250         "answer": false,
    251         "justification": "No human participants; administrative data analysis."
    252       },
    253       "randomization_described": {
    254         "applies": false,
    255         "answer": false,
    256         "justification": "No human participants; observational study."
    257       },
    258       "blinding_described": {
    259         "applies": false,
    260         "answer": false,
    261         "justification": "No human participants; observational study."
    262       },
    263       "attrition_reported": {
    264         "applies": false,
    265         "answer": false,
    266         "justification": "No human participants; administrative data analysis."
    267       }
    268     },
    269     "cost_and_practicality": {
    270       "inference_cost_reported": {
    271         "applies": false,
    272         "answer": false,
    273         "justification": "This is an observational labor economics study, not a method/system paper. Cost reporting is not applicable."
    274       },
    275       "compute_budget_stated": {
    276         "applies": false,
    277         "answer": false,
    278         "justification": "This is an observational labor economics study. Compute budget is not applicable."
    279       }
    280     }
    281   },
    282   "claims": [
    283     {
    284       "claim": "Early-career workers (ages 22-25) in AI-exposed occupations experienced 16% relative employment declines after controlling for firm-level shocks.",
    285       "evidence": "Poisson event study regression with firm-time and firm-quintile fixed effects shows 15 log-point decline for highest exposure quintiles vs lowest, statistically significant (Section 4.4, Figure 4).",
    286       "supported": "strong"
    287     },
    288     {
    289       "claim": "Employment for experienced workers in AI-exposed occupations remained stable or grew.",
    290       "evidence": "Figure 2 and Figure 4 show estimates for older age groups are much smaller in magnitude and not statistically significant.",
    291       "supported": "strong"
    292     },
    293     {
    294       "claim": "Entry-level employment declined in automative AI applications but not augmentative ones.",
    295       "evidence": "Figure 3 using Anthropic Economic Index data shows Panel B (automation) has declining employment for young workers in highest quintile, while Panel C (augmentation) shows no such pattern (Section 4.3).",
    296       "supported": "moderate"
    297     },
    298     {
    299       "claim": "Labor market adjustments occur primarily through employment rather than compensation.",
    300       "evidence": "Figure 5 shows little difference in annual salary trends by age or exposure quintile, in contrast to employment patterns (Section 4.5).",
    301       "supported": "moderate"
    302     },
    303     {
    304       "claim": "Results are robust to excluding technology occupations and firms, controlling for remote work, education composition, and interest rate exposure.",
    305       "evidence": "Section 4.6 presents robustness checks in Figures A12-A28 showing consistent patterns across alternative sample constructions.",
    306       "supported": "strong"
    307     }
    308   ],
    309   "methodology_tags": ["observational"],
    310   "key_findings": "Using ADP payroll data covering millions of US workers monthly through September 2025, early-career workers (ages 22-25) in AI-exposed occupations experienced a 16% relative employment decline after late 2022, while experienced workers' employment remained stable. The decline is concentrated in occupations where AI automates rather than augments work. Adjustments manifest in employment quantities rather than compensation, and results are robust to excluding tech occupations, controlling for remote work, and other alternative explanations.",
    311   "red_flags": [
    312     {
    313       "flag": "Proprietary data prevents replication",
    314       "detail": "The core ADP payroll dataset is proprietary and not available for independent verification. While this is common in labor economics, it means the results cannot be independently replicated."
    315     },
    316     {
    317       "flag": "Potential conflict of interest with Anthropic data",
    318       "detail": "Fact 3 (automation vs augmentation) relies on the Anthropic Economic Index, which classifies AI usage of Claude conversations. The classification of tasks as 'automative' vs 'augmentative' is done by Claude itself, creating a potential circularity and alignment with Anthropic's preferred narrative that AI augments rather than replaces workers."
    319     },
    320     {
    321       "flag": "Causal language despite observational design",
    322       "detail": "While the authors appropriately caveat their findings, the paper's framing (title, structure as 'six facts') and prominent media coverage invite causal interpretation. The identification strategy cannot rule out all confounds, as the authors themselves acknowledge."
    323     }
    324   ],
    325   "cited_papers": [
    326     {
    327       "title": "GPTs are GPTs: Labor market impact potential of LLMs",
    328       "authors": ["Tyna Eloundou", "Sam Manning", "Pamela Mishkin", "Daniel Rock"],
    329       "year": 2024,
    330       "relevance": "Core AI exposure measure used throughout the paper; foundational work on estimating occupational AI exposure."
    331     },
    332     {
    333       "title": "Which Economic Tasks are Performed with AI? Evidence from Millions of Claude Conversations",
    334       "authors": ["Kunal Handa", "Alex Tamkin"],
    335       "year": 2025,
    336       "arxiv_id": "2503.04761",
    337       "relevance": "Provides the Anthropic Economic Index used for automation vs augmentation classification of AI usage by occupation."
    338     },
    339     {
    340       "title": "Generative AI at Work",
    341       "authors": ["Erik Brynjolfsson", "Danielle Li", "Lindsey Raymond"],
    342       "year": 2025,
    343       "relevance": "Firm-level evidence on generative AI effects on worker productivity, published in QJE."
    344     },
    345     {
    346       "title": "The Short-Term Effects of Generative Artificial Intelligence on Employment: Evidence from an Online Labor Market",
    347       "authors": ["Xuan Hui", "Oren Reshef", "Luofeng Zhou"],
    348       "year": 2023,
    349       "relevance": "Evidence of AI employment effects on freelance platforms, complementary to this paper's economy-wide analysis."
    350     },
    351     {
    352       "title": "Who is AI replacing? The impact of generative AI on online freelancing platforms",
    353       "authors": ["Ozge Demirci", "Jonas Hannane", "Xinrong Zhu"],
    354       "year": 2025,
    355       "relevance": "Further evidence on AI displacing freelance workers, particularly in writing and coding tasks."
    356     },
    357     {
    358       "title": "Artificial Intelligence and the Labor Market",
    359       "authors": ["Manolis Hampole", "Dimitris Papanikolaou", "Lars D. Schmidt", "Bryan Seegmiller"],
    360       "year": 2025,
    361       "relevance": "Finds limited overall AI employment impacts using job postings and LinkedIn data, with firm-level demand growth offsetting occupation-level declines."
    362     },
    363     {
    364       "title": "The Rapid Adoption of Generative AI",
    365       "authors": ["Alexander Bick", "Adam Blandin", "David J. Deming"],
    366       "year": 2024,
    367       "relevance": "Documents the pace of generative AI adoption among US workers, providing context for the timing of employment effects."
    368     },
    369     {
    370       "title": "Navigating the Jagged Technological Frontier: Field Experimental Evidence of the Effects of AI on Knowledge Worker Productivity and Quality",
    371       "authors": ["Fabrizio Dell'Acqua", "Edward McFowland III", "Ethan R. Mollick"],
    372       "year": 2023,
    373       "relevance": "RCT evidence on AI effects on knowledge worker productivity, relevant to understanding heterogeneous task-level impacts."
    374     },
    375     {
    376       "title": "Large Language Models, Small Labor Market Effects",
    377       "authors": ["Anders Humlum", "Emilie Vestergaard"],
    378       "year": 2025,
    379       "relevance": "Contrasting finding of minimal AI employment effects in Denmark, important for assessing generalizability."
    380     },
    381     {
    382       "title": "The Impact of AI on Developer Productivity: Evidence from GitHub Copilot",
    383       "authors": ["Sida Peng", "Eirini Kalliamvakou", "Peter Cihon", "Mert Demirer"],
    384       "year": 2023,
    385       "arxiv_id": "2302.06590",
    386       "relevance": "Evidence on AI coding tool productivity effects, relevant to understanding why software developer employment may be affected."
    387     }
    388   ]
    389 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs