scan.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan.json (20999B)
      1 {
      2   "paper": {
      3     "title": "Collaborating with GenAI: Incentives and Replacements",
      4     "authors": ["Boaz Taitler", "Omer Ben-Porat"],
      5     "year": 2025,
      6     "venue": "arXiv",
      7     "arxiv_id": "2508.20213",
      8     "doi": "10.48550/arXiv.2508.20213"
      9   },
     10   "checklist": {
     11     "artifacts": {
     12       "code_released": {
     13         "applies": true,
     14         "answer": false,
     15         "justification": "No code repository or link is provided in the paper. The simulations described in Section 6 are not accompanied by released code."
     16       },
     17       "data_released": {
     18         "applies": true,
     19         "answer": false,
     20         "justification": "The simulation data (10,000 generated instances) is not released. No data download links or supplementary data files are provided."
     21       },
     22       "environment_specified": {
     23         "applies": true,
     24         "answer": false,
     25         "justification": "The paper mentions hardware ('intel Core i7-9700k CPU and 16 GB RAM') in Section 6 but provides no software environment details, library versions, or dependency specifications."
     26       },
     27       "reproduction_instructions": {
     28         "applies": true,
     29         "answer": false,
     30         "justification": "No reproduction instructions are provided. The simulation setup is described at a high level in Section 6 (instance generation procedure, best-response dynamics with tolerance 10^-8), but there are no step-by-step instructions or scripts."
     31       }
     32     },
     33     "statistical_methodology": {
     34       "confidence_intervals_or_error_bars": {
     35         "applies": true,
     36         "answer": false,
     37         "justification": "The simulation results in Section 6 report empirical frequencies and distributions over 10,000 instances but provide no confidence intervals or error bars on these estimates."
     38       },
     39       "significance_tests": {
     40         "applies": false,
     41         "answer": false,
     42         "justification": "The paper is primarily theoretical with proofs. The simulations illustrate theoretical findings rather than making comparative statistical claims that would require significance tests."
     43       },
     44       "effect_sizes_reported": {
     45         "applies": false,
     46         "answer": false,
     47         "justification": "The paper presents theoretical results (propositions, theorems) and illustrative simulations. Effect sizes in the statistical sense are not applicable to formal proofs or frequency distributions of game-theoretic outcomes."
     48       },
     49       "sample_size_justified": {
     50         "applies": true,
     51         "answer": false,
     52         "justification": "The simulation uses 10,000 generated instances (Section 6) but provides no justification for why this number was chosen or whether it is sufficient for the claims being illustrated."
     53       },
     54       "variance_reported": {
     55         "applies": true,
     56         "answer": false,
     57         "justification": "Simulation results are presented as aggregate empirical frequency distributions (Figures 1 and 2) without reporting variance, standard deviation, or spread measures across runs or subsets."
     58       }
     59     },
     60     "evaluation_design": {
     61       "baselines_included": {
     62         "applies": false,
     63         "answer": false,
     64         "justification": "This is a theoretical paper introducing a new game-theoretic model. There is no prior model of the same problem to serve as a baseline comparison. The paper compares GenAI vs. no-GenAI settings within its own framework."
     65       },
     66       "baselines_contemporary": {
     67         "applies": false,
     68         "answer": false,
     69         "justification": "Not applicable for the same reason as baselines_included — this is a novel theoretical framework, not a benchmark evaluation."
     70       },
     71       "ablation_study": {
     72         "applies": false,
     73         "answer": false,
     74         "justification": "The paper is a theoretical model with proofs. There is no system with components to ablate. The comparison of GenAI vs. no-GenAI settings (Section 3) is part of the core analysis, not an ablation."
     75       },
     76       "multiple_metrics": {
     77         "applies": false,
     78         "answer": false,
     79         "justification": "This is a theoretical paper proving properties of a game-theoretic model. Evaluation metrics in the empirical sense do not apply."
     80       },
     81       "human_evaluation": {
     82         "applies": false,
     83         "answer": false,
     84         "justification": "This is a theoretical paper with mathematical proofs and simulations. Human evaluation of outputs is not relevant."
     85       },
     86       "held_out_test_set": {
     87         "applies": false,
     88         "answer": false,
     89         "justification": "No datasets or test sets are used. The paper presents theoretical analysis and simulations of a game-theoretic model."
     90       },
     91       "per_category_breakdown": {
     92         "applies": true,
     93         "answer": true,
     94         "justification": "Figure 1b breaks down optimal coalition size by the total player share P, providing a granular view across different parameter ranges rather than just overall aggregates."
     95       },
     96       "failure_cases_discussed": {
     97         "applies": true,
     98         "answer": true,
     99         "justification": "Section 5.1 explicitly discusses instability of optimal coalitions (Proposition 5.2), showing cases where the optimal coalition is not stable. The myopic removal dynamics analysis (Section 6.1) shows ~80% of instances degrade to the empty coalition."
    100       },
    101       "negative_results_reported": {
    102         "applies": true,
    103         "answer": true,
    104         "justification": "The paper reports several negative findings: GenAI can lead to zero effort even when nearly ineffective (Proposition 3.6), optimal coalitions are often unstable (Proposition 5.2), and myopic removal dynamics typically lead to empty coalitions (~80% of instances, Section 6.1)."
    105       }
    106     },
    107     "claims_and_evidence": {
    108       "abstract_claims_supported": {
    109         "applies": true,
    110         "answer": true,
    111         "justification": "The abstract claims are supported by formal proofs: GenAI leading to no effort (Proposition 3.5, 3.6), NP-completeness of the optimization problem (Theorem 4.2), efficient algorithm for linear cases (Theorem 4.4), low-value workers being critical (Proposition 5.3), and cascade effects (Section 5.3 and simulations in Section 6)."
    112       },
    113       "causal_claims_justified": {
    114         "applies": false,
    115         "answer": false,
    116         "justification": "The paper makes formal mathematical claims about equilibrium properties of a game-theoretic model, not empirical causal claims. The 'causes' are within the model's assumptions, established through proofs."
    117       },
    118       "generalization_bounded": {
    119         "applies": true,
    120         "answer": true,
    121         "justification": "The paper is explicit about its theoretical nature and limitations. Section 7 states 'Our paper focuses on a theoretical perspective that is naturally limited' and discusses specific assumptions (exogenous portions, free GenAI access). The model's scope is clearly bounded to the MSB game framework."
    122       },
    123       "alternative_explanations_discussed": {
    124         "applies": false,
    125         "answer": false,
    126         "justification": "This is a theoretical paper presenting mathematical proofs within a formal model. Alternative explanations in the empirical sense do not apply — the results follow logically from the model's axioms."
    127       }
    128     },
    129     "setup_transparency": {
    130       "model_versions_specified": {
    131         "applies": false,
    132         "answer": false,
    133         "justification": "No LLM or AI model is used in the experiments. The paper is a theoretical game-theory study with numerical simulations."
    134       },
    135       "prompts_provided": {
    136         "applies": false,
    137         "answer": false,
    138         "justification": "No prompting is used. This is a theoretical paper with mathematical simulations, not an LLM-based study."
    139       },
    140       "hyperparameters_reported": {
    141         "applies": true,
    142         "answer": true,
    143         "justification": "Section 6 reports simulation hyperparameters: 12 players per instance, coefficients sampled uniformly from [0,1], tolerance of 10^-8 for best-response dynamics, 10,000 instances generated. The shared benefit function form (product) and contribution/cost function forms are fully specified."
    144       },
    145       "scaffolding_described": {
    146         "applies": false,
    147         "answer": false,
    148         "justification": "No agentic scaffolding is used. This is a theoretical game-theory paper."
    149       },
    150       "data_preprocessing_documented": {
    151         "applies": true,
    152         "answer": true,
    153         "justification": "Section 6 fully describes the instance generation procedure: how contribution functions, cost functions, shared benefit functions, and player portions are constructed from sampled parameters. The pipeline from parameter sampling to equilibrium computation is documented."
    154       }
    155     },
    156     "limitations_and_scope": {
    157       "limitations_section_present": {
    158         "applies": true,
    159         "answer": true,
    160         "justification": "Section 7 ('Discussion and Future Work') discusses limitations including the assumption of exogenous portions, the lack of contract design analysis, and the theoretical nature of the work ('Our paper focuses on a theoretical perspective that is naturally limited')."
    161       },
    162       "threats_to_validity_specific": {
    163         "applies": true,
    164         "answer": true,
    165         "justification": "Section 7 identifies specific limitations: exogenously given portions (theta_i) which may not hold in all scenarios, the assumption that GenAI is free, and the lack of diverse interaction forms (competition, substitutable relationships). These are specific to this study's design choices."
    166       },
    167       "scope_boundaries_stated": {
    168         "applies": true,
    169         "answer": true,
    170         "justification": "Section 7 explicitly identifies what was NOT studied: varying portions to reach better equilibria, contract design where players must outperform GenAI, and competition or substitutable relationships. The paper also states in Section 1.1 that it 'focuses on a theoretical perspective that is naturally limited.'"
    171       }
    172     },
    173     "data_integrity": {
    174       "raw_data_available": {
    175         "applies": true,
    176         "answer": false,
    177         "justification": "The 10,000 simulation instances and their results are not made available for independent verification. Only aggregate figures are shown."
    178       },
    179       "data_collection_described": {
    180         "applies": true,
    181         "answer": true,
    182         "justification": "Section 6 ('Generating instances') describes in detail how simulation data was generated: 12 players, product shared benefit function, contribution functions with sqrt(alpha_i*e_i + beta_i*g_i), linear costs, and parameter sampling from U[0,1]."
    183       },
    184       "recruitment_methods_described": {
    185         "applies": false,
    186         "answer": false,
    187         "justification": "No human participants. This is a theoretical paper with computational simulations."
    188       },
    189       "data_pipeline_documented": {
    190         "applies": true,
    191         "answer": true,
    192         "justification": "Section 6 documents the full pipeline: instance generation (parameter sampling) → brute-force coalition search over all 2^12 subsets → best-response dynamics with 10^-8 tolerance for each coalition → aggregation into frequency distributions. Hardware and total runtime (15 hours) are also reported."
    193       }
    194     },
    195     "conflicts_of_interest": {
    196       "funding_disclosed": {
    197         "applies": true,
    198         "answer": true,
    199         "justification": "The Acknowledgements section states: 'This research was supported by the Israel Science Foundation (ISF; Grant No. 3079/24).'"
    200       },
    201       "affiliations_disclosed": {
    202         "applies": true,
    203         "answer": true,
    204         "justification": "Both authors are clearly affiliated with Technion—Israel Institute of Technology, with email addresses provided on the first page."
    205       },
    206       "funder_independent_of_outcome": {
    207         "applies": true,
    208         "answer": true,
    209         "justification": "The Israel Science Foundation is a public research funding agency with no commercial stake in the results of this game-theoretic study about GenAI incentives."
    210       },
    211       "financial_interests_declared": {
    212         "applies": true,
    213         "answer": false,
    214         "justification": "No competing interests or financial interests statement is present in the paper."
    215       }
    216     },
    217     "contamination": {
    218       "training_cutoff_stated": {
    219         "applies": false,
    220         "answer": false,
    221         "justification": "The paper does not evaluate any pre-trained model's capability on a benchmark. It is a theoretical game-theory paper with numerical simulations."
    222       },
    223       "train_test_overlap_discussed": {
    224         "applies": false,
    225         "answer": false,
    226         "justification": "No pre-trained model is evaluated on any benchmark. Not applicable."
    227       },
    228       "benchmark_contamination_addressed": {
    229         "applies": false,
    230         "answer": false,
    231         "justification": "No benchmark evaluation is performed. Not applicable."
    232       }
    233     },
    234     "human_studies": {
    235       "pre_registered": {
    236         "applies": false,
    237         "answer": false,
    238         "justification": "No human participants. This is a theoretical paper with computational simulations."
    239       },
    240       "irb_or_ethics_approval": {
    241         "applies": false,
    242         "answer": false,
    243         "justification": "No human participants. Not applicable."
    244       },
    245       "demographics_reported": {
    246         "applies": false,
    247         "answer": false,
    248         "justification": "No human participants. Not applicable."
    249       },
    250       "inclusion_exclusion_criteria": {
    251         "applies": false,
    252         "answer": false,
    253         "justification": "No human participants. Not applicable."
    254       },
    255       "randomization_described": {
    256         "applies": false,
    257         "answer": false,
    258         "justification": "No human participants. Not applicable."
    259       },
    260       "blinding_described": {
    261         "applies": false,
    262         "answer": false,
    263         "justification": "No human participants. Not applicable."
    264       },
    265       "attrition_reported": {
    266         "applies": false,
    267         "answer": false,
    268         "justification": "No human participants. Not applicable."
    269       }
    270     },
    271     "cost_and_practicality": {
    272       "inference_cost_reported": {
    273         "applies": false,
    274         "answer": false,
    275         "justification": "This is a theoretical paper. There is no proposed method with inference costs. The simulations are illustrative, not a deployable system."
    276       },
    277       "compute_budget_stated": {
    278         "applies": true,
    279         "answer": true,
    280         "justification": "Section 6 reports: 'We used a standard PC with intel Core i7-9700k CPU and 16 GB RAM for running the simulations. The entire execution took roughly 15 hours.'"
    281       }
    282     }
    283   },
    284   "claims": [
    285     {
    286       "claim": "GenAI can lead workers to exert no effort in equilibrium, even when GenAI's effectiveness is negligible.",
    287       "evidence": "Proposition 3.6 (Section 3.2) proves that for any epsilon > 0, there exists a game where GenAI changes contribution by at most epsilon but the dominant equilibrium shifts from full effort (e_i=1) to zero effort (e_i=0). Proof uses a constructed 2-player game instance.",
    288       "supported": "strong"
    289     },
    290     {
    291       "claim": "The manager's problem of finding the optimal coalition is NP-complete.",
    292       "evidence": "Theorem 4.2 (Section 4.1) with full proof in Appendix B.1, using a reduction from the clique problem. The construction maps vertices to players and edges to shared benefit terms.",
    293       "supported": "strong"
    294     },
    295     {
    296       "claim": "An efficient O(N/epsilon^2) algorithm exists for the special case of (almost-)linear shared benefit functions.",
    297       "evidence": "Theorem 4.4 (Section 4.2) with Algorithm 1 in Appendix B.2. The reduction to a knapsack problem is shown, and correctness/runtime are formally proven.",
    298       "supported": "strong"
    299     },
    300     {
    301       "claim": "Workers with near-zero marginal contribution can be pivotal for sustaining overall output; their removal triggers cascades.",
    302       "evidence": "Proposition 5.3 (Section 5.2) proves for any epsilon > 0, there exists an instance where a player's marginal contribution is less than epsilon yet is in the optimal coalition. Section 5.3 and Proposition 5.4 show decreasing VSR values trigger cascading exclusions.",
    303       "supported": "strong"
    304     },
    305     {
    306       "claim": "Optimal coalitions tend to be either large or small (rarely medium-sized), and ~80% degrade to empty coalitions under myopic removal dynamics.",
    307       "evidence": "Simulations over 10,000 instances (Section 6.1, Figures 1 and 2). Coalitions of 10-12 players appear ~70% of the time, medium coalitions (1-8) are rare. Under myopic removal dynamics, ~80% converge to empty coalitions.",
    308       "supported": "moderate"
    309     }
    310   ],
    311   "methodology_tags": ["theoretical"],
    312   "key_findings": "This paper presents a game-theoretic model (Managed Shared Benefit game) showing that GenAI access can paradoxically reduce worker effort to zero in equilibrium, even when GenAI provides negligible performance improvement. The manager's optimal team selection problem is NP-complete in general, with an efficient algorithm for linear cases. Simulations over 10,000 instances demonstrate that optimal coalitions are bimodal (large or empty) and highly unstable under myopic removal dynamics, with cascading exclusions triggered by removing even low-contribution workers.",
    313   "red_flags": [
    314     {
    315       "flag": "Simulations only illustrate theoretical claims",
    316       "detail": "The simulations in Section 6 use a specific parametric form (multiplicative shared benefit, specific contribution/cost functions with uniformly sampled coefficients). The results may not generalize beyond this narrow functional class, though the paper does not claim they do."
    317     },
    318     {
    319       "flag": "No code or data released for simulation verification",
    320       "detail": "The 10,000-instance simulation study is not reproducible without access to code. The specific instance generation, best-response dynamics implementation, and convergence criteria cannot be independently verified."
    321     }
    322   ],
    323   "cited_papers": [
    324     {
    325       "title": "Measuring the impact of early-2025 AI on experienced open-source developer productivity",
    326       "authors": ["J. Becker", "N. Rush", "E. Barnes", "D. Rein"],
    327       "year": 2025,
    328       "arxiv_id": "2507.09089",
    329       "relevance": "Empirical study of AI's impact on developer productivity, directly relevant to the survey's core topic of AI-augmented software engineering."
    330     },
    331     {
    332       "title": "The impact of AI on developer productivity: Evidence from GitHub Copilot",
    333       "authors": ["S. Peng", "E. Kalliamvakou", "P. Cihon", "M. Demirer"],
    334       "year": 2023,
    335       "arxiv_id": "2302.06590",
    336       "relevance": "Landmark study on AI-assisted programming productivity, directly relevant to evaluating claims about GenAI's effect on developer output."
    337     },
    338     {
    339       "title": "Strategic foundation models",
    340       "authors": ["D. Goktas", "A. Greenwald", "T. Osogami", "R. Patel", "K. Leyton-Brown"],
    341       "year": 2025,
    342       "relevance": "Examines strategic aspects of foundation models including delegation and deferring to AI, relevant to understanding AI agent safety and alignment."
    343     },
    344     {
    345       "title": "Social choice for AI alignment: Dealing with diverse human feedback",
    346       "authors": ["V. Conitzer", "R. Freedman", "J. Heitzig"],
    347       "year": 2024,
    348       "doi": "10.48550/ARXIV.2404.10271",
    349       "relevance": "Addresses AI alignment through social choice theory, relevant to the survey's coverage of AI safety and alignment research."
    350     },
    351     {
    352       "title": "Braess's paradox of generative AI",
    353       "authors": ["B. Taitler", "O. Ben-Porat"],
    354       "year": 2025,
    355       "relevance": "Game-theoretic analysis showing GenAI can paradoxically reduce outcomes, directly relevant to understanding unintended consequences of AI adoption."
    356     },
    357     {
    358       "title": "Competition and diversity in generative AI",
    359       "authors": ["M. Raghavan"],
    360       "year": 2024,
    361       "arxiv_id": "2412.08610",
    362       "relevance": "Examines competitive dynamics of generative AI systems, relevant to understanding market and ecological effects of AI tools."
    363     },
    364     {
    365       "title": "Fine-tuning games: Bargaining and adaptation for general-purpose models",
    366       "authors": ["B. Laufer", "J. Kleinberg", "H. Heidari"],
    367       "year": 2024,
    368       "relevance": "Game-theoretic framework for fine-tuning foundation models, relevant to understanding strategic interactions around AI model development."
    369     },
    370     {
    371       "title": "A no free lunch theorem for human-AI collaboration",
    372       "authors": ["K. Peng", "N. Garg", "J. Kleinberg"],
    373       "year": 2025,
    374       "relevance": "Theoretical result on fundamental limits of human-AI collaboration, directly relevant to the survey's assessment of AI-augmented work claims."
    375     }
    376   ]
    377 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs