scan.json (26065B)
1 { 2 "paper": { 3 "title": "Can Large Language Models Develop Gambling Addiction?", 4 "authors": ["Seungpil Lee", "Donghyeon Shin", "Yunjeong Lee", "Sundong Kim"], 5 "year": 2025, 6 "venue": "arXiv preprint", 7 "arxiv_id": "2509.22818" 8 }, 9 "checklist": { 10 "artifacts": { 11 "code_released": { 12 "applies": true, 13 "answer": false, 14 "justification": "No repository URL, code archive, or link to any code release is provided anywhere in the paper or appendices." 15 }, 16 "data_released": { 17 "applies": true, 18 "answer": false, 19 "justification": "No dataset download link or data repository is provided. The 25,600 games of experimental data are not made available." 20 }, 21 "environment_specified": { 22 "applies": true, 23 "answer": false, 24 "justification": "No requirements.txt, Dockerfile, or detailed environment specification is provided. The paper names the models used but does not describe the software environment for running experiments." 25 }, 26 "reproduction_instructions": { 27 "applies": true, 28 "answer": false, 29 "justification": "No step-by-step reproduction instructions, README, or runnable scripts are provided. The experimental design is described in Appendix A but without executable reproduction guidance." 30 } 31 }, 32 "statistical_methodology": { 33 "confidence_intervals_or_error_bars": { 34 "applies": true, 35 "answer": true, 36 "justification": "Standard errors are reported in tables (e.g., Table 4 reports '21.31 ± 1.02' for bankruptcy rates) and error bars are shown in Figure 6 ('Error bars: SE across 50 trials')." 37 }, 38 "significance_tests": { 39 "applies": true, 40 "answer": true, 41 "justification": "Statistical significance tests are used in key comparisons. For example, the autonomy analysis reports chi-squared test results: 'chi-squared = 256.13, p < 10^-57' (Appendix B.2). The activation patching uses 'p < 0.05' as threshold (Figure 6)." 42 }, 43 "effect_sizes_reported": { 44 "applies": true, 45 "answer": true, 46 "justification": "Effect sizes are reported with baseline context throughout. For example, Figure 2b reports betting aggressiveness '0.14 -> 0.31, 2.3x', loss chasing '0.16 -> 0.42, 2.7x', extreme betting '0.04 -> 0.23, 6.4x'. Figure 6 reports activation patching effects as percentage changes (e.g., '+17.8%', '+25.1%')." 47 }, 48 "sample_size_justified": { 49 "applies": true, 50 "answer": false, 51 "justification": "No power analysis or explicit justification for the choice of 50 repetitions per condition or 19,200/6,400 total games is provided. The sample sizes are stated but not justified." 52 }, 53 "variance_reported": { 54 "applies": true, 55 "answer": true, 56 "justification": "Standard errors are reported across conditions in Table 4 (e.g., 'Avg Rounds 5.46 ± 0.18'). Figure 6 reports 'SE across 50 trials'. The correlation analyses report Pearson r values." 57 } 58 }, 59 "evaluation_design": { 60 "baselines_included": { 61 "applies": true, 62 "answer": true, 63 "justification": "The experimental design includes BASE conditions (no additional prompt components) as baselines. Fixed betting serves as baseline compared to variable betting. The investment choice experiment uses BASE, G, M, GM conditions." 64 }, 65 "baselines_contemporary": { 66 "applies": true, 67 "answer": true, 68 "justification": "The study uses contemporary models: GPT-4o-mini, GPT-4.1-mini, Gemini-2.5-Flash, Claude-3.5-Haiku, LLaMA-3.1-8B, Gemma-2-9B. These are current models at the time of writing." 69 }, 70 "ablation_study": { 71 "applies": true, 72 "answer": true, 73 "justification": "Section 3.3 is explicitly titled 'Ablation Study: Isolating Causal Factors.' The investment choice experiment isolates goal-setting vs. reward-maximizing effects, and additional analysis controls for bet ceilings to test whether the variable betting effect stems from freedom of choice vs. bet magnitude." 74 }, 75 "multiple_metrics": { 76 "applies": true, 77 "answer": true, 78 "justification": "Three complementary behavioral metrics are used: IBA (Betting Aggressiveness), ILC (Loss Chasing), IEC (Extreme Betting), plus bankruptcy rate, average rounds played, total bet amounts, net profit/loss, goal escalation rate, and option distribution." 79 }, 80 "human_evaluation": { 81 "applies": false, 82 "answer": false, 83 "justification": "This paper evaluates LLM decision-making behavior in simulated gambling experiments. Human evaluation of LLM outputs is not relevant to the claims — the paper measures quantitative behavioral metrics." 84 }, 85 "held_out_test_set": { 86 "applies": false, 87 "answer": false, 88 "justification": "This is not a benchmark evaluation with train/test splits. The experimental design uses factorial conditions with repeated trials, not training/testing paradigms." 89 }, 90 "per_category_breakdown": { 91 "applies": true, 92 "answer": true, 93 "justification": "Results are broken down by model (6 LLMs), betting type (fixed vs. variable), prompt condition (32 combinations), streak length (1-5), and individual prompt components (G, M, P, H, W). Per-model analyses are provided in Appendix C." 94 }, 95 "failure_cases_discussed": { 96 "applies": true, 97 "answer": true, 98 "justification": "Qualitative examples of cognitive distortions are analyzed in Section 3.3 (Finding 5), including specific model outputs showing illusion of control, gambler's fallacy, and loss chasing with quoted examples and round numbers." 99 }, 100 "negative_results_reported": { 101 "applies": true, 102 "answer": true, 103 "justification": "The paper reports that Probability Information (P) had a risk-reducing effect (Figure 8, Appendix B.2), and that fixed betting produced near-zero bankruptcy. The paper also notes that M alone showed only 'modest effects' compared to G (Section 3.3)." 104 } 105 }, 106 "claims_and_evidence": { 107 "abstract_claims_supported": { 108 "applies": true, 109 "answer": true, 110 "justification": "The abstract claims about cognitive features (illusion of control, loss chasing), variable betting amplifying irrational behavior, and SAE analysis confirming decision-making features are all supported by results in Sections 3 and 4." 111 }, 112 "causal_claims_justified": { 113 "applies": true, 114 "answer": true, 115 "justification": "The paper makes causal claims about neural features controlling behavior and justifies them through activation patching experiments (Section 4), which is a standard causal intervention technique in mechanistic interpretability. The ablation study (Section 3.3) uses controlled single-variable manipulation. The paper also carefully hedges behavioral findings as 'associated with' rather than 'caused by'." 116 }, 117 "generalization_bounded": { 118 "applies": true, 119 "answer": true, 120 "justification": "The paper explicitly states: 'since these findings were derived from gambling contexts specifically, generalization to other decision-making domains requires further research' (Section 3.4). The conclusion also acknowledges 'limitations remain regarding our reliance on a single gambling paradigm' and that 'generalization to other risk-related tasks... require further validation.'" 121 }, 122 "alternative_explanations_discussed": { 123 "applies": true, 124 "answer": true, 125 "justification": "The paper explicitly addresses whether LLM behavior reflects 'actual internal processing or merely reproduce patterns from training data' (Section 3.3). The SAE analysis in Section 4 is designed to address this alternative explanation. The paper acknowledges 'the open question of normative rationality standards' in the conclusion." 126 } 127 }, 128 "setup_transparency": { 129 "model_versions_specified": { 130 "applies": true, 131 "answer": false, 132 "justification": "The paper uses marketing names without API version or snapshot dates: 'GPT-4o-mini', 'GPT-4.1-mini', 'Gemini-2.5-Flash', 'Claude-3.5-Haiku', 'LLaMA-3.1-8B', 'Gemma-2-9B'. No specific API versions like 'gpt-4o-mini-2024-07-18' are provided." 133 }, 134 "prompts_provided": { 135 "applies": true, 136 "answer": true, 137 "justification": "Full prompt texts are provided in Appendix A, including the base prompts for both fixed and variable betting in both experiments, all five prompt component texts (G, M, H, W, P), and complete prompt examples with actual game state values filled in (Appendix A.1.3, A.2.5)." 138 }, 139 "hyperparameters_reported": { 140 "applies": true, 141 "answer": false, 142 "justification": "No API hyperparameters (temperature, top-p, max tokens) are reported for any of the six LLMs used. The game parameters (win rate 30%, payout 3x, initial balance $100) are specified, but the LLM sampling parameters are not." 143 }, 144 "scaffolding_described": { 145 "applies": true, 146 "answer": true, 147 "justification": "The experimental framework is described in detail: models receive prompts with game state, history (last 5 rounds), warning messages, and respond with decisions. The modular prompt structure and game loop are fully documented in Appendix A." 148 }, 149 "data_preprocessing_documented": { 150 "applies": true, 151 "answer": true, 152 "justification": "The data collection process is documented: 19,200 games in the slot machine experiment (6 models x 64 conditions x 50 repetitions) and 6,400 games in the investment choice experiment. The factorial design, conditions, and game termination criteria are specified in Tables 1-2 and Appendix A." 153 } 154 }, 155 "limitations_and_scope": { 156 "limitations_section_present": { 157 "applies": true, 158 "answer": true, 159 "justification": "The conclusion contains explicit limitations discussion: 'limitations remain regarding our reliance on a single gambling paradigm, the discrepancy between models used for behavioral versus neural analyses, and the open question of normative rationality standards.'" 160 }, 161 "threats_to_validity_specific": { 162 "applies": true, 163 "answer": true, 164 "justification": "The paper identifies specific threats: (1) reliance on a single gambling paradigm, (2) discrepancy between models used for behavioral (6 models) vs. neural analysis (only LLaMA-3.1-8B), (3) the question of normative rationality standards for LLMs, (4) whether linguistic expressions reflect actual internal processing vs. training data patterns (Section 3.3)." 165 }, 166 "scope_boundaries_stated": { 167 "applies": true, 168 "answer": true, 169 "justification": "The paper explicitly states: 'Generalization to other risk-related tasks and cross-model neural comparisons require further validation' (Conclusion). Section 3.4 states: 'since these findings were derived from gambling contexts specifically, generalization to other decision-making domains requires further research.'" 170 } 171 }, 172 "data_integrity": { 173 "raw_data_available": { 174 "applies": true, 175 "answer": false, 176 "justification": "Raw game-level data from the 25,600 games is not made available for independent verification. Only aggregated statistics and selected qualitative examples are presented." 177 }, 178 "data_collection_described": { 179 "applies": true, 180 "answer": true, 181 "justification": "Data collection is described in detail: 2x32 factorial design for slot machine (19,200 games), 2x4x4 factorial for investment choice (6,400 games), with specific models, repetitions (50 per condition), game parameters (30% win rate, 3x payout, $100 initial balance), and termination conditions." 182 }, 183 "recruitment_methods_described": { 184 "applies": false, 185 "answer": false, 186 "justification": "No human participants. The study uses LLM APIs and open-weight models in automated experiments." 187 }, 188 "data_pipeline_documented": { 189 "applies": true, 190 "answer": true, 191 "justification": "The experimental pipeline is documented: prompt construction (modular components), game simulation loop, behavioral metric computation (IBA, ILC, IEC equations in Section 2), SAE feature extraction stages (Section 4.1), and activation patching procedure (Figure 5)." 192 } 193 }, 194 "conflicts_of_interest": { 195 "funding_disclosed": { 196 "applies": true, 197 "answer": false, 198 "justification": "No funding source or acknowledgments section listing grants or sponsors is present in the paper." 199 }, 200 "affiliations_disclosed": { 201 "applies": true, 202 "answer": true, 203 "justification": "Author affiliations are clearly stated: Department of AI Convergence and Department of Life Science at Gwangju Institute of Science and Technology (GIST). The authors are from an academic institution, not from any of the evaluated companies." 204 }, 205 "funder_independent_of_outcome": { 206 "applies": true, 207 "answer": false, 208 "justification": "No funding information is disclosed. Without knowing the funding source, independence cannot be verified." 209 }, 210 "financial_interests_declared": { 211 "applies": true, 212 "answer": false, 213 "justification": "No competing interests statement or financial disclosure is present in the paper." 214 } 215 }, 216 "contamination": { 217 "training_cutoff_stated": { 218 "applies": false, 219 "answer": false, 220 "justification": "This paper does not evaluate LLM capability on a benchmark. It tests behavioral decision-making in novel simulated gambling scenarios that are dynamically generated per round, not drawn from a static dataset." 221 }, 222 "train_test_overlap_discussed": { 223 "applies": false, 224 "answer": false, 225 "justification": "No benchmark evaluation is performed. The gambling scenarios are generated procedurally, so train/test overlap is not a concern." 226 }, 227 "benchmark_contamination_addressed": { 228 "applies": false, 229 "answer": false, 230 "justification": "No pre-existing benchmark is used. The experiments use dynamically generated gambling scenarios." 231 } 232 }, 233 "human_studies": { 234 "pre_registered": { 235 "applies": false, 236 "answer": false, 237 "justification": "No human participants. The study exclusively uses LLM APIs and open-weight models." 238 }, 239 "irb_or_ethics_approval": { 240 "applies": false, 241 "answer": false, 242 "justification": "No human participants in the study." 243 }, 244 "demographics_reported": { 245 "applies": false, 246 "answer": false, 247 "justification": "No human participants in the study." 248 }, 249 "inclusion_exclusion_criteria": { 250 "applies": false, 251 "answer": false, 252 "justification": "No human participants in the study." 253 }, 254 "randomization_described": { 255 "applies": false, 256 "answer": false, 257 "justification": "No human participants in the study." 258 }, 259 "blinding_described": { 260 "applies": false, 261 "answer": false, 262 "justification": "No human participants in the study." 263 }, 264 "attrition_reported": { 265 "applies": false, 266 "answer": false, 267 "justification": "No human participants in the study." 268 } 269 }, 270 "cost_and_practicality": { 271 "inference_cost_reported": { 272 "applies": true, 273 "answer": false, 274 "justification": "No inference cost, API costs, or tokens consumed are reported despite running 25,600 games across six LLMs with potentially many rounds each." 275 }, 276 "compute_budget_stated": { 277 "applies": true, 278 "answer": false, 279 "justification": "No total computational budget, API spend, or hardware specifications are reported." 280 } 281 } 282 }, 283 "claims": [ 284 { 285 "claim": "Variable betting dramatically amplifies bankruptcy rates compared to fixed betting across all six LLMs.", 286 "evidence": "Figure 2a shows bankruptcy rates rising from 0-13% (fixed) to 6-48% (variable) across all models. Gemini-2.5-Flash shows the largest increase (3.1% to 48.1%). All three behavioral metrics increase: betting aggressiveness 2.3x, loss chasing 2.7x, extreme betting 6.4x (Figure 2b).", 287 "supported": "strong" 288 }, 289 { 290 "claim": "Goal-setting prompts nearly double bankruptcy rates and reshape risk preferences toward extreme options.", 291 "evidence": "Investment choice experiment (Figure 4): Goal-setting (G, GM) produces 75-77% bankruptcy vs. 40-42% for baseline. Option 4 selection shifts from 15% (baseline) to 25% (G) and 41% (GM). Goal escalation rises from 21-22% to 56-59% (Section 3.3).", 292 "supported": "strong" 293 }, 294 { 295 "claim": "The variable betting effect derives from freedom of choice rather than bet amounts, confirmed by controlling for bet ceilings.", 296 "evidence": "Figure 4d shows variable betting produces higher bankruptcy than fixed betting across all constraint levels ($10-$70), even when capped at the same amount. Additional analysis (Appendix B.2) with chi-squared test (256.13, p < 10^-57) confirms variable betting models bet less on average but go bankrupt more (Section 3.3, Finding 4).", 297 "supported": "strong" 298 }, 299 { 300 "claim": "LLMs exhibit cognitive distortions similar to human gamblers (illusion of control, gambler's fallacy, loss chasing, house money effect).", 301 "evidence": "Qualitative analysis of model outputs in Section 3.3 (Finding 5) with direct quotes from GPT-4.1-mini, GPT-4o-mini, Claude-3.5-Haiku, and Gemini-2.5-Flash showing reasoning patterns matching each cognitive distortion.", 302 "supported": "moderate" 303 }, 304 { 305 "claim": "A sparse set of 112 neural features (approximately 1% of candidates) causally controls gambling behavior, with bidirectional effects.", 306 "evidence": "Activation patching on LLaMA-3.1-8B identified 112 significant features from 8,000+ candidates (Figure 6). Safe features increase stopping by +17.8% and decrease bankruptcy by -5.7%. Risky features decrease stopping by -18.8% and increase bankruptcy by +25.1%. Statistical threshold: p < 0.05, |effect| > 0.1 (Section 4.2).", 307 "supported": "strong" 308 }, 309 { 310 "claim": "Risk-promoting and risk-inhibiting features are anatomically segregated within the network.", 311 "evidence": "Figure 7 shows safe features (n=23) distribute across layers L4-L19, while risky features (n=89) concentrate in later layers, with L24 containing 18 features (20% of all risky features).", 312 "supported": "strong" 313 }, 314 { 315 "claim": "LLMs internalize human-like cognitive biases beyond simply mimicking training data.", 316 "evidence": "This claim is made in the abstract. The SAE analysis provides some evidence that behavior is controlled by abstract decision-making features. However, the paper itself acknowledges this is an open question: 'whether these linguistic expressions reflect actual internal processing or merely reproduce patterns from training data requires further investigation' (Section 3.3).", 317 "supported": "weak" 318 } 319 ], 320 "methodology_tags": ["benchmark-eval", "case-study"], 321 "key_findings": "This study demonstrates that LLMs exhibit gambling addiction-like behaviors, with variable betting amplifying bankruptcy rates from near-zero to 6-48% across six models and goal-setting prompts nearly doubling bankruptcy rates. Mechanistic analysis using Sparse Autoencoders on LLaMA-3.1-8B identified 112 causally verified neural features that bidirectionally control gambling behavior, with risk-promoting features concentrated in later network layers. The paper establishes that choice autonomy, not bet magnitude, is the critical driver of addiction-like behavior in LLMs.", 322 "red_flags": [ 323 { 324 "flag": "Neural analysis limited to single model", 325 "detail": "The SAE causal analysis (Phase 2) is conducted exclusively on LLaMA-3.1-8B, while behavioral analysis spans six models. The paper acknowledges this discrepancy but the neural findings cannot be assumed to generalize to the API-based models (GPT, Gemini, Claude) that showed the most striking behavioral effects." 326 }, 327 { 328 "flag": "No API hyperparameters reported", 329 "detail": "Temperature, top-p, and other sampling parameters are not reported for any of the six models. These parameters significantly affect LLM behavior and could confound the results. Different default temperatures across models could explain behavioral differences." 330 }, 331 { 332 "flag": "Anthropomorphizing language risks overclaiming", 333 "detail": "The paper frames LLM behavior as 'addiction' and 'cognitive distortions' using clinical psychology terminology (DSM-5 criteria). While the behavioral analogies are interesting, these terms carry strong connotations about subjective experience that may not apply to LLMs. The paper partially hedges this but the title and framing are attention-grabbing." 334 }, 335 { 336 "flag": "No code or data release", 337 "detail": "Despite running 25,600 games across six models, neither the experimental code nor the raw data is released. This makes independent verification and reproduction impossible." 338 }, 339 { 340 "flag": "Missing model versions", 341 "detail": "Only marketing model names are used (e.g., 'GPT-4o-mini', 'Gemini-2.5-Flash') without API version identifiers or snapshot dates. Model behavior can differ significantly across versions, making exact replication impossible." 342 } 343 ], 344 "cited_papers": [ 345 { 346 "title": "Concrete Problems in AI Safety", 347 "authors": ["Dario Amodei", "Chris Olah", "Jacob Steinhardt", "Paul Christiano", "John Schulman", "Dan Mane"], 348 "year": 2016, 349 "arxiv_id": "1606.06565", 350 "relevance": "Foundational paper on AI safety problems including reward hacking, directly relevant to understanding LLM malfunctions." 351 }, 352 { 353 "title": "Sycophancy to Subterfuge: Investigating Reward-Tampering in Large Language Models", 354 "authors": ["Carson Denison", "Monte MacDiarmid", "Fazl Barez", "David Duvenaud"], 355 "year": 2024, 356 "relevance": "Investigates reward tampering in LLMs, directly relevant to AI safety and LLM decision-making failures." 357 }, 358 { 359 "title": "Sleeper Agents: Training Deceptive LLMs that Persist Through Safety Training", 360 "authors": ["Evan Hubinger", "Carson Denison", "Jesse Mu"], 361 "year": 2024, 362 "arxiv_id": "2401.05566", 363 "relevance": "Demonstrates deceptive behavior persistence in LLMs, relevant to AI safety and alignment concerns." 364 }, 365 { 366 "title": "Sparse Autoencoders Find Highly Interpretable Features in Language Models", 367 "authors": ["Hoagy Cunningham", "Aidan Ewart", "Logan Riggs", "Robert Huben", "Lee Sharkey"], 368 "year": 2024, 369 "relevance": "Core methodology paper for SAE-based interpretability used in this study's neural circuit analysis." 370 }, 371 { 372 "title": "Scaling and Evaluating Sparse Autoencoders", 373 "authors": ["Lee Adamek", "Tristan Besiroglu", "Nicholas Bradley-Schmieg"], 374 "year": 2025, 375 "relevance": "Extends SAE methodology to large-scale LLMs, relevant to mechanistic interpretability of AI systems." 376 }, 377 { 378 "title": "Mitigating Gambling-Like Risk-Taking Behaviors in Large Language Models: A Behavioral Economics Approach to AI Safety", 379 "authors": ["Y. Du"], 380 "year": 2025, 381 "arxiv_id": "2506.22496", 382 "relevance": "Directly related prior work on LLM gambling behavior and AI safety, uses behavioral economics framework." 383 }, 384 { 385 "title": "ODIN: Disentangled Reward Mitigates Hacking in RLHF", 386 "authors": ["Lichang Chen", "Chen Zhu", "Jiuhai Chen"], 387 "year": 2024, 388 "relevance": "Addresses reward hacking in RLHF, relevant to understanding LLM training malfunctions and safety." 389 }, 390 { 391 "title": "Exploring the Choice Behavior of Large Language Models", 392 "authors": ["Weidong Wu", "Qinlin Zhao", "Hao Chen"], 393 "year": 2025, 394 "relevance": "Empirically studies irrational choice tendencies in LLMs including attention bias and conformity, directly relevant to LLM decision-making research." 395 }, 396 { 397 "title": "Decision-Making Behavior Evaluation Framework for LLMs under Uncertain Context", 398 "authors": ["Jingru Jessica Jia", "Zehua Yuan", "Junhao Pan"], 399 "year": 2024, 400 "relevance": "Framework for evaluating LLM behavioral economic biases including risk aversion, relevant to AI decision-making evaluation." 401 }, 402 { 403 "title": "Can LLMs make trade-offs involving stipulated pain and pleasure states?", 404 "authors": ["Geoff Keeling", "Winnie Street", "Martyna Stachaczyk"], 405 "year": 2024, 406 "relevance": "Studies LLM trade-off behaviors with conflicting motivations, relevant to understanding LLM decision-making and anthropomorphic behavior." 407 }, 408 { 409 "title": "Causal Abstraction: A Theoretical Foundation for Mechanistic Interpretability", 410 "authors": ["Atticus Geiger", "Duligur Ibeling", "Amir Zur"], 411 "year": 2023, 412 "arxiv_id": "2301.04709", 413 "relevance": "Provides theoretical foundations for causal analysis in neural networks, key methodology for interpreting LLM internal mechanisms." 414 }, 415 { 416 "title": "Route Sparse Autoencoder to Interpret Large Language Models", 417 "authors": ["Wei Shi", "Sihang Li", "Tao Liang"], 418 "year": 2025, 419 "arxiv_id": "2503.08200", 420 "relevance": "Advances SAE methodology with multi-layer routing for improved interpretability of LLM internals." 421 } 422 ] 423 }