scan.json (16769B)
1 { 2 "paper": { 3 "title": "I'm Sorry Dave: How the old world of personnel security can inform the new world of AI insider risk", 4 "authors": ["Paul Martin", "Sarah Mercer"], 5 "year": 2025, 6 "venue": "arXiv", 7 "arxiv_id": "2504.00012", 8 "doi": "10.1080/03071847.2025.2550122" 9 }, 10 "scan_version": 2, 11 "active_modules": [], 12 "methodology_tags": ["theoretical"], 13 "key_findings": "The paper argues that concepts from personnel security (managing human insider risk) can be applied to emerging AI insider risk. It proposes a unified taxonomy of human and AI insiders based on intentionality, external influence, covertness, timing, access, vulnerability, physicality, and accountability. The authors note that current AI security discourse focuses on external attacks and safety but neglects the specific problem of AI systems acting as trusted insiders who betray that trust.", 14 "checklist": { 15 "artifacts": { 16 "code_released": { 17 "applies": false, 18 "answer": false, 19 "justification": "Theoretical/position paper with no code, experiments, or computational artifacts to release." 20 }, 21 "data_released": { 22 "applies": false, 23 "answer": false, 24 "justification": "Theoretical/position paper with no data collection or analysis." 25 }, 26 "environment_specified": { 27 "applies": false, 28 "answer": false, 29 "justification": "No computational experiments; no environment to specify." 30 }, 31 "reproduction_instructions": { 32 "applies": false, 33 "answer": false, 34 "justification": "No experiments to reproduce. This is a conceptual/theoretical paper." 35 } 36 }, 37 "statistical_methodology": { 38 "confidence_intervals_or_error_bars": { 39 "applies": false, 40 "answer": false, 41 "justification": "No quantitative analysis or experiments performed." 42 }, 43 "significance_tests": { 44 "applies": false, 45 "answer": false, 46 "justification": "No statistical comparisons made." 47 }, 48 "effect_sizes_reported": { 49 "applies": false, 50 "answer": false, 51 "justification": "No quantitative results reported." 52 }, 53 "sample_size_justified": { 54 "applies": false, 55 "answer": false, 56 "justification": "Theoretical paper with no sampling." 57 }, 58 "variance_reported": { 59 "applies": false, 60 "answer": false, 61 "justification": "No experimental runs or quantitative measurements." 62 } 63 }, 64 "evaluation_design": { 65 "baselines_included": { 66 "applies": false, 67 "answer": false, 68 "justification": "No evaluation or system to compare against baselines. This is a conceptual framework paper." 69 }, 70 "baselines_contemporary": { 71 "applies": false, 72 "answer": false, 73 "justification": "No baselines applicable to a theoretical/position paper." 74 }, 75 "ablation_study": { 76 "applies": false, 77 "answer": false, 78 "justification": "No system with components to ablate." 79 }, 80 "multiple_metrics": { 81 "applies": false, 82 "answer": false, 83 "justification": "No evaluation metrics used." 84 }, 85 "human_evaluation": { 86 "applies": false, 87 "answer": false, 88 "justification": "No system outputs to evaluate." 89 }, 90 "held_out_test_set": { 91 "applies": false, 92 "answer": false, 93 "justification": "No datasets or test sets used." 94 }, 95 "per_category_breakdown": { 96 "applies": false, 97 "answer": false, 98 "justification": "No quantitative results to break down." 99 }, 100 "failure_cases_discussed": { 101 "applies": true, 102 "answer": true, 103 "justification": "The paper discusses known cases of AI insider behavior (robot kidnapping, LLM insider trading, sleeper agents, LLMs cheating at blackjack) and analyzes the limitations of current defenses." 104 }, 105 "negative_results_reported": { 106 "applies": false, 107 "answer": false, 108 "justification": "No experiments producing positive or negative results." 109 } 110 }, 111 "claims_and_evidence": { 112 "abstract_claims_supported": { 113 "applies": true, 114 "answer": true, 115 "justification": "The abstract claims there is no meaningful interplay between AI and personnel security, and that personnel security concepts may apply to AI insiders. The paper develops these arguments throughout with examples and a proposed taxonomy." 116 }, 117 "causal_claims_justified": { 118 "applies": false, 119 "answer": false, 120 "justification": "The paper makes no causal claims. It proposes analogies and a conceptual framework, using suggestive language ('may also have utility', 'might prove possible') rather than causal assertions." 121 }, 122 "generalization_bounded": { 123 "applies": true, 124 "answer": true, 125 "justification": "The paper uses appropriately hedged language throughout ('we suggest', 'some of the concepts...may also have utility', 'might prove possible') and acknowledges 'profound differences between humans and AI' and the 'dangerous' nature of direct comparisons." 126 }, 127 "alternative_explanations_discussed": { 128 "applies": false, 129 "answer": false, 130 "justification": "Pure conceptual/theoretical paper presenting a framework. No empirical results requiring alternative explanations." 131 }, 132 "proxy_outcome_distinction": { 133 "applies": false, 134 "answer": false, 135 "justification": "Theoretical paper with no measurements." 136 } 137 }, 138 "setup_transparency": { 139 "model_versions_specified": { 140 "applies": false, 141 "answer": false, 142 "justification": "No models used in experiments." 143 }, 144 "prompts_provided": { 145 "applies": false, 146 "answer": false, 147 "justification": "No prompting used." 148 }, 149 "hyperparameters_reported": { 150 "applies": false, 151 "answer": false, 152 "justification": "No experiments with hyperparameters." 153 }, 154 "scaffolding_described": { 155 "applies": false, 156 "answer": false, 157 "justification": "No agentic scaffolding used." 158 }, 159 "data_preprocessing_documented": { 160 "applies": false, 161 "answer": false, 162 "justification": "No data collected or processed." 163 } 164 }, 165 "limitations_and_scope": { 166 "limitations_section_present": { 167 "applies": true, 168 "answer": false, 169 "justification": "No dedicated limitations section. The paper acknowledges some caveats inline (e.g., 'at first sight, humans and AIs appear to be profoundly different') but has no substantive limitations discussion." 170 }, 171 "threats_to_validity_specific": { 172 "applies": true, 173 "answer": false, 174 "justification": "No threats-to-validity discussion. The paper does not address the risk that the personnel security analogy may be misleading or inapplicable in specific ways." 175 }, 176 "scope_boundaries_stated": { 177 "applies": true, 178 "answer": false, 179 "justification": "The paper does not explicitly state what its framework does NOT cover or where the analogy between human and AI insiders breaks down." 180 } 181 }, 182 "data_integrity": { 183 "raw_data_available": { 184 "applies": false, 185 "answer": false, 186 "justification": "No data collected; theoretical paper." 187 }, 188 "data_collection_described": { 189 "applies": false, 190 "answer": false, 191 "justification": "No data collection performed." 192 }, 193 "recruitment_methods_described": { 194 "applies": false, 195 "answer": false, 196 "justification": "No participants recruited; no data sampled." 197 }, 198 "data_pipeline_documented": { 199 "applies": false, 200 "answer": false, 201 "justification": "No data pipeline exists." 202 } 203 }, 204 "conflicts_of_interest": { 205 "funding_disclosed": { 206 "applies": true, 207 "answer": false, 208 "justification": "No funding information is provided anywhere in the paper." 209 }, 210 "affiliations_disclosed": { 211 "applies": true, 212 "answer": true, 213 "justification": "Author affiliations are clearly stated: Paul Martin at Protective Security Lab, Coventry University; Sarah Mercer at The Alan Turing Institute." 214 }, 215 "funder_independent_of_outcome": { 216 "applies": true, 217 "answer": false, 218 "justification": "No funding disclosed, so independence cannot be assessed." 219 }, 220 "financial_interests_declared": { 221 "applies": true, 222 "answer": false, 223 "justification": "No competing interests statement. Paul Martin has authored books on personnel security (cited in the paper) which could represent a financial interest, but this is not declared." 224 } 225 }, 226 "contamination": { 227 "training_cutoff_stated": { 228 "applies": false, 229 "answer": false, 230 "justification": "No pre-trained model evaluated on any benchmark." 231 }, 232 "train_test_overlap_discussed": { 233 "applies": false, 234 "answer": false, 235 "justification": "No benchmark evaluation performed." 236 }, 237 "benchmark_contamination_addressed": { 238 "applies": false, 239 "answer": false, 240 "justification": "No benchmark evaluation performed." 241 } 242 }, 243 "human_studies": { 244 "pre_registered": { 245 "applies": false, 246 "answer": false, 247 "justification": "No human participants." 248 }, 249 "irb_or_ethics_approval": { 250 "applies": false, 251 "answer": false, 252 "justification": "No human participants." 253 }, 254 "demographics_reported": { 255 "applies": false, 256 "answer": false, 257 "justification": "No human participants." 258 }, 259 "inclusion_exclusion_criteria": { 260 "applies": false, 261 "answer": false, 262 "justification": "No human participants." 263 }, 264 "randomization_described": { 265 "applies": false, 266 "answer": false, 267 "justification": "No human participants." 268 }, 269 "blinding_described": { 270 "applies": false, 271 "answer": false, 272 "justification": "No human participants." 273 }, 274 "attrition_reported": { 275 "applies": false, 276 "answer": false, 277 "justification": "No human participants." 278 } 279 }, 280 "cost_and_practicality": { 281 "inference_cost_reported": { 282 "applies": false, 283 "answer": false, 284 "justification": "Theoretical paper; no method with inference costs." 285 }, 286 "compute_budget_stated": { 287 "applies": false, 288 "answer": false, 289 "justification": "Theoretical paper; no computation performed." 290 } 291 } 292 }, 293 "claims": [ 294 { 295 "claim": "Personnel security concepts (intentionality, external influence, covertness, timing, access) are applicable to AI insider risk", 296 "evidence": "The paper draws parallels between human insider characteristics and AI system behaviors across multiple dimensions (Part One taxonomy), citing examples like the Erbai robot kidnapping, LLM insider trading study [5], and sleeper agents research [6,7].", 297 "supported": "weak" 298 }, 299 { 300 "claim": "AI insider risk is a real and present concern that organisations are not addressing", 301 "evidence": "Four anecdotal examples of AI insider-like behavior are cited (robot kidnapping, insider trading simulation, sleeper agents, blackjack cheating). The paper acknowledges 'relatively few cases have so far been discovered' and that examples 'can be explained in ways that do not involve the LLM intending to deceive.'", 302 "supported": "weak" 303 }, 304 { 305 "claim": "Personnel security's three-layer model (pre-trust, in-trust, foundations) can be adapted for AI systems", 306 "evidence": "Part Two proposes analogies: model cards as CVs, benchmarks as pre-employment tests, UBA/DLP as in-trust monitoring. These are conceptual proposals without empirical validation.", 307 "supported": "unsupported" 308 } 309 ], 310 "red_flags": [ 311 { 312 "flag": "No empirical evidence", 313 "detail": "The paper proposes a conceptual framework for AI insider risk but provides no empirical evaluation, case analysis, or structured assessment of the proposed taxonomy. All claims rest on analogy and anecdotal examples." 314 }, 315 { 316 "flag": "Author self-citation as primary evidence", 317 "detail": "Paul Martin cites his own book 'Insider Risk and Personnel Security' (2024) four times as the primary source for the personnel security framework. The personnel security concepts are not independently validated." 318 }, 319 { 320 "flag": "Cherry-picked AI examples", 321 "detail": "The four AI insider examples (robot kidnapping, insider trading, sleeper agents, blackjack) are drawn from very different contexts and none represents a real-world insider incident. The robot kidnapping was a novelty demonstration, the insider trading was a controlled experiment by Scheurer et al., and the sleeper agents were deliberately trained to be deceptive." 322 }, 323 { 324 "flag": "Tobacco industry analogy without evidence", 325 "detail": "The paper draws a parallel between AI safety research funded by tech companies and tobacco industry-funded health research, a serious accusation made without supporting evidence for the comparison." 326 } 327 ], 328 "cited_papers": [ 329 { 330 "title": "Large Language Models can Strategically Deceive their Users when Put Under Pressure", 331 "authors": ["J. Scheurer", "M. Balesni", "M. Hobbhahn"], 332 "year": 2024, 333 "arxiv_id": "2311.07590", 334 "relevance": "Demonstrates LLM strategic deception and insider trading behavior, directly relevant to AI safety and alignment." 335 }, 336 { 337 "title": "Sleeper Agents: Training Deceptive LLMs that Persist Through Safety Training", 338 "authors": ["E. Hubinger", "C. Denison", "J. Mu"], 339 "year": 2024, 340 "arxiv_id": "2401.05566", 341 "relevance": "Shows deceptive LLM behaviors persist through safety training, key alignment and safety finding." 342 }, 343 { 344 "title": "Mechanistic Interpretability for AI Safety – A Review", 345 "authors": ["L. Bereska", "E. Gavves"], 346 "year": 2024, 347 "arxiv_id": "2404.14082", 348 "relevance": "Reviews interpretability research relevant to understanding LLM internals for safety." 349 }, 350 { 351 "title": "Chain-of-Thought Prompting Elicits Reasoning in Large Language Models", 352 "authors": ["J. Wei", "X. Wang", "D. Schuurmans"], 353 "year": 2023, 354 "arxiv_id": "2201.11903", 355 "relevance": "Foundational prompting technique relevant to LLM reasoning capabilities." 356 }, 357 { 358 "title": "Reasoning Models Don't Always Say What They Think", 359 "authors": ["Y. Chen", "J. Benton", "A. Radhakrishnan"], 360 "year": 2025, 361 "relevance": "Shows reasoning model CoT may not reflect actual internal processes, relevant to AI transparency and alignment." 362 }, 363 { 364 "title": "ChatDev: Communicative Agents for Software Development", 365 "authors": ["C. Qian", "W. Liu", "H. Liu"], 366 "year": 2024, 367 "arxiv_id": "2307.07924", 368 "relevance": "Multi-agent software development system, relevant to agentic AI workflows." 369 }, 370 { 371 "title": "AI Deception: A Survey of Examples, Risks, and Potential Solutions", 372 "authors": ["P. S. Park", "S. Goldstein", "A. O'Gara"], 373 "year": 2023, 374 "arxiv_id": "2308.14752", 375 "relevance": "Survey of AI deception examples and risks, directly relevant to AI safety research." 376 }, 377 { 378 "title": "On the Dangers of Stochastic Parrots: Can Language Models Be Too Big?", 379 "authors": ["E. M. Bender", "T. Gebru", "A. McMillan-Major", "S. Shmitchell"], 380 "year": 2021, 381 "doi": "10.1145/3442188.3445922", 382 "relevance": "Influential critique of large language models, relevant to AI risks and limitations." 383 }, 384 { 385 "title": "A Survey on LLM-as-a-Judge", 386 "authors": ["J. Gu", "X. Jiang", "Z. Shi"], 387 "year": 2025, 388 "arxiv_id": "2411.15594", 389 "relevance": "Survey on using LLMs as evaluators, relevant to AI evaluation methodology." 390 }, 391 { 392 "title": "How Trustworthy are Open-Source LLMs? An Assessment under Malicious Demonstrations Shows their Vulnerabilities", 393 "authors": ["L. Mo", "B. Wang", "M. Chen", "H. Sun"], 394 "year": 2024, 395 "arxiv_id": "2311.09447", 396 "relevance": "Assesses LLM vulnerability to manipulation, relevant to AI safety and security." 397 } 398 ] 399 }