ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

scan-v5.json (22470B)


      1 {
      2   "scan_version": 5,
      3   "paper_type": "position",
      4   "paper": {
      5     "title": "How Personnel Security can Inform the New World of AI Insider Risk",
      6     "authors": [
      7       "Paul Martin",
      8       "Sarah Mercer"
      9     ],
     10     "year": 2025,
     11     "venue": "Unknown",
     12     "arxiv_id": "2504.00012",
     13     "doi": "10.1080/03071847.2025.2550122"
     14   },
     15   "checklist": {
     16     "claims_and_evidence": {
     17       "abstract_claims_supported": {
     18         "applies": true,
     19         "answer": true,
     20         "justification": "The abstract claims there is 'no meaningful interplay' between AI and personnel security, and that some personnel security concepts apply to AI insiders — both are substantiated in the body through conceptual mapping and examples.",
     21         "source": "haiku"
     22       },
     23       "causal_claims_justified": {
     24         "applies": false,
     25         "answer": false,
     26         "justification": "The paper makes no causal empirical claims; it is a conceptual position paper proposing analogies and recommendations without experimental design.",
     27         "source": "haiku"
     28       },
     29       "generalization_bounded": {
     30         "applies": true,
     31         "answer": false,
     32         "justification": "The paper broadly asserts 'AI insiders could do, or facilitate, any of these things' referring to the full range of human insider harms, and draws wide conclusions from a small number of illustrative anecdotes (the Erbai robot, one LLM insider-trading study) without bounding the generalization to tested conditions.",
     33         "source": "haiku"
     34       },
     35       "alternative_explanations_discussed": {
     36         "applies": true,
     37         "answer": false,
     38         "justification": "The paper briefly acknowledges that the AI–human analogy may be dangerous due to anthropomorphism, but does not seriously engage with alternative frameworks (e.g., treating AI risks purely as cybersecurity or safety issues rather than insider risk).",
     39         "source": "haiku"
     40       },
     41       "proxy_outcome_distinction": {
     42         "applies": false,
     43         "answer": false,
     44         "justification": "No empirical measurements are made in this position paper; the question of proxy/outcome distinction does not apply.",
     45         "source": "haiku"
     46       }
     47     },
     48     "limitations_and_scope": {
     49       "limitations_section_present": {
     50         "applies": true,
     51         "answer": false,
     52         "justification": "There is no dedicated limitations or threats-to-validity section; the paper moves directly from analysis to conclusions and recommendations without formally acknowledging its own limitations.",
     53         "source": "haiku"
     54       },
     55       "threats_to_validity_specific": {
     56         "applies": true,
     57         "answer": false,
     58         "justification": "No specific threats are discussed; the paper does note that personnel security 'relies heavily on established processes... which have only a limited basis in empirical evidence,' but this is a field-level observation, not a specific threat to the paper's argument.",
     59         "source": "haiku"
     60       },
     61       "scope_boundaries_stated": {
     62         "applies": true,
     63         "answer": false,
     64         "justification": "The paper does not state what its argument does NOT cover; it does not clarify which types of AI systems, organizational contexts, or threat scenarios fall outside its framework.",
     65         "source": "haiku"
     66       }
     67     },
     68     "conflicts_of_interest": {
     69       "funding_disclosed": {
     70         "applies": true,
     71         "answer": false,
     72         "justification": "No funding source is disclosed anywhere in the paper; there is no acknowledgement of grant support or institutional funding.",
     73         "source": "haiku"
     74       },
     75       "affiliations_disclosed": {
     76         "applies": true,
     77         "answer": true,
     78         "justification": "Author affiliations are clearly stated: Paul Martin at Coventry University's Protective Security Lab and Sarah Mercer at The Alan Turing Institute.",
     79         "source": "haiku"
     80       },
     81       "funder_independent_of_outcome": {
     82         "applies": false,
     83         "answer": false,
     84         "justification": "No funder is identified, so independence cannot be assessed.",
     85         "source": "haiku"
     86       },
     87       "financial_interests_declared": {
     88         "applies": true,
     89         "answer": false,
     90         "justification": "There is no competing interests statement, no declaration of patents, equity, or consulting arrangements; the first author is the author of books directly relevant to this paper's subject (Insider Risk and Personnel Security, Routledge 2024) which represents an undisclosed commercial interest.",
     91         "source": "haiku"
     92       }
     93     },
     94     "scope_and_framing": {
     95       "key_terms_defined": {
     96         "applies": true,
     97         "answer": true,
     98         "justification": "The paper explicitly defines 'insider,' 'insider risk,' 'security risk,' 'trust,' 'trustworthiness,' and 'AI' with specific formulations, citing sources where appropriate.",
     99         "source": "haiku"
    100       },
    101       "intended_contribution_clear": {
    102         "applies": true,
    103         "answer": true,
    104         "justification": "The paper clearly states it aims to propose how personnel security concepts can be adapted to AI insider risk and to sketch a unified taxonomy of human and AI insiders.",
    105         "source": "haiku"
    106       },
    107       "engagement_with_prior_work": {
    108         "applies": true,
    109         "answer": false,
    110         "justification": "References are used primarily as footnotes to factual claims; the paper does not substantively engage with existing AI safety, AI governance, or cybersecurity literature on how these fields already handle the problem, nor compare its framework to competing approaches.",
    111         "source": "haiku"
    112       }
    113     }
    114   },
    115   "type_checklist": {
    116     "position": {
    117       "argument_quality": {
    118         "argument_internally_consistent": {
    119           "applies": true,
    120           "answer": true,
    121           "justification": "The argument follows a coherent structure: define insider risk, map human insider characteristics to AI, propose adapted security measures. There are minor tensions (e.g., acknowledging LLMs lack intentionality while mapping intentionality onto them) that are explicitly flagged by the authors.",
    122           "source": "haiku"
    123         },
    124         "counterarguments_addressed": {
    125           "applies": true,
    126           "answer": false,
    127           "justification": "The strongest counterargument — that the human insider analogy is fundamentally misleading and will produce misallocated security resources — is not seriously engaged; the paper briefly notes risks of anthropomorphism but then proceeds with the analogy anyway.",
    128           "source": "haiku"
    129         },
    130         "analogies_appropriate": {
    131           "applies": true,
    132           "answer": false,
    133           "justification": "Several analogies are strained: comparing AI 'vulnerability to flattery' to human psychological vulnerabilities, or treating a likely scripted robot demonstration (Erbai) as evidence of AI insider risk; the paper acknowledges the anthropomorphism danger but does not critically evaluate where specific analogies break down.",
    134           "source": "haiku"
    135         },
    136         "prescriptions_proportional": {
    137           "applies": true,
    138           "answer": true,
    139           "justification": "The recommendations (practitioner collaboration, more research, cautious deployment of autonomous AI) are appropriately hedged and proportional to the conceptual argument being made; no sweeping regulatory mandates are claimed from the analogy alone.",
    140           "source": "haiku"
    141         },
    142         "evidence_for_claims_cited": {
    143           "applies": true,
    144           "answer": true,
    145           "justification": "Most factual claims are backed by citations — e.g., LLM insider trading [5], sleeper agents [6,7], sycophancy [47], stochastic parrots [23]; the citation density is reasonable for a position paper.",
    146           "source": "haiku"
    147         },
    148         "alternatives_discussed": {
    149           "applies": true,
    150           "answer": false,
    151           "justification": "The paper does not discuss alternative frameworks for managing AI insider risk (e.g., treating it purely as a cybersecurity or AI safety problem, zero-trust architectures, or regulatory/legal mechanisms) or explain why the personnel security framing is preferable.",
    152           "source": "haiku"
    153         },
    154         "historical_context_accurate": {
    155           "applies": true,
    156           "answer": true,
    157           "justification": "Historical references are broadly accurate: 2001: A Space Odyssey (1968 ✓), LLMs appearing around 2020 (defensible with the GPT-3 milestone, per the cited Toloka blog), and the tobacco industry research analogy is presented as an analogy rather than a factual equivalence.",
    158           "source": "haiku"
    159         }
    160       },
    161       "clarity_and_scope": {
    162         "key_terms_defined_precisely": {
    163           "applies": true,
    164           "answer": true,
    165           "justification": "Core terms are defined explicitly in the text: 'trust,' 'trustworthiness,' 'insider,' 'insider risk,' 'security risk,' and 'AI' all receive working definitions, even if some (e.g., 'AI' is deliberately broad).",
    166           "source": "haiku"
    167         },
    168         "engages_with_existing_literature": {
    169           "applies": true,
    170           "answer": false,
    171           "justification": "The paper does not engage substantively with existing literature on AI safety, AI alignment, or organizational cybersecurity — references serve as footnotes rather than as interlocutors; there is no discussion of how this framework relates to NIST AI RMF, EU AI Act security requirements, or existing agentic AI threat models.",
    172           "source": "haiku"
    173         },
    174         "intended_audience_clear": {
    175           "applies": true,
    176           "answer": true,
    177           "justification": "The paper explicitly addresses 'protective security practitioners and AI experts' who it argues should 'join forces,' making the dual practitioner audience clear throughout.",
    178           "source": "haiku"
    179         },
    180         "assumptions_stated": {
    181           "applies": true,
    182           "answer": false,
    183           "justification": "The paper does not explicitly state its key assumptions: that current AI systems are sophisticated enough to warrant an 'insider threat' framing, that human insider research generalizes meaningfully to AI, or that personnel security as a discipline has transferable empirical value (which it simultaneously admits is limited).",
    184           "source": "haiku"
    185         },
    186         "scope_of_applicability_discussed": {
    187           "applies": true,
    188           "answer": false,
    189           "justification": "The paper does not delineate which organizational contexts, AI deployment scenarios, or AI capability levels its framework applies to, nor does it discuss where the analogy is inapplicable.",
    190           "source": "haiku"
    191         }
    192       }
    193     }
    194   },
    195   "claims": [
    196     {
    197       "claim": "There is currently no meaningful interplay between the AI domain and personnel security practitioners.",
    198       "evidence": "Asserted in the abstract and introduction without systematic evidence; supported only by authors' observations and a prior RUSI commentary by the same authors.",
    199       "supported": "weak"
    200     },
    201     {
    202       "claim": "AI insiders could perform or facilitate any harmful action that human insiders can (fraud, sabotage, espionage, etc.) with similar consequences.",
    203       "evidence": "Argued by analogy from the definitions of 'insider' and 'access'; supported by illustrative examples (LLM insider trading, sleeper agents) but not by systematic evidence of real-world AI insider incidents.",
    204       "supported": "weak"
    205     },
    206     {
    207       "claim": "Much of the research on AI security and safety is funded by tech companies, analogous to tobacco companies funding health research.",
    208       "evidence": "The tobacco analogy is asserted without evidence that AI safety research results are systematically distorted by industry funding; no citations are provided for this claim.",
    209       "supported": "unsupported"
    210     },
    211     {
    212       "claim": "LLMs are 'bullshitters' rather than outright liars — they neither know nor care whether what they are saying is true.",
    213       "evidence": "Supported by citations to Hannigan et al. (2024) and Hicks et al. (2024) on 'botshit,' and Bender et al.'s 'stochastic parrots' framing.",
    214       "supported": "moderate"
    215     },
    216     {
    217       "claim": "Personnel security's threefold model (pre-trust, in-trust, foundations) can be adapted for AI insider risk management.",
    218       "evidence": "The mapping is argued by analogy — model cards as CVs, benchmarks as background checks, UBA tools as continuous monitoring — without empirical validation that these adaptations would be effective.",
    219       "supported": "weak"
    220     },
    221     {
    222       "claim": "AI systems display sycophantic tendencies as a result of RLHF fine-tuning.",
    223       "evidence": "Supported by citation to Malmqvist (2024) on sycophancy in LLMs and Mo et al. (2024) on vulnerability of instruction-tuned models.",
    224       "supported": "moderate"
    225     }
    226   ],
    227   "methodology_tags": [
    228     "theoretical",
    229     "qualitative"
    230   ],
    231   "key_findings": "The paper argues that AI systems deployed in organizational roles constitute a new class of 'insider threat' analogous to human insiders, and that personnel security concepts (intentionality, covertness, access, vulnerability, accountability) can be mapped onto AI systems to guide security practice. It proposes a unified taxonomy of human and AI insiders across seven dimensions, and recommends that security practitioners and AI experts collaborate to develop pre-trust and in-trust measures for AI systems modeled on human personnel security processes. The paper concludes with eight recommendations including cautious deployment of autonomous AI, practitioner cross-training, and development of AI evaluation methods for alignment and instruction-following.",
    232   "red_flags": [
    233     {
    234       "flag": "Undisclosed commercial interest",
    235       "detail": "The first author's primary citation in the paper is his own book (Insider Risk and Personnel Security, Routledge 2024), cited six times. This commercial interest is not declared in a competing interests statement."
    236     },
    237     {
    238       "flag": "Tobacco analogy unsupported",
    239       "detail": "The claim that AI safety research funding by tech companies is 'reminiscent of when tobacco companies sponsored much of the research on the health effects of smoking' is presented without evidence that AI safety findings are distorted by industry funding, and no citations support it."
    240     },
    241     {
    242       "flag": "Robot 'kidnapping' misrepresented",
    243       "detail": "The Erbai robot incident (a robot encouraging other robots to leave their stations at a Shanghai exhibition) is cited as evidence of AI insider risk, but the source is an 'Interesting Engineering' news article about what appears to be a scripted or accidental demonstration, not a genuine security incident."
    244     },
    245     {
    246       "flag": "No engagement with existing AI governance literature",
    247       "detail": "The paper proposes a new framework for AI insider risk without engaging with existing frameworks (NIST AI RMF, EU AI Act, MITRE ATLAS, OWASP LLM Top 10 is briefly noted) that already address related concerns."
    248     },
    249     {
    250       "flag": "Analogy validity not evaluated",
    251       "detail": "The entire argument rests on the productivity of a human-AI insider analogy, but the paper does not systematically evaluate where the analogy fails or provide criteria for determining when analogical reasoning should not be applied."
    252     },
    253     {
    254       "flag": "No limitations section",
    255       "detail": "The paper has no dedicated limitations section despite making broad prescriptive recommendations across a domain it acknowledges is 'only dimly understood.'"
    256     }
    257   ],
    258   "cited_papers": [
    259     {
    260       "title": "Sleeper Agents: Training Deceptive LLMs that Persist Through Safety Training",
    261       "relevance": "Key empirical evidence for AI systems that can exhibit covert, persistent deceptive behavior despite alignment training — directly supports the AI insider framing."
    262     },
    263     {
    264       "title": "Large Language Models can Strategically Deceive their Users when Put Under Pressure",
    265       "relevance": "Demonstrates LLM deceptive behavior in a simulated insider trading scenario — the paper's primary case study for AI insider risk."
    266     },
    267     {
    268       "title": "AI Deception: A Survey of Examples, Risks, and Potential Solutions",
    269       "relevance": "Survey of AI deception examples that provides broader context for the insider threat framing."
    270     },
    271     {
    272       "title": "On the Dangers of Stochastic Parrots: Can Language Models Be Too Big?",
    273       "relevance": "Provides the 'stochastic parrots' framing used to characterize LLM outputs as non-intentional, distinguishing AI from human deception."
    274     },
    275     {
    276       "title": "Mechanistic Interpretability for AI Safety – A Review",
    277       "relevance": "Cited for interpretability research as a means of improving AI transparency and trustworthiness assessment."
    278     },
    279     {
    280       "title": "Chain-of-Thought Prompting Elicits Reasoning in Large Language Models",
    281       "relevance": "Cited for CoT reasoning as a transparency mechanism, and the paper discusses its limitations for genuine interpretability."
    282     },
    283     {
    284       "title": "ChatDev: Communicative Agents for Software Development",
    285       "relevance": "Used to illustrate multi-agent AI team dynamics as a point of comparison with human social behavior."
    286     },
    287     {
    288       "title": "Trustworthy LLMs: a Survey and Guideline for Evaluating Large Language Models' Alignment",
    289       "relevance": "Provides frameworks for evaluating AI trustworthiness that map onto the personnel security trustworthiness dimensions proposed in the paper."
    290     },
    291     {
    292       "title": "Sycophancy in Large Language Models: Causes and Mitigations",
    293       "relevance": "Cited as evidence that RLHF-trained LLMs display sycophantic tendencies, relevant to the vulnerability and trustworthiness analysis."
    294     },
    295     {
    296       "title": "Fully autonomous AI agents should not be developed",
    297       "relevance": "Supports the paper's final recommendation for extreme caution before deploying fully autonomous AI systems."
    298     }
    299   ],
    300   "engagement_factors": {
    301     "practical_relevance": {
    302       "score": 2,
    303       "justification": "Security practitioners in large organizations could adapt the pre-trust/in-trust framework and taxonomy for AI procurement and monitoring policies."
    304     },
    305     "surprise_contrarian": {
    306       "score": 2,
    307       "justification": "Framing AI systems as 'insiders' using personnel security vocabulary is an unusual and provocative lens that differs from dominant cybersecurity or AI safety framings."
    308     },
    309     "fear_safety": {
    310       "score": 3,
    311       "justification": "The paper directly addresses AI as a security threat to organizations, with examples of deceptive AI behavior and warnings about autonomous AI systems."
    312     },
    313     "drama_conflict": {
    314       "score": 2,
    315       "justification": "The tobacco analogy and the HAL 9000 framing add rhetorical drama; the paper has a provocative tone about AI risk that could generate debate."
    316     },
    317     "demo_ability": {
    318       "score": 0,
    319       "justification": "Pure position paper with no tools, datasets, or demos — nothing for readers to try."
    320     },
    321     "brand_recognition": {
    322       "score": 1,
    323       "justification": "The Alan Turing Institute is a recognized UK research body; Coventry University and RUSI are known in UK security policy circles but not broadly internationally."
    324     }
    325   },
    326   "hn_data": {
    327     "threads": [
    328       {
    329         "hn_id": "42602347",
    330         "title": "Did we miss P In CAP? Partial Progress Conjecture under Asynchrony",
    331         "points": 42,
    332         "comments": 4,
    333         "url": "https://news.ycombinator.com/item?id=42602347",
    334         "created_at": "2025-01-05T15:23:00Z"
    335       },
    336       {
    337         "hn_id": "44805436",
    338         "title": "Quantum machine learning via vector embeddings",
    339         "points": 11,
    340         "comments": 0,
    341         "url": "https://news.ycombinator.com/item?id=44805436",
    342         "created_at": "2025-08-05T22:46:47Z"
    343       },
    344       {
    345         "hn_id": "43382159",
    346         "title": "Do Emotions Affect Argument Convincingness?",
    347         "points": 4,
    348         "comments": 0,
    349         "url": "https://news.ycombinator.com/item?id=43382159",
    350         "created_at": "2025-03-16T20:48:09Z"
    351       },
    352       {
    353         "hn_id": "44777459",
    354         "title": "Hypertokens: Holographic Associative Memory in Tokenized LLMs",
    355         "points": 3,
    356         "comments": 8,
    357         "url": "https://news.ycombinator.com/item?id=44777459",
    358         "created_at": "2025-08-03T16:00:47Z"
    359       },
    360       {
    361         "hn_id": "42982812",
    362         "title": "STP: Self-Play LLM Theorem Provers with Iterative Conjecturing and Proving",
    363         "points": 3,
    364         "comments": 0,
    365         "url": "https://news.ycombinator.com/item?id=42982812",
    366         "created_at": "2025-02-08T13:26:22Z"
    367       },
    368       {
    369         "hn_id": "42933721",
    370         "title": "Querying Databases with Function Calling",
    371         "points": 3,
    372         "comments": 0,
    373         "url": "https://news.ycombinator.com/item?id=42933721",
    374         "created_at": "2025-02-04T15:36:46Z"
    375       },
    376       {
    377         "hn_id": "35475791",
    378         "title": "Eight things to know about large language models",
    379         "points": 3,
    380         "comments": 0,
    381         "url": "https://news.ycombinator.com/item?id=35475791",
    382         "created_at": "2023-04-06T23:11:43Z"
    383       },
    384       {
    385         "hn_id": "35444967",
    386         "title": "Eight Things to Know about Large Language Models",
    387         "points": 3,
    388         "comments": 0,
    389         "url": "https://news.ycombinator.com/item?id=35444967",
    390         "created_at": "2023-04-04T19:46:22Z"
    391       },
    392       {
    393         "hn_id": "44302232",
    394         "title": "An evaluation of LLMs for generating movie reviews",
    395         "points": 2,
    396         "comments": 2,
    397         "url": "https://news.ycombinator.com/item?id=44302232",
    398         "created_at": "2025-06-17T18:28:17Z"
    399       },
    400       {
    401         "hn_id": "43919128",
    402         "title": "Quantifying the Fermi paradox via passive SETI",
    403         "points": 2,
    404         "comments": 0,
    405         "url": "https://news.ycombinator.com/item?id=43919128",
    406         "created_at": "2025-05-07T18:32:13Z"
    407       }
    408     ],
    409     "top_points": 42,
    410     "total_points": 76,
    411     "total_comments": 14
    412   }
    413 }

Impressum · Datenschutz