ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

scan.json (22357B)


      1 {
      2   "paper": {
      3     "title": "LLM Harms: A Taxonomy and Discussion",
      4     "authors": ["Kevin Chen", "Saleh Afroogh", "Abhejay Murali", "David Atkinson", "Amit Dhurandhar", "Junfeng Jiao"],
      5     "year": 2025,
      6     "venue": "arXiv.org",
      7     "arxiv_id": "2512.05929",
      8     "doi": "10.48550/arXiv.2512.05929"
      9   },
     10   "scan_version": 2,
     11   "active_modules": ["survey_methodology"],
     12   "checklist": {
     13     "artifacts": {
     14       "code_released": {
     15         "applies": true,
     16         "answer": false,
     17         "justification": "No code repository, analysis scripts, or data archive is mentioned anywhere in the paper."
     18       },
     19       "data_released": {
     20         "applies": true,
     21         "answer": false,
     22         "justification": "The coded corpus of 200 papers and extraction sheets are not released. No download link or supplementary data provided."
     23       },
     24       "environment_specified": {
     25         "applies": true,
     26         "answer": false,
     27         "justification": "No computational environment or tooling details are provided for reproducing the systematic review analysis."
     28       },
     29       "reproduction_instructions": {
     30         "applies": true,
     31         "answer": false,
     32         "justification": "No step-by-step instructions for reproducing the review. While the methodology section describes the search strategy, there are no scripts or detailed replication guides."
     33       }
     34     },
     35     "statistical_methodology": {
     36       "confidence_intervals_or_error_bars": {
     37         "applies": false,
     38         "answer": false,
     39         "justification": "This is a systematic review/taxonomy paper that does not run experiments producing quantitative results requiring confidence intervals."
     40       },
     41       "significance_tests": {
     42         "applies": false,
     43         "answer": false,
     44         "justification": "No statistical comparisons are made; the paper synthesizes literature qualitatively."
     45       },
     46       "effect_sizes_reported": {
     47         "applies": false,
     48         "answer": false,
     49         "justification": "No experiments are conducted; the paper reports findings from reviewed literature, not its own effect sizes."
     50       },
     51       "sample_size_justified": {
     52         "applies": false,
     53         "answer": false,
     54         "justification": "No experiment with samples; the corpus size of 200 papers is a product of the search/screening process, not a sample size requiring power analysis."
     55       },
     56       "variance_reported": {
     57         "applies": false,
     58         "answer": false,
     59         "justification": "No experimental runs are performed; this is a literature review."
     60       }
     61     },
     62     "evaluation_design": {
     63       "baselines_included": {
     64         "applies": true,
     65         "answer": false,
     66         "justification": "The paper does not compare its taxonomy against prior harm taxonomies (e.g., Weidinger et al.) in a structured way, though it mentions them in related work."
     67       },
     68       "baselines_contemporary": {
     69         "applies": true,
     70         "answer": false,
     71         "justification": "No structured comparison against contemporary taxonomies or frameworks is provided."
     72       },
     73       "ablation_study": {
     74         "applies": false,
     75         "answer": false,
     76         "justification": "A taxonomy paper has no system components to ablate."
     77       },
     78       "multiple_metrics": {
     79         "applies": false,
     80         "answer": false,
     81         "justification": "No experimental evaluation with metrics is conducted."
     82       },
     83       "human_evaluation": {
     84         "applies": false,
     85         "answer": false,
     86         "justification": "No system outputs to evaluate; this is a literature synthesis."
     87       },
     88       "held_out_test_set": {
     89         "applies": false,
     90         "answer": false,
     91         "justification": "No experimental evaluation requiring train/test splits."
     92       },
     93       "per_category_breakdown": {
     94         "applies": true,
     95         "answer": true,
     96         "justification": "The taxonomy is organized into 5 major harm categories with 15 subcategories, and Figure 2 shows aggregate distribution of publication categories by harm cluster. Section 6.2 reports approximate proportions (60% direct output, ~50% misuse, ~30% societal, ~25% downstream)."
     97       },
     98       "failure_cases_discussed": {
     99         "applies": true,
    100         "answer": true,
    101         "justification": "Section VI Discussion extensively discusses where current mitigations fail: 'toxic-speech filters still miss 7% of non-English slurs', governance is 'fragmentary', and safety progress is 'reactive, patching symptoms faster than root causes'."
    102       },
    103       "negative_results_reported": {
    104         "applies": true,
    105         "answer": true,
    106         "justification": "The paper reports negative findings about the state of the field: mitigations are insufficient, governance is fragmented, harms are persistent despite improvements, and certain harm categories are under-researched."
    107       }
    108     },
    109     "claims_and_evidence": {
    110       "abstract_claims_supported": {
    111         "applies": true,
    112         "answer": true,
    113         "justification": "The abstract claims five categories of harms and proposes mitigation strategies, which are delivered in Sections IV-V. The claims are qualitative and supported by the literature synthesis."
    114       },
    115       "causal_claims_justified": {
    116         "applies": false,
    117         "answer": false,
    118         "justification": "The paper makes no causal claims of its own; it synthesizes causal claims from reviewed literature."
    119       },
    120       "generalization_bounded": {
    121         "applies": true,
    122         "answer": true,
    123         "justification": "The paper explicitly bounds scope: 'text-based LLMs ≥ 7B parameters; vision-language hybrids and small domain-specific models are out of scope' and covers publications from Jan 2021 to Jun 2025."
    124       },
    125       "alternative_explanations_discussed": {
    126         "applies": true,
    127         "answer": false,
    128         "justification": "The paper does not discuss alternative interpretations of its synthesis findings or consider whether its taxonomy structure might miss important dimensions. No alternative framings of the harm landscape are considered."
    129       },
    130       "proxy_outcome_distinction": {
    131         "applies": false,
    132         "answer": false,
    133         "justification": "This is a taxonomy/survey paper with no measurements of its own; no proxy-outcome gap exists."
    134       }
    135     },
    136     "setup_transparency": {
    137       "model_versions_specified": {
    138         "applies": false,
    139         "answer": false,
    140         "justification": "The paper does not use any AI models as part of its methodology (it reviews papers about models)."
    141       },
    142       "prompts_provided": {
    143         "applies": false,
    144         "answer": false,
    145         "justification": "No prompting is used in the methodology."
    146       },
    147       "hyperparameters_reported": {
    148         "applies": false,
    149         "answer": false,
    150         "justification": "No model experiments are conducted."
    151       },
    152       "scaffolding_described": {
    153         "applies": false,
    154         "answer": false,
    155         "justification": "No agentic scaffolding is used."
    156       },
    157       "data_preprocessing_documented": {
    158         "applies": true,
    159         "answer": true,
    160         "justification": "Section III describes the PRISMA flow: 1,986 records plus 24 seed papers screened to 200 final corpus. Table 2 shows search phrases per harm cluster. Figure 1 provides the PRISMA flow diagram. Filtering criteria and saturation rules are described."
    161       }
    162     },
    163     "limitations_and_scope": {
    164       "limitations_section_present": {
    165         "applies": true,
    166         "answer": false,
    167         "justification": "There is no dedicated limitations section. The conclusion discusses future directions but does not systematically address the study's own limitations."
    168       },
    169       "threats_to_validity_specific": {
    170         "applies": true,
    171         "answer": false,
    172         "justification": "No specific threats to validity of this review are discussed. No mention of search completeness limitations, coding reliability, or potential reviewer bias."
    173       },
    174       "scope_boundaries_stated": {
    175         "applies": true,
    176         "answer": true,
    177         "justification": "The paper states: 'text-based LLMs ≥ 7B parameters; vision-language hybrids and small domain-specific models are out of scope' and bounds the temporal scope to Jan 2021-Jun 2025."
    178       }
    179     },
    180     "data_integrity": {
    181       "raw_data_available": {
    182         "applies": true,
    183         "answer": false,
    184         "justification": "The coded dataset of 200 papers with harm type, severity, prevalence, and mitigation codes is not released for verification."
    185       },
    186       "data_collection_described": {
    187         "applies": true,
    188         "answer": true,
    189         "justification": "Section III describes multi-database search across 9 databases, the date range (Jan 2021-Jun 2025), the anchor-plus-topic query design, the saturation rule, and coding process including a pilot on 10 papers."
    190       },
    191       "recruitment_methods_described": {
    192         "applies": true,
    193         "answer": false,
    194         "justification": "The paper mentions 'ten expert interviews with safety engineers and policymakers' but provides no details on how these experts were recruited, selected, or what their backgrounds were."
    195       },
    196       "data_pipeline_documented": {
    197         "applies": true,
    198         "answer": true,
    199         "justification": "Figure 1 provides the PRISMA flow diagram showing the pipeline from 1,986 records to 200 final corpus. Section III describes the coding process, saturation rules, and query design."
    200       }
    201     },
    202     "conflicts_of_interest": {
    203       "funding_disclosed": {
    204         "applies": true,
    205         "answer": true,
    206         "justification": "Acknowledgements state: 'This research is funded by the NSF grants 2125858, 2236305 and UT-Good Systems Grand Challenge.'"
    207       },
    208       "affiliations_disclosed": {
    209         "applies": true,
    210         "answer": true,
    211         "justification": "Author affiliations are listed: University of Texas at Austin (Urban Information Lab, McCombs School of Business) and IBM Research."
    212       },
    213       "funder_independent_of_outcome": {
    214         "applies": true,
    215         "answer": true,
    216         "justification": "NSF and UT-Good Systems Grand Challenge are academic funding sources with no financial stake in the taxonomy's particular findings about LLM harms."
    217       },
    218       "financial_interests_declared": {
    219         "applies": true,
    220         "answer": true,
    221         "justification": "The paper includes: 'Conflict of interest: The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.' Note: one author is from IBM Research, which develops AI products, but this is disclosed via affiliation."
    222       }
    223     },
    224     "contamination": {
    225       "training_cutoff_stated": {
    226         "applies": false,
    227         "answer": false,
    228         "justification": "This is a systematic review that does not evaluate any pre-trained model on benchmarks."
    229       },
    230       "train_test_overlap_discussed": {
    231         "applies": false,
    232         "answer": false,
    233         "justification": "No model evaluation is conducted."
    234       },
    235       "benchmark_contamination_addressed": {
    236         "applies": false,
    237         "answer": false,
    238         "justification": "No benchmark evaluation is conducted."
    239       }
    240     },
    241     "human_studies": {
    242       "pre_registered": {
    243         "applies": true,
    244         "answer": true,
    245         "justification": "The paper states: 'The protocol was registered prospectively on the Open Science Framework using the Generic Systematic-Review template, giving it a permanent DOI and time-stamping any later changes.'"
    246       },
    247       "irb_or_ethics_approval": {
    248         "applies": true,
    249         "answer": false,
    250         "justification": "The paper states 'Institutional Review Board Statement: Not applicable' and 'Informed Consent Statement: Not applicable,' despite conducting ten expert interviews which typically require ethics review."
    251       },
    252       "demographics_reported": {
    253         "applies": true,
    254         "answer": false,
    255         "justification": "The ten expert interviewees are described only as 'safety engineers and policymakers' with no demographics, experience levels, or institutional affiliations reported."
    256       },
    257       "inclusion_exclusion_criteria": {
    258         "applies": true,
    259         "answer": false,
    260         "justification": "No inclusion/exclusion criteria for the expert interview participants are provided."
    261       },
    262       "randomization_described": {
    263         "applies": false,
    264         "answer": false,
    265         "justification": "Expert interviews are not an experimental study requiring randomization."
    266       },
    267       "blinding_described": {
    268         "applies": false,
    269         "answer": false,
    270         "justification": "Expert interviews are not an experimental study requiring blinding."
    271       },
    272       "attrition_reported": {
    273         "applies": true,
    274         "answer": false,
    275         "justification": "No information on how many experts were approached vs. how many participated, or whether any dropped out."
    276       }
    277     },
    278     "cost_and_practicality": {
    279       "inference_cost_reported": {
    280         "applies": false,
    281         "answer": false,
    282         "justification": "This is a survey paper; no computational method with inference costs is proposed."
    283       },
    284       "compute_budget_stated": {
    285         "applies": false,
    286         "answer": false,
    287         "justification": "This is a survey paper with no significant computation."
    288       }
    289     },
    290     "survey_methodology": {
    291       "prisma_or_structured_protocol": {
    292         "applies": true,
    293         "answer": true,
    294         "justification": "Section III states: 'We drafted the review plan in line with the PRISMA-2020 checklist.' The protocol was registered on OSF. Figure 1 provides the PRISMA flow diagram. Table 2 shows the keyword matrix."
    295       },
    296       "quality_assessment_of_sources": {
    297         "applies": true,
    298         "answer": false,
    299         "justification": "No quality scoring rubric or risk-of-bias assessment is applied to the 200 included studies. The paper codes studies for 'harm type, severity, prevalence, and mitigation claims' but does not assess their methodological quality. All papers are treated equally regardless of rigor."
    300       },
    301       "publication_bias_discussed": {
    302         "applies": true,
    303         "answer": false,
    304         "justification": "No discussion of publication bias. No funnel plots, no acknowledgment that published harm studies may skew toward dramatic findings, no tests for publication bias."
    305       }
    306     }
    307   },
    308   "claims": [
    309     {
    310       "claim": "LLM harms fall into five lifecycle-ordered categories: pre-deployment, direct output, misuse/malicious application, societal/systemic, and downstream application.",
    311       "evidence": "Section IV presents the full taxonomy with 5 categories and 15 subcategories, supported by synthesis of 200 reviewed papers.",
    312       "supported": "moderate"
    313     },
    314     {
    315       "claim": "Harm clusters layer rather than replace one another: privacy-violating corpus data seeds output bias, which adversaries weaponize via jailbreaks.",
    316       "evidence": "Section VI Discussion and Section 7.1 Harm Interdependencies describe these chains conceptually, but no quantitative evidence of causal linkages is provided.",
    317       "supported": "weak"
    318     },
    319     {
    320       "claim": "Red-teaming plus constitutional fine-tuning cuts jailbreak success by ~40% on Llama 3-8B without crippling utility.",
    321       "evidence": "Section 5.1 cites this finding from Anthropic's constitutional AI replication work (references [204], [205]).",
    322       "supported": "moderate"
    323     },
    324     {
    325       "claim": "About 60% of reviewed papers focus on direct output problems, ~50% on malicious uses, ~30% on societal impacts, and ~25% on downstream harms.",
    326       "evidence": "Section 6.2 states these approximate proportions. Figure 2 shows aggregate distribution, though exact counts are not provided.",
    327       "supported": "weak"
    328     },
    329     {
    330       "claim": "Governance levers are fragmentary, with the EU AI Act imposing transparency duties while the U.S. relies on voluntary guidance.",
    331       "evidence": "Section 5.2 and Section VI describe the regulatory landscape with specific references to EU AI Act provisions and NIST RMF.",
    332       "supported": "moderate"
    333     }
    334   ],
    335   "methodology_tags": ["meta-analysis", "qualitative"],
    336   "key_findings": "This systematic review of 200 papers proposes a five-category lifecycle taxonomy of LLM harms: pre-deployment, direct output, misuse, societal/systemic, and downstream application. The paper finds that harm clusters are interdependent and layered, mitigation efficacy is uneven and domain-specific, and governance frameworks remain fragmentary. It identifies multi-agent ecosystems as an emerging harm category and calls for compute governance, dynamic auditing, and cross-disciplinary collaboration.",
    337   "red_flags": [
    338     {
    339       "flag": "No quality assessment of included studies",
    340       "detail": "The survey reviews 200 papers but applies no quality scoring or risk-of-bias assessment. All studies are treated equally regardless of methodological rigor, laundering weak results alongside strong ones."
    341     },
    342     {
    343       "flag": "Expert interviews minimally described",
    344       "detail": "Ten expert interviews with 'safety engineers and policymakers' are mentioned but never described in detail: no interview protocol, no recruitment method, no demographics, no quotes or coded themes are presented. It is unclear how interview findings influenced the taxonomy."
    345     },
    346     {
    347       "flag": "Claims outrun evidence on harm interdependencies",
    348       "detail": "Section 7.1 claims harm clusters are causally linked (e.g., 'prompt-injection exploits can resurrect pre-training privacy leaks') but provides no empirical evidence of these causal chains beyond conceptual argument."
    349     },
    350     {
    351       "flag": "No limitations section",
    352       "detail": "Despite being a systematic review, the paper lacks a dedicated limitations section discussing search completeness, inter-coder reliability, potential biases in the review process, or coverage gaps."
    353     },
    354     {
    355       "flag": "AI tool used without transparency about extent",
    356       "detail": "Acknowledgements note 'the use AI-powered tools, such as OpenAI's applications, for assistance in editing and brainstorming' but do not specify which sections or to what extent AI was used in the analysis or writing."
    357     },
    358     {
    359       "flag": "Approximate statistics without rigorous counts",
    360       "detail": "Section 6.2 reports 'about 60%' and '~50%' of papers in various categories without providing exact counts, suggesting these may be rough estimates rather than systematic coding results."
    361     }
    362   ],
    363   "cited_papers": [
    364     {
    365       "title": "Ethical and social risks of harm from Language Models",
    366       "authors": ["L. Weidinger"],
    367       "year": 2021,
    368       "arxiv_id": "2112.04359",
    369       "relevance": "Foundational LLM harm taxonomy covering discrimination/toxicity, information hazards, misuse, HCI harms, and environmental impacts — key prior framework this paper builds on."
    370     },
    371     {
    372       "title": "Training language models to follow instructions with human feedback",
    373       "authors": ["L. Ouyang"],
    374       "year": 2022,
    375       "arxiv_id": "2203.02155",
    376       "relevance": "InstructGPT paper establishing the RLHF recipe for alignment, a core mitigation technique discussed in the survey."
    377     },
    378     {
    379       "title": "Universal and Transferable Adversarial Attacks on Aligned Language Models",
    380       "authors": ["A. Zou", "Z. Wang", "N. Carlini"],
    381       "year": 2023,
    382       "arxiv_id": "2307.15043",
    383       "relevance": "Demonstrates universal jailbreak suffixes bypassing safety filters in >80% of trials, key adversarial attack result."
    384     },
    385     {
    386       "title": "A Survey on Hallucination in Large Language Models: Principles, Taxonomy, Challenges, and Open Questions",
    387       "authors": ["L. Huang"],
    388       "year": 2024,
    389       "doi": "10.1145/3703155",
    390       "relevance": "Comprehensive hallucination survey cataloguing error types and rates up to 23% in open-ended QA."
    391     },
    392     {
    393       "title": "Bias and Fairness in Large Language Models: A Survey",
    394       "authors": ["I. O. Gallegos"],
    395       "year": 2023,
    396       "doi": "10.1162/coli_a_00524",
    397       "relevance": "Survey showing stereotype benchmarks still surface gender, race, and disability prejudice in GPT-4-class models."
    398     },
    399     {
    400       "title": "Taxonomy of Risks posed by Language Models",
    401       "authors": ["L. Weidinger"],
    402       "year": 2022,
    403       "doi": "10.1145/3531146.3533088",
    404       "relevance": "Earlier ACM taxonomy of language model risks that this paper extends with a lifecycle-aware framework."
    405     },
    406     {
    407       "title": "Scaling Laws for Neural Language Models",
    408       "authors": ["J. Kaplan"],
    409       "year": 2020,
    410       "arxiv_id": "2001.08361",
    411       "relevance": "Foundational scaling laws paper showing power-law improvements with parameters/data/compute."
    412     },
    413     {
    414       "title": "Training Compute-Optimal Large Language Models",
    415       "authors": ["J. Hoffmann"],
    416       "year": 2022,
    417       "arxiv_id": "2203.15556",
    418       "relevance": "Chinchilla paper revising scaling laws for compute-optimal training."
    419     },
    420     {
    421       "title": "Datasheets for Datasets",
    422       "authors": ["T. Gebru"],
    423       "year": 2018,
    424       "doi": "10.1145/3458723",
    425       "relevance": "Foundational work on dataset documentation and provenance disclosure for AI accountability."
    426     },
    427     {
    428       "title": "Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks",
    429       "authors": ["P. Lewis"],
    430       "year": 2020,
    431       "arxiv_id": "2005.11401",
    432       "relevance": "Seminal RAG paper coupling parametric models with retrieval for factuality improvement."
    433     },
    434     {
    435       "title": "Why Do Multi-Agent LLM Systems Fail?",
    436       "authors": ["M. Cemri"],
    437       "year": 2025,
    438       "arxiv_id": "2503.13657",
    439       "relevance": "Directly relevant to multi-agent failure modes discussed in the emerging harm categories section."
    440     },
    441     {
    442       "title": "Risk Taxonomy, Mitigation, and Assessment Benchmarks of Large Language Model Systems",
    443       "authors": ["T. Cui"],
    444       "year": 2024,
    445       "arxiv_id": "2401.05778",
    446       "relevance": "Complementary LLM risk taxonomy with mitigation benchmarks."
    447     }
    448   ]
    449 }

Impressum · Datenschutz