scan.json (26226B)
1 { 2 "paper": { 3 "title": "Epistemic Alignment: A Mediating Framework for User-LLM Knowledge Delivery", 4 "authors": ["Nicholas Clark", "Hua Shen", "Bill Howe", "Tanushree Mitra"], 5 "year": 2025, 6 "venue": "arXiv.org", 7 "arxiv_id": "2504.01205", 8 "doi": "10.48550/arXiv.2504.01205" 9 }, 10 "scan_version": 3, 11 "active_modules": [], 12 "methodology_tags": ["qualitative", "theoretical"], 13 "key_findings": "The paper proposes ten epistemic challenges across three dimensions (epistemic responsibility, epistemic personalization, testimonial reliability) derived from philosophical epistemology. Thematic analysis of 128 Reddit custom instructions shows 92.1% address at least one challenge and 80.3% address multiple, despite no standardized vocabulary. Content analysis of OpenAI and Anthropic policies reveals both providers acknowledge epistemic challenges in documentation but lack structured interface mechanisms for users to specify citation standards, uncertainty expression, or perspective balance.", 14 "checklist": { 15 "artifacts": { 16 "code_released": { 17 "applies": true, 18 "answer": false, 19 "justification": "No code repository or archive is provided. The paper uses GPT-4o-mini and GPT-4o for analysis but does not release any analysis scripts." 20 }, 21 "data_released": { 22 "applies": true, 23 "answer": false, 24 "justification": "The 128 custom instructions dataset extracted from Reddit is not released. Appendix B provides only examples (Table 2), not the full dataset." 25 }, 26 "environment_specified": { 27 "applies": true, 28 "answer": false, 29 "justification": "No environment specification is provided. The paper mentions using GPT-4o-mini and GPT-4o but provides no software versions, API details, or reproducibility setup." 30 }, 31 "reproduction_instructions": { 32 "applies": true, 33 "answer": false, 34 "justification": "No reproduction instructions are provided. The Reddit API query parameters are listed in Table 1 and prompts are in Appendix A, but there are no step-by-step instructions for reproducing the full analysis pipeline." 35 } 36 }, 37 "statistical_methodology": { 38 "confidence_intervals_or_error_bars": { 39 "applies": true, 40 "answer": false, 41 "justification": "The paper reports point estimates (92.1%, 80.3%, κ = 0.8875) without any confidence intervals or error bars." 42 }, 43 "significance_tests": { 44 "applies": true, 45 "answer": false, 46 "justification": "No statistical significance tests are used. Prevalence rates and inter-rater reliability are reported as descriptive statistics only." 47 }, 48 "effect_sizes_reported": { 49 "applies": true, 50 "answer": false, 51 "justification": "No effect sizes are reported. The Cohen's Kappa (κ = 0.8875) serves as an agreement measure, but no effect sizes are provided for the main findings about challenge prevalence." 52 }, 53 "sample_size_justified": { 54 "applies": true, 55 "answer": false, 56 "justification": "The sample of 128 custom instructions is reported without justification for why this size is sufficient. No power analysis or saturation analysis is discussed." 57 }, 58 "variance_reported": { 59 "applies": true, 60 "answer": false, 61 "justification": "No variance or spread measures are reported for any of the quantitative results." 62 } 63 }, 64 "evaluation_design": { 65 "baselines_included": { 66 "applies": true, 67 "answer": false, 68 "justification": "No baseline frameworks or alternative taxonomies are systematically compared against. The related work section discusses relevant literature but does not benchmark the proposed framework against prior frameworks for coverage or completeness." 69 }, 70 "baselines_contemporary": { 71 "applies": true, 72 "answer": false, 73 "justification": "No baselines are included, so contemporariness cannot be assessed." 74 }, 75 "ablation_study": { 76 "applies": false, 77 "answer": false, 78 "justification": "The contribution is a conceptual framework with ten challenges. There is no multi-component system to ablate." 79 }, 80 "multiple_metrics": { 81 "applies": true, 82 "answer": false, 83 "justification": "The only quantitative metric reported is Cohen's Kappa for inter-rater reliability (κ = 0.8875). No additional metrics assess framework validity, coverage, or usefulness." 84 }, 85 "human_evaluation": { 86 "applies": true, 87 "answer": true, 88 "justification": "Two human experts independently validated the GPT-4o challenge labels, achieving inter-rater reliability of κ = 0.8875 (Section 5). This constitutes human evaluation of the annotation quality." 89 }, 90 "held_out_test_set": { 91 "applies": false, 92 "answer": false, 93 "justification": "No train/test paradigm is used. The paper performs qualitative thematic analysis, not predictive modeling." 94 }, 95 "per_category_breakdown": { 96 "applies": true, 97 "answer": false, 98 "justification": "While the paper discusses each of the ten challenges qualitatively and provides example instructions in Table 2, no per-challenge quantitative prevalence rates are reported. The reader cannot determine which challenges are most or least frequently addressed." 99 }, 100 "failure_cases_discussed": { 101 "applies": true, 102 "answer": false, 103 "justification": "No failure cases of the framework are discussed. The paper does not address custom instructions that did not fit any challenge or cases where the framework's categories were ambiguous or overlapping." 104 }, 105 "negative_results_reported": { 106 "applies": true, 107 "answer": false, 108 "justification": "No negative results are reported. Every aspect of the analysis supports the framework. There is no discussion of challenges that were rarely observed or areas where the framework proved inadequate." 109 } 110 }, 111 "claims_and_evidence": { 112 "abstract_claims_supported": { 113 "applies": true, 114 "answer": true, 115 "justification": "The abstract claims the framework identifies ten challenges, that thematic analysis finds users develop workarounds for each, and that providers fail to establish adequate mechanisms. Sections 4, 5, and 6 support these claims with the framework definition, thematic analysis results, and provider content analysis respectively." 116 }, 117 "causal_claims_justified": { 118 "applies": false, 119 "answer": false, 120 "justification": "The paper makes descriptive and evaluative claims about the state of epistemic alignment. It does not make causal claims about what causes epistemic misalignment or what interventions would improve it." 121 }, 122 "generalization_bounded": { 123 "applies": true, 124 "answer": false, 125 "justification": "The framework is presented as general ('for AI developers' and 'for users') but the empirical validation is limited to Reddit custom instructions from 4 subreddits and two model providers (OpenAI, Anthropic). The paper does not bound its generalizations to these specific populations and platforms." 126 }, 127 "alternative_explanations_discussed": { 128 "applies": true, 129 "answer": false, 130 "justification": "No alternative explanations are discussed. For example, custom instructions addressing the ten challenges could reflect social copying rather than independent user needs. The paper does not consider whether its framework imposes categories that may not reflect actual user mental models." 131 }, 132 "proxy_outcome_distinction": { 133 "applies": true, 134 "answer": false, 135 "justification": "The paper uses Reddit custom instructions as a proxy for user epistemic preferences broadly but does not acknowledge the gap between Reddit power users who share prompting strategies and typical LLM users. The framework is presented as addressing general user needs but validated only on a self-selected online community." 136 } 137 }, 138 "setup_transparency": { 139 "model_versions_specified": { 140 "applies": true, 141 "answer": false, 142 "justification": "The paper uses 'GPT-4o-mini' and 'GPT-4o' without specifying snapshot dates or API versions. These models change behavior across versions." 143 }, 144 "prompts_provided": { 145 "applies": true, 146 "answer": true, 147 "justification": "Full prompt text is provided in Appendix A: Prompt 1 for custom instruction extraction and Prompt 2 for identifying epistemic challenges, including complete instructions and output format." 148 }, 149 "hyperparameters_reported": { 150 "applies": true, 151 "answer": false, 152 "justification": "The paper mentions 'zero-shot prompting with GPT-4o-mini' but reports no temperature, top-p, or other generation parameters for either GPT-4o-mini or GPT-4o." 153 }, 154 "scaffolding_described": { 155 "applies": false, 156 "answer": false, 157 "justification": "No agentic scaffolding is used. The analysis pipeline consists of straightforward API calls for classification." 158 }, 159 "data_preprocessing_documented": { 160 "applies": true, 161 "answer": true, 162 "justification": "Section 5 and Table 1 document the data pipeline: Reddit API query with specific search terms and subreddits, filtering to top-level comments >100 characters, GPT-4o-mini classification to identify custom instructions (yielding 128), followed by GPT-4o analysis and human validation." 163 } 164 }, 165 "limitations_and_scope": { 166 "limitations_section_present": { 167 "applies": true, 168 "answer": false, 169 "justification": "There is no dedicated limitations section. Section 7 is titled 'Discussion & Conclusion' and discusses contributions and proposed interface designs but does not substantively address limitations of the work." 170 }, 171 "threats_to_validity_specific": { 172 "applies": true, 173 "answer": false, 174 "justification": "No specific threats to validity are discussed. The paper does not address the representativeness of Reddit data, the circular use of LLMs to study LLM interaction patterns, or the subjectivity in framework construction." 175 }, 176 "scope_boundaries_stated": { 177 "applies": true, 178 "answer": false, 179 "justification": "No explicit scope boundaries are stated. The paper does not identify what settings, user populations, or platform types the framework does NOT apply to." 180 } 181 }, 182 "data_integrity": { 183 "raw_data_available": { 184 "applies": true, 185 "answer": false, 186 "justification": "The raw Reddit custom instructions dataset is not released. Only selected examples appear in Table 2." 187 }, 188 "data_collection_described": { 189 "applies": true, 190 "answer": true, 191 "justification": "Table 1 provides detailed Reddit API query parameters including search query, keyword filters, subreddits (ChatGPT, ChatGPTPro, ClaudeAI, OpenAI), time frame (past 2 years), and comment/instruction length filters." 192 }, 193 "recruitment_methods_described": { 194 "applies": true, 195 "answer": true, 196 "justification": "The data sourcing is described: posts from four specific subreddits matching search terms, filtered to top-level comments >100 characters, then GPT-4o-mini classified to identify actual custom instructions. The sourcing method is clear even if selection bias is not discussed." 197 }, 198 "data_pipeline_documented": { 199 "applies": true, 200 "answer": false, 201 "justification": "The pipeline stages are described (Reddit query → comment extraction → GPT-4o-mini filter → GPT-4o analysis → human validation) but intermediate counts are missing. The paper does not report how many total posts were retrieved, how many comments were extracted before the 100-character filter, or how many passed that filter before GPT-4o-mini classification." 202 } 203 }, 204 "conflicts_of_interest": { 205 "funding_disclosed": { 206 "applies": true, 207 "answer": false, 208 "justification": "No funding source is disclosed. There is no acknowledgments section or funding statement in the paper." 209 }, 210 "affiliations_disclosed": { 211 "applies": true, 212 "answer": true, 213 "justification": "All authors are disclosed as affiliated with the University of Washington. They are evaluating OpenAI and Anthropic products as external academics, not as employees of either company." 214 }, 215 "funder_independent_of_outcome": { 216 "applies": true, 217 "answer": false, 218 "justification": "No funding is disclosed, so independence of funder from outcome cannot be verified." 219 }, 220 "financial_interests_declared": { 221 "applies": true, 222 "answer": false, 223 "justification": "No competing interests or financial interests statement is included in the paper." 224 } 225 }, 226 "contamination": { 227 "training_cutoff_stated": { 228 "applies": false, 229 "answer": false, 230 "justification": "The paper does not evaluate a pre-trained model's capability on any benchmark. GPT-4o-mini and GPT-4o are used as classification tools, not as subjects of evaluation." 231 }, 232 "train_test_overlap_discussed": { 233 "applies": false, 234 "answer": false, 235 "justification": "No benchmark evaluation is performed. The models are used as annotation tools, not tested for capability." 236 }, 237 "benchmark_contamination_addressed": { 238 "applies": false, 239 "answer": false, 240 "justification": "No benchmark evaluation is performed." 241 } 242 }, 243 "human_studies": { 244 "pre_registered": { 245 "applies": false, 246 "answer": false, 247 "justification": "No human participants. The paper analyzes publicly posted Reddit content and has two expert annotators for validation, but does not conduct a human subjects study." 248 }, 249 "irb_or_ethics_approval": { 250 "applies": false, 251 "answer": false, 252 "justification": "No human participants in the traditional sense. The paper mines public Reddit data." 253 }, 254 "demographics_reported": { 255 "applies": false, 256 "answer": false, 257 "justification": "No human participants. Reddit users whose posts were analyzed are anonymous and not treated as study subjects." 258 }, 259 "inclusion_exclusion_criteria": { 260 "applies": false, 261 "answer": false, 262 "justification": "No human participants. Data selection criteria are covered under data_integrity." 263 }, 264 "randomization_described": { 265 "applies": false, 266 "answer": false, 267 "justification": "No human subjects study. No experimental conditions requiring randomization." 268 }, 269 "blinding_described": { 270 "applies": false, 271 "answer": false, 272 "justification": "No human subjects study." 273 }, 274 "attrition_reported": { 275 "applies": false, 276 "answer": false, 277 "justification": "No human participants." 278 } 279 }, 280 "cost_and_practicality": { 281 "inference_cost_reported": { 282 "applies": false, 283 "answer": false, 284 "justification": "This is primarily a theoretical/qualitative paper proposing a framework. The GPT-4o usage is for annotation, not a proposed method. Cost reporting is not relevant." 285 }, 286 "compute_budget_stated": { 287 "applies": false, 288 "answer": false, 289 "justification": "Primarily theoretical/qualitative work. Compute budget is not relevant." 290 } 291 } 292 }, 293 "engagement_factors": { 294 "practical_relevance": { 295 "score": 1, 296 "justification": "The framework provides conceptual vocabulary but no tools or techniques a practitioner could directly implement." 297 }, 298 "surprise_contrarian": { 299 "score": 1, 300 "justification": "The finding that users develop elaborate workarounds is somewhat known in the prompt engineering community; the formalization into epistemic dimensions is new but not surprising." 301 }, 302 "fear_safety": { 303 "score": 0, 304 "justification": "No AI safety, security, or risk concerns are raised beyond general knowledge delivery quality." 305 }, 306 "drama_conflict": { 307 "score": 1, 308 "justification": "Evaluates OpenAI and Anthropic as failing to provide adequate epistemic customization mechanisms, mild critical angle." 309 }, 310 "demo_ability": { 311 "score": 0, 312 "justification": "No code, demo, or tool is provided." 313 }, 314 "brand_recognition": { 315 "score": 2, 316 "justification": "Directly evaluates ChatGPT and Claude products from OpenAI and Anthropic, well-known brands, though the authors are from UW (not a famous AI lab)." 317 } 318 }, 319 "claims": [ 320 { 321 "claim": "92.1% of analyzed custom instructions address at least one epistemic challenge, and 80.3% address multiple challenges.", 322 "evidence": "Section 5 reports these percentages from thematic analysis of 128 custom instructions extracted from Reddit, with GPT-4o classification validated by two human experts (κ = 0.8875).", 323 "supported": "moderate" 324 }, 325 { 326 "claim": "All ten epistemic challenges in the framework are independently addressed by users through custom instructions and prompting strategies.", 327 "evidence": "Section 5 and Table 2 (Appendix B) provide examples of custom instructions corresponding to each of the ten challenges, showing 'independent emergence of solutions to all ten challenges across diverse user instructions.'", 328 "supported": "moderate" 329 }, 330 { 331 "claim": "Both OpenAI and Anthropic acknowledge epistemic challenges in documentation but fail to provide structured interface mechanisms for specifying epistemic preferences.", 332 "evidence": "Sections 6.1 and 6.2 present content analysis of provider documentation. For OpenAI: 'while it mentions reliable sources, it lacks detailed mechanisms for citation verification.' For Anthropic: 'the interface still lacks dimension-specific controls for specifying citation standards.'", 333 "supported": "moderate" 334 }, 335 { 336 "claim": "Users develop three prominent folk theories of model behavior: 'Suppressing Default Behavior,' 'Expert Persona,' and 'Parameter Configuration.'", 337 "evidence": "Section 5 describes these three folk theories with specific examples from the custom instructions dataset (e.g., 'Avoid any language constructs that could be interpreted as expressing remorse' for suppression).", 338 "supported": "moderate" 339 } 340 ], 341 "red_flags": [ 342 { 343 "flag": "Non-representative sample", 344 "detail": "The 128 custom instructions come from 4 Reddit subreddits (r/ChatGPT, r/ChatGPTPro, r/OpenAI, r/Anthropic). Reddit power users who share prompting strategies are not representative of typical LLM users. The paper generalizes to 'users' broadly without acknowledging this selection bias." 345 }, 346 { 347 "flag": "Circular LLM-assisted coding", 348 "detail": "GPT-4o-mini was used to extract custom instructions from comments, and GPT-4o was used to classify which epistemic challenges each instruction addresses. Using LLMs to classify data about LLM interaction patterns introduces a circular dependency — the model may impose its own understanding of epistemic concepts rather than capturing user intent." 349 }, 350 { 351 "flag": "No limitations section", 352 "detail": "The paper has no dedicated limitations section despite significant methodological choices that could affect validity: small sample, non-representative population, LLM-assisted coding, evaluation of only two providers, and reliance on publicly documented policies rather than actual system behavior." 353 }, 354 { 355 "flag": "Missing intermediate pipeline counts", 356 "detail": "The data collection pipeline does not report how many Reddit posts were retrieved, how many comments were extracted, or how many passed the 100-character filter. Only the final count (128 custom instructions) is reported, making it impossible to assess the selectivity of each filtering stage." 357 }, 358 { 359 "flag": "Framework validated by its own creators", 360 "detail": "The ten-challenge framework was created by the authors and then validated by the same authors finding instances in Reddit data. There is no independent evaluation of whether the framework categories are comprehensive, non-overlapping, or meaningful to actual users." 361 } 362 ], 363 "cited_papers": [ 364 { 365 "title": "Survey of Hallucination in Natural Language Generation", 366 "authors": ["Ziwei Ji", "Nayeon Lee", "Rita Frieske", "Tiezheng Yu", "Dan Su", "Yan Xu", "Etsuko Ishii", "Yejin Bang", "Delong Chen", "Ho Shu Chan", "Wenliang Dai", "Andrea Madotto", "Pascale Fung"], 367 "year": 2022, 368 "arxiv_id": "2202.03629", 369 "doi": "10.1145/3571730", 370 "relevance": "Foundational survey on LLM hallucination, directly relevant to AI safety and reliability research." 371 }, 372 { 373 "title": "Towards Understanding Sycophancy in Language Models", 374 "authors": ["Mrinank Sharma", "Meg Tong", "Tomasz Korbak", "David Duvenaud", "Amanda Askell", "Samuel R. Bowman"], 375 "year": 2023, 376 "arxiv_id": "2310.13548", 377 "relevance": "Studies LLM sycophancy behavior (deference to user misinformation), a key AI alignment concern." 378 }, 379 { 380 "title": "The Instruction Hierarchy: Training LLMs to Prioritize Privileged Instructions", 381 "authors": ["Eric Wallace", "Kai Xiao", "Reimar Leike", "Lilian Weng", "Johannes Heidecke", "Alex Beutel"], 382 "year": 2024, 383 "arxiv_id": "2404.13208", 384 "relevance": "Addresses LLM instruction following and prioritization, relevant to prompt injection and safety." 385 }, 386 { 387 "title": "Towards Bidirectional Human-AI Alignment: A Systematic Review for Clarifications, Framework, and Future Directions", 388 "authors": ["Hua Shen", "Tiffany Knearem", "Reshmi Ghosh"], 389 "year": 2024, 390 "arxiv_id": "2406.09264", 391 "relevance": "Systematic review of human-AI alignment approaches, directly relevant to LLM alignment methodology." 392 }, 393 { 394 "title": "Chain-of-Thought Prompting Elicits Reasoning in Large Language Models", 395 "authors": ["Jason Wei", "Xuezhi Wang", "Dale Schuurmans", "Maarten Bosma", "Brian Ichter", "Fei Xia", "Ed Chi", "Quoc Le", "Denny Zhou"], 396 "year": 2022, 397 "arxiv_id": "2201.11903", 398 "relevance": "Foundational prompting technique paper, central to LLM capability and prompting research." 399 }, 400 { 401 "title": "Can Large Language Models Faithfully Express Their Intrinsic Uncertainty in Words?", 402 "authors": ["Gal Yona", "Roee Aharoni", "Mor Geva"], 403 "year": 2024, 404 "arxiv_id": "2405.16908", 405 "relevance": "Studies LLM uncertainty expression capability, relevant to AI reliability and calibration." 406 }, 407 { 408 "title": "Fine-grained Hallucination Detection and Editing for Language Models", 409 "authors": ["Abhika Mishra", "Akari Asai", "Vidhisha Balachandran", "Yizhong Wang", "Graham Neubig", "Yulia Tsvetkov", "Hannaneh Hajishirzi"], 410 "year": 2024, 411 "arxiv_id": "2401.06855", 412 "relevance": "Addresses hallucination detection and mitigation in LLMs, key AI safety research." 413 }, 414 { 415 "title": "LLMs Know More Than They Show: On the Intrinsic Representation of LLM Hallucinations", 416 "authors": ["Hadas Orgad", "Michael Toker", "Zorik Gekhman", "Roi Reichart", "Idan Szpektor", "Hadas Kotek", "Yonatan Belinkov"], 417 "year": 2024, 418 "arxiv_id": "2410.02707", 419 "relevance": "Studies internal representations of hallucinations in LLMs, relevant to understanding model reliability." 420 }, 421 { 422 "title": "The Art of Defending: A Systematic Evaluation and Analysis of LLM Defense Strategies on Safety and Over-Defensiveness", 423 "authors": ["Neeraj Varshney", "Pavel Dolin", "Agastya Seth", "Chitta Baral"], 424 "year": 2023, 425 "arxiv_id": "2401.00287", 426 "relevance": "Evaluates LLM defense strategies including over-abstention, directly relevant to AI safety and alignment." 427 }, 428 { 429 "title": "Knowledge Conflicts for LLMs: A Survey", 430 "authors": ["Rongwu Xu", "Zehan Qi", "Zhijiang Guo", "Cunxiang Wang", "Hongru Wang", "Yue Zhang", "Wei Xu"], 431 "year": 2024, 432 "arxiv_id": "2403.08319", 433 "relevance": "Surveys knowledge conflicts in LLMs, relevant to understanding LLM reliability and knowledge delivery." 434 }, 435 { 436 "title": "Enabling Large Language Models to Generate Text with Citations", 437 "authors": ["Tianyu Gao", "Howard Yen", "Jiatong Yu", "Danqi Chen"], 438 "year": 2023, 439 "arxiv_id": "2305.14627", 440 "relevance": "Addresses citation generation in LLMs, relevant to verification and reliability of AI outputs." 441 }, 442 { 443 "title": "The Art of Saying No: Contextual Noncompliance in Language Models", 444 "authors": ["Faeze Brahman", "Sachin Kumar", "Vidhisha Balachandran"], 445 "year": 2024, 446 "arxiv_id": "2407.12043", 447 "relevance": "Studies LLM refusal behavior and contextual noncompliance, relevant to AI safety alignment." 448 }, 449 { 450 "title": "A Roadmap to Pluralistic Alignment", 451 "authors": ["Taylor Sorensen", "Jared Moore", "Jillian Fisher", "Mitchell Gordon"], 452 "year": 2024, 453 "arxiv_id": "2402.05070", 454 "relevance": "Proposes framework for pluralistic AI alignment accommodating diverse perspectives, directly relevant to alignment methodology." 455 }, 456 { 457 "title": "SAFETY-TUNED LLAMAS: Lessons from Improving the Safety of Large Language Models that Follow Instructions", 458 "authors": ["Federico Bianchi", "Mirac Suzgun", "Giuseppe Attanasio", "Paul Röttger", "Dan Jurafsky", "Tatsunori Hashimoto", "James Zou"], 459 "year": 2024, 460 "relevance": "Studies safety tuning of instruction-following LLMs, relevant to alignment and safety research." 461 } 462 ] 463 }