ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

scan-v4.json (19877B)


      1 {
      2   "scan_version": 4,
      3   "paper_type": "position",
      4   "paper": {
      5     "title": "Epistemic Alignment: A Mediating Framework for User-LLM Knowledge Delivery",
      6     "authors": [
      7       "Nicholas Clark",
      8       "Hua Shen",
      9       "Bill Howe",
     10       "Tanushree Mitra"
     11     ],
     12     "year": 2025,
     13     "venue": "arXiv.org",
     14     "arxiv_id": "2504.01205",
     15     "doi": "10.48550/arXiv.2504.01205"
     16   },
     17   "checklist": {
     18     "claims_and_evidence": {
     19       "abstract_claims_supported": {
     20         "applies": true,
     21         "answer": true,
     22         "justification": "The abstract claims the framework identifies ten challenges, that thematic analysis finds users develop workarounds for each, and that providers fail to establish adequate mechanisms. Sections 4, 5, and 6 support these claims with the framework definition, thematic analysis results, and provider content analysis respectively.",
     23         "source": "opus"
     24       },
     25       "causal_claims_justified": {
     26         "applies": false,
     27         "answer": false,
     28         "justification": "The paper makes descriptive and evaluative claims about the state of epistemic alignment. It does not make causal claims about what causes epistemic misalignment or what interventions would improve it.",
     29         "source": "opus"
     30       },
     31       "generalization_bounded": {
     32         "applies": true,
     33         "answer": false,
     34         "justification": "The framework is presented as general ('for AI developers' and 'for users') but the empirical validation is limited to Reddit custom instructions from 4 subreddits and two model providers (OpenAI, Anthropic). The paper does not bound its generalizations to these specific populations and platforms.",
     35         "source": "opus"
     36       },
     37       "alternative_explanations_discussed": {
     38         "applies": true,
     39         "answer": false,
     40         "justification": "No alternative explanations are discussed. For example, custom instructions addressing the ten challenges could reflect social copying rather than independent user needs. The paper does not consider whether its framework imposes categories that may not reflect actual user mental models.",
     41         "source": "opus"
     42       },
     43       "proxy_outcome_distinction": {
     44         "applies": true,
     45         "answer": false,
     46         "justification": "The paper uses Reddit custom instructions as a proxy for user epistemic preferences broadly but does not acknowledge the gap between Reddit power users who share prompting strategies and typical LLM users. The framework is presented as addressing general user needs but validated only on a self-selected online community.",
     47         "source": "opus"
     48       }
     49     },
     50     "limitations_and_scope": {
     51       "limitations_section_present": {
     52         "applies": true,
     53         "answer": false,
     54         "justification": "There is no dedicated limitations section. Section 7 is titled 'Discussion & Conclusion' and discusses contributions and proposed interface designs but does not substantively address limitations of the work.",
     55         "source": "opus"
     56       },
     57       "threats_to_validity_specific": {
     58         "applies": true,
     59         "answer": false,
     60         "justification": "No specific threats to validity are discussed. The paper does not address the representativeness of Reddit data, the circular use of LLMs to study LLM interaction patterns, or the subjectivity in framework construction.",
     61         "source": "opus"
     62       },
     63       "scope_boundaries_stated": {
     64         "applies": true,
     65         "answer": false,
     66         "justification": "No explicit scope boundaries are stated. The paper does not identify what settings, user populations, or platform types the framework does NOT apply to.",
     67         "source": "opus"
     68       }
     69     },
     70     "conflicts_of_interest": {
     71       "funding_disclosed": {
     72         "applies": true,
     73         "answer": false,
     74         "justification": "No funding source is disclosed. There is no acknowledgments section or funding statement in the paper.",
     75         "source": "opus"
     76       },
     77       "affiliations_disclosed": {
     78         "applies": true,
     79         "answer": true,
     80         "justification": "All authors are disclosed as affiliated with the University of Washington. They are evaluating OpenAI and Anthropic products as external academics, not as employees of either company.",
     81         "source": "opus"
     82       },
     83       "funder_independent_of_outcome": {
     84         "applies": true,
     85         "answer": false,
     86         "justification": "No funding is disclosed, so independence of funder from outcome cannot be verified.",
     87         "source": "opus"
     88       },
     89       "financial_interests_declared": {
     90         "applies": true,
     91         "answer": false,
     92         "justification": "No competing interests or financial interests statement is included in the paper.",
     93         "source": "opus"
     94       }
     95     },
     96     "scope_and_framing": {
     97       "key_terms_defined": {
     98         "applies": true,
     99         "answer": true,
    100         "justification": "Core terms — 'epistemic alignment,' 'epistemic profile,' 'epistemic responsibility,' 'epistemic personalization,' 'testimonial reliability' — are formally defined in Section 3, including mathematical notation for profile components.",
    101         "source": "haiku"
    102       },
    103       "intended_contribution_clear": {
    104         "applies": true,
    105         "answer": true,
    106         "justification": "Section 1 explicitly enumerates four contributions: framework introduction, thematic analysis validation, assessment of current systems, and interface feature recommendations.",
    107         "source": "haiku"
    108       },
    109       "engagement_with_prior_work": {
    110         "applies": true,
    111         "answer": true,
    112         "justification": "Section 2 engages substantively with epistemology literature (Hookway, de Ridder, Goldman, Chinn & Rinehart) and LLM work on hallucination, sycophancy, and uncertainty, showing how the framework builds on these traditions rather than merely listing citations.",
    113         "source": "haiku"
    114       }
    115     }
    116   },
    117   "type_checklist": {
    118     "position": {
    119       "argument_quality": {
    120         "argument_internally_consistent": {
    121           "applies": true,
    122           "answer": true,
    123           "justification": "The argument flows coherently: diverse user epistemic needs exist → current interfaces lack structure → framework identifies the challenges → analysis confirms the framework captures real user concerns → interface redesign is prescribed.",
    124           "source": "haiku"
    125         },
    126         "counterarguments_addressed": {
    127           "applies": true,
    128           "answer": false,
    129           "justification": "The paper does not engage with the strongest counterarguments — e.g., that natural language interfaces may be adequate, that structured controls could worsen UX, or that capability improvements make interface redesign unnecessary.",
    130           "source": "haiku"
    131         },
    132         "analogies_appropriate": {
    133           "applies": true,
    134           "answer": true,
    135           "justification": "The mapping of philosophical epistemology concepts (inquiry, meta-cognitive tasks, testimonial reliability) to LLM interaction challenges is the paper's central move and is executed consistently; the Type I/II error analogy for error vs. ignorance is reasonable if slightly forced.",
    136           "source": "haiku"
    137         },
    138         "prescriptions_proportional": {
    139           "applies": true,
    140           "answer": false,
    141           "justification": "The paper prescribes a detailed four-component interface paradigm based on 128 Reddit instructions and informal content analysis of two companies' documentation — the specificity of prescriptions substantially outpaces the thin evidence base.",
    142           "source": "haiku"
    143         },
    144         "evidence_for_claims_cited": {
    145           "applies": true,
    146           "answer": true,
    147           "justification": "Factual claims about hallucination, sycophancy, abstention, and citation trust are consistently supported with citations to prior empirical work (e.g., Ding et al. 2025, Sharma et al. 2023, Varshney et al. 2023).",
    148           "source": "haiku"
    149         },
    150         "alternatives_discussed": {
    151           "applies": true,
    152           "answer": false,
    153           "justification": "The paper proposes one framework and one interface redesign without discussing alternative approaches (e.g., capability-centric solutions, training-based preference learning) or explaining why the proposed approach is preferred.",
    154           "source": "haiku"
    155         },
    156         "historical_context_accurate": {
    157           "applies": true,
    158           "answer": true,
    159           "justification": "References to Wikipedia's neutral point of view policy, the history of epistemology, and LLM development appear accurate and appropriately cited.",
    160           "source": "haiku"
    161         }
    162       },
    163       "clarity_and_scope": {
    164         "key_terms_defined_precisely": {
    165           "applies": true,
    166           "answer": true,
    167           "justification": "The paper provides formal mathematical definitions of the epistemic profile in Section 3 and precise verbal definitions of all ten framework challenges, going beyond casual usage.",
    168           "source": "haiku"
    169         },
    170         "engages_with_existing_literature": {
    171           "applies": true,
    172           "answer": true,
    173           "justification": "The paper builds substantively on de Ridder's meta-cognitive tasks, Chinn & Rinehart's AIR framework, Goldman's epistemic values, and Sorensen et al.'s pluralism work, demonstrating genuine engagement rather than superficial citation.",
    174           "source": "haiku"
    175         },
    176         "intended_audience_clear": {
    177           "applies": true,
    178           "answer": false,
    179           "justification": "The intended audience is not explicitly stated; the conclusion briefly mentions 'AI developers' and 'users' but the paper's mix of philosophical, empirical, and design-oriented content targets an ambiguous audience.",
    180           "source": "haiku"
    181         },
    182         "assumptions_stated": {
    183           "applies": true,
    184           "answer": false,
    185           "justification": "Key assumptions — that philosophical epistemology maps onto LLM interaction, that Reddit users represent general users, that provider documentation reflects actual model behavior — are never explicitly stated as assumptions the argument depends on.",
    186           "source": "haiku"
    187         },
    188         "scope_of_applicability_discussed": {
    189           "applies": true,
    190           "answer": false,
    191           "justification": "The paper focuses on two providers with brief rationale (56% enterprise share) but does not discuss where the framework would not apply, what types of users or contexts it excludes, or the limits of its applicability.",
    192           "source": "haiku"
    193         }
    194       }
    195     }
    196   },
    197   "claims": [
    198     {
    199       "claim": "Users cannot effectively specify epistemic preferences to LLMs via current natural language interfaces",
    200       "evidence": "Thematic analysis of 128 Reddit custom instructions showing users develop elaborate workarounds in the absence of structured specification mechanisms",
    201       "supported": "moderate"
    202     },
    203     {
    204       "claim": "The framework's ten challenges are validated empirically — 92.1% of custom instructions addressed at least one challenge",
    205       "evidence": "GPT-4o coding of 128 Reddit custom instructions validated by two human raters (κ=0.8875)",
    206       "supported": "weak"
    207     },
    208     {
    209       "claim": "OpenAI and Anthropic have partially addressed epistemic challenges in documentation but lack structured user-facing mechanisms",
    210       "evidence": "Content analysis of model cards, changelogs, and blog posts for both providers coded by two expert annotators",
    211       "supported": "moderate"
    212     },
    213     {
    214       "claim": "Neither platform provides verification tools to confirm whether user epistemic preferences were followed in a response",
    215       "evidence": "Sections 6.1 and 6.2 identify absence of such features in ChatGPT and Claude interfaces based on product documentation review",
    216       "supported": "moderate"
    217     },
    218     {
    219       "claim": "Users independently developed 'folk theories' (Suppressing Default Behavior, Expert Persona, Parameter Configuration) to address epistemic challenges",
    220       "evidence": "Identified as common patterns across the 128 Reddit instructions, described qualitatively with examples",
    221       "supported": "weak"
    222     }
    223   ],
    224   "methodology_tags": [
    225     "qualitative",
    226     "case-study",
    227     "theoretical"
    228   ],
    229   "key_findings": "The paper proposes the Epistemic Alignment Framework — ten challenges in human-LLM knowledge transmission across epistemic responsibility, personalization, and testimonial reliability dimensions — grounded in philosophical epistemology. A thematic analysis of 128 Reddit custom instructions found users independently addressing all ten challenges via ad-hoc folk theories, providing face-validity for the framework. Content analysis of OpenAI and Anthropic documentation reveals both providers acknowledge these challenges but lack structured interface mechanisms for users to specify or verify epistemic preferences. The authors prescribe a four-component interface redesign including structured preference controls, transparency annotations, adaptive personalization, and contextual guidance.",
    230   "red_flags": [
    231     {
    232       "flag": "Non-representative sample",
    233       "detail": "128 custom instructions from LLM enthusiast subreddits are self-selected and not representative of typical LLM users; the paper generalizes to 'users' broadly without bounding this claim."
    234     },
    235     {
    236       "flag": "Circular validation",
    237       "detail": "The framework was constructed first, then used to code user instructions, and the presence of instructions matching all ten challenges is treated as validation — the framework could be retrofitted to almost any set of user customization behaviors."
    238     },
    239     {
    240       "flag": "No limitations section",
    241       "detail": "The paper has no dedicated limitations or threats-to-validity section despite significant methodological concerns about sample representativeness, content analysis subjectivity, and two-provider scope."
    242     },
    243     {
    244       "flag": "Funding not disclosed",
    245       "detail": "No funding acknowledgment or competing interests statement appears anywhere in the paper."
    246     },
    247     {
    248       "flag": "Prescriptions exceed evidence",
    249       "detail": "The four-component interface redesign proposal is presented as concrete guidance based on thin, non-representative evidence (128 Reddit posts and informal content analysis of corporate documentation)."
    250     }
    251   ],
    252   "cited_papers": [
    253     {
    254       "title": "A Roadmap to Pluralistic Alignment",
    255       "relevance": "Foundation for the pluralism/range-of-viewpoints dimension of the framework"
    256     },
    257     {
    258       "title": "Towards Understanding Sycophancy in Language Models",
    259       "relevance": "Key empirical basis for the sycophancy challenge in the framework"
    260     },
    261     {
    262       "title": "Towards Bidirectional Human-AI Alignment: A Systematic Review for Clarifications, Framework, and Future Directions",
    263       "relevance": "Directly frames the epistemic alignment problem as bidirectional; co-authored by paper's second author"
    264     },
    265     {
    266       "title": "Enabling Large Language Models to Generate Text with Citations",
    267       "relevance": "Background for the citation and reference verification challenge"
    268     },
    269     {
    270       "title": "Citations and Trust in LLM Generated Responses",
    271       "relevance": "Key evidence that citations increase user trust even when randomly generated, motivating the citation verification challenge"
    272     },
    273     {
    274       "title": "Online Illusions of Understanding",
    275       "relevance": "Foundational to the framework; de Ridder's five meta-cognitive tasks provide the organizing structure for the ten challenges"
    276     },
    277     {
    278       "title": "Can Large Language Models Faithfully Express Their Intrinsic Uncertainty in Words?",
    279       "relevance": "Background on uncertainty expression, directly relevant to the hedging language and testimonial reliability dimensions"
    280     },
    281     {
    282       "title": "Personalization of Large Language Models: A Survey",
    283       "relevance": "Background survey on LLM personalization relevant to the epistemic personalization dimension"
    284     }
    285   ],
    286   "engagement_factors": {
    287     "practical_relevance": {
    288       "score": 1,
    289       "justification": "The framework provides conceptual vocabulary but no tools or techniques a practitioner could directly implement."
    290     },
    291     "surprise_contrarian": {
    292       "score": 1,
    293       "justification": "The finding that users develop elaborate workarounds is somewhat known in the prompt engineering community; the formalization into epistemic dimensions is new but not surprising."
    294     },
    295     "fear_safety": {
    296       "score": 0,
    297       "justification": "No AI safety, security, or risk concerns are raised beyond general knowledge delivery quality."
    298     },
    299     "drama_conflict": {
    300       "score": 1,
    301       "justification": "Evaluates OpenAI and Anthropic as failing to provide adequate epistemic customization mechanisms, mild critical angle."
    302     },
    303     "demo_ability": {
    304       "score": 0,
    305       "justification": "No code, demo, or tool is provided."
    306     },
    307     "brand_recognition": {
    308       "score": 2,
    309       "justification": "Directly evaluates ChatGPT and Claude products from OpenAI and Anthropic, well-known brands, though the authors are from UW (not a famous AI lab)."
    310     }
    311   },
    312   "hn_data": {
    313     "threads": [
    314       {
    315         "hn_id": "47634936",
    316         "title": "Reasoning models encode tool choices before they start reasoning",
    317         "points": 3,
    318         "comments": 0,
    319         "url": "https://news.ycombinator.com/item?id=47634936"
    320       },
    321       {
    322         "hn_id": "45116073",
    323         "title": "Towards Agentic OS: An LLM Agent Framework for Linux Schedulers",
    324         "points": 3,
    325         "comments": 0,
    326         "url": "https://news.ycombinator.com/item?id=45116073"
    327       },
    328       {
    329         "hn_id": "43729852",
    330         "title": "MageSQL: Enhancing In-Context Learning for Text-to-SQL Applications with LLMs",
    331         "points": 2,
    332         "comments": 0,
    333         "url": "https://news.ycombinator.com/item?id=43729852"
    334       },
    335       {
    336         "hn_id": "42724278",
    337         "title": "Abundant Water from Early Supernovae at Cosmic Dawn",
    338         "points": 2,
    339         "comments": 0,
    340         "url": "https://news.ycombinator.com/item?id=42724278"
    341       },
    342       {
    343         "hn_id": "44756018",
    344         "title": "Ask HN: Is manually discovering and configuring MCP servers the only way?",
    345         "points": 1,
    346         "comments": 3,
    347         "url": "https://news.ycombinator.com/item?id=44756018"
    348       },
    349       {
    350         "hn_id": "47622971",
    351         "title": "When a reasoning LLM chooses, which comes first: thought or decision?",
    352         "points": 1,
    353         "comments": 0,
    354         "url": "https://news.ycombinator.com/item?id=47622971"
    355       },
    356       {
    357         "hn_id": "44605627",
    358         "title": "Long-Sequence Memory with Temporal Kernels and Dense Hopfield Functionals",
    359         "points": 1,
    360         "comments": 0,
    361         "url": "https://news.ycombinator.com/item?id=44605627"
    362       },
    363       {
    364         "hn_id": "44276405",
    365         "title": "Relic: Evaluating Compositional Instruction Following via Language Recognition",
    366         "points": 1,
    367         "comments": 0,
    368         "url": "https://news.ycombinator.com/item?id=44276405"
    369       },
    370       {
    371         "hn_id": "43358918",
    372         "title": "The Countable Reals (2024)",
    373         "points": 1,
    374         "comments": 0,
    375         "url": "https://news.ycombinator.com/item?id=43358918"
    376       },
    377       {
    378         "hn_id": "40220945",
    379         "title": "Search for gravitationally lensed interstellar transmissions",
    380         "points": 1,
    381         "comments": 0,
    382         "url": "https://news.ycombinator.com/item?id=40220945"
    383       }
    384     ],
    385     "top_points": 3,
    386     "total_points": 16,
    387     "total_comments": 3
    388   }
    389 }

Impressum · Datenschutz