ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

scan.json (20323B)


      1 {
      2   "paper": {
      3     "title": "The Next Paradigm Is User-Centric Agent, Not Platform-Centric Service",
      4     "authors": [
      5       "Luankang Zhang",
      6       "Hang Lv",
      7       "Qiushi Pan",
      8       "Kefen Wang",
      9       "Yonghao Huang",
     10       "Xinrui Miao",
     11       "Yin Xu",
     12       "Wei Guo",
     13       "Yong Liu",
     14       "Hao Wang",
     15       "Enhong Chen"
     16     ],
     17     "year": 2026,
     18     "venue": "arXiv",
     19     "arxiv_id": "2602.15682"
     20   },
     21   "scan_version": 2,
     22   "active_modules": [],
     23   "methodology_tags": ["theoretical"],
     24   "key_findings": "This position paper argues that digital services should shift from platform-centric to user-centric agent architectures. The authors identify three structural bottlenecks of platform-centric services—fragmented context, limited execution boundaries, and misaligned incentives—and propose a device-cloud collaborative pipeline where an on-device agent controls user data and goals while the cloud provides external services. The paper discusses governance challenges including agent provider neutrality and platform-agent coordination but provides no empirical evidence or prototype implementation.",
     25   "checklist": {
     26     "artifacts": {
     27       "code_released": {
     28         "applies": true,
     29         "answer": false,
     30         "justification": "No code, prototype, or repository is released. The paper proposes an architecture conceptually but provides no implementation."
     31       },
     32       "data_released": {
     33         "applies": true,
     34         "answer": false,
     35         "justification": "No dataset or structured analysis data is released. The paper is a conceptual argument with no collected data."
     36       },
     37       "environment_specified": {
     38         "applies": false,
     39         "answer": false,
     40         "justification": "Theoretical/position paper with no experiments to reproduce."
     41       },
     42       "reproduction_instructions": {
     43         "applies": false,
     44         "answer": false,
     45         "justification": "Theoretical/position paper with no experiments or implementation to reproduce."
     46       }
     47     },
     48     "statistical_methodology": {
     49       "confidence_intervals_or_error_bars": {
     50         "applies": false,
     51         "answer": false,
     52         "justification": "Theoretical/position paper with no quantitative results."
     53       },
     54       "significance_tests": {
     55         "applies": false,
     56         "answer": false,
     57         "justification": "No comparative empirical claims requiring significance tests."
     58       },
     59       "effect_sizes_reported": {
     60         "applies": false,
     61         "answer": false,
     62         "justification": "No quantitative experiments or measurements."
     63       },
     64       "sample_size_justified": {
     65         "applies": false,
     66         "answer": false,
     67         "justification": "Theoretical paper with no samples or experiments."
     68       },
     69       "variance_reported": {
     70         "applies": false,
     71         "answer": false,
     72         "justification": "No experimental runs to report variance for."
     73       }
     74     },
     75     "evaluation_design": {
     76       "baselines_included": {
     77         "applies": false,
     78         "answer": false,
     79         "justification": "No empirical evaluation is conducted. The paper contrasts paradigms conceptually but does not evaluate any system."
     80       },
     81       "baselines_contemporary": {
     82         "applies": false,
     83         "answer": false,
     84         "justification": "No empirical evaluation to compare baselines."
     85       },
     86       "ablation_study": {
     87         "applies": false,
     88         "answer": false,
     89         "justification": "No system or components to ablate."
     90       },
     91       "multiple_metrics": {
     92         "applies": false,
     93         "answer": false,
     94         "justification": "No evaluation or metrics."
     95       },
     96       "human_evaluation": {
     97         "applies": false,
     98         "answer": false,
     99         "justification": "No system outputs to evaluate."
    100       },
    101       "held_out_test_set": {
    102         "applies": false,
    103         "answer": false,
    104         "justification": "No evaluation on any dataset."
    105       },
    106       "per_category_breakdown": {
    107         "applies": false,
    108         "answer": false,
    109         "justification": "No quantitative results to break down."
    110       },
    111       "failure_cases_discussed": {
    112         "applies": false,
    113         "answer": false,
    114         "justification": "No system or experiments to produce failure cases."
    115       },
    116       "negative_results_reported": {
    117         "applies": false,
    118         "answer": false,
    119         "justification": "No experiments conducted."
    120       }
    121     },
    122     "claims_and_evidence": {
    123       "abstract_claims_supported": {
    124         "applies": true,
    125         "answer": true,
    126         "justification": "The abstract's claims are qualitative arguments (platform-centric models conflict with user interests, user-centric agents are feasible) that are developed through conceptual reasoning in the paper body (Sections 2-5). The claims are argumentative rather than empirical, and the paper does provide the reasoning it promises."
    127       },
    128       "causal_claims_justified": {
    129         "applies": true,
    130         "answer": false,
    131         "justification": "The paper uses causal language throughout: platform-centric design 'results in conflicts,' user-centric agents 'resolve' fragmented context, better models 'become increasingly proficient at reinforcing the walled garden effect' (Section 3). These are causal claims supported only by logical argument, not empirical evidence or causal inference methodology."
    132       },
    133       "generalization_bounded": {
    134         "applies": true,
    135         "answer": false,
    136         "justification": "The title claims 'The Next Paradigm' and the paper argues broadly about 'the future of digital services.' These claims are unbounded—no specific domains, geographies, or service types are excluded. The paper doesn't acknowledge settings where platform-centric optimization may genuinely serve users (e.g., spam filtering, safety systems)."
    137       },
    138       "alternative_explanations_discussed": {
    139         "applies": true,
    140         "answer": true,
    141         "justification": "Section 6 ('Alternative Views') discusses two competing paradigms: system-level context-driven agents (e.g., Apple Intelligence) and service-level execution-driven agents (e.g., Meituan, Didi). The paper provides specific rebuttals to each alternative, making this a substantive discussion of competing explanations."
    142       },
    143       "proxy_outcome_distinction": {
    144         "applies": false,
    145         "answer": false,
    146         "justification": "Theoretical paper with no measurements or proxies."
    147       }
    148     },
    149     "setup_transparency": {
    150       "model_versions_specified": {
    151         "applies": false,
    152         "answer": false,
    153         "justification": "No models used in experiments."
    154       },
    155       "prompts_provided": {
    156         "applies": false,
    157         "answer": false,
    158         "justification": "No prompting used."
    159       },
    160       "hyperparameters_reported": {
    161         "applies": false,
    162         "answer": false,
    163         "justification": "No experiments requiring hyperparameters."
    164       },
    165       "scaffolding_described": {
    166         "applies": false,
    167         "answer": false,
    168         "justification": "No agentic scaffolding implemented or evaluated."
    169       },
    170       "data_preprocessing_documented": {
    171         "applies": false,
    172         "answer": false,
    173         "justification": "No data collected or processed."
    174       }
    175     },
    176     "limitations_and_scope": {
    177       "limitations_section_present": {
    178         "applies": true,
    179         "answer": false,
    180         "justification": "There is no dedicated limitations section. Section 6 discusses 'Alternative Views' but this is a comparison of competing paradigms, not an honest assessment of limitations of the proposed approach. Section 7 (Conclusion) is 6 sentences with no limitations discussion."
    181       },
    182       "threats_to_validity_specific": {
    183         "applies": true,
    184         "answer": false,
    185         "justification": "No threats to validity are discussed. The paper does not acknowledge specific weaknesses in its argument such as the feasibility gap between the proposed architecture and current technology, the cold-start problem for personal agents, or scenarios where platform optimization serves users better than individual agents."
    186       },
    187       "scope_boundaries_stated": {
    188         "applies": true,
    189         "answer": false,
    190         "justification": "The paper makes sweeping claims about 'the next paradigm' for all digital services without stating what its arguments do NOT cover. No specific boundaries on domain, geography, user type, or service category are stated."
    191       }
    192     },
    193     "data_integrity": {
    194       "raw_data_available": {
    195         "applies": false,
    196         "answer": false,
    197         "justification": "Theoretical paper with no data collected."
    198       },
    199       "data_collection_described": {
    200         "applies": false,
    201         "answer": false,
    202         "justification": "No data collection performed."
    203       },
    204       "recruitment_methods_described": {
    205         "applies": false,
    206         "answer": false,
    207         "justification": "No participants and no standard benchmark used."
    208       },
    209       "data_pipeline_documented": {
    210         "applies": false,
    211         "answer": false,
    212         "justification": "No data pipeline exists."
    213       }
    214     },
    215     "conflicts_of_interest": {
    216       "funding_disclosed": {
    217         "applies": true,
    218         "answer": false,
    219         "justification": "No funding section or acknowledgments listing funding sources. The paper notes affiliations with Huawei Technologies but does not disclose whether Huawei funded this work."
    220       },
    221       "affiliations_disclosed": {
    222         "applies": true,
    223         "answer": true,
    224         "justification": "Author affiliations are clearly listed: University of Science and Technology of China and Huawei Technologies, Shenzhen."
    225       },
    226       "funder_independent_of_outcome": {
    227         "applies": true,
    228         "answer": false,
    229         "justification": "Huawei Technologies is listed as an affiliation for two authors (Wei Guo, Yong Liu). Huawei, as a device manufacturer, has a direct commercial interest in on-device intelligence—the exact paradigm this paper advocates. This creates a non-independent relationship between the affiliated organization and the paper's conclusions."
    230       },
    231       "financial_interests_declared": {
    232         "applies": true,
    233         "answer": false,
    234         "justification": "No competing interests statement is present anywhere in the paper."
    235       }
    236     },
    237     "contamination": {
    238       "training_cutoff_stated": {
    239         "applies": false,
    240         "answer": false,
    241         "justification": "No pre-trained model evaluated on any benchmark."
    242       },
    243       "train_test_overlap_discussed": {
    244         "applies": false,
    245         "answer": false,
    246         "justification": "No pre-trained model evaluated on any benchmark."
    247       },
    248       "benchmark_contamination_addressed": {
    249         "applies": false,
    250         "answer": false,
    251         "justification": "No benchmark evaluation performed."
    252       }
    253     },
    254     "human_studies": {
    255       "pre_registered": {
    256         "applies": false,
    257         "answer": false,
    258         "justification": "No human participants."
    259       },
    260       "irb_or_ethics_approval": {
    261         "applies": false,
    262         "answer": false,
    263         "justification": "No human participants."
    264       },
    265       "demographics_reported": {
    266         "applies": false,
    267         "answer": false,
    268         "justification": "No human participants."
    269       },
    270       "inclusion_exclusion_criteria": {
    271         "applies": false,
    272         "answer": false,
    273         "justification": "No human participants."
    274       },
    275       "randomization_described": {
    276         "applies": false,
    277         "answer": false,
    278         "justification": "No human participants."
    279       },
    280       "blinding_described": {
    281         "applies": false,
    282         "answer": false,
    283         "justification": "No human participants."
    284       },
    285       "attrition_reported": {
    286         "applies": false,
    287         "answer": false,
    288         "justification": "No human participants."
    289       }
    290     },
    291     "cost_and_practicality": {
    292       "inference_cost_reported": {
    293         "applies": false,
    294         "answer": false,
    295         "justification": "Theoretical/position paper with no method to cost."
    296       },
    297       "compute_budget_stated": {
    298         "applies": false,
    299         "answer": false,
    300         "justification": "Theoretical/position paper with no computation performed."
    301       }
    302     }
    303   },
    304   "claims": [
    305     {
    306       "claim": "Platform-centric services prioritize platform objectives (engagement, retention, conversion) over user welfare, creating fundamental conflicts with user interests.",
    307       "evidence": "Argued conceptually in Section 1 and Section 3 with Figure 1 illustrating the conflict (a student wanting to study being pushed video content for ad revenue). No empirical evidence provided.",
    308       "supported": "weak"
    309     },
    310     {
    311       "claim": "Improvements in platform service quality do not necessarily translate to genuine user benefit; a more capable model becomes a stronger amplifier of its (platform-aligned) objective.",
    312       "evidence": "Section 3 argues this conceptually: 'A platform-centric model cannot simultaneously function as a user's trusted agent and a system optimized for platform profit. A more capable model is not a more benevolent model.' Supported by logical argument only.",
    313       "supported": "weak"
    314     },
    315     {
    316       "claim": "User-centric agents can resolve three structural bottlenecks: fragmented context (Section 3.1), limited execution boundaries (Section 3.2), and misaligned incentives (Section 3.3).",
    317       "evidence": "Each bottleneck is argued conceptually with reference to existing literature. Section 3.1 argues device-side context aggregation solves fragmentation. Section 3.2 argues cross-service orchestration extends execution. Section 3.3 argues user-controlled objectives resolve misalignment. No prototype or user study validates these claims.",
    318       "supported": "weak"
    319     },
    320     {
    321       "claim": "A device-cloud collaborative pipeline is a practical architecture for implementing user-centric intelligence.",
    322       "evidence": "Section 4 proposes the architecture with three stages (Perception & Memory, Intent Decomposition & Planning, Execution & Evolution) illustrated in Figure 3. The pipeline is described conceptually and diagrammatically but is not implemented, tested, or compared with alternatives.",
    323       "supported": "unsupported"
    324     },
    325     {
    326       "claim": "Advancements in LLMs and on-device intelligence make user-centric agents now feasible.",
    327       "evidence": "End of Section 3 cites LLM-as-Rec (Liu et al. 2025e) and MobileLLM (Liu et al. 2024b) as evidence that understanding and execution can be co-located on-device. No integration or feasibility study is provided.",
    328       "supported": "weak"
    329     }
    330   ],
    331   "red_flags": [
    332     {
    333       "flag": "Undisclosed commercial conflict of interest",
    334       "detail": "Two authors are affiliated with Huawei Technologies, a major device manufacturer with direct commercial interest in on-device intelligence—the paradigm this paper advocates. The paper does not disclose this conflict or include a competing interests statement."
    335     },
    336     {
    337       "flag": "Claims significantly outrun evidence",
    338       "detail": "The paper declares 'The Next Paradigm' in its title and makes sweeping claims about the future of all digital services, but provides no empirical evidence, prototype implementation, user study, simulation, or comparative analysis. Every claim is supported only by logical argument and selective citation."
    339     },
    340     {
    341       "flag": "No limitations section",
    342       "detail": "For a position paper making broad claims about the future of digital services, there is no discussion of limitations, scenarios where the proposed paradigm might fail, or conditions under which platform-centric approaches might be preferable."
    343     },
    344     {
    345       "flag": "Selective framing of platform-centric services",
    346       "detail": "The paper frames all platform-centric optimization as adversarial to users without acknowledging cases where platform goals align with user welfare (spam filtering, content moderation, safety systems, fraud detection) or where coordination benefits from centralization."
    347     },
    348     {
    349       "flag": "Proposed architecture hand-waves critical challenges",
    350       "detail": "The device-cloud pipeline (Section 4) is presented as a practical architecture, but critical technical challenges (on-device LLM capability limits, latency constraints, cross-platform API standardization, user adoption) are mentioned at most in passing without concrete solutions."
    351     }
    352   ],
    353   "cited_papers": [
    354     {
    355       "title": "GPT-4 technical report",
    356       "authors": ["Josh Achiam", "Steven Adler", "Sandhini Agarwal"],
    357       "year": 2023,
    358       "arxiv_id": "2303.08774",
    359       "relevance": "Foundational LLM capabilities paper relevant to the survey's coverage of LLM-based tools and agents."
    360     },
    361     {
    362       "title": "MemGPT: Towards LLMs as operating systems",
    363       "authors": ["Charles Packer", "Sarah Wooders", "Kevin Lin"],
    364       "year": 2024,
    365       "arxiv_id": "2310.08560",
    366       "relevance": "Proposes LLM memory management architecture relevant to agentic AI system design."
    367     },
    368     {
    369       "title": "FrugalGPT: How to use large language models while reducing cost and improving performance",
    370       "authors": ["Lingjiao Chen", "Matei Zaharia", "James Zou"],
    371       "year": 2023,
    372       "arxiv_id": "2305.05176",
    373       "relevance": "Addresses cost-efficiency of LLM inference, a practical concern in agentic AI deployment."
    374     },
    375     {
    376       "title": "MobileLLM: Optimizing sub-billion parameter language models for on-device use cases",
    377       "authors": ["Zechun Liu"],
    378       "year": 2024,
    379       "relevance": "On-device LLM optimization directly relevant to feasibility of user-centric on-device agents."
    380     },
    381     {
    382       "title": "Tree search for language model agents",
    383       "authors": ["Jing Yu Koh", "Stephen McAleer", "Daniel Fried", "Ruslan Salakhutdinov"],
    384       "year": 2025,
    385       "arxiv_id": "2407.01476",
    386       "relevance": "Agent planning via tree search relevant to the survey's coverage of agentic AI reasoning."
    387     },
    388     {
    389       "title": "GuardAgent: Safeguard LLM agents by a guard agent via knowledge-enabled reasoning",
    390       "authors": ["Zijie Xiang"],
    391       "year": 2025,
    392       "arxiv_id": "2406.09187",
    393       "relevance": "Agent safety and guardrails relevant to the survey's coverage of LLM agent safety."
    394     },
    395     {
    396       "title": "AgentSpec: Customizable runtime enforcement for safe and reliable LLM agents",
    397       "authors": ["Hao Wang", "Christopher M. Poskitt", "Jun Sun"],
    398       "year": 2025,
    399       "arxiv_id": "2503.18666",
    400       "relevance": "Runtime safety enforcement for LLM agents relevant to agentic AI safety."
    401     },
    402     {
    403       "title": "ReWOO: Decoupling reasoning from observations for efficient augmented language models",
    404       "authors": ["Binfeng Xu"],
    405       "year": 2023,
    406       "arxiv_id": "2305.18323",
    407       "relevance": "Efficient agentic reasoning architecture relevant to LLM agent design."
    408     },
    409     {
    410       "title": "Build agent advocates, not platform agents",
    411       "authors": ["Sayash Kapoor", "Noam Kolt", "Seth Lazar"],
    412       "year": 2025,
    413       "arxiv_id": "2505.04345",
    414       "relevance": "Directly argues for user-controlled AI agents over platform-controlled ones, relevant to AI agent governance."
    415     },
    416     {
    417       "title": "iAgent: LLM agent as a shield between user and recommender systems",
    418       "authors": ["Wenyue Xu"],
    419       "year": 2025,
    420       "arxiv_id": "2502.14662",
    421       "relevance": "Proposes LLM agent as user-side intermediary with recommender systems, directly relevant to user-centric AI agents."
    422     },
    423     {
    424       "title": "AGRAIL: A lifelong agent guardrail with effective and adaptive safety detection",
    425       "authors": ["Weiran Luo"],
    426       "year": 2025,
    427       "arxiv_id": "2502.11448",
    428       "relevance": "Lifelong safety guardrails for LLM agents relevant to agentic AI safety and governance."
    429     },
    430     {
    431       "title": "PlanGen: A multi-agent framework for generating planning and reasoning trajectories for complex problem solving",
    432       "authors": ["Mihir Parmar"],
    433       "year": 2025,
    434       "arxiv_id": "2502.16111",
    435       "relevance": "Multi-agent planning framework relevant to LLM agent architecture design."
    436     }
    437   ]
    438 }

Impressum · Datenschutz