ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

scan.json (17214B)


      1 {
      2   "paper": {
      3     "title": "A Survey on Agentic Service Ecosystems: Measurement, Analysis, and Optimization",
      4     "authors": ["Xuwen Zhang", "Xiao Xue", "Xia Xie", "Qun Ma", "Xiangning Yu", "Deyu Zhou", "Yifan Wang", "Ming Zhang"],
      5     "year": 2025,
      6     "venue": "arXiv",
      7     "arxiv_id": "2508.07343",
      8     "doi": "10.48550/arXiv.2508.07343"
      9   },
     10   "scan_version": 2,
     11   "active_modules": ["survey_methodology"],
     12   "methodology_tags": ["meta-analysis"],
     13   "key_findings": "This paper proposes a three-stage framework (measurement, analysis, optimization) for understanding swarm intelligence emergence in agentic service ecosystems. It categorizes service ecosystem evolution into bio-inspired, socially-inspired, and brain-inspired intelligence phases. The survey organizes existing work across spatial-temporal, structural, and functional analysis dimensions, and discusses direct (incentive-based) vs indirect (adaptive) optimization strategies. A ride-sharing case study illustrates the framework but provides no new empirical data.",
     14   "checklist": {
     15     "artifacts": {
     16       "code_released": {
     17         "applies": true,
     18         "answer": false,
     19         "justification": "No code or analysis scripts are released. No repository URL is mentioned anywhere in the paper."
     20       },
     21       "data_released": {
     22         "applies": true,
     23         "answer": false,
     24         "justification": "No dataset, corpus of surveyed papers, or extracted data is released."
     25       },
     26       "environment_specified": {
     27         "applies": false,
     28         "answer": false,
     29         "justification": "This is a purely narrative survey with no computational experiments or environment to specify."
     30       },
     31       "reproduction_instructions": {
     32         "applies": true,
     33         "answer": false,
     34         "justification": "No reproduction instructions provided. The survey does not describe a reproducible search or review methodology."
     35       }
     36     },
     37     "statistical_methodology": {
     38       "confidence_intervals_or_error_bars": {
     39         "applies": false,
     40         "answer": false,
     41         "justification": "This is a narrative survey with no quantitative experiments or meta-analysis aggregation."
     42       },
     43       "significance_tests": {
     44         "applies": false,
     45         "answer": false,
     46         "justification": "No statistical tests are performed; the paper is a narrative literature review."
     47       },
     48       "effect_sizes_reported": {
     49         "applies": false,
     50         "answer": false,
     51         "justification": "No quantitative analysis or effect sizes; purely narrative survey."
     52       },
     53       "sample_size_justified": {
     54         "applies": false,
     55         "answer": false,
     56         "justification": "No quantitative experiments conducted."
     57       },
     58       "variance_reported": {
     59         "applies": false,
     60         "answer": false,
     61         "justification": "No experimental runs or quantitative results to report variance for."
     62       }
     63     },
     64     "evaluation_design": {
     65       "baselines_included": {
     66         "applies": true,
     67         "answer": false,
     68         "justification": "The survey does not compare itself against prior surveys on swarm intelligence or service ecosystems. No systematic comparison with related survey papers."
     69       },
     70       "baselines_contemporary": {
     71         "applies": false,
     72         "answer": false,
     73         "justification": "No baselines are included, so contemporaneity cannot be assessed."
     74       },
     75       "ablation_study": {
     76         "applies": false,
     77         "answer": false,
     78         "justification": "No system or method is proposed that could be ablated."
     79       },
     80       "multiple_metrics": {
     81         "applies": false,
     82         "answer": false,
     83         "justification": "No experimental evaluation is conducted."
     84       },
     85       "human_evaluation": {
     86         "applies": false,
     87         "answer": false,
     88         "justification": "No evaluation of system outputs is performed."
     89       },
     90       "held_out_test_set": {
     91         "applies": false,
     92         "answer": false,
     93         "justification": "No experiments requiring train/test splits."
     94       },
     95       "per_category_breakdown": {
     96         "applies": true,
     97         "answer": true,
     98         "justification": "The survey organizes literature across multiple dimensions: measurement (linear vs nonlinear), analysis (spatiotemporal, structural, functional), and optimization (direct vs indirect), providing structured per-category discussion."
     99       },
    100       "failure_cases_discussed": {
    101         "applies": true,
    102         "answer": true,
    103         "justification": "The paper discusses limitations of existing methods throughout, e.g., limitations of linear metrics (Section 4.1), challenges of traditional analytical methods (Section 5.1), and the collaborative optimization dilemma (Section 6.1)."
    104       },
    105       "negative_results_reported": {
    106         "applies": true,
    107         "answer": true,
    108         "justification": "The paper identifies unresolved challenges and limitations of current approaches in each section, noting where existing methods fail (e.g., traditional linear metrics failing to capture emergent behaviors)."
    109       }
    110     },
    111     "claims_and_evidence": {
    112       "abstract_claims_supported": {
    113         "applies": true,
    114         "answer": true,
    115         "justification": "The abstract claims to propose a framework and review existing technologies. The paper does present the three-step framework (Sections 3-6) and reviews relevant literature. The claims are descriptive rather than empirical."
    116       },
    117       "causal_claims_justified": {
    118         "applies": false,
    119         "answer": false,
    120         "justification": "The paper makes no causal claims; it proposes a conceptual framework and reviews literature without claiming causal relationships from its own analysis."
    121       },
    122       "generalization_bounded": {
    123         "applies": true,
    124         "answer": false,
    125         "justification": "The title and abstract claim broad applicability ('Agentic Service Ecosystems') but the concrete case study (Section 7) covers only ride-sharing. The paper does not bound its framework's applicability to specific domains or acknowledge where it may not apply."
    126       },
    127       "alternative_explanations_discussed": {
    128         "applies": false,
    129         "answer": false,
    130         "justification": "The paper presents no empirical results; it is a conceptual survey/taxonomy."
    131       },
    132       "proxy_outcome_distinction": {
    133         "applies": false,
    134         "answer": false,
    135         "justification": "No measurements or proxies are used; this is a theoretical survey."
    136       }
    137     },
    138     "setup_transparency": {
    139       "model_versions_specified": {
    140         "applies": false,
    141         "answer": false,
    142         "justification": "No models are used or evaluated in this survey."
    143       },
    144       "prompts_provided": {
    145         "applies": false,
    146         "answer": false,
    147         "justification": "No prompting is used."
    148       },
    149       "hyperparameters_reported": {
    150         "applies": false,
    151         "answer": false,
    152         "justification": "No experiments are conducted."
    153       },
    154       "scaffolding_described": {
    155         "applies": false,
    156         "answer": false,
    157         "justification": "No agentic scaffolding is used."
    158       },
    159       "data_preprocessing_documented": {
    160         "applies": true,
    161         "answer": false,
    162         "justification": "The paper does not describe how surveyed papers were selected, searched for, or filtered. No search queries, databases, inclusion/exclusion criteria, or PRISMA-style pipeline is documented."
    163       }
    164     },
    165     "limitations_and_scope": {
    166       "limitations_section_present": {
    167         "applies": true,
    168         "answer": false,
    169         "justification": "There is no dedicated limitations section. The conclusion (Section 8) briefly mentions future research directions but does not discuss limitations of the survey itself."
    170       },
    171       "threats_to_validity_specific": {
    172         "applies": true,
    173         "answer": false,
    174         "justification": "No threats to validity are discussed. The paper does not acknowledge potential biases in its literature selection or framework construction."
    175       },
    176       "scope_boundaries_stated": {
    177         "applies": true,
    178         "answer": false,
    179         "justification": "The paper does not explicitly state what its framework does NOT cover or where boundaries lie. The scope appears unbounded — it discusses everything from ant colonies to ride-sharing to LLMs without clear delimitation."
    180       }
    181     },
    182     "data_integrity": {
    183       "raw_data_available": {
    184         "applies": true,
    185         "answer": false,
    186         "justification": "No list of surveyed papers, search results, or underlying data is made available."
    187       },
    188       "data_collection_described": {
    189         "applies": true,
    190         "answer": false,
    191         "justification": "The paper does not describe how the 123 references were found. No search strategy, databases queried, or time period is stated."
    192       },
    193       "recruitment_methods_described": {
    194         "applies": false,
    195         "answer": false,
    196         "justification": "No human participants; data sources are published papers (standard benchmark-like sources)."
    197       },
    198       "data_pipeline_documented": {
    199         "applies": true,
    200         "answer": false,
    201         "justification": "No pipeline from literature search to final paper selection is described. The reader cannot tell how or why these particular 123 references were chosen."
    202       }
    203     },
    204     "conflicts_of_interest": {
    205       "funding_disclosed": {
    206         "applies": true,
    207         "answer": false,
    208         "justification": "No funding or acknowledgments section is present in the paper."
    209       },
    210       "affiliations_disclosed": {
    211         "applies": true,
    212         "answer": true,
    213         "justification": "Author affiliations are clearly listed: Tianjin University, Hainan University, Shandong University, University of Exeter."
    214       },
    215       "funder_independent_of_outcome": {
    216         "applies": true,
    217         "answer": false,
    218         "justification": "No funding is disclosed, so independence cannot be assessed."
    219       },
    220       "financial_interests_declared": {
    221         "applies": true,
    222         "answer": false,
    223         "justification": "No competing interests or financial interests statement is present in the paper."
    224       }
    225     },
    226     "contamination": {
    227       "training_cutoff_stated": {
    228         "applies": false,
    229         "answer": false,
    230         "justification": "This survey does not evaluate any pre-trained model on a benchmark."
    231       },
    232       "train_test_overlap_discussed": {
    233         "applies": false,
    234         "answer": false,
    235         "justification": "No model evaluation is performed."
    236       },
    237       "benchmark_contamination_addressed": {
    238         "applies": false,
    239         "answer": false,
    240         "justification": "No model evaluation is performed."
    241       }
    242     },
    243     "human_studies": {
    244       "pre_registered": {
    245         "applies": false,
    246         "answer": false,
    247         "justification": "No human participants in this survey."
    248       },
    249       "irb_or_ethics_approval": {
    250         "applies": false,
    251         "answer": false,
    252         "justification": "No human participants."
    253       },
    254       "demographics_reported": {
    255         "applies": false,
    256         "answer": false,
    257         "justification": "No human participants."
    258       },
    259       "inclusion_exclusion_criteria": {
    260         "applies": false,
    261         "answer": false,
    262         "justification": "No human participants."
    263       },
    264       "randomization_described": {
    265         "applies": false,
    266         "answer": false,
    267         "justification": "No human participants."
    268       },
    269       "blinding_described": {
    270         "applies": false,
    271         "answer": false,
    272         "justification": "No human participants."
    273       },
    274       "attrition_reported": {
    275         "applies": false,
    276         "answer": false,
    277         "justification": "No human participants."
    278       }
    279     },
    280     "cost_and_practicality": {
    281       "inference_cost_reported": {
    282         "applies": false,
    283         "answer": false,
    284         "justification": "This is a survey paper with no method to cost."
    285       },
    286       "compute_budget_stated": {
    287         "applies": false,
    288         "answer": false,
    289         "justification": "This is a survey paper with no computation."
    290       }
    291     },
    292     "survey_methodology": {
    293       "prisma_or_structured_protocol": {
    294         "applies": true,
    295         "answer": false,
    296         "justification": "No PRISMA diagram, structured search protocol, or reproducible search strategy is described. The paper does not explain how literature was identified, screened, or selected."
    297       },
    298       "quality_assessment_of_sources": {
    299         "applies": true,
    300         "answer": false,
    301         "justification": "The survey does not assess the quality of the papers it cites. All sources are treated equally regardless of methodology, venue, or rigor."
    302       },
    303       "publication_bias_discussed": {
    304         "applies": true,
    305         "answer": false,
    306         "justification": "No discussion of publication bias, positive-result skew, or representativeness of the surveyed literature."
    307       }
    308     }
    309   },
    310   "claims": [
    311     {
    312       "claim": "Traditional linear measurement methods are inadequate for capturing the complexity of modern service ecosystems driven by swarm intelligence.",
    313       "evidence": "Section 4 discusses how linear metrics (accuracy, recall, response time) fail to capture emergent behaviors and nonlinear interactions, citing multiple sources [39-44].",
    314       "supported": "moderate"
    315     },
    316     {
    317       "claim": "The proposed three-step framework (measurement, analysis, optimization) reveals cyclical mechanisms that foster swarm intelligence emergence.",
    318       "evidence": "The framework is described conceptually in Section 3 and elaborated in Sections 4-6, but no empirical validation of the framework itself is provided.",
    319       "supported": "weak"
    320     },
    321     {
    322       "claim": "Service ecosystems evolve through three stages of intelligence: bio-inspired, socially-inspired, and brain-inspired.",
    323       "evidence": "Section 2 describes the three stages with references to historical developments (Beni 1989, Chai 2017, Li Wei), but the stage model is asserted rather than empirically validated.",
    324       "supported": "weak"
    325     }
    326   ],
    327   "red_flags": [
    328     {
    329       "flag": "No systematic review methodology",
    330       "detail": "The paper is titled a 'survey' but follows no structured review protocol (PRISMA, snowballing, etc.). There are no search queries, database selections, inclusion/exclusion criteria, or paper counts at each screening stage. The reader cannot assess the comprehensiveness or bias of the literature coverage."
    331     },
    332     {
    333       "flag": "No quality assessment of sources",
    334       "detail": "All 123 cited papers are treated equally without any assessment of their methodological quality. This risks laundering weak results into the narrative."
    335     },
    336     {
    337       "flag": "Framework asserted without validation",
    338       "detail": "The measurement-analysis-optimization framework is proposed as a contribution but is not validated empirically. The ride-sharing case study (Section 7) is a narrative illustration, not a test of the framework."
    339     },
    340     {
    341       "flag": "Heavy self-citation",
    342       "detail": "Multiple references are to the same research group (Xue X and colleagues from Tianjin University), including refs [1], [3], [5], [12], [25], [44], [49], [61], [63], [72], [74], [75], [77], [78], [82], [96], [97]. This concentration raises concerns about the breadth of the survey's coverage."
    343     },
    344     {
    345       "flag": "Scope mismatch with title",
    346       "detail": "The title mentions 'Agentic Service Ecosystems' suggesting LLM-era AI agents, but the paper primarily covers traditional swarm intelligence, service computing, and ecosystem services literature. The connection to modern agentic AI systems is superficial."
    347     }
    348   ],
    349   "cited_papers": [
    350     {
    351       "title": "Are emergent abilities of large language models a mirage?",
    352       "authors": ["R. Schaeffer", "B. Miranda", "S. Koyejo"],
    353       "year": 2023,
    354       "relevance": "Directly relevant to understanding emergence claims in LLM capabilities, applicable to evaluating overclaiming in AI research."
    355     },
    356     {
    357       "title": "Crowd intelligence in AI 2.0 era",
    358       "authors": ["W. Li", "W. Wu", "H. Wang"],
    359       "year": 2017,
    360       "relevance": "Foundational work on crowd intelligence frameworks that relates to multi-agent AI system design."
    361     },
    362     {
    363       "title": "Swarm intelligence: past, present and future",
    364       "authors": ["X.S. Yang", "S. Deb", "Y.X. Zhao"],
    365       "year": 2018,
    366       "relevance": "Overview of swarm intelligence optimization techniques relevant to understanding multi-agent AI systems."
    367     },
    368     {
    369       "title": "AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation",
    370       "authors": ["Q. Wu"],
    371       "year": 2023,
    372       "relevance": "Multi-agent LLM framework directly relevant to agentic AI software engineering research."
    373     },
    374     {
    375       "title": "Intelligent computing: the latest advances, challenges, and future",
    376       "authors": ["S. Zhu", "T. Yu", "T. Xu"],
    377       "year": 2023,
    378       "relevance": "Survey of intelligent computing advances relevant to AI capability assessment."
    379     }
    380   ]
    381 }

Impressum · Datenschutz