scan.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan.json (20210B)
      1 {
      2   "paper": {
      3     "title": "Multi-Agent Collaboration Mechanisms: A Survey of LLMs",
      4     "authors": [
      5       "Khanh-Tung Tran",
      6       "Dung Dao",
      7       "Minh-Duong Nguyen",
      8       "Quoc-Viet Pham",
      9       "Barry O'Sullivan",
     10       "Hoang D. Nguyen"
     11     ],
     12     "year": 2025,
     13     "venue": "arXiv",
     14     "arxiv_id": "2501.06322"
     15   },
     16   "scan_version": 2,
     17   "active_modules": ["survey_methodology"],
     18   "methodology_tags": ["meta-analysis"],
     19   "key_findings": "This survey proposes a framework for characterizing LLM-based multi-agent collaboration along five dimensions: actors, types (cooperation/competition/coopetition), structures (centralized/decentralized/hierarchical), strategies (rule-based/role-based/model-based), and coordination mechanisms. The review covers applications across 5G/6G networks, NLG/QA, and social/cultural domains. The authors find that cooperation is the dominant collaboration type, role-based strategies are most common, and that effective collaboration channel design is critical — poorly designed MAS can underperform single-agent systems with strong prompts.",
     20   "checklist": {
     21     "artifacts": {
     22       "code_released": {
     23         "applies": true,
     24         "answer": false,
     25         "justification": "No code repository or analysis scripts are mentioned or linked in the paper."
     26       },
     27       "data_released": {
     28         "applies": true,
     29         "answer": false,
     30         "justification": "No dataset of surveyed papers, extracted data, or supplementary materials is released."
     31       },
     32       "environment_specified": {
     33         "applies": true,
     34         "answer": false,
     35         "justification": "No environment or dependency specifications are provided. As a survey, analysis tools/scripts could have been documented but were not."
     36       },
     37       "reproduction_instructions": {
     38         "applies": true,
     39         "answer": false,
     40         "justification": "No instructions for reproducing the survey methodology or replicating the paper selection process are provided."
     41       }
     42     },
     43     "statistical_methodology": {
     44       "confidence_intervals_or_error_bars": {
     45         "applies": false,
     46         "answer": false,
     47         "justification": "This is a survey paper that does not run experiments or report quantitative results requiring uncertainty measures."
     48       },
     49       "significance_tests": {
     50         "applies": false,
     51         "answer": false,
     52         "justification": "Survey paper with no statistical comparisons."
     53       },
     54       "effect_sizes_reported": {
     55         "applies": false,
     56         "answer": false,
     57         "justification": "Survey paper with no experiments."
     58       },
     59       "sample_size_justified": {
     60         "applies": false,
     61         "answer": false,
     62         "justification": "Survey paper; no experimental sample sizes."
     63       },
     64       "variance_reported": {
     65         "applies": false,
     66         "answer": false,
     67         "justification": "Survey paper with no experimental runs."
     68       }
     69     },
     70     "evaluation_design": {
     71       "baselines_included": {
     72         "applies": true,
     73         "answer": true,
     74         "justification": "Table 1 compares the current survey against 8 prior surveys across four dimensions (focus on MAS, review of collaborative aspects, proposed framework, review of applications), showing how this work addresses gaps in prior surveys."
     75       },
     76       "baselines_contemporary": {
     77         "applies": true,
     78         "answer": true,
     79         "justification": "The comparison surveys in Table 1 are from 2023-2024, which are contemporary to this 2025 paper."
     80       },
     81       "ablation_study": {
     82         "applies": false,
     83         "answer": false,
     84         "justification": "Survey paper — no system components to ablate."
     85       },
     86       "multiple_metrics": {
     87         "applies": false,
     88         "answer": false,
     89         "justification": "Survey paper with no quantitative evaluation metrics."
     90       },
     91       "human_evaluation": {
     92         "applies": false,
     93         "answer": false,
     94         "justification": "Survey paper — human evaluation of outputs is not relevant."
     95       },
     96       "held_out_test_set": {
     97         "applies": false,
     98         "answer": false,
     99         "justification": "Survey paper — no test set."
    100       },
    101       "per_category_breakdown": {
    102         "applies": true,
    103         "answer": true,
    104         "justification": "The survey organizes findings by collaboration type (Section 4.2), strategy (Section 4.3), communication structure (Section 4.4), and coordination architecture (Section 4.5), with summary tables for each category (Tables 2-5)."
    105       },
    106       "failure_cases_discussed": {
    107         "applies": true,
    108         "answer": true,
    109         "justification": "Section 4.6 and throughout, the paper discusses failure modes: cascading hallucinations, suboptimal MAS designs underperforming single agents (ref [128]), agents sending messages to themselves (ref [91]), and infinite conversation loops."
    110       },
    111       "negative_results_reported": {
    112         "applies": true,
    113         "answer": true,
    114         "justification": "The paper notes that MAS can be overtaken by single-agent counterparts with strong prompts (ref [128]), and that LLMs still have significant limitations in opponent modeling and team collaboration (Section 4.2.2)."
    115       }
    116     },
    117     "claims_and_evidence": {
    118       "abstract_claims_supported": {
    119         "applies": true,
    120         "answer": true,
    121         "justification": "The abstract claims to provide an 'extensive survey of the collaborative aspect of MASs' and 'an extensible framework' — both are delivered through the five-dimension framework (Section 3-4) and application review (Section 5)."
    122       },
    123       "causal_claims_justified": {
    124         "applies": false,
    125         "answer": false,
    126         "justification": "The paper is a survey/taxonomy that makes no causal claims. It describes and categorizes existing work without claiming causal relationships."
    127       },
    128       "generalization_bounded": {
    129         "applies": true,
    130         "answer": false,
    131         "justification": "The title claims to survey 'Multi-Agent Collaboration Mechanisms' broadly, but the paper does not explicitly bound what was excluded from the review or state the scope boundaries of its search. No systematic search criteria or inclusion/exclusion methodology is described."
    132       },
    133       "alternative_explanations_discussed": {
    134         "applies": false,
    135         "answer": false,
    136         "justification": "This is a survey/taxonomy paper presenting no empirical results of its own; alternative explanations are not applicable."
    137       },
    138       "proxy_outcome_distinction": {
    139         "applies": false,
    140         "answer": false,
    141         "justification": "This is a survey paper with no measurements or proxy outcomes."
    142       }
    143     },
    144     "setup_transparency": {
    145       "model_versions_specified": {
    146         "applies": false,
    147         "answer": false,
    148         "justification": "Survey paper — does not use any models."
    149       },
    150       "prompts_provided": {
    151         "applies": false,
    152         "answer": false,
    153         "justification": "Survey paper — no prompting is used."
    154       },
    155       "hyperparameters_reported": {
    156         "applies": false,
    157         "answer": false,
    158         "justification": "Survey paper — no experiments with hyperparameters."
    159       },
    160       "scaffolding_described": {
    161         "applies": false,
    162         "answer": false,
    163         "justification": "Survey paper — no agentic scaffolding is used."
    164       },
    165       "data_preprocessing_documented": {
    166         "applies": true,
    167         "answer": false,
    168         "justification": "The paper does not describe its paper selection pipeline. There is no description of search queries, databases searched, filtering criteria, or number of papers at each screening stage. Section 4.1 mentions 'systematically analyzing existing research' but provides no methodology details."
    169       }
    170     },
    171     "limitations_and_scope": {
    172       "limitations_section_present": {
    173         "applies": true,
    174         "answer": false,
    175         "justification": "There is no dedicated limitations section. Section 6 ('Open Problems & Discussion') discusses challenges for the field but does not address limitations of the survey itself."
    176       },
    177       "threats_to_validity_specific": {
    178         "applies": true,
    179         "answer": false,
    180         "justification": "No threats to validity of the survey methodology are discussed. The paper does not acknowledge potential biases in its paper selection or coverage gaps."
    181       },
    182       "scope_boundaries_stated": {
    183         "applies": true,
    184         "answer": false,
    185         "justification": "The paper does not explicitly state what was excluded from the review scope or what the results do NOT show. The search methodology, inclusion/exclusion criteria, and scope boundaries are never defined."
    186       }
    187     },
    188     "data_integrity": {
    189       "raw_data_available": {
    190         "applies": true,
    191         "answer": false,
    192         "justification": "No list of all surveyed papers, search results, or extracted data is made available for verification."
    193       },
    194       "data_collection_described": {
    195         "applies": true,
    196         "answer": false,
    197         "justification": "The paper does not describe how surveyed papers were collected. No search queries, databases, date ranges, or collection procedures are specified."
    198       },
    199       "recruitment_methods_described": {
    200         "applies": false,
    201         "answer": false,
    202         "justification": "No human participants; data source is published literature (not a standard benchmark requiring NA either, but the selection process itself is not described — covered by data_collection_described)."
    203       },
    204       "data_pipeline_documented": {
    205         "applies": true,
    206         "answer": false,
    207         "justification": "No documentation of the pipeline from initial paper discovery to final inclusion in the survey. The reader cannot determine how papers were found, filtered, or categorized."
    208       }
    209     },
    210     "conflicts_of_interest": {
    211       "funding_disclosed": {
    212         "applies": true,
    213         "answer": true,
    214         "justification": "Acknowledgments section states: 'This research work has emanated from research conducted with financial support from Science Foundation Ireland under Grant 12/RC/2289-P2 and 18/CRT/6223.'"
    215       },
    216       "affiliations_disclosed": {
    217         "applies": true,
    218         "answer": true,
    219         "justification": "All author affiliations are listed: University College Cork, Pusan National University, and Trinity College Dublin."
    220       },
    221       "funder_independent_of_outcome": {
    222         "applies": true,
    223         "answer": true,
    224         "justification": "Science Foundation Ireland is a government funding agency with no stake in the outcomes of this survey."
    225       },
    226       "financial_interests_declared": {
    227         "applies": true,
    228         "answer": false,
    229         "justification": "No competing interests or financial interests statement is provided in the paper."
    230       }
    231     },
    232     "contamination": {
    233       "training_cutoff_stated": {
    234         "applies": false,
    235         "answer": false,
    236         "justification": "Survey paper — does not evaluate any pre-trained model on benchmarks."
    237       },
    238       "train_test_overlap_discussed": {
    239         "applies": false,
    240         "answer": false,
    241         "justification": "Survey paper — no model evaluation on benchmarks."
    242       },
    243       "benchmark_contamination_addressed": {
    244         "applies": false,
    245         "answer": false,
    246         "justification": "Survey paper — no model evaluation on benchmarks."
    247       }
    248     },
    249     "human_studies": {
    250       "pre_registered": {
    251         "applies": false,
    252         "answer": false,
    253         "justification": "No human participants in this survey paper."
    254       },
    255       "irb_or_ethics_approval": {
    256         "applies": false,
    257         "answer": false,
    258         "justification": "No human participants."
    259       },
    260       "demographics_reported": {
    261         "applies": false,
    262         "answer": false,
    263         "justification": "No human participants."
    264       },
    265       "inclusion_exclusion_criteria": {
    266         "applies": false,
    267         "answer": false,
    268         "justification": "No human participants."
    269       },
    270       "randomization_described": {
    271         "applies": false,
    272         "answer": false,
    273         "justification": "No human participants."
    274       },
    275       "blinding_described": {
    276         "applies": false,
    277         "answer": false,
    278         "justification": "No human participants."
    279       },
    280       "attrition_reported": {
    281         "applies": false,
    282         "answer": false,
    283         "justification": "No human participants."
    284       }
    285     },
    286     "cost_and_practicality": {
    287       "inference_cost_reported": {
    288         "applies": false,
    289         "answer": false,
    290         "justification": "Survey paper — no method with inference costs."
    291       },
    292       "compute_budget_stated": {
    293         "applies": false,
    294         "answer": false,
    295         "justification": "Survey paper — no computational experiments."
    296       }
    297     },
    298     "survey_methodology": {
    299       "prisma_or_structured_protocol": {
    300         "applies": true,
    301         "answer": false,
    302         "justification": "No PRISMA flow diagram, structured search protocol, or systematic methodology is described. The paper states it 'systematically analyzes existing research' (Section 4.1) but provides no search strategy, database list, query terms, or paper counts at screening stages."
    303       },
    304       "quality_assessment_of_sources": {
    305         "applies": true,
    306         "answer": false,
    307         "justification": "The survey does not assess the methodological quality of cited papers. All surveyed works are treated equally regardless of their rigor. Tables summarize advantages/disadvantages of approaches but do not evaluate the quality of the underlying evidence."
    308       },
    309       "publication_bias_discussed": {
    310         "applies": true,
    311         "answer": false,
    312         "justification": "No discussion of publication bias. The survey does not consider whether its sources skew toward positive results or whether negative findings about MAS approaches are underrepresented."
    313       }
    314     }
    315   },
    316   "claims": [
    317     {
    318       "claim": "LLM-based MASs enable groups of intelligent agents to coordinate and solve complex tasks collectively, transitioning from isolated models to collaboration-centric approaches.",
    319       "evidence": "Supported by review of multiple frameworks (AutoGen, MetaGPT, AgentVerse, CAMEL) in Sections 4-5, with specific examples of cooperative, competitive, and coopetitive systems.",
    320       "supported": "moderate"
    321     },
    322     {
    323       "claim": "MAS with suboptimal collaboration channel design can be overtaken by single-agent counterparts with strong prompts.",
    324       "evidence": "Cites [128] (Wang et al. 2024) which showed single agents with strong prompts outperform poorly designed MAS on reasoning tasks (Section 4.2.2, 4.6).",
    325       "supported": "moderate"
    326     },
    327     {
    328       "claim": "The proposed five-dimension framework (actors, types, structures, strategies, coordination) provides a comprehensive characterization of LLM-based MAS collaboration.",
    329       "evidence": "Framework is presented in Section 3-4 with Tables 1-5 categorizing existing work along these dimensions. Coverage appears broad but completeness cannot be verified without a systematic search methodology.",
    330       "supported": "weak"
    331     },
    332     {
    333       "claim": "Cooperation is the primary collaboration type in LLM-based MAS, with competition and coopetition being less explored.",
    334       "evidence": "Table 2 shows cooperation has 8+ reference groups while competition has 4 and coopetition has only 2. Section 4.2 details this distribution.",
    335       "supported": "moderate"
    336     }
    337   ],
    338   "red_flags": [
    339     {
    340       "flag": "No systematic search methodology",
    341       "detail": "The paper claims to be a survey but provides no search protocol, database list, query terms, inclusion/exclusion criteria, or PRISMA-style flow diagram. The reader cannot assess whether important works were missed or how papers were selected."
    342     },
    343     {
    344       "flag": "No quality assessment of surveyed papers",
    345       "detail": "All cited works are treated as equally valid evidence. The survey does not assess the rigor of the papers it reviews, potentially laundering weak or unreplicated results alongside strong evidence."
    346     },
    347     {
    348       "flag": "Self-comparison in Table 1",
    349       "detail": "Table 1 rates the current paper as 'High' on all four dimensions while rating all prior surveys as Low-Medium. This self-assessment is subjective and not independently verified."
    350     },
    351     {
    352       "flag": "Framework not validated",
    353       "detail": "The proposed five-dimension framework is presented as a contribution but is not validated through any empirical process (e.g., inter-rater coding, coverage analysis, or comparison with alternative taxonomies)."
    354     }
    355   ],
    356   "cited_papers": [
    357     {
    358       "title": "AgentVerse: Facilitating Multi-Agent Collaboration and Exploring Emergent Behaviors",
    359       "authors": ["Weize Chen"],
    360       "year": 2024,
    361       "relevance": "Core MAS framework for cooperative agent collaboration with emergent behaviors, relevant to agentic AI evaluation."
    362     },
    363     {
    364       "title": "MetaGPT: Meta Programming for A Multi-Agent Collaborative Framework",
    365       "authors": ["Sirui Hong"],
    366       "year": 2024,
    367       "relevance": "Key MAS framework using role-based SOPs for software development, directly relevant to agentic coding evaluation."
    368     },
    369     {
    370       "title": "AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation",
    371       "authors": ["Qingyun Wu"],
    372       "year": 2024,
    373       "relevance": "Major open-source MAS framework for multi-agent conversation, widely used in agentic AI research."
    374     },
    375     {
    376       "title": "CAMEL: Communicative Agents for 'Mind' Exploration of Large Language Model Society",
    377       "authors": ["Guohao Li"],
    378       "year": 2023,
    379       "arxiv_id": "2303.17760",
    380       "relevance": "Role-playing framework for multi-agent cooperation, foundational work in LLM agent collaboration."
    381     },
    382     {
    383       "title": "ChatDev: Communicative Agents for Software Development",
    384       "authors": ["Chen Qian"],
    385       "year": 2024,
    386       "relevance": "Multi-agent software development system using chat chains, directly relevant to AI-assisted coding."
    387     },
    388     {
    389       "title": "Improving Factuality and Reasoning in Language Models through Multiagent Debate",
    390       "authors": ["Yilun Du"],
    391       "year": 2023,
    392       "arxiv_id": "2305.14325",
    393       "relevance": "Foundational work showing multi-agent debate improves LLM factuality and reasoning."
    394     },
    395     {
    396       "title": "AgentBench: Evaluating LLMs as Agents",
    397       "authors": ["Xiao Liu"],
    398       "year": 2024,
    399       "relevance": "Benchmark for evaluating LLM agent capabilities, relevant to evaluation methodology."
    400     },
    401     {
    402       "title": "Reflexion: language agents with verbal reinforcement learning",
    403       "authors": ["Noah Shinn"],
    404       "year": 2023,
    405       "relevance": "Self-reflection agent framework for iterative improvement, key agentic AI methodology."
    406     },
    407     {
    408       "title": "Rethinking the Bounds of LLM Reasoning: Are Multi-Agent Discussions the Key?",
    409       "authors": ["Qineng Wang"],
    410       "year": 2024,
    411       "relevance": "Shows single agents can outperform poorly designed MAS, important counter-evidence to MAS benefits."
    412     },
    413     {
    414       "title": "MapCoder: Multi-Agent Code Generation for Competitive Problem Solving",
    415       "authors": ["Md. Ashraful Islam"],
    416       "year": 2024,
    417       "relevance": "Multi-agent code generation system with role-based collaboration, relevant to AI coding evaluation."
    418     },
    419     {
    420       "title": "Self-Organized Agents: A LLM Multi-Agent Framework toward Ultra Large-Scale Code Generation and Optimization",
    421       "authors": ["Yoichi Ishibashi"],
    422       "year": 2024,
    423       "arxiv_id": "2404.02183",
    424       "relevance": "Self-organizing MAS for large-scale code generation, relevant to scalable agentic coding."
    425     },
    426     {
    427       "title": "Agent-as-a-Judge: Evaluate Agents with Agents",
    428       "authors": ["Mingchen Zhuge"],
    429       "year": 2024,
    430       "relevance": "Novel framework for evaluating agentic systems using other agents, relevant to evaluation methodology."
    431     }
    432   ]
    433 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs