scan-v5.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan-v5.json (19772B)
      1 {
      2   "scan_version": 5,
      3   "paper_type": "survey",
      4   "paper": {
      5     "title": "Graph-based Agent Memory: Taxonomy, Techniques, and Applications",
      6     "authors": [
      7       "Chang Yang",
      8       "Chuang Zhou",
      9       "Yilin Xiao",
     10       "Su Dong",
     11       "Luyao Zhuang",
     12       "Yujing Zhang",
     13       "Zhu Wang",
     14       "Zijin Hong",
     15       "Zheng Yuan",
     16       "Zhishang Xiang",
     17       "Shengyuan Chen",
     18       "Huachi Zhou",
     19       "Qinggang Zhang",
     20       "Ninghao Liu",
     21       "Jinsong Su",
     22       "Xinrun Wang",
     23       "Yi Chang",
     24       "Xiao Huang"
     25     ],
     26     "year": 2026,
     27     "venue": "arXiv",
     28     "arxiv_id": "2602.05665",
     29     "doi": null
     30   },
     31   "checklist": {
     32     "claims_and_evidence": {
     33       "abstract_claims_supported": {
     34         "applies": true,
     35         "answer": true,
     36         "justification": "The abstract promises taxonomy, lifecycle-based technique analysis, open-source library/benchmark summary, applications, and challenges — all delivered in corresponding sections with substantial coverage.",
     37         "source": "haiku"
     38       },
     39       "causal_claims_justified": {
     40         "applies": true,
     41         "answer": false,
     42         "justification": "The paper asserts 'graph-based memory architectures have demonstrated superior performance in applications requiring multi-session coherence... and hallucination reduction' without a systematic meta-analysis or controlled comparison — citing individual papers is not adequate support for a general superiority claim.",
     43         "source": "haiku"
     44       },
     45       "generalization_bounded": {
     46         "applies": true,
     47         "answer": false,
     48         "justification": "Claims such as 'graph-based memory is a general and flexible framework' that subsumes all other paradigms are stated broadly without empirical bounds on which tasks, model scales, or conditions the advantage holds.",
     49         "source": "haiku"
     50       },
     51       "alternative_explanations_discussed": {
     52         "applies": true,
     53         "answer": false,
     54         "justification": "Trade-offs between graph types are noted in Figure 6, but the survey does not seriously consider whether flat or vector-based memory is sufficient for most practical deployments — a key alternative that would challenge the paper's central thesis.",
     55         "source": "haiku"
     56       },
     57       "proxy_outcome_distinction": {
     58         "applies": false,
     59         "answer": false,
     60         "justification": "This survey conducts no original empirical measurements, so the proxy-outcome distinction does not apply at the survey level.",
     61         "source": "haiku"
     62       }
     63     },
     64     "limitations_and_scope": {
     65       "limitations_section_present": {
     66         "applies": true,
     67         "answer": true,
     68         "justification": "Section X 'Limitations and Future Directions' is present and covers memory graph quality, scalability, privacy, dynamic schema learning, interpretability, theoretical foundations, and multi-agent coordination.",
     69         "source": "haiku"
     70       },
     71       "threats_to_validity_specific": {
     72         "applies": true,
     73         "answer": false,
     74         "justification": "The limitations section addresses technical challenges of graph memory systems but says nothing about survey-specific threats such as selection bias, non-systematic paper discovery, over-representation of arXiv preprints, or the absence of peer-reviewed inclusion criteria.",
     75         "source": "haiku"
     76       },
     77       "scope_boundaries_stated": {
     78         "applies": true,
     79         "answer": false,
     80         "justification": "The topical focus on graph-based memory is stated, but no explicit scope boundaries are given — no year range, no venue coverage criteria, and no 'we do not cover X' statements to bound what the review claims to represent.",
     81         "source": "haiku"
     82       }
     83     },
     84     "conflicts_of_interest": {
     85       "funding_disclosed": {
     86         "applies": true,
     87         "answer": false,
     88         "justification": "No funding acknowledgment appears anywhere in the paper despite 18 authors from four institutions. Author affiliations list emails but no grant support.",
     89         "source": "haiku"
     90       },
     91       "affiliations_disclosed": {
     92         "applies": true,
     93         "answer": true,
     94         "justification": "Author affiliations are fully disclosed: Hong Kong Polytechnic University, Xiamen University, Singapore Management University, and Jilin University, with email addresses provided.",
     95         "source": "haiku"
     96       },
     97       "funder_independent_of_outcome": {
     98         "applies": false,
     99         "answer": false,
    100         "justification": "No funding is disclosed, so funder independence cannot be assessed; marked not applicable.",
    101         "source": "haiku"
    102       },
    103       "financial_interests_declared": {
    104         "applies": true,
    105         "answer": false,
    106         "justification": "No competing interests statement, no declaration of patents, equity, or consulting relationships appears in the paper.",
    107         "source": "haiku"
    108       }
    109     },
    110     "scope_and_framing": {
    111       "key_terms_defined": {
    112         "applies": true,
    113         "answer": true,
    114         "justification": "AI agent (Definition II.1), basic memory operations (Definition II.2), memory lifecycle (Definition II.3), graph types (KG, temporal, hypergraph), and knowledge vs. experience memory are formally defined with mathematical notation in Section II and Appendix A.",
    115         "source": "haiku"
    116       },
    117       "intended_contribution_clear": {
    118         "applies": true,
    119         "answer": true,
    120         "justification": "Four explicit contributions are bulleted at the end of Section I: memory taxonomy, lifecycle-based technique analysis, open-source library/benchmark summary, and challenges/future directions.",
    121         "source": "haiku"
    122       },
    123       "engagement_with_prior_work": {
    124         "applies": true,
    125         "answer": true,
    126         "justification": "The paper engages substantively with CoALA, MemGPT, Reflexion, Voyager, and domain-specific systems throughout, contrasting graph-based approaches against traditional vector/log-based memory and positioning contributions relative to prior surveys.",
    127         "source": "haiku"
    128       }
    129     }
    130   },
    131   "type_checklist": {
    132     "survey": {
    133       "search_and_selection": {
    134         "search_strategy_reproducible": {
    135           "applies": true,
    136           "answer": false,
    137           "justification": "No search strategy is described anywhere. The paper provides no information about how papers were identified, what queries were used, or what databases were searched.",
    138           "source": "haiku"
    139         },
    140         "inclusion_exclusion_explicit": {
    141           "applies": true,
    142           "answer": false,
    143           "justification": "No inclusion or exclusion criteria are stated. Papers appear selected based on author familiarity with the literature rather than systematic, reproducible criteria.",
    144           "source": "haiku"
    145         },
    146         "prisma_or_structured_protocol": {
    147           "applies": true,
    148           "answer": false,
    149           "justification": "No mention of PRISMA or any structured review protocol. This is a narrative survey, not a systematic review.",
    150           "source": "haiku"
    151         },
    152         "search_terms_provided": {
    153           "applies": true,
    154           "answer": false,
    155           "justification": "No search terms or queries are provided anywhere in the paper.",
    156           "source": "haiku"
    157         },
    158         "databases_listed": {
    159           "applies": true,
    160           "answer": false,
    161           "justification": "No databases (e.g., arXiv, ACL Anthology, Semantic Scholar, IEEE Xplore) are mentioned as having been systematically searched.",
    162           "source": "haiku"
    163         },
    164         "screening_process_documented": {
    165           "applies": true,
    166           "answer": false,
    167           "justification": "No screening process is documented — no PRISMA flowchart, no counts of papers identified, screened, or excluded at any stage.",
    168           "source": "haiku"
    169         },
    170         "review_scope_justified": {
    171           "applies": true,
    172           "answer": false,
    173           "justification": "The topical scope is stated but not formally justified with a methodology rationale. No year range, venue selection, or explicit topic boundary is justified.",
    174           "source": "haiku"
    175         }
    176       },
    177       "synthesis_quality": {
    178         "conflicting_findings_acknowledged": {
    179           "applies": true,
    180           "answer": false,
    181           "justification": "Trade-offs between graph types are noted in Figure 6, but conflicting empirical findings across reviewed papers — e.g., cases where graph memory provides no benefit over flat memory — are not acknowledged or discussed.",
    182           "source": "haiku"
    183         },
    184         "quality_assessment_of_sources": {
    185           "applies": true,
    186           "answer": false,
    187           "justification": "No quality rubric is applied to reviewed papers. Systems are described and categorized without evaluating their methodological rigor, and the survey does not flag that many cited papers are unreviewed preprints.",
    188           "source": "haiku"
    189         },
    190         "publication_bias_discussed": {
    191           "applies": true,
    192           "answer": false,
    193           "justification": "Publication bias is never mentioned. The survey does not acknowledge that reviewed papers systematically report positive results for graph memory, creating a skewed representation of the field.",
    194           "source": "haiku"
    195         },
    196         "quantitative_synthesis_present": {
    197           "applies": true,
    198           "answer": false,
    199           "justification": "No meta-analysis, vote counting, or effect size aggregation is performed. The synthesis is entirely narrative with no quantitative aggregation across reviewed papers.",
    200           "source": "haiku"
    201         },
    202         "recommendations_supported_by_evidence": {
    203           "applies": true,
    204           "answer": false,
    205           "justification": "Future research directions in Section X are speculative and not derived from a systematic quality assessment of reviewed work — they represent author opinion rather than evidence-grounded recommendations.",
    206           "source": "haiku"
    207         }
    208       }
    209     }
    210   },
    211   "claims": [
    212     {
    213       "claim": "Graph-based memory architectures have demonstrated superior performance in applications requiring multi-session coherence, personalized adaptation, complex task planning, and hallucination reduction.",
    214       "evidence": "Asserted in Section I and Section III.E based on citing individual systems; no systematic meta-analysis or controlled comparison against non-graph baselines is provided.",
    215       "supported": "weak"
    216     },
    217     {
    218       "claim": "Traditional memory paradigms (linear, vector, key-value) cannot adequately represent relational dependencies, hierarchical organization, and causal dependencies required by sophisticated agents.",
    219       "evidence": "Figure 3 provides an architectural comparison and Section III.D lists limitations of each traditional paradigm, but no empirical benchmarking demonstrates these failure modes at scale.",
    220       "supported": "moderate"
    221     },
    222     {
    223       "claim": "Graph-based agent memory is a general and flexible framework that subsumes traditional memory paradigms as degenerate cases.",
    224       "evidence": "Conceptual argument in Section III.E that a linear buffer corresponds to a chain graph and vector memory to a fully-connected graph — a definitional reframing rather than an empirical demonstration.",
    225       "supported": "moderate"
    226     },
    227     {
    228       "claim": "There has been a surge of research into graph-based memory architectures for LLM agents in 2025–2026.",
    229       "evidence": "Over 200 cited papers are provided, with the majority from 2025–2026, supporting active and growing research activity in this space.",
    230       "supported": "strong"
    231     },
    232     {
    233       "claim": "Open-source graph memory libraries vary substantially in their support for temporal reasoning, hierarchical structure, and agent integration.",
    234       "evidence": "Table IV in Appendix B provides a systematic comparison of 11 libraries across 12 dimensions, showing clear differentiation in capabilities.",
    235       "supported": "strong"
    236     },
    237     {
    238       "claim": "Existing benchmarks have significant limitations for evaluating graph-based memory specifically, including conflating long-context processing with dedicated memory mechanisms.",
    239       "evidence": "Section VIII.B explicitly notes this limitation for LongContext benchmarks and similar limitations for each scenario category — specific and consistent critique.",
    240       "supported": "moderate"
    241     }
    242   ],
    243   "methodology_tags": [
    244     "survey",
    245     "theoretical"
    246   ],
    247   "key_findings": "This paper presents a comprehensive taxonomy of graph-based LLM agent memory organized around the memory lifecycle (extraction, storage, retrieval, evolution), distinguishing knowledge memory (passive/static) from experience memory (proactive/dynamic) and five graph structure paradigms (knowledge graph, hierarchical, temporal, hypergraph, hybrid) with explicit trade-off analysis in Figure 6. The survey catalogues 11 open-source libraries and 50+ benchmarks across 7 scenario categories (Interaction, Personalization, Web, LongContext, Continual, Environments, Tool/Gen), finding that no benchmark adequately isolates graph-memory-specific contributions from confounding factors such as long-context processing and planning skill. The central claim is that graph structures provide computable advantages over flat memory for relational reasoning, hierarchical organization, and temporal tracking, with the paper positioning graph memory as a unified framework subsuming traditional approaches. Key open challenges include absence of intrinsic memory graph quality metrics, scalability bottlenecks, privacy vulnerabilities from relational inference attacks, and lack of theoretical foundations for memory-augmented agents.",
    248   "red_flags": [
    249     {
    250       "flag": "No systematic search methodology",
    251       "detail": "The paper presents no search strategy, inclusion/exclusion criteria, databases searched, or PRISMA-style screening counts. It is a narrative survey selected by author familiarity, not a systematic or scoping review."
    252     },
    253     {
    254       "flag": "Unsupported superiority claims",
    255       "detail": "Claims that graph-based memory 'demonstrated superior performance' across multiple domains are asserted without meta-analysis, effect size aggregation, or systematic controlled comparison against non-graph baselines."
    256     },
    257     {
    258       "flag": "No quality assessment of reviewed papers",
    259       "detail": "Reviewed systems are described and categorized but never evaluated for methodological rigor. Many cited papers (50+) are unreviewed arXiv preprints, and this is not acknowledged."
    260     },
    261     {
    262       "flag": "Publication bias unaddressed",
    263       "detail": "The survey does not acknowledge that reviewed papers systematically report positive results for graph memory approaches, creating a positively skewed picture of the field without discussion of null or negative results."
    264     },
    265     {
    266       "flag": "Funding undisclosed",
    267       "detail": "No funding acknowledgment appears despite 18 authors from four institutions across three countries — an unusual omission for a large collaborative academic survey."
    268     },
    269     {
    270       "flag": "Heavy reliance on unreviewed preprints",
    271       "detail": "The majority of cited works from 2025–2026 are arXiv preprints, including foundational claims about graph memory advantages, yet the survey treats them equivalently to peer-reviewed publications."
    272     }
    273   ],
    274   "cited_papers": [
    275     {
    276       "title": "Cognitive Architectures for Language Agents (CoALA)",
    277       "relevance": "Foundational prior taxonomy for agent memory; the survey's graph-based perspective explicitly extends and builds upon this framework."
    278     },
    279     {
    280       "title": "MemGPT: Towards LLMs as Operating Systems",
    281       "relevance": "Key prior system for agent memory management with paging; positioned as a representative existing approach that graph-based memory builds upon."
    282     },
    283     {
    284       "title": "Zep: A Temporal Knowledge Graph Architecture for Agent Memory (Graphiti)",
    285       "relevance": "Central example system for temporal graph memory with bi-temporal modeling; cited extensively across storage, retrieval, and evolution sections."
    286     },
    287     {
    288       "title": "Mem0: Building Production-Ready AI Agents with Scalable Long-Term Memory",
    289       "relevance": "Representative industrial-scale knowledge graph memory system; cited as both a technique example and a comprehensive open-source library."
    290     },
    291     {
    292       "title": "From Local to Global: A Graph RAG Approach to Query-Focused Summarization",
    293       "relevance": "Microsoft GraphRAG; cited as a key technique for memory consolidation via graph merging and community summarization in the evolution section."
    294     },
    295     {
    296       "title": "Reflexion: Language Agents with Verbal Reinforcement Learning",
    297       "relevance": "Cited as foundational work on sequential trajectory extraction and experience-based memory evolution via verbal feedback loops."
    298     },
    299     {
    300       "title": "Voyager: An Open-Ended Embodied Agent with Large Language Models",
    301       "relevance": "Key motivating example of procedural memory using an ever-growing code library; cited as illustration of lifelong learning via experience memory."
    302     },
    303     {
    304       "title": "AriGraph: Learning Knowledge Graph World Models with Episodic Memory for LLM Agents",
    305       "relevance": "Representative system combining knowledge graph world models with episodic memory; extensively cited for KG construction and extraction methodology."
    306     },
    307     {
    308       "title": "LongMemEval: Benchmarking Chat Assistants on Long-Term Interactive Memory",
    309       "relevance": "Key benchmark for evaluating long-term conversational memory; cited in the benchmark taxonomy and used to illustrate evaluation challenges."
    310     },
    311     {
    312       "title": "On the Structural Memory of LLM Agents",
    313       "relevance": "Direct prior work on structural memory for LLM agents; cited as immediate motivation for the graph-based approach and as a comparative reference."
    314     }
    315   ],
    316   "engagement_factors": {
    317     "practical_relevance": {
    318       "score": 3,
    319       "justification": "Directly applicable to practitioners building LLM agent systems — covers 11 open-source libraries with URLs, 50+ benchmarks, and implementation patterns across 8 application domains."
    320     },
    321     "surprise_contrarian": {
    322       "score": 1,
    323       "justification": "Reinforces the prevailing narrative that structured/graph memory is superior to flat memory; does not challenge consensus views or present surprising findings."
    324     },
    325     "fear_safety": {
    326       "score": 1,
    327       "justification": "Briefly mentions adversarial attacks on memory (prompt injection, data poisoning, relational inference attacks on private data) as a challenge in Section X, but this is not a central focus."
    328     },
    329     "drama_conflict": {
    330       "score": 0,
    331       "justification": "No controversy, competing claims between research groups, or critical assessments of prior work; uniformly positive framing of the field."
    332     },
    333     "demo_ability": {
    334       "score": 2,
    335       "justification": "Links to Awesome-GraphMemory GitHub repository and 11 open-source libraries with direct URLs, enabling practitioners to immediately explore and try systems."
    336     },
    337     "brand_recognition": {
    338       "score": 1,
    339       "justification": "Authors from Hong Kong Polytechnic University and co-institutions; cites well-known systems (Mem0, MemGPT, Reflexion, Voyager) but no major AI lab (OpenAI, Google, Anthropic, Meta) authorship."
    340     }
    341   },
    342   "hn_data": {
    343     "threads": [],
    344     "top_points": 0,
    345     "total_points": 0,
    346     "total_comments": 0
    347   }
    348 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs