scan-v5.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan-v5.json (26973B)
      1 {
      2   "scan_version": 5,
      3   "paper_type": "empirical",
      4   "paper": {
      5     "title": "Explainable and Fine-Grained Safeguarding of LLM Multi-Agent Systems via Bi-Level Graph Anomaly Detection",
      6     "authors": [
      7       "Junjun Pan",
      8       "Yixin Liu",
      9       "Rui Miao",
     10       "Kaize Ding",
     11       "Yu Zheng"
     12     ],
     13     "year": 2025,
     14     "venue": "arXiv.org",
     15     "arxiv_id": "2512.18733",
     16     "doi": "10.48550/arXiv.2512.18733"
     17   },
     18   "checklist": {
     19     "claims_and_evidence": {
     20       "abstract_claims_supported": {
     21         "applies": true,
     22         "answer": true,
     23         "justification": "The abstract claims 'extensive experiments across diverse MAS topologies and attack scenarios demonstrate robust detection performance and strong interpretability,' which is supported by Table 1 (6 datasets, 4 topologies) and Figure 5 (qualitative explanation case studies).",
     24         "source": "haiku"
     25       },
     26       "causal_claims_justified": {
     27         "applies": true,
     28         "answer": true,
     29         "justification": "Causal claims about component contributions are supported by ablation studies in Tables 2 and 3, which systematically remove the fusion module and token view to isolate their causal effects on performance.",
     30         "source": "haiku"
     31       },
     32       "generalization_bounded": {
     33         "applies": true,
     34         "answer": false,
     35         "justification": "The paper repeatedly claims 'real-world applications' and 'practical reliability' but all experiments are in simulated MAS environments; the limitations section only vaguely notes 'evaluation scope remains limited' without bounding specific generalizability claims in the conclusions.",
     36         "source": "haiku"
     37       },
     38       "alternative_explanations_discussed": {
     39         "applies": true,
     40         "answer": false,
     41         "justification": "No alternative explanations are considered for XG-Guard's superior performance; the paper does not discuss whether advantages might stem from hyperparameter tuning advantages, SentenceBERT encoder choice, or experimental setup specifically matching XG-Guard's design assumptions.",
     42         "source": "haiku"
     43       },
     44       "proxy_outcome_distinction": {
     45         "applies": true,
     46         "answer": true,
     47         "justification": "AUROC, ASR, and ACC metrics directly measure the defense system's core objectives (detecting malicious agents and maintaining task performance) without mischaracterizing proxies as primary outcomes.",
     48         "source": "haiku"
     49       }
     50     },
     51     "limitations_and_scope": {
     52       "limitations_section_present": {
     53         "applies": true,
     54         "answer": true,
     55         "justification": "There is a dedicated 'Limitations' section appearing after the conclusion, before 'Ethical Considerations.'",
     56         "source": "haiku"
     57       },
     58       "threats_to_validity_specific": {
     59         "applies": true,
     60         "answer": true,
     61         "justification": "The limitations section identifies a specific concrete threat: 'API providers may update backend models without notice, the performance of MAS and the malicious agent detector may become unstable,' which is specific and non-boilerplate.",
     62         "source": "haiku"
     63       },
     64       "scope_boundaries_stated": {
     65         "applies": true,
     66         "answer": false,
     67         "justification": "The limitations section says 'evaluation scope remains limited' and suggests extending to 'broader task domains,' but does not explicitly state what results do NOT demonstrate — no clear boundary on where findings do not apply.",
     68         "source": "haiku"
     69       }
     70     },
     71     "conflicts_of_interest": {
     72       "funding_disclosed": {
     73         "applies": true,
     74         "answer": false,
     75         "justification": "No funding acknowledgment appears in the paper; the ethical considerations section mentions 'no conflicts of interest' but does not disclose any funding sources.",
     76         "source": "haiku"
     77       },
     78       "affiliations_disclosed": {
     79         "applies": true,
     80         "answer": true,
     81         "justification": "Author affiliations are clearly listed at the top of the paper: Griffith University, Jilin University, and Northwestern University.",
     82         "source": "haiku"
     83       },
     84       "funder_independent_of_outcome": {
     85         "applies": false,
     86         "answer": false,
     87         "justification": "Funding is not disclosed, so funder independence cannot be assessed.",
     88         "source": "haiku"
     89       },
     90       "financial_interests_declared": {
     91         "applies": true,
     92         "answer": false,
     93         "justification": "The ethical considerations section states 'We identify no ethical risks or conflicts of interest,' which is boilerplate and not a proper competing interests or financial interests declaration.",
     94         "source": "haiku"
     95       }
     96     },
     97     "scope_and_framing": {
     98       "key_terms_defined": {
     99         "applies": true,
    100         "answer": true,
    101         "justification": "Key terms receive formal mathematical definitions: MAS as directed graph G=(V,E), agent tuple (Role, State, Memory, Plugin), the unsupervised defense problem, and 'explainable MAS defense' with token-level explanation scores.",
    102         "source": "haiku"
    103       },
    104       "intended_contribution_clear": {
    105         "applies": true,
    106         "answer": true,
    107         "justification": "The paper explicitly states three contributions (scenario, methodology, experiments) at the end of the introduction, clearly articulating that XG-Guard is the first unsupervised GAD framework for MAS with inherent explainability.",
    108         "source": "haiku"
    109       },
    110       "engagement_with_prior_work": {
    111         "applies": true,
    112         "answer": true,
    113         "justification": "The paper explicitly situates XG-Guard against G-Safeguard (supervised) and BlindGuard (unsupervised, no explainability), showing how each limitation motivates a specific design decision, with Appendix A providing comprehensive related work coverage of both MAS safety and GAD literature.",
    114         "source": "haiku"
    115       }
    116     }
    117   },
    118   "type_checklist": {
    119     "empirical": {
    120       "artifacts": {
    121         "code_released": {
    122           "applies": true,
    123           "answer": false,
    124           "justification": "No code repository, link, or promise of release appears anywhere in the paper.",
    125           "source": "haiku"
    126         },
    127         "data_released": {
    128           "applies": true,
    129           "answer": true,
    130           "justification": "The paper uses publicly available benchmarks: CSQA, MMLU, GSM8K, InjecAgent, and PoisonRAG, all of which are standard public datasets usable without modification.",
    131           "source": "haiku"
    132         },
    133         "environment_specified": {
    134           "applies": true,
    135           "answer": false,
    136           "justification": "Appendix D specifies optimizer and hyperparameters but provides no environment specifications such as Python version, library versions, CUDA version, or containerization.",
    137           "source": "haiku"
    138         },
    139         "reproduction_instructions": {
    140           "applies": true,
    141           "answer": false,
    142           "justification": "No step-by-step reproduction instructions are provided; Appendix B gives algorithm pseudocode and D gives hyperparameters, but not a reproducible end-to-end pipeline.",
    143           "source": "haiku"
    144         }
    145       },
    146       "statistical_methodology": {
    147         "confidence_intervals_or_error_bars": {
    148           "applies": true,
    149           "answer": false,
    150           "justification": "Table 1 reports single AUC and ASR values for all conditions with no confidence intervals, error bars, or indication of multiple experimental runs.",
    151           "source": "haiku"
    152         },
    153         "significance_tests": {
    154           "applies": true,
    155           "answer": false,
    156           "justification": "No statistical significance tests are performed despite extensive comparative claims against five baselines across 24 experimental conditions.",
    157           "source": "haiku"
    158         },
    159         "effect_sizes_reported": {
    160           "applies": true,
    161           "answer": true,
    162           "justification": "Numeric AUC and ASR values are reported for all methods across all conditions in Table 1, providing absolute performance differences with full baseline context for computing effect magnitudes.",
    163           "source": "haiku"
    164         },
    165         "sample_size_justified": {
    166           "applies": true,
    167           "answer": false,
    168           "justification": "No justification for the choice of datasets, number of experimental trials, or sample sizes is provided anywhere in the paper.",
    169           "source": "haiku"
    170         },
    171         "variance_reported": {
    172           "applies": true,
    173           "answer": false,
    174           "justification": "Results appear to be single experimental runs; no variance, standard deviation, or spread across multiple runs is reported.",
    175           "source": "haiku"
    176         }
    177       },
    178       "evaluation_design": {
    179         "baselines_included": {
    180           "applies": true,
    181           "answer": true,
    182           "justification": "Five baselines are included: DOMINANT, PREM, TAM (general GAD methods), BlindGuard (unsupervised MAS defense SOTA), and G-Safeguard (supervised MAS defense upper bound), plus a no-defense lower bound.",
    183           "source": "haiku"
    184         },
    185         "baselines_contemporary": {
    186           "applies": true,
    187           "answer": true,
    188           "justification": "BlindGuard (2025) and G-Safeguard (2025) are contemporary and directly comparable; older GAD methods (DOMINANT 2019, PREM 2023, TAM 2023) are appropriately included as general GAD representatives.",
    189           "source": "haiku"
    190         },
    191         "ablation_study": {
    192           "applies": true,
    193           "answer": true,
    194           "justification": "Section 4.2 includes an ablation systematically removing the fusion module ('−Fusion') and then the token view ('−Token'), with full results across all 24 conditions in Appendix E Table 3.",
    195           "source": "haiku"
    196         },
    197         "multiple_metrics": {
    198           "applies": true,
    199           "answer": true,
    200           "justification": "Three metrics are used: AUROC (detection ability), ASR@3 (attack success rate after defense), and ACC (overall MAS task accuracy after defense).",
    201           "source": "haiku"
    202         },
    203         "human_evaluation": {
    204           "applies": false,
    205           "answer": false,
    206           "justification": "The task is automated malicious agent detection with objective ground-truth labels; human evaluation is not relevant to the core detection performance claims.",
    207           "source": "haiku"
    208         },
    209         "held_out_test_set": {
    210           "applies": true,
    211           "answer": true,
    212           "justification": "Training uses unattacked MAS graphs and testing uses separate attacked graphs; the defender is trained without exposure to malicious data, constituting a proper held-out evaluation.",
    213           "source": "haiku"
    214         },
    215         "per_category_breakdown": {
    216           "applies": true,
    217           "answer": true,
    218           "justification": "Table 1 provides full breakdowns by MAS topology (chain, tree, star, random) and attack type (prompt injection, tool attack, memory attack) across six datasets.",
    219           "source": "haiku"
    220         },
    221         "failure_cases_discussed": {
    222           "applies": true,
    223           "answer": true,
    224           "justification": "The paper identifies a concrete failure mode: 'spurious tokens appearing in the explanations, like punctuation marks,' and explains the root cause (SentenceBERT embedding contextual information into punctuation tokens).",
    225           "source": "haiku"
    226         },
    227         "negative_results_reported": {
    228           "applies": true,
    229           "answer": true,
    230           "justification": "Ablation reveals the counterintuitive negative finding that naive score fusion ('−Fusion': AUC 48.27) performs far worse than removing the token level entirely ('−Token': AUC 90.67) on TA-InjecAgent, validating the prototype semantic mismatch problem.",
    231           "source": "haiku"
    232         }
    233       },
    234       "setup_transparency": {
    235         "model_versions_specified": {
    236           "applies": true,
    237           "answer": false,
    238           "justification": "GPT-4o-mini is used as the primary backbone LLM without a snapshot date or version pin; DeepSeek-V3 and Qwen3-30B-A3B are cited but the API access point and exact checkpoint are not specified.",
    239           "source": "haiku"
    240         },
    241         "prompts_provided": {
    242           "applies": true,
    243           "answer": false,
    244           "justification": "No actual system prompts or attack prompt templates are provided; attack types are described conceptually ('system prompts of malicious agents are manipulated') without showing prompt content.",
    245           "source": "haiku"
    246         },
    247         "hyperparameters_reported": {
    248           "applies": true,
    249           "answer": true,
    250           "justification": "Appendix D reports Adam optimizer, 20 epochs, L2 weight decay 2×10⁻⁴, dataset-specific learning rates (1×10⁻⁵ for MA-CSQA, 1×10⁻⁴ for others), and dataset-specific contrastive trade-off α values.",
    251           "source": "haiku"
    252         },
    253         "scaffolding_described": {
    254           "applies": true,
    255           "answer": true,
    256           "justification": "The MAS is formally described as a directed graph with agent tuple (Role, State, Memory, Plugin), communication topology matrix A, and the detect-then-remediate defense pipeline is explained with graph pruning semantics.",
    257           "source": "haiku"
    258         },
    259         "data_preprocessing_documented": {
    260           "applies": true,
    261           "answer": true,
    262           "justification": "Section 3.1 describes the full transformation from agent responses to graph attributes via SentenceBERT at both sentence and token level, with explicit equations for each encoding step.",
    263           "source": "haiku"
    264         }
    265       },
    266       "data_integrity": {
    267         "raw_data_available": {
    268           "applies": true,
    269           "answer": false,
    270           "justification": "The MAS interaction graphs generated for training and testing are not released; only the underlying benchmark task datasets are publicly available, not the dialogue data used in experiments.",
    271           "source": "haiku"
    272         },
    273         "data_collection_described": {
    274           "applies": true,
    275           "answer": false,
    276           "justification": "The paper states experiments follow 'settings of previous works' (Wang et al., 2025; Miao et al., 2025) without detailing how many MAS interactions were generated, what agent roles were assigned, or how attack injection was implemented.",
    277           "source": "haiku"
    278         },
    279         "recruitment_methods_described": {
    280           "applies": false,
    281           "answer": false,
    282           "justification": "No human participants; standard public benchmarks used as task inputs.",
    283           "source": "haiku"
    284         },
    285         "data_pipeline_documented": {
    286           "applies": true,
    287           "answer": false,
    288           "justification": "The encoding pipeline (responses → graph attributes) is documented, but the upstream pipeline from benchmark questions to MAS interactions to experimental datasets is deferred to prior work without sufficient detail for independent reproduction.",
    289           "source": "haiku"
    290         }
    291       },
    292       "contamination": {
    293         "training_cutoff_stated": {
    294           "applies": false,
    295           "answer": false,
    296           "justification": "NA — the evaluation is of a defense system trained on generated MAS interaction data, not of LLM capabilities on benchmarks; standard benchmark contamination does not apply to XG-Guard's training.",
    297           "source": "haiku"
    298         },
    299         "train_test_overlap_discussed": {
    300           "applies": false,
    301           "answer": false,
    302           "justification": "NA — XG-Guard is trained on generated normal MAS graphs; the benchmark datasets serve as task inputs for the MAS agents, not as training/test data for the defense model.",
    303           "source": "haiku"
    304         },
    305         "benchmark_contamination_addressed": {
    306           "applies": false,
    307           "answer": false,
    308           "justification": "NA — the detection target is malicious agent behavior in MAS dialogues, not LLM accuracy on benchmark questions; contamination of benchmark tasks in the backbone LLM is not the evaluation concern.",
    309           "source": "haiku"
    310         }
    311       },
    312       "human_studies": {
    313         "pre_registered": {
    314           "applies": false,
    315           "answer": false,
    316           "justification": "NA — no human participants.",
    317           "source": "haiku"
    318         },
    319         "irb_or_ethics_approval": {
    320           "applies": false,
    321           "answer": false,
    322           "justification": "NA — the paper explicitly states 'Our research involves no human subjects, animal experiments, or sensitive data.'",
    323           "source": "haiku"
    324         },
    325         "demographics_reported": {
    326           "applies": false,
    327           "answer": false,
    328           "justification": "NA — no human participants.",
    329           "source": "haiku"
    330         },
    331         "inclusion_exclusion_criteria": {
    332           "applies": false,
    333           "answer": false,
    334           "justification": "NA — no human participants.",
    335           "source": "haiku"
    336         },
    337         "randomization_described": {
    338           "applies": false,
    339           "answer": false,
    340           "justification": "NA — no human participants.",
    341           "source": "haiku"
    342         },
    343         "blinding_described": {
    344           "applies": false,
    345           "answer": false,
    346           "justification": "NA — no human participants.",
    347           "source": "haiku"
    348         },
    349         "attrition_reported": {
    350           "applies": false,
    351           "answer": false,
    352           "justification": "NA — no human participants.",
    353           "source": "haiku"
    354         }
    355       },
    356       "cost_and_practicality": {
    357         "inference_cost_reported": {
    358           "applies": true,
    359           "answer": false,
    360           "justification": "Only theoretical time complexity O(NL² + M) is given in Appendix C; no actual inference latency, API costs, or wall-clock runtime is reported.",
    361           "source": "haiku"
    362         },
    363         "compute_budget_stated": {
    364           "applies": true,
    365           "answer": false,
    366           "justification": "No total computational budget, hardware specifications, GPU hours, or API call counts are stated anywhere in the paper.",
    367           "source": "haiku"
    368         }
    369       }
    370     }
    371   },
    372   "claims": [
    373     {
    374       "claim": "XG-Guard consistently achieves superior defense performance among unsupervised methods, exceeding 90% AUROC across all topologies and attack scenarios.",
    375       "evidence": "Table 1 shows XG-Guard achieving 87–99% AUC across 24 experimental conditions (6 datasets × 4 topologies), substantially outperforming BlindGuard (55–88%) and other unsupervised baselines.",
    376       "supported": "strong"
    377     },
    378     {
    379       "claim": "XG-Guard is the first work to formulate MAS defense as an unsupervised GAD problem while providing inherent explainability.",
    380       "evidence": "The paper asserts this priority in the contributions section; prior works G-Safeguard (supervised) and BlindGuard (unsupervised, no explainability) are positioned as the predecessors being surpassed.",
    381       "supported": "moderate"
    382     },
    383     {
    384       "claim": "Token-level representations are essential for detecting malicious agents; removing them causes significant AUROC drops.",
    385       "evidence": "Ablation in Table 2 shows the '−Token' variant drops from 99.56 to 90.67 AUC on TA-InjecAgent (tree topology); full ablation in Appendix E shows consistent degradation across all settings.",
    386       "supported": "strong"
    387     },
    388     {
    389       "claim": "Naive averaging of sentence- and token-level scores performs worse than removing the token level entirely, due to prototype semantic mismatch.",
    390       "evidence": "Table 2 and Appendix E show '−Fusion' scoring 48.27 AUC on TA-InjecAgent while '−Token' scores 90.67, a counterintuitive result the paper explains via the covariance-guided fusion mechanism.",
    391       "supported": "strong"
    392     },
    393     {
    394       "claim": "XG-Guard generalizes to different LLM backbones (DeepSeek-V3, Qwen3-30B-A3B) with consistently strong performance.",
    395       "evidence": "Figure 3 shows XG-Guard maintaining the lowest ASR@3 across both alternative LLMs on CSQA and PoisonRAG datasets across four topologies, though without variance or significance reporting.",
    396       "supported": "moderate"
    397     }
    398   ],
    399   "methodology_tags": [
    400     "benchmark-eval",
    401     "case-study"
    402   ],
    403   "key_findings": "XG-Guard proposes a bi-level graph anomaly detection framework combining sentence- and token-level agent representations with a theme-based prototype detector to identify malicious agents in LLM multi-agent systems without labeled training data. It consistently achieves >90% AUROC across 6 datasets and 4 network topologies, substantially outperforming prior unsupervised methods and approaching supervised baselines. A critical finding from the ablation is that naively combining sentence- and token-level scores (−Fusion) performs far worse than removing the token level entirely, validating the prototype semantic mismatch problem and the necessity of covariance-guided fusion. Token-level explanation scores highlight specific malicious phrases in agent outputs, though spurious punctuation tokens appear in some explanations due to contextual SentenceBERT embeddings.",
    404   "red_flags": [
    405     {
    406       "flag": "No statistical significance testing",
    407       "detail": "All comparative claims are made without significance tests or confidence intervals; results appear to be single experimental runs across all 24 conditions, making performance differences statistically unvalidated."
    408     },
    409     {
    410       "flag": "No code released",
    411       "detail": "No repository or code link is provided, making reproduction dependent solely on the methodology description plus access to the prior works whose settings are followed."
    412     },
    413     {
    414       "flag": "GPT-4o-mini unversioned",
    415       "detail": "The primary backbone LLM is specified as 'GPT-4o-mini' without a snapshot date; the paper itself acknowledges that API providers may update backend models, which would undermine reproducibility."
    416     },
    417     {
    418       "flag": "Explainability evaluated only qualitatively",
    419       "detail": "Explanation quality is demonstrated through two handpicked case studies (Figure 5) without any systematic or quantitative evaluation of explanation accuracy, faithfulness, or user utility."
    420     },
    421     {
    422       "flag": "MAS interaction data not released",
    423       "detail": "The generated MAS dialogue graphs used for training and testing are not publicly available; data generation details defer to prior works without self-contained specification."
    424     },
    425     {
    426       "flag": "Simulated attacks only, real-world claims unjustified",
    427       "detail": "All attack scenarios are simulated in controlled environments, yet the paper extensively claims 'real-world applicability' and 'practical reliability' without empirical grounding in deployed systems."
    428     }
    429   ],
    430   "cited_papers": [
    431     {
    432       "title": "G-Safeguard: A Topology-Guided Security Lens and Treatment on LLM-Based Multi-Agent Systems",
    433       "relevance": "Direct predecessor: supervised GAD-based MAS defense framework that XG-Guard extends to the unsupervised setting with explainability; used as the supervised upper-bound baseline"
    434     },
    435     {
    436       "title": "BlindGuard: Safeguarding LLM-Based Multi-Agent Systems under Unknown Attacks",
    437       "relevance": "Current state-of-the-art unsupervised MAS defense baseline that XG-Guard directly competes with and improves upon"
    438     },
    439     {
    440       "title": "InjecAgent: Benchmarking Indirect Prompt Injections in Tool-Integrated LLM Agents",
    441       "relevance": "Provides the tool attack benchmark and attack scenario used in evaluation experiments"
    442     },
    443     {
    444       "title": "Deep Anomaly Detection on Attributed Networks (DOMINANT)",
    445       "relevance": "Foundational reconstruction-based unsupervised graph anomaly detection baseline"
    446     },
    447     {
    448       "title": "Truncated Affinity Maximization: One-Class Homophily Modeling for Graph Anomaly Detection (TAM)",
    449       "relevance": "Competing affinity-based unsupervised GAD baseline achieving strong prior performance"
    450     },
    451     {
    452       "title": "PREM: A Simple yet Effective Approach for Node-Level Graph Anomaly Detection",
    453       "relevance": "Graph anomaly detection baseline and prior work by first author used for contrastive learning comparison"
    454     },
    455     {
    456       "title": "CommonsenseQA: A Question Answering Challenge Targeting Commonsense Knowledge",
    457       "relevance": "Primary benchmark dataset used as MAS task under prompt injection and memory attack scenarios"
    458     }
    459   ],
    460   "engagement_factors": {
    461     "practical_relevance": {
    462       "score": 2,
    463       "justification": "Addresses a real and growing security problem for deployed multi-agent systems, but lack of code release and unversioned API dependencies limit immediate practitioner adoption."
    464     },
    465     "surprise_contrarian": {
    466       "score": 1,
    467       "justification": "The bi-level approach is intuitive; the most surprising finding (naive fusion hurts more than removing token level) is a technical insight rather than a paradigm-challenging result."
    468     },
    469     "fear_safety": {
    470       "score": 3,
    471       "justification": "Directly addresses prompt injection, memory poisoning, and tool exploitation in autonomous AI agent systems — core security concerns for increasingly deployed multi-agent AI."
    472     },
    473     "drama_conflict": {
    474       "score": 1,
    475       "justification": "Incremental improvement over existing defenses; no notable controversy or conflict with dominant paradigms."
    476     },
    477     "demo_ability": {
    478       "score": 1,
    479       "justification": "No code, demo, or interactive interface released; readers cannot try the system themselves."
    480     },
    481     "brand_recognition": {
    482       "score": 1,
    483       "justification": "Griffith University is not a leading AI brand; no involvement from major AI labs, well-known companies, or high-profile researchers."
    484     }
    485   },
    486   "hn_data": {
    487     "threads": [
    488       {
    489         "hn_id": "45657595",
    490         "title": "Binary Retrieval-Augmented Reward Mitigates Hallucinations",
    491         "points": 44,
    492         "comments": 3,
    493         "url": "https://news.ycombinator.com/item?id=45657595",
    494         "created_at": "2025-10-21T16:14:28Z"
    495       },
    496       {
    497         "hn_id": "43198812",
    498         "title": "Symmetries of Living Systems",
    499         "points": 8,
    500         "comments": 0,
    501         "url": "https://news.ycombinator.com/item?id=43198812",
    502         "created_at": "2025-02-27T21:41:54Z"
    503       },
    504       {
    505         "hn_id": "45664388",
    506         "title": "Query Decomposition for RAG",
    507         "points": 1,
    508         "comments": 0,
    509         "url": "https://news.ycombinator.com/item?id=45664388",
    510         "created_at": "2025-10-22T02:47:42Z"
    511       }
    512     ],
    513     "top_points": 44,
    514     "total_points": 53,
    515     "total_comments": 3
    516   }
    517 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs