scan.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan.json (16962B)
      1 {
      2   "paper": {
      3     "title": "The Artificial Brain: A Neuroscience Inspired Architecture for Multimodal AI Systems",
      4     "authors": ["Krrish Choudhary", "Tanvi Kandoi"],
      5     "year": 2026,
      6     "venue": "International Journal of Transdisciplinary Research and Perspectives"
      7   },
      8   "checklist": {
      9     "artifacts": {
     10       "code_released": {
     11         "applies": true,
     12         "answer": false,
     13         "justification": "No repository URL, code archive, or link to any implementation is provided anywhere in the paper."
     14       },
     15       "data_released": {
     16         "applies": true,
     17         "answer": false,
     18         "justification": "No dataset is released. The paper is a proposed architecture with no original data."
     19       },
     20       "environment_specified": {
     21         "applies": true,
     22         "answer": false,
     23         "justification": "Hardware target is mentioned (RTX 4050, 6GB VRAM) and model names/sizes are listed in Table 14, but no requirements.txt, Dockerfile, or dependency specifications are provided."
     24       },
     25       "reproduction_instructions": {
     26         "applies": true,
     27         "answer": false,
     28         "justification": "No step-by-step reproduction instructions, README, or runnable scripts are provided. The paper describes an architecture conceptually but gives no instructions for building it."
     29       }
     30     },
     31     "statistical_methodology": {
     32       "confidence_intervals_or_error_bars": {
     33         "applies": false,
     34         "answer": false,
     35         "justification": "The paper presents no original experiments or quantitative results. All numbers cited (e.g., 81.5%, 10.23 MDI) are from other papers."
     36       },
     37       "significance_tests": {
     38         "applies": false,
     39         "answer": false,
     40         "justification": "No original experiments are conducted; no comparative claims are made based on the authors' own data."
     41       },
     42       "effect_sizes_reported": {
     43         "applies": false,
     44         "answer": false,
     45         "justification": "No original experiments are conducted. This is a theoretical architecture proposal."
     46       },
     47       "sample_size_justified": {
     48         "applies": false,
     49         "answer": false,
     50         "justification": "No experiments are run, so no sample size to justify. This is a theoretical paper."
     51       },
     52       "variance_reported": {
     53         "applies": false,
     54         "answer": false,
     55         "justification": "No original experiments are conducted."
     56       }
     57     },
     58     "evaluation_design": {
     59       "baselines_included": {
     60         "applies": true,
     61         "answer": false,
     62         "justification": "Tables 12-13 list related work and existing agent systems, but the proposed architecture is never evaluated against them. There is no empirical comparison."
     63       },
     64       "baselines_contemporary": {
     65         "applies": false,
     66         "answer": false,
     67         "justification": "No empirical evaluation is conducted, so baseline contemporaneity is not applicable."
     68       },
     69       "ablation_study": {
     70         "applies": false,
     71         "answer": false,
     72         "justification": "No implementation or evaluation exists to ablate. This is a theoretical architecture proposal."
     73       },
     74       "multiple_metrics": {
     75         "applies": false,
     76         "answer": false,
     77         "justification": "No evaluation is conducted with any metrics."
     78       },
     79       "human_evaluation": {
     80         "applies": false,
     81         "answer": false,
     82         "justification": "No system is built or evaluated, so human evaluation is not applicable."
     83       },
     84       "held_out_test_set": {
     85         "applies": false,
     86         "answer": false,
     87         "justification": "No evaluation is conducted."
     88       },
     89       "per_category_breakdown": {
     90         "applies": false,
     91         "answer": false,
     92         "justification": "No evaluation is conducted."
     93       },
     94       "failure_cases_discussed": {
     95         "applies": true,
     96         "answer": true,
     97         "justification": "Section 16 (Limitations) lists 10 specific limitations including coordination failures (79%), inevitable hallucination, missing cross-modal detector, and McGurk trap. Fig. 24 categorizes components by readiness."
     98       },
     99       "negative_results_reported": {
    100         "applies": false,
    101         "answer": false,
    102         "justification": "No experiments are run, so there are no negative results to report."
    103       }
    104     },
    105     "claims_and_evidence": {
    106       "abstract_claims_supported": {
    107         "applies": true,
    108         "answer": false,
    109         "justification": "The abstract claims 'The architecture is implementable today on consumer hardware (RTX 4050, 6GB VRAM)' but no implementation exists. The buildability assessment (Fig. 24) shows several components are in 'needs work' or 'open research' categories, contradicting the implementability claim."
    110       },
    111       "causal_claims_justified": {
    112         "applies": true,
    113         "answer": false,
    114         "justification": "The paper makes causal claims such as 'compare before merging' prevents hallucination and that the architecture solves multimodal dominance. These are presented as design rationale from neuroscience analogy but no empirical evidence from the proposed system supports them."
    115       },
    116       "generalization_bounded": {
    117         "applies": true,
    118         "answer": false,
    119         "justification": "The paper presents the architecture as a general solution for multimodal AI ('an architecture for a fundamentally different kind of AI') without bounding claims to any tested setting, since nothing is tested."
    120       },
    121       "alternative_explanations_discussed": {
    122         "applies": true,
    123         "answer": false,
    124         "justification": "The paper does not discuss alternative architectural approaches that could achieve similar goals, nor does it consider why the neuroscience analogy might not transfer to AI systems."
    125       }
    126     },
    127     "setup_transparency": {
    128       "model_versions_specified": {
    129         "applies": true,
    130         "answer": true,
    131         "justification": "Table 14 specifies exact models: Qwen2.5-1.5B Q4, Qwen2.5-3B Q4, distilbert-sst-2, MiniLM-L6-v2, ChromaDB+MiniLM, and Claude API for the conscious layer."
    132       },
    133       "prompts_provided": {
    134         "applies": false,
    135         "answer": false,
    136         "justification": "No prompting experiments are conducted. The paper is a theoretical architecture proposal."
    137       },
    138       "hyperparameters_reported": {
    139         "applies": false,
    140         "answer": false,
    141         "justification": "No experiments are run. Formulas for neuromodulator effects are given (Table 7) but these are architectural specifications, not experimental hyperparameters."
    142       },
    143       "scaffolding_described": {
    144         "applies": true,
    145         "answer": true,
    146         "justification": "The paper describes the agentic scaffolding in extensive detail: the 5-layer architecture, conflict detection cascade (Fig. 11), identity filter (Fig. 13), memory systems (Fig. 14), reward propagation (Fig. 17), and core processing loop (Fig. 23)."
    147       },
    148       "data_preprocessing_documented": {
    149         "applies": false,
    150         "answer": false,
    151         "justification": "No data is collected or processed. This is a theoretical architecture proposal."
    152       }
    153     },
    154     "limitations_and_scope": {
    155       "limitations_section_present": {
    156         "applies": true,
    157         "answer": true,
    158         "justification": "Section 16 'Limitations' provides a dedicated table (Table 15) listing 10 limitations with severity ratings."
    159       },
    160       "threats_to_validity_specific": {
    161         "applies": true,
    162         "answer": true,
    163         "justification": "The limitations are specific to this architecture: 'Coordination failures dominate (79%)' (citing multi-agent failure literature), 'No general cross-modal detector exists' (HIGH), 'Confidence estimation unsolved' (HIGH). These are not generic boilerplate."
    164       },
    165       "scope_boundaries_stated": {
    166         "applies": true,
    167         "answer": true,
    168         "justification": "Fig. 24 explicitly categorizes which components are buildable now (green), need work (yellow), and remain open research problems (red). Table 15 acknowledges fundamental limitations like inevitable hallucination."
    169       }
    170     },
    171     "data_integrity": {
    172       "raw_data_available": {
    173         "applies": false,
    174         "answer": false,
    175         "justification": "No original data is collected. This is a theoretical architecture proposal."
    176       },
    177       "data_collection_described": {
    178         "applies": false,
    179         "answer": false,
    180         "justification": "No data collection occurs. This is a theoretical architecture proposal."
    181       },
    182       "recruitment_methods_described": {
    183         "applies": false,
    184         "answer": false,
    185         "justification": "No participants or data sources are recruited. This is a theoretical architecture proposal."
    186       },
    187       "data_pipeline_documented": {
    188         "applies": false,
    189         "answer": false,
    190         "justification": "No data pipeline exists. This is a theoretical architecture proposal."
    191       }
    192     },
    193     "conflicts_of_interest": {
    194       "funding_disclosed": {
    195         "applies": true,
    196         "answer": false,
    197         "justification": "No funding source or acknowledgments section is present in the paper."
    198       },
    199       "affiliations_disclosed": {
    200         "applies": true,
    201         "answer": true,
    202         "justification": "Author affiliations are listed: LNM Institute of Information Technology, Jaipur and IIIT Tiruchirappalli. No product being evaluated, so no product-affiliation conflict."
    203       },
    204       "funder_independent_of_outcome": {
    205         "applies": false,
    206         "answer": false,
    207         "justification": "No funding is disclosed. Appears to be unfunded academic work from two Indian universities."
    208       },
    209       "financial_interests_declared": {
    210         "applies": true,
    211         "answer": false,
    212         "justification": "No competing interests or financial interests statement is present in the paper."
    213       }
    214     },
    215     "contamination": {
    216       "training_cutoff_stated": {
    217         "applies": false,
    218         "answer": false,
    219         "justification": "No model is evaluated on any benchmark. This is a theoretical architecture proposal."
    220       },
    221       "train_test_overlap_discussed": {
    222         "applies": false,
    223         "answer": false,
    224         "justification": "No model is evaluated on any benchmark."
    225       },
    226       "benchmark_contamination_addressed": {
    227         "applies": false,
    228         "answer": false,
    229         "justification": "No model is evaluated on any benchmark."
    230       }
    231     },
    232     "human_studies": {
    233       "pre_registered": {
    234         "applies": false,
    235         "answer": false,
    236         "justification": "No human participants in this study."
    237       },
    238       "irb_or_ethics_approval": {
    239         "applies": false,
    240         "answer": false,
    241         "justification": "No human participants in this study."
    242       },
    243       "demographics_reported": {
    244         "applies": false,
    245         "answer": false,
    246         "justification": "No human participants in this study."
    247       },
    248       "inclusion_exclusion_criteria": {
    249         "applies": false,
    250         "answer": false,
    251         "justification": "No human participants in this study."
    252       },
    253       "randomization_described": {
    254         "applies": false,
    255         "answer": false,
    256         "justification": "No human participants in this study."
    257       },
    258       "blinding_described": {
    259         "applies": false,
    260         "answer": false,
    261         "justification": "No human participants in this study."
    262       },
    263       "attrition_reported": {
    264         "applies": false,
    265         "answer": false,
    266         "justification": "No human participants in this study."
    267       }
    268     },
    269     "cost_and_practicality": {
    270       "inference_cost_reported": {
    271         "applies": false,
    272         "answer": false,
    273         "justification": "This is a theoretical architecture proposal with no running system. Cost estimates (e.g., '$0 cost 90% of the time') are speculative, not measured."
    274       },
    275       "compute_budget_stated": {
    276         "applies": false,
    277         "answer": false,
    278         "justification": "No experiments were run, so no compute budget to report. This is a theoretical paper."
    279       }
    280     }
    281   },
    282   "claims": [
    283     {
    284       "claim": "LLaVA-7B exhibits a Modality Dominance Index of 10.23, trusting its text decoder 10x more than its visual encoder",
    285       "evidence": "Cited from [1] Wu et al., arXiv:2508.10552. No original measurement by the authors.",
    286       "supported": "moderate"
    287     },
    288     {
    289       "claim": "Unimodal specialists beat all multimodal models at 81.5% accuracy",
    290       "evidence": "Table 1 cites this statistic but the source reference is unclear from the paper text.",
    291       "supported": "weak"
    292     },
    293     {
    294       "claim": "The architecture is implementable today on consumer hardware (RTX 4050, 6GB VRAM)",
    295       "evidence": "Table 14 shows model sizes totaling 3.6GB GPU. However, Fig. 24 shows several components in 'needs work' or 'open research' status, contradicting full implementability.",
    296       "supported": "weak"
    297     },
    298     {
    299       "claim": "90% of the time SLMs agree (costing $0); 10% of the time they disagree and escalate to the conscious layer",
    300       "evidence": "Stated in Section 4 without any empirical measurement or citation. This appears to be an assumption, not a measured result.",
    301       "supported": "unsupported"
    302     },
    303     {
    304       "claim": "The neuroscience-inspired architecture solves multimodal integration failures including hallucination",
    305       "evidence": "The architecture is described conceptually with neuroscience analogies but no implementation or evaluation exists to verify the claim.",
    306       "supported": "unsupported"
    307     }
    308   ],
    309   "methodology_tags": ["theoretical"],
    310   "key_findings": "This paper proposes a theoretical architecture for multimodal AI inspired by neuroscience, mapping brain structures (visual cortex, ACC, PFC, DMN, hippocampus, basal ganglia) to AI components (specialized SLMs, conflict detectors, reasoning models, identity cores, vector databases, router MLPs). The key design principle is 'compare before merging' to prevent hallucination through late fusion with conflict detection. No implementation or empirical evaluation is provided. The paper acknowledges 10 limitations and categorizes components by readiness level, with several remaining open research problems.",
    311   "red_flags": [
    312     {
    313       "flag": "No implementation or evaluation",
    314       "detail": "The paper proposes a complete architecture but provides zero empirical evidence that it works. No prototype, no benchmarks, no measurements. The abstract's claim that it is 'implementable today' is contradicted by the paper's own buildability assessment showing open research problems."
    315     },
    316     {
    317       "flag": "Unsourced statistics presented as fact",
    318       "detail": "The 90%/10% SLM agreement ratio and $0 cost claim (Section 4) appear to be fabricated assumptions with no citation or measurement. Several statistics in Table 1 lack clear source attribution."
    319     },
    320     {
    321       "flag": "Neuroscience analogy treated as engineering specification",
    322       "detail": "Brain-to-AI mappings (Table 3) are presented as if biological analogy guarantees engineering viability. The paper does not address why brain-inspired architectures might not transfer to silicon, or why previous brain-inspired AI efforts had limited success."
    323     },
    324     {
    325       "flag": "Dubious venue",
    326       "detail": "Published in 'International Journal of Transdisciplinary Research and Perspectives' (E-ISSN: 3107-7935, Volume 2 Issue 2), a very new journal with no established reputation. The corresponding author email is a personal Gmail address."
    327     }
    328   ],
    329   "cited_papers": [
    330     {
    331       "title": "When language overrules: Revealing text dominance in multimodal large language models",
    332       "authors": ["H. Wu"],
    333       "year": 2025,
    334       "arxiv_id": "2508.10552",
    335       "relevance": "Empirical measurement of modality dominance bias in multimodal LLMs, relevant to understanding LLM limitations."
    336     },
    337     {
    338       "title": "Why do multi-agent LLM systems fail?",
    339       "year": 2025,
    340       "arxiv_id": "2503.13657",
    341       "relevance": "Analysis of failure modes in multi-agent LLM systems, directly relevant to agentic AI reliability."
    342     },
    343     {
    344       "title": "FrugalGPT: How to use large language models while reducing cost and improving performance",
    345       "authors": ["S. Chen"],
    346       "year": 2023,
    347       "arxiv_id": "2305.05176",
    348       "relevance": "Cost-optimization strategies for LLM usage including model routing, relevant to AI practicality research."
    349     },
    350     {
    351       "title": "RouteLLM: Model routing for cost optimization",
    352       "year": 2025,
    353       "relevance": "Model routing approach for LLM cost optimization, relevant to agentic AI efficiency."
    354     },
    355     {
    356       "title": "HalluciDoctor: Cross-MLLM consistency checking",
    357       "year": 2024,
    358       "arxiv_id": "2311.13614",
    359       "relevance": "Cross-model hallucination detection method, relevant to LLM reliability and safety."
    360     }
    361   ]
    362 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs