scan-v5.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan-v5.json (28972B)
      1 {
      2   "scan_version": 5,
      3   "paper_type": "empirical",
      4   "paper": {
      5     "title": "Fine-grained Analysis of Brain-LLM Alignment through Input Attribution",
      6     "authors": [
      7       "Michela Proietti",
      8       "Roberto Capobianco",
      9       "Mariya Toneva"
     10     ],
     11     "year": 2025,
     12     "venue": "arXiv.org",
     13     "arxiv_id": "2510.12355",
     14     "doi": "10.48550/arXiv.2510.12355"
     15   },
     16   "checklist": {
     17     "claims_and_evidence": {
     18       "abstract_claims_supported": {
     19         "applies": true,
     20         "answer": true,
     21         "justification": "All abstract claims are substantiated: BA/NWP low IoU (Figure 2), NWP recency/primacy biases (Figure 5), syntactic focus of NWP vs. semantic/discourse focus of BA (Figure 4), and broader BA recency effect are each directly evidenced in main results.",
     22         "source": "haiku"
     23       },
     24       "causal_claims_justified": {
     25         "applies": true,
     26         "answer": true,
     27         "justification": "Masking experiments (Appendix C) support causal claims about word importance: removing the top 1% of attributed words nearly abolishes both NWP and BA performance, validating that identified words functionally drive each task.",
     28         "source": "haiku"
     29       },
     30       "generalization_bounded": {
     31         "applies": true,
     32         "answer": true,
     33         "justification": "Claims are validated across two datasets (HP and MRH), five model architectures (1–2B parameter range), and consistent per-subject replication; the paper's limitations section explicitly bounds scope to frozen models and specific parameter scales.",
     34         "source": "haiku"
     35       },
     36       "alternative_explanations_discussed": {
     37         "applies": true,
     38         "answer": false,
     39         "justification": "The paper does not systematically consider alternative explanations for the BA/NWP divergence—e.g., whether differences arise from the hemodynamic delay modeling, the ridge regression fitting, or the preprocessing pipeline rather than genuine feature reliance differences.",
     40         "source": "haiku"
     41       },
     42       "proxy_outcome_distinction": {
     43         "applies": true,
     44         "answer": true,
     45         "justification": "The paper explicitly defines BA as Pearson correlation between predicted and recorded brain activity via a linear encoding model, and attribution scores as gradient-based word importance for that prediction, keeping proxy measures and interpretive claims appropriately distinct.",
     46         "source": "haiku"
     47       }
     48     },
     49     "limitations_and_scope": {
     50       "limitations_section_present": {
     51         "applies": true,
     52         "answer": true,
     53         "justification": "Section 5 contains a dedicated 'Limitations' subsection with three specific points, not just a single sentence in the conclusion.",
     54         "source": "haiku"
     55       },
     56       "threats_to_validity_specific": {
     57         "applies": true,
     58         "answer": true,
     59         "justification": "Specific limitations include: gradient-based methods being sensitive to local nonlinearities (mitigated by multi-method validation), discourse annotations being coarse and predefined, and frozen models reflecting inductive biases rather than optimal BA solutions.",
     60         "source": "haiku"
     61       },
     62       "scope_boundaries_stated": {
     63         "applies": true,
     64         "answer": true,
     65         "justification": "The paper explicitly bounds scope to frozen 1–2B parameter models, two public fMRI datasets, gradient-based attribution only, and notes that results reflect inductive biases rather than optimal alignment; the Future Work section identifies what the current study does not address.",
     66         "source": "haiku"
     67       }
     68     },
     69     "conflicts_of_interest": {
     70       "funding_disclosed": {
     71         "applies": true,
     72         "answer": false,
     73         "justification": "No funding acknowledgments are present in the paper text; the absence of any acknowledgments section means funding sources cannot be verified.",
     74         "source": "haiku"
     75       },
     76       "affiliations_disclosed": {
     77         "applies": true,
     78         "answer": true,
     79         "justification": "Author affiliations are clearly stated on the title page: Sapienza University of Rome, Sony AI Zurich, and Max Planck Institute for Software Systems.",
     80         "source": "haiku"
     81       },
     82       "funder_independent_of_outcome": {
     83         "applies": false,
     84         "answer": false,
     85         "justification": "No funding is disclosed, so independence cannot be assessed.",
     86         "source": "haiku"
     87       },
     88       "financial_interests_declared": {
     89         "applies": true,
     90         "answer": false,
     91         "justification": "No competing interests statement or declaration of financial interests appears anywhere in the paper.",
     92         "source": "haiku"
     93       }
     94     },
     95     "scope_and_framing": {
     96       "key_terms_defined": {
     97         "applies": true,
     98         "answer": true,
     99         "justification": "Brain alignment (BA) is formally defined as the Pearson correlation performance of brain encoding models predicting activity from LLM representations; voxel is defined in a footnote; attribution methods (GXI, IG) are formally defined in Section 3.3 and Appendix A.3.",
    100         "source": "haiku"
    101       },
    102       "intended_contribution_clear": {
    103         "applies": true,
    104         "answer": true,
    105         "justification": "Four explicit bullet-point contributions are listed in the introduction: the novel attribution framework, the finding on transformer/SSM/hybrid behavioral similarity, the BA/NWP case study, and the specific differences in feature reliance and context integration.",
    106         "source": "haiku"
    107       },
    108       "engagement_with_prior_work": {
    109         "applies": true,
    110         "answer": true,
    111         "justification": "The paper actively situates its approach relative to prior methods (perturbation-based vs. attribution-based), builds on Merlin & Toneva (2024) and AlKhamissi et al. (2025), and explains how the new framework extends rather than merely replicates existing findings.",
    112         "source": "haiku"
    113       }
    114     }
    115   },
    116   "type_checklist": {
    117     "empirical": {
    118       "artifacts": {
    119         "code_released": {
    120           "applies": true,
    121           "answer": true,
    122           "justification": "The Reproducibility Statement explicitly links to https://github.com/michelaproietti/Brain-LLM-Alignment-Attribution with data preprocessing scripts, attribution implementations, and evaluation procedures.",
    123           "source": "haiku"
    124         },
    125         "data_released": {
    126           "applies": true,
    127           "answer": true,
    128           "justification": "Both fMRI datasets used are publicly available: the Harry Potter dataset (Wehbe et al., 2014) and Moth Radio Hour (Deniz et al., 2019).",
    129           "source": "haiku"
    130         },
    131         "environment_specified": {
    132           "applies": true,
    133           "answer": false,
    134           "justification": "The paper specifies hardware (NVIDIA H100 80GB) and mentions Captum and HuggingFace libraries, but provides no requirements.txt, Dockerfile, or pinned dependency versions in the paper itself.",
    135           "source": "haiku"
    136         },
    137         "reproduction_instructions": {
    138           "applies": true,
    139           "answer": true,
    140           "justification": "The Reproducibility Statement cross-references Sections 3, 4, 5, and Appendices A–C for all pipeline details, and released code accompanies the paper; together these provide sufficient instructions to reproduce.",
    141           "source": "haiku"
    142         }
    143       },
    144       "statistical_methodology": {
    145         "confidence_intervals_or_error_bars": {
    146           "applies": true,
    147           "answer": true,
    148           "justification": "Standard errors across subjects are reported for model-wise BA scores (Figure 22a) and standard errors across models/contexts are shown as error bars in Figures 3 and 4.",
    149           "source": "haiku"
    150         },
    151         "significance_tests": {
    152           "applies": true,
    153           "answer": true,
    154           "justification": "Two-sided paired t-tests with Benjamini-Hochberg correction are used for AUC comparisons between BA and NWP (Figure 3), and pairwise model comparison p-values are reported (Figure 22b).",
    155           "source": "haiku"
    156         },
    157         "effect_sizes_reported": {
    158           "applies": true,
    159           "answer": true,
    160           "justification": "IoU values, AUC differences, mean Pearson correlations, and percentage drops in CE loss and Pearson r are all reported with baseline context, providing meaningful effect size information.",
    161           "source": "haiku"
    162         },
    163         "sample_size_justified": {
    164           "applies": true,
    165           "answer": false,
    166           "justification": "The paper uses 8 subjects (HP) and 9 subjects (MRH) without any power analysis or explicit justification for adequacy; sample size is implicitly justified only by availability of these public datasets.",
    167           "source": "haiku"
    168         },
    169         "variance_reported": {
    170           "applies": true,
    171           "answer": true,
    172           "justification": "Standard errors across subjects and across contexts are consistently reported in figure error bars throughout the paper.",
    173           "source": "haiku"
    174         }
    175       },
    176       "evaluation_design": {
    177         "baselines_included": {
    178           "applies": true,
    179           "answer": true,
    180           "justification": "A random baseline for IoU is computed by drawing 100 pairs of random word sets and averaging their IoUs for each threshold (Figure 2); baseline CE loss and correlation are used in masking experiments.",
    181           "source": "haiku"
    182         },
    183         "baselines_contemporary": {
    184           "applies": true,
    185           "answer": true,
    186           "justification": "The five models evaluated (Falcon3-1B, Gemma-2B, Llama3.2-1B, Mamba-1.4B, Zamba2-1.2B) are all 2023–2024 releases; the random baseline is appropriate for the IoU analysis.",
    187           "source": "haiku"
    188         },
    189         "ablation_study": {
    190           "applies": true,
    191           "answer": true,
    192           "justification": "Masking experiments ablate top-attributed words to validate functional relevance; robustness checks use two attribution methods (GXI vs. IG) and two context lengths (640 vs. 80 words).",
    193           "source": "haiku"
    194         },
    195         "multiple_metrics": {
    196           "applies": true,
    197           "answer": true,
    198           "justification": "IoU, Center of Mass, Pearson correlation, AUC, CE loss change, Spearman rank correlation, and per-linguistic-feature proportions are all reported.",
    199           "source": "haiku"
    200         },
    201         "human_evaluation": {
    202           "applies": false,
    203           "answer": false,
    204           "justification": "This paper evaluates computational alignment between LLM representations and brain data; human evaluation of system outputs is not relevant to the study design.",
    205           "source": "haiku"
    206         },
    207         "held_out_test_set": {
    208           "applies": true,
    209           "answer": true,
    210           "justification": "4-fold cross-validation is used for HP and 11-fold (one story per fold) for MRH, with nested cross-validation for regularization selection, ensuring held-out evaluation.",
    211           "source": "haiku"
    212         },
    213         "per_category_breakdown": {
    214           "applies": true,
    215           "answer": true,
    216           "justification": "Results are broken down per model (5 models), per subject (8/9 subjects in Appendix F), per layer depth (early/middle/late), per linguistic feature category (semantic/syntactic/discourse), and per ROI (language-selective ROIs in Appendix C).",
    217           "source": "haiku"
    218         },
    219         "failure_cases_discussed": {
    220           "applies": true,
    221           "answer": true,
    222           "justification": "The anomalous oscillatory attribution pattern in Llama3.2-1B is extensively analyzed across multiple appendices (H, I) and shown to be context/stimulus-dependent rather than a general failure mode.",
    223           "source": "haiku"
    224         },
    225         "negative_results_reported": {
    226           "applies": true,
    227           "answer": true,
    228           "justification": "The finding that Llama3.2-1B's oscillatory pattern does not generalize to Qwen2-1.5B (same architectural features) and disappears on MRH/shorter contexts is a null result reported transparently.",
    229           "source": "haiku"
    230         }
    231       },
    232       "setup_transparency": {
    233         "model_versions_specified": {
    234           "applies": true,
    235           "answer": true,
    236           "justification": "All five models are named with their parameter counts, architectural families, and source publications/technical reports; Appendix A.2 provides detailed descriptions of each model's architecture and training data.",
    237           "source": "haiku"
    238         },
    239         "prompts_provided": {
    240           "applies": false,
    241           "answer": false,
    242           "justification": "Models are used as feature extractors with no prompting; there are no prompts or system instructions to report.",
    243           "source": "haiku"
    244         },
    245         "hyperparameters_reported": {
    246           "applies": true,
    247           "answer": true,
    248           "justification": "Context length (L=640), TR concatenation (D=4), cross-validation folds (4-fold HP, 11-fold MRH), IG steps (m=20), baseline (zero embedding), and attribution thresholds are all explicitly reported.",
    249           "source": "haiku"
    250         },
    251         "scaffolding_described": {
    252           "applies": false,
    253           "answer": false,
    254           "justification": "No agentic scaffolding is used; models are evaluated as feature extractors with a fixed ridge regression encoding model.",
    255           "source": "haiku"
    256         },
    257         "data_preprocessing_documented": {
    258           "applies": true,
    259           "answer": true,
    260           "justification": "Section 3.1 and Appendix A.1 describe fMRI preprocessing; Section 3.3.1 and Appendix A.4 detail context construction, tokenization, word/TR embedding extraction, and hemodynamic delay accounting.",
    261           "source": "haiku"
    262         }
    263       },
    264       "data_integrity": {
    265         "raw_data_available": {
    266           "applies": true,
    267           "answer": true,
    268           "justification": "Both the Harry Potter fMRI dataset (Wehbe et al., 2014) and Moth Radio Hour dataset (Deniz et al., 2019) are publicly available datasets.",
    269           "source": "haiku"
    270         },
    271         "data_collection_described": {
    272           "applies": true,
    273           "answer": true,
    274           "justification": "Section 3.1 and Appendix A.1 describe both datasets: number of subjects, stimulus presentation rate, TR sampling rate, run structure, and word-level annotations for HP.",
    275           "source": "haiku"
    276         },
    277         "recruitment_methods_described": {
    278           "applies": false,
    279           "answer": false,
    280           "justification": "The paper uses existing public datasets; no new participant recruitment was conducted in this study.",
    281           "source": "haiku"
    282         },
    283         "data_pipeline_documented": {
    284           "applies": true,
    285           "answer": true,
    286           "justification": "The full pipeline from raw fMRI data through LLM representation extraction, TR-level embedding construction, brain encoding model training, and attribution computation is documented in Sections 3.3–3.4 and Appendix A.4, with a visual illustration (Figure 7).",
    287           "source": "haiku"
    288         }
    289       },
    290       "contamination": {
    291         "training_cutoff_stated": {
    292           "applies": true,
    293           "answer": false,
    294           "justification": "The LLMs are used to generate representations for Harry Potter text, which is likely in their training corpora, but no training data cutoffs are reported and the issue is not acknowledged.",
    295           "source": "haiku"
    296         },
    297         "train_test_overlap_discussed": {
    298           "applies": true,
    299           "answer": false,
    300           "justification": "The paper does not discuss whether Harry Potter (a widely distributed copyrighted book) appears in LLM training data, which could systematically inflate NWP attribution scores for that stimulus.",
    301           "source": "haiku"
    302         },
    303         "benchmark_contamination_addressed": {
    304           "applies": true,
    305           "answer": false,
    306           "justification": "Harry Potter and the Sorcerer's Stone was published in 1998 and is available online; the paper does not address whether this widely-available text was included in any model's pre-training corpus.",
    307           "source": "haiku"
    308         }
    309       },
    310       "human_studies": {
    311         "pre_registered": {
    312           "applies": false,
    313           "answer": false,
    314           "justification": "No new human participants were recruited; the paper uses existing public fMRI datasets.",
    315           "source": "haiku"
    316         },
    317         "irb_or_ethics_approval": {
    318           "applies": false,
    319           "answer": false,
    320           "justification": "No new human study was conducted; existing public datasets are used.",
    321           "source": "haiku"
    322         },
    323         "demographics_reported": {
    324           "applies": false,
    325           "answer": false,
    326           "justification": "Existing public datasets are used; demographic reporting belongs to the original studies.",
    327           "source": "haiku"
    328         },
    329         "inclusion_exclusion_criteria": {
    330           "applies": false,
    331           "answer": false,
    332           "justification": "No new human study was conducted.",
    333           "source": "haiku"
    334         },
    335         "randomization_described": {
    336           "applies": false,
    337           "answer": false,
    338           "justification": "No new human study was conducted.",
    339           "source": "haiku"
    340         },
    341         "blinding_described": {
    342           "applies": false,
    343           "answer": false,
    344           "justification": "No new human study was conducted.",
    345           "source": "haiku"
    346         },
    347         "attrition_reported": {
    348           "applies": false,
    349           "answer": false,
    350           "justification": "No new human study was conducted.",
    351           "source": "haiku"
    352         }
    353       },
    354       "cost_and_practicality": {
    355         "inference_cost_reported": {
    356           "applies": true,
    357           "answer": true,
    358           "justification": "Appendix J provides detailed per-task, per-model compute time and peak GPU memory usage tables for all experiments across both datasets.",
    359           "source": "haiku"
    360         },
    361         "compute_budget_stated": {
    362           "applies": true,
    363           "answer": true,
    364           "justification": "Appendix J summarizes total compute: GXI attribution ~1501 hours, IG attribution ~329 hours, brain alignment training ~219 hours, all on a single NVIDIA H100 80GB GPU.",
    365           "source": "haiku"
    366         }
    367       }
    368     }
    369   },
    370   "claims": [
    371     {
    372       "claim": "Brain alignment (BA) and next-word prediction (NWP) rely on largely distinct subsets of input words, with IoU ≈0.1–0.2 at low attribution thresholds",
    373       "evidence": "Figure 2 shows IoU between top-attributed word sets for BA and NWP, consistently 1.5–2× above a random baseline but very low (≈0.1–0.2) at stringent thresholds (t≤10%), replicated on both HP and MRH datasets",
    374       "supported": "strong"
    375     },
    376     {
    377       "claim": "NWP exhibits both recency and primacy biases across transformer, SSM, and hybrid architectures, while BA shows only a broader recency bias",
    378       "evidence": "Figure 5 shows bimodal NWP attribution distributions (peaks at both ends of context) vs. BA's unimodal broader recency distribution, consistent across all 5 models and replicated on MRH (Figures 12–14)",
    379       "supported": "strong"
    380     },
    381     {
    382       "claim": "NWP emphasizes syntactic features while BA places greater weight on semantic and discourse-level information",
    383       "evidence": "Figure 4 shows that at low attribution thresholds NWP uniquely attributes more to syntactic features while BA shows a more balanced distribution with higher proportions of semantic and discourse features; replicated with IG (Figure 15)",
    384       "supported": "moderate"
    385     },
    386     {
    387       "claim": "BA has higher attribution spread (more distributed across context words) at middle and late layers, while NWP has higher spread at early layers",
    388       "evidence": "Figure 3 shows AUC for BA increases from early to late layers while NWP AUC decreases; differences are statistically significant (p<0.001, Benjamini-Hochberg corrected)",
    389       "supported": "strong"
    390     },
    391     {
    392       "claim": "Top-attributed words are functionally important: masking just the top 1% abolishes both NWP performance (>100% CE increase) and BA (nearly 100% drop in Pearson r)",
    393       "evidence": "Appendix C, Figures 8 and 9, show catastrophic performance collapse from minimal masking across all 5 models and language-selective ROIs",
    394       "supported": "strong"
    395     },
    396     {
    397       "claim": "Transformers, SSMs, and hybrid architectures behave largely similarly in BA attribution patterns, with Llama3.2-1B as a notable exception showing oscillatory positional patterns",
    398       "evidence": "Figures 5, 18–20 show consistent BA/NWP attribution patterns across 4 of 5 models; Llama3.2-1B exception is extensively investigated in Appendices E, H, I",
    399       "supported": "moderate"
    400     },
    401     {
    402       "claim": "Llama3.2-1B's oscillatory attribution pattern for BA is stimulus-dependent rather than a fixed architectural property",
    403       "evidence": "Qwen2-1.5B (sharing RoPE, GQA, FlashAttention2) shows no oscillation (Appendix H); Llama3.2-1B oscillation disappears on MRH dataset (Appendix D.3) and with 80-word contexts (Appendix I)",
    404       "supported": "moderate"
    405     }
    406   ],
    407   "methodology_tags": [
    408     "observational",
    409     "benchmark-eval"
    410   ],
    411   "key_findings": "The paper introduces a gradient-based end-to-end attribution framework to compare which input words drive brain-LLM alignment (BA) versus next-word prediction (NWP). The central finding is that BA and NWP rely on substantially distinct word subsets: NWP focuses on syntactic features with strong recency and primacy positional biases, while BA draws more heavily on semantic and discourse-level information with a broader but still primarily recency-focused attention pattern. Attribution spread increases with layer depth for BA (suggesting higher-order semantic integration) while decreasing for NWP. These patterns are largely consistent across five model architectures (transformers, SSMs, hybrid), with Llama3.2-1B as an unusual exception showing stimulus-dependent oscillatory attribution patterns, providing evidence that brain alignment emerges from richer representational processing than surface-level next-word prediction alone.",
    412   "red_flags": [
    413     {
    414       "flag": "Harry Potter contamination unaddressed",
    415       "detail": "The primary stimulus is a chapter of Harry Potter—a widely distributed copyrighted book almost certainly present in LLM training data. This could systematically elevate NWP attribution quality for familiar text vs. genuine linguistic processing, but the paper never discusses this potential confound."
    416     },
    417     {
    418       "flag": "Very small fMRI sample sizes",
    419       "detail": "Only 8 subjects (HP) and 9 subjects (MRH) are used with no power analysis. Brain alignment metrics from fMRI are inherently noisy, and conclusions about BA vs. NWP differences may not generalize at this sample size."
    420     },
    421     {
    422       "flag": "Attribution proxy limitations underacknowledged",
    423       "detail": "Gradient-based attribution scores (GXI, IG) measure sensitivity of the model's output to input perturbations, not what the model 'really uses' in a causal sense. The paper acknowledges sensitivity to local nonlinearities but does not discuss the broader critique that attribution methods often reflect input statistics rather than model computation."
    424     },
    425     {
    426       "flag": "Coarse linguistic annotations",
    427       "detail": "Authors themselves note that HP discourse annotations are 'relatively coarse and limited to predefined categories,' yet the feature-based analysis comparing syntactic vs. semantic vs. discourse reliance is central to the paper's claims."
    428     },
    429     {
    430       "flag": "Limited model scale and type",
    431       "detail": "All five models are 1–2B parameters. Whether findings generalize to larger models or instruction-tuned models (which dominate current LLM usage) is left entirely to future work."
    432     }
    433   ],
    434   "cited_papers": [
    435     {
    436       "title": "Language models and brains align due to more than next-word prediction and word-level information",
    437       "relevance": "Direct predecessor showing BA depends on more than NWP; this paper extends that work with fine-grained attribution analysis"
    438     },
    439     {
    440       "title": "The neural architecture of language: Integrative modeling converges on predictive processing",
    441       "relevance": "Foundational work establishing the NWP-BA correlation that this paper's methodology is designed to interrogate"
    442     },
    443     {
    444       "title": "Shared computational principles for language processing in humans and deep language models",
    445       "relevance": "Key prior work claiming NWP is a major driver of brain-LLM alignment; directly contested and nuanced by this paper"
    446     },
    447     {
    448       "title": "From language to cognition: How LLMs outgrow the human language network",
    449       "relevance": "Recent work showing BA and NWP decouple during training; complementary finding to this paper's comparison at inference"
    450     },
    451     {
    452       "title": "Contextual feature extraction hierarchies converge in large language models and the brain",
    453       "relevance": "Evidence that higher-level linguistic features emerge in later layers; used to interpret attribution spread results"
    454     },
    455     {
    456       "title": "Token-wise decomposition of autoregressive language model hidden states for analyzing model predictions",
    457       "relevance": "Prior attribution-based work finding syntactic focus in NWP; this paper's findings are validated against and extend these results"
    458     },
    459     {
    460       "title": "Interpreting and improving natural-language processing (in machines) with natural language-processing (in the brain)",
    461       "relevance": "Foundational brain-LLM alignment paper introducing the core framework that this work builds upon"
    462     },
    463     {
    464       "title": "Joint processing of linguistic properties in brains and language models",
    465       "relevance": "Prior work showing syntactic information's role in brain alignment; used as a key reference point for feature-based analysis"
    466     }
    467   ],
    468   "engagement_factors": {
    469     "practical_relevance": {
    470       "score": 1,
    471       "justification": "The attribution framework has potential applicability for interpretability research, but the direct practical utility for LLM practitioners is limited given the focus on basic cognitive neuroscience questions."
    472     },
    473     "surprise_contrarian": {
    474       "score": 2,
    475       "justification": "The paper directly challenges the influential Goldstein et al. (2022) claim that NWP is a major driver of brain alignment by showing the two tasks rely on largely different input features, with quantitative evidence."
    476     },
    477     "fear_safety": {
    478       "score": 0,
    479       "justification": "The paper addresses basic science questions about brain-LLM alignment with no AI safety or risk implications."
    480     },
    481     "drama_conflict": {
    482       "score": 1,
    483       "justification": "The paper is framed around a 'contentious research question' in the literature and takes a side in an ongoing debate, though the tone is measured and collaborative rather than confrontational."
    484     },
    485     "demo_ability": {
    486       "score": 1,
    487       "justification": "Code is released but reproducing results requires access to fMRI datasets, substantial GPU compute (~1500+ hours for GXI), and neuroscience domain knowledge."
    488     },
    489     "brand_recognition": {
    490       "score": 1,
    491       "justification": "Max Planck Institute and Sony AI are recognizable institutions, but none are the dominant AI labs driving public attention; the third author (Toneva) is a respected neuroscience/NLP researcher."
    492     }
    493   },
    494   "hn_data": {
    495     "threads": [
    496       {
    497         "hn_id": "41929456",
    498         "title": "Quantum inspired factorization up to 100-bit RSA number in polynomial time [pdf]",
    499         "points": 4,
    500         "comments": 0,
    501         "url": "https://news.ycombinator.com/item?id=41929456"
    502       },
    503       {
    504         "hn_id": "38038429",
    505         "title": "GMEM: Generalized Memory Management for Peripheral Devices",
    506         "points": 3,
    507         "comments": 0,
    508         "url": "https://news.ycombinator.com/item?id=38038429"
    509       },
    510       {
    511         "hn_id": "42794658",
    512         "title": "Test-time regression: a unifying framework for designing sequence models",
    513         "points": 1,
    514         "comments": 0,
    515         "url": "https://news.ycombinator.com/item?id=42794658"
    516       },
    517       {
    518         "hn_id": "41933882",
    519         "title": "Quantum inspired factorization up to 100-bit RSA number in polynomial time",
    520         "points": 1,
    521         "comments": 0,
    522         "url": "https://news.ycombinator.com/item?id=41933882"
    523       }
    524     ],
    525     "top_points": 4,
    526     "total_points": 9,
    527     "total_comments": 0
    528   }
    529 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs