ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

scan.json (14084B)


      1 {
      2   "paper": {
      3     "title": "Masked Hard-Attention Transformers Recognize Exactly the Star-Free Languages",
      4     "authors": ["Andy Yang", "David Chiang", "Dana Angluin"],
      5     "year": 2023,
      6     "venue": "Neural Information Processing Systems",
      7     "arxiv_id": "2310.13897",
      8     "doi": "10.52202/079017-0327"
      9   },
     10   "scan_version": 2,
     11   "active_modules": [],
     12   "methodology_tags": ["theoretical"],
     13   "key_findings": "The paper establishes exact characterizations of masked hard-attention transformers in terms of formal language classes. With strict masking and no position embeddings, these transformers recognize exactly the star-free languages (equivalent to LTL). Boolean RASP (B-RASP) serves as a convenient intermediate language. The paper further shows that position embeddings, strict vs. non-strict masking, and depth all affect expressive power in precisely characterized ways.",
     14   "checklist": {
     15     "artifacts": {
     16       "code_released": {
     17         "applies": true,
     18         "answer": true,
     19         "justification": "The paper provides a B-RASP simulator at https://b-rasp.github.io/ (Section 3.2)."
     20       },
     21       "data_released": {
     22         "applies": false,
     23         "answer": false,
     24         "justification": "Purely theoretical paper with no dataset."
     25       },
     26       "environment_specified": {
     27         "applies": false,
     28         "answer": false,
     29         "justification": "Purely theoretical paper with no computational experiments requiring environment setup."
     30       },
     31       "reproduction_instructions": {
     32         "applies": false,
     33         "answer": false,
     34         "justification": "Theoretical paper — proofs are self-contained in the paper and appendices."
     35       }
     36     },
     37     "statistical_methodology": {
     38       "confidence_intervals_or_error_bars": {
     39         "applies": false,
     40         "answer": false,
     41         "justification": "No empirical experiments are conducted."
     42       },
     43       "significance_tests": {
     44         "applies": false,
     45         "answer": false,
     46         "justification": "No empirical experiments are conducted."
     47       },
     48       "effect_sizes_reported": {
     49         "applies": false,
     50         "answer": false,
     51         "justification": "No empirical experiments are conducted."
     52       },
     53       "sample_size_justified": {
     54         "applies": false,
     55         "answer": false,
     56         "justification": "Theoretical paper with no samples."
     57       },
     58       "variance_reported": {
     59         "applies": false,
     60         "answer": false,
     61         "justification": "No empirical experiments are conducted."
     62       }
     63     },
     64     "evaluation_design": {
     65       "baselines_included": {
     66         "applies": false,
     67         "answer": false,
     68         "justification": "Theoretical paper establishing equivalences via proofs, not experimental comparisons."
     69       },
     70       "baselines_contemporary": {
     71         "applies": false,
     72         "answer": false,
     73         "justification": "No experimental baselines; the paper positions against prior theoretical results."
     74       },
     75       "ablation_study": {
     76         "applies": false,
     77         "answer": false,
     78         "justification": "No system with components to ablate."
     79       },
     80       "multiple_metrics": {
     81         "applies": false,
     82         "answer": false,
     83         "justification": "No empirical evaluation with metrics."
     84       },
     85       "human_evaluation": {
     86         "applies": false,
     87         "answer": false,
     88         "justification": "Theoretical paper; human evaluation is irrelevant."
     89       },
     90       "held_out_test_set": {
     91         "applies": false,
     92         "answer": false,
     93         "justification": "No data or evaluation sets used."
     94       },
     95       "per_category_breakdown": {
     96         "applies": false,
     97         "answer": false,
     98         "justification": "No empirical results to break down."
     99       },
    100       "failure_cases_discussed": {
    101         "applies": true,
    102         "answer": true,
    103         "justification": "Section 6 (Limitations) discusses what the results do NOT cover: softmax attention, unmasked attention, and the restriction of position embeddings to finite image."
    104       },
    105       "negative_results_reported": {
    106         "applies": true,
    107         "answer": true,
    108         "justification": "The paper shows that non-strict masking reduces expressivity (Section 5.2), and identifies limitations of the framework (Section 6). These are negative results about the model's capabilities."
    109       }
    110     },
    111     "claims_and_evidence": {
    112       "abstract_claims_supported": {
    113         "applies": true,
    114         "answer": true,
    115         "justification": "All abstract claims (equivalence with star-free languages, role of B-RASP, effects of position embeddings/strict masking/depth) are supported by formal theorems with proofs in the paper and appendices."
    116       },
    117       "causal_claims_justified": {
    118         "applies": false,
    119         "answer": false,
    120         "justification": "The paper makes mathematical equivalence claims, not causal claims."
    121       },
    122       "generalization_bounded": {
    123         "applies": true,
    124         "answer": true,
    125         "justification": "The paper is precise about what variants of transformers are covered (hard attention, specific masking types) and explicitly states in Section 6 that results do not apply to softmax-attention or unmasked transformers."
    126       },
    127       "alternative_explanations_discussed": {
    128         "applies": false,
    129         "answer": false,
    130         "justification": "Theoretical paper with formal proofs; alternative explanations are not applicable to mathematical proofs."
    131       },
    132       "proxy_outcome_distinction": {
    133         "applies": false,
    134         "answer": false,
    135         "justification": "Theoretical paper with no measurements."
    136       }
    137     },
    138     "setup_transparency": {
    139       "model_versions_specified": {
    140         "applies": false,
    141         "answer": false,
    142         "justification": "No LLM or model is used in experiments."
    143       },
    144       "prompts_provided": {
    145         "applies": false,
    146         "answer": false,
    147         "justification": "No prompting is used."
    148       },
    149       "hyperparameters_reported": {
    150         "applies": false,
    151         "answer": false,
    152         "justification": "No experiments with hyperparameters."
    153       },
    154       "scaffolding_described": {
    155         "applies": false,
    156         "answer": false,
    157         "justification": "No agentic scaffolding is used."
    158       },
    159       "data_preprocessing_documented": {
    160         "applies": false,
    161         "answer": false,
    162         "justification": "No data preprocessing; purely theoretical."
    163       }
    164     },
    165     "limitations_and_scope": {
    166       "limitations_section_present": {
    167         "applies": true,
    168         "answer": true,
    169         "justification": "Section 6 is titled 'Limitations' and substantively discusses three key limitations."
    170       },
    171       "threats_to_validity_specific": {
    172         "applies": true,
    173         "answer": true,
    174         "justification": "Section 6 identifies specific limitations: results apply only to hard attention (not softmax), only to masked attention (not unmasked), and the finite-image restriction on position embeddings does not exactly match standard definitions."
    175       },
    176       "scope_boundaries_stated": {
    177         "applies": true,
    178         "answer": true,
    179         "justification": "Section 6 explicitly states what the results do NOT apply to: softmax-attention transformers, unmasked attention, and standard (non-rational) sinusoidal position embeddings."
    180       }
    181     },
    182     "data_integrity": {
    183       "raw_data_available": {
    184         "applies": false,
    185         "answer": false,
    186         "justification": "No empirical data collected."
    187       },
    188       "data_collection_described": {
    189         "applies": false,
    190         "answer": false,
    191         "justification": "No data collection; theoretical paper."
    192       },
    193       "recruitment_methods_described": {
    194         "applies": false,
    195         "answer": false,
    196         "justification": "No participants or data recruitment."
    197       },
    198       "data_pipeline_documented": {
    199         "applies": false,
    200         "answer": false,
    201         "justification": "No data pipeline; theoretical paper."
    202       }
    203     },
    204     "conflicts_of_interest": {
    205       "funding_disclosed": {
    206         "applies": true,
    207         "answer": false,
    208         "justification": "No funding or acknowledgments section mentioning grants or funding sources is present (the Acknowledgements section thanks individuals and reviewers but does not mention funding)."
    209       },
    210       "affiliations_disclosed": {
    211         "applies": true,
    212         "answer": true,
    213         "justification": "Author affiliations (University of Notre Dame, Yale University) are listed on the first page."
    214       },
    215       "funder_independent_of_outcome": {
    216         "applies": false,
    217         "answer": false,
    218         "justification": "No funding disclosed; appears to be unfunded academic work."
    219       },
    220       "financial_interests_declared": {
    221         "applies": true,
    222         "answer": false,
    223         "justification": "No competing interests statement is present in the paper."
    224       }
    225     },
    226     "contamination": {
    227       "training_cutoff_stated": {
    228         "applies": false,
    229         "answer": false,
    230         "justification": "The paper does not evaluate any pre-trained model on a benchmark."
    231       },
    232       "train_test_overlap_discussed": {
    233         "applies": false,
    234         "answer": false,
    235         "justification": "The paper does not evaluate any pre-trained model on a benchmark."
    236       },
    237       "benchmark_contamination_addressed": {
    238         "applies": false,
    239         "answer": false,
    240         "justification": "The paper does not evaluate any pre-trained model on a benchmark."
    241       }
    242     },
    243     "human_studies": {
    244       "pre_registered": {
    245         "applies": false,
    246         "answer": false,
    247         "justification": "No human participants."
    248       },
    249       "irb_or_ethics_approval": {
    250         "applies": false,
    251         "answer": false,
    252         "justification": "No human participants."
    253       },
    254       "demographics_reported": {
    255         "applies": false,
    256         "answer": false,
    257         "justification": "No human participants."
    258       },
    259       "inclusion_exclusion_criteria": {
    260         "applies": false,
    261         "answer": false,
    262         "justification": "No human participants."
    263       },
    264       "randomization_described": {
    265         "applies": false,
    266         "answer": false,
    267         "justification": "No human participants."
    268       },
    269       "blinding_described": {
    270         "applies": false,
    271         "answer": false,
    272         "justification": "No human participants."
    273       },
    274       "attrition_reported": {
    275         "applies": false,
    276         "answer": false,
    277         "justification": "No human participants."
    278       }
    279     },
    280     "cost_and_practicality": {
    281       "inference_cost_reported": {
    282         "applies": false,
    283         "answer": false,
    284         "justification": "Purely theoretical paper."
    285       },
    286       "compute_budget_stated": {
    287         "applies": false,
    288         "answer": false,
    289         "justification": "Purely theoretical paper."
    290       }
    291     }
    292   },
    293   "claims": [
    294     {
    295       "claim": "Masked hard-attention transformers with strict future masking and no position embeddings recognize exactly the star-free languages.",
    296       "evidence": "Theorems 1-4 establish equivalence chains: LTL ↔ B-RASP ↔ masked hard-attention transformers. Full proofs in Appendices B and C.",
    297       "supported": "strong"
    298     },
    299     {
    300       "claim": "Non-strict masking reduces expressivity to the stutter-invariant star-free languages.",
    301       "evidence": "Theorem 6, building on Peled and Wilke (1997), with proof adaptation described in Section 5.2.",
    302       "supported": "strong"
    303     },
    304     {
    305       "claim": "With rational sinusoidal position embeddings, masked hard-attention transformers recognize exactly the regular languages in AC⁰.",
    306       "evidence": "Corollary 8, proved via connection to FO[<, MOD] (Appendix D.2).",
    307       "supported": "strong"
    308     },
    309     {
    310       "claim": "Adding more self-attention layers strictly increases expressive power at every depth level.",
    311       "evidence": "Theorem 10, using separating languages STAIR_{k+1} from Etessami and Wilke (2000).",
    312       "supported": "strong"
    313     }
    314   ],
    315   "red_flags": [],
    316   "cited_papers": [
    317     {
    318       "title": "Attention is all you need",
    319       "authors": ["Ashish Vaswani", "Noam Shazeer", "Niki Parmar"],
    320       "year": 2017,
    321       "relevance": "Foundational transformer architecture paper that this work theoretically analyzes."
    322     },
    323     {
    324       "title": "Thinking like Transformers",
    325       "authors": ["Gail Weiss", "Yoav Goldberg", "Eran Yahav"],
    326       "year": 2021,
    327       "relevance": "Introduced RASP programming language for reasoning about transformer computations; B-RASP builds on this."
    328     },
    329     {
    330       "title": "Theoretical limitations of self-attention in neural sequence models",
    331       "authors": ["Michael Hahn"],
    332       "year": 2020,
    333       "relevance": "Prior work on expressivity bounds of transformers, showing hard-attention cannot compute parity."
    334     },
    335     {
    336       "title": "Formal language recognition by hard attention Transformers: Perspectives from circuit complexity",
    337       "authors": ["Yiding Hao", "Dana Angluin", "Robert Frank"],
    338       "year": 2022,
    339       "relevance": "Established AC⁰ upper bound for hard-attention transformers; this paper resolves open questions from this work."
    340     },
    341     {
    342       "title": "The expressive power of transformers with chain of thought",
    343       "authors": ["William Merrill", "Ashish Sabharwal"],
    344       "year": 2024,
    345       "relevance": "Studies transformer expressivity with chain-of-thought, another exact characterization result."
    346     },
    347     {
    348       "title": "Overcoming a theoretical limitation of self-attention",
    349       "authors": ["David Chiang", "Peter Cholak"],
    350       "year": 2022,
    351       "relevance": "Shows soft-attention transformers can compute parity, contrasting with hard-attention limitations studied here."
    352     },
    353     {
    354       "title": "Learning Transformer programs",
    355       "authors": ["Dan Friedman", "Alexander Wettig", "Danqi Chen"],
    356       "year": 2023,
    357       "relevance": "Studies learning in transformer-like programs; related to the associative recall task example in this paper."
    358     }
    359   ]
    360 }

Impressum · Datenschutz