ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

scan.json (29545B)


      1 {
      2   "paper": {
      3     "title": "Probing the Emergence of Cross-lingual Alignment during LLM Training",
      4     "authors": [
      5       "Hetong Wang",
      6       "Pasquale Minervini",
      7       "Edoardo M. Ponti"
      8     ],
      9     "year": 2024,
     10     "venue": "Annual Meeting of the Association for Computational Linguistics",
     11     "arxiv_id": "2406.13229",
     12     "doi": "10.48550/arXiv.2406.13229"
     13   },
     14   "scan_version": 3,
     15   "active_modules": ["experimental_rigor", "data_leakage"],
     16   "methodology_tags": ["benchmark-eval"],
     17   "key_findings": "Cross-lingual neuron overlap in BLOOM checkpoints strongly correlates with zero-shot cross-lingual transfer performance on XNLI and POS tagging (Pearson r up to 0.94). Alignment dynamics are non-monotonic: smaller models (560m, 1b1) experience severe drops in both neuron overlap and downstream performance at certain training steps, while the largest model (1b7) shows monotonic growth. The findings suggest a model-size threshold for stable cross-lingual alignment emergence, consistent with the 'curse of multilinguality' hypothesis.",
     18   "checklist": {
     19     "artifacts": {
     20       "code_released": {
     21         "applies": true,
     22         "answer": true,
     23         "justification": "Code is released at https://github.com/ErikaaWang/probing-multilingual-dynamics, stated in Section 1 footnote."
     24       },
     25       "data_released": {
     26         "applies": true,
     27         "answer": true,
     28         "justification": "All datasets used are publicly available: Universal Dependencies treebanks v2.1, XNLI, and BLOOM intermediate checkpoints on HuggingFace (https://huggingface.co/bigscience/bloom-intermediate)."
     29       },
     30       "environment_specified": {
     31         "applies": true,
     32         "answer": false,
     33         "justification": "The paper mentions 'a single 80GB NVIDIA A100 GPU' and uses qLoRA, but no requirements.txt, Dockerfile, or detailed dependency specifications are provided."
     34       },
     35       "reproduction_instructions": {
     36         "applies": true,
     37         "answer": false,
     38         "justification": "No step-by-step reproduction instructions are provided in the paper. The GitHub repository is referenced but the paper itself does not include instructions for replicating the experiments."
     39       }
     40     },
     41     "statistical_methodology": {
     42       "confidence_intervals_or_error_bars": {
     43         "applies": true,
     44         "answer": false,
     45         "justification": "All results in Table 1, Figures 3-5, and Appendix B are reported as point estimates. No confidence intervals or error bars are shown on any figure or in any table."
     46       },
     47       "significance_tests": {
     48         "applies": true,
     49         "answer": true,
     50         "justification": "Pearson correlation coefficients with p-values are reported in Table 1. Statistical significance is tested against the null hypothesis, with color-coded p-values indicating significance levels (p<0.05, p<0.001)."
     51       },
     52       "effect_sizes_reported": {
     53         "applies": true,
     54         "answer": true,
     55         "justification": "Pearson correlation coefficients (r) are reported in Table 1, which are standard effect size measures. Absolute performance values and overlap rates are also reported, providing context for the magnitude of effects."
     56       },
     57       "sample_size_justified": {
     58         "applies": true,
     59         "answer": false,
     60         "justification": "No justification for why 4-8 checkpoints per model scale were used, no power analysis, and no acknowledgment that the small number of data points (e.g., 6 checkpoints for 560m) limits the reliability of correlation estimates."
     61       },
     62       "variance_reported": {
     63         "applies": true,
     64         "answer": false,
     65         "justification": "All results appear to be single-run. No standard deviations, variance across seeds, or spread measures are reported for either the probing or downstream fine-tuning experiments."
     66       }
     67     },
     68     "evaluation_design": {
     69       "baselines_included": {
     70         "applies": true,
     71         "answer": false,
     72         "justification": "No alternative cross-lingual alignment metrics are compared against (e.g., CKA, centered kernel alignment, representation similarity analysis). The neuron overlap metric is used without comparison to other approaches for measuring alignment."
     73       },
     74       "baselines_contemporary": {
     75         "applies": true,
     76         "answer": false,
     77         "justification": "No baselines are included, so contemporariness cannot be assessed."
     78       },
     79       "ablation_study": {
     80         "applies": false,
     81         "answer": false,
     82         "justification": "The probing methodology is a single pipeline without modular components to ablate. The paper does vary layers and linguistic features, but these are analysis parameters rather than system components."
     83       },
     84       "multiple_metrics": {
     85         "applies": true,
     86         "answer": true,
     87         "justification": "Multiple metrics are used: neuron overlap rate, XNLI accuracy, POS tagging F1, and Pearson correlation coefficients between alignment and downstream performance."
     88       },
     89       "human_evaluation": {
     90         "applies": false,
     91         "answer": false,
     92         "justification": "Human evaluation is not relevant to this probing/correlation study; all metrics are computed automatically."
     93       },
     94       "held_out_test_set": {
     95         "applies": true,
     96         "answer": true,
     97         "justification": "Section 3.1 describes train/validation/test splits for probing with lemma-based splitting. Section 3.2 states: 'we evaluate the best finetuned models on the test set of each target language' with model selection on development sets."
     98       },
     99       "per_category_breakdown": {
    100         "applies": true,
    101         "answer": true,
    102         "justification": "Results are broken down by model scale (Figures 1, 3), training step (Figures 1, 3, 5), target language (Figures 4, 5), morphosyntactic feature (Figures 1, 2), and layer depth (Figure 2). Appendix B provides exhaustive pairwise overlap heatmaps."
    103       },
    104       "failure_cases_discussed": {
    105         "applies": true,
    106         "answer": true,
    107         "justification": "The paper extensively discusses unexpected drops in alignment during training (Section 4.1, 4.3), the near-random performance at drop points (Figure 5), and the degradation affecting both in-language and cross-lingual abilities."
    108       },
    109       "negative_results_reported": {
    110         "applies": true,
    111         "answer": true,
    112         "justification": "Several negative findings are reported: non-monotonic alignment dynamics contradicting initial assumptions (Section 4.1), weak and non-significant correlations for BLOOM-1b7 on average metrics (Table 1, r=0.395/0.258 with p≥0.05), and the drastic overlap decrease at the last hidden layer for autoregressive models."
    113       }
    114     },
    115     "claims_and_evidence": {
    116       "abstract_claims_supported": {
    117         "applies": true,
    118         "answer": true,
    119         "justification": "The abstract claims high correlation between neuron overlap and downstream performance (supported by Table 1), degradation in certain training phases (supported by Figures 1, 3), and new insights into multilingual pretraining dynamics (supported by Section 4). All claims are substantiated in the results."
    120       },
    121       "causal_claims_justified": {
    122         "applies": true,
    123         "answer": false,
    124         "justification": "Section 6 states 'the same sub-networks are activated at inference time and updated during fine-tuning, which contributes to the cross-lingual generalisation ability of LMs' — this is a causal claim based on correlational evidence. The study design (observing correlations between neuron overlap and performance across checkpoints) cannot establish causation."
    125       },
    126       "generalization_bounded": {
    127         "applies": true,
    128         "answer": false,
    129         "justification": "The title 'Probing the Emergence of Cross-lingual Alignment during LLM Training' frames findings broadly for LLMs, but experiments use only BLOOM (one model family, autoregressive only, up to 1.7B parameters). The limitations section acknowledges this but the title and abstract do not bound the claims to BLOOM."
    130       },
    131       "alternative_explanations_discussed": {
    132         "applies": true,
    133         "answer": false,
    134         "justification": "Section 4.3 discusses possible explanations for the drop points (bad minima, curse of multilinguality, scaling effects) but does not consider alternative explanations for the main correlation finding. The correlation between neuron overlap and downstream performance could be driven by a confound (e.g., both improving simply as a function of training compute)."
    135       },
    136       "proxy_outcome_distinction": {
    137         "applies": true,
    138         "answer": true,
    139         "justification": "The paper is clear that neuron overlap is a proxy metric for cross-lingual alignment, and they validate it against downstream task performance. The measurements (overlap rate, accuracy, F1) match the granularity of the claims made."
    140       }
    141     },
    142     "setup_transparency": {
    143       "model_versions_specified": {
    144         "applies": true,
    145         "answer": true,
    146         "justification": "Specific BLOOM model sizes (560m, 1b1, 1b7) are identified with exact checkpoint references from HuggingFace (https://huggingface.co/bigscience/bloom-intermediate). Specific training steps are stated for each checkpoint."
    147       },
    148       "prompts_provided": {
    149         "applies": false,
    150         "answer": false,
    151         "justification": "The paper does not use prompting. Probing is done with trained linear probes (Eq. 4), and downstream evaluation uses fine-tuning with task-specific heads."
    152       },
    153       "hyperparameters_reported": {
    154         "applies": true,
    155         "answer": true,
    156         "justification": "Section 3.2 reports: AdamW optimizer, learning rate 2×10⁻⁵, 5 epochs for XNLI, 10 epochs for POS, evaluation every 100/500 steps, 4-bit quantization with qLoRA. Section 3.1 reports k=50 neurons for probing. Uniform prior for p(C), Poisson sampling for variational distribution."
    157       },
    158       "scaffolding_described": {
    159         "applies": false,
    160         "answer": false,
    161         "justification": "No agentic scaffolding is used in this work."
    162       },
    163       "data_preprocessing_documented": {
    164         "applies": true,
    165         "answer": true,
    166         "justification": "Section 3.1 describes the full pipeline: UD labels mapped to UniMorph Schema via McCarthy et al. (2018) converter, contextual representations extracted from BLOOM at selected layers, subword tokens averaged per word (following Vulić et al., 2020), grouped by linguistic feature, split by lemma (same lemma in same split), and words with lemmas occurring <20 times discarded."
    167       }
    168     },
    169     "limitations_and_scope": {
    170       "limitations_section_present": {
    171         "applies": true,
    172         "answer": true,
    173         "justification": "A dedicated 'Limitations' section follows the Conclusions, containing substantive discussion of methodological constraints."
    174       },
    175       "threats_to_validity_specific": {
    176         "applies": true,
    177         "answer": true,
    178         "justification": "The limitations section identifies specific threats: large checkpoint intervals may miss finer-grained dynamics, results limited to autoregressive models with the same objective and training dataset, experimental design choices (selected layers and morphosyntactic categories) not directly transferable to other architectures, and findings limited to languages seen during pretraining."
    179       },
    180       "scope_boundaries_stated": {
    181         "applies": true,
    182         "answer": true,
    183         "justification": "The limitations explicitly state scope boundaries: 'we consider only autoregressive models with the same objective and training dataset', 'findings on the trend of alignment might be not applicable if zooming in on a particular window of training', and 'the generalisation to unseen languages is left for future research.'"
    184       }
    185     },
    186     "data_integrity": {
    187       "raw_data_available": {
    188         "applies": true,
    189         "answer": true,
    190         "justification": "All raw data is publicly available: UD treebanks v2.1, XNLI dataset, BLOOM intermediate checkpoints on HuggingFace. Code for probing is released on GitHub, enabling independent verification."
    191       },
    192       "data_collection_described": {
    193         "applies": true,
    194         "answer": true,
    195         "justification": "Section 3.1 describes in detail how probing datasets are constructed from UD treebanks: annotation mapping, embedding extraction, grouping, splitting criteria, and filtering thresholds. Section 3.2 describes downstream data sources with specific references."
    196       },
    197       "recruitment_methods_described": {
    198         "applies": false,
    199         "answer": false,
    200         "justification": "No human participants. All data sources are standard public benchmarks (UD, XNLI) and publicly available model checkpoints."
    201       },
    202       "data_pipeline_documented": {
    203         "applies": true,
    204         "answer": true,
    205         "justification": "The full pipeline from raw UD annotations to final probing datasets is documented in Section 3.1: annotation → UniMorph mapping → representation extraction → subword averaging → feature grouping → lemma-based splitting → frequency filtering. Checkpoint selection and deduplication is also documented (footnote 3)."
    206       }
    207     },
    208     "conflicts_of_interest": {
    209       "funding_disclosed": {
    210         "applies": true,
    211         "answer": false,
    212         "justification": "The acknowledgements only mention compute resources ('This work used resources provided by the Edinburgh Compute and Data Facility'). No funding grants, agencies, or financial support are disclosed."
    213       },
    214       "affiliations_disclosed": {
    215         "applies": true,
    216         "answer": true,
    217         "justification": "Author affiliations are clearly listed: University of Edinburgh and University of Cambridge. These are academic institutions with no direct financial interest in BLOOM's performance."
    218       },
    219       "funder_independent_of_outcome": {
    220         "applies": true,
    221         "answer": false,
    222         "justification": "No funder is disclosed, so independence cannot be assessed. The work appears to be unfunded academic research, but this is not explicitly stated."
    223       },
    224       "financial_interests_declared": {
    225         "applies": true,
    226         "answer": false,
    227         "justification": "No competing interests statement or financial disclosure is provided in the paper."
    228       }
    229     },
    230     "contamination": {
    231       "training_cutoff_stated": {
    232         "applies": true,
    233         "answer": false,
    234         "justification": "The paper describes the ROOTS training corpus but does not state a specific training data cutoff date for BLOOM. The temporal relationship between training data collection and benchmark creation is not discussed."
    235       },
    236       "train_test_overlap_discussed": {
    237         "applies": true,
    238         "answer": false,
    239         "justification": "No discussion of whether XNLI (2018) or UD treebank (2017) test examples could have appeared in the ROOTS corpus used to train BLOOM."
    240       },
    241       "benchmark_contamination_addressed": {
    242         "applies": true,
    243         "answer": false,
    244         "justification": "XNLI (2018) and UD treebanks (2017) were publicly available before BLOOM's training. No discussion of whether these benchmarks or their solutions appeared in the training data."
    245       }
    246     },
    247     "human_studies": {
    248       "pre_registered": {
    249         "applies": false,
    250         "answer": false,
    251         "justification": "No human participants in this study."
    252       },
    253       "irb_or_ethics_approval": {
    254         "applies": false,
    255         "answer": false,
    256         "justification": "No human participants in this study."
    257       },
    258       "demographics_reported": {
    259         "applies": false,
    260         "answer": false,
    261         "justification": "No human participants in this study."
    262       },
    263       "inclusion_exclusion_criteria": {
    264         "applies": false,
    265         "answer": false,
    266         "justification": "No human participants in this study."
    267       },
    268       "randomization_described": {
    269         "applies": false,
    270         "answer": false,
    271         "justification": "No human participants in this study."
    272       },
    273       "blinding_described": {
    274         "applies": false,
    275         "answer": false,
    276         "justification": "No human participants in this study."
    277       },
    278       "attrition_reported": {
    279         "applies": false,
    280         "answer": false,
    281         "justification": "No human participants in this study."
    282       }
    283     },
    284     "cost_and_practicality": {
    285       "inference_cost_reported": {
    286         "applies": true,
    287         "answer": false,
    288         "justification": "No inference cost, latency, or tokens consumed are reported. The paper mentions using 'a single 80GB NVIDIA A100 GPU' but does not quantify the cost of probing or fine-tuning experiments."
    289       },
    290       "compute_budget_stated": {
    291         "applies": true,
    292         "answer": false,
    293         "justification": "No total computational budget (GPU hours, training time) is stated. Hardware is mentioned but total compute for the full experimental pipeline across all checkpoints and model scales is not quantified."
    294       }
    295     },
    296     "experimental_rigor": {
    297       "seed_sensitivity_reported": {
    298         "applies": true,
    299         "answer": false,
    300         "justification": "No mention of multiple random seeds. Both probing and fine-tuning results appear to be single-run, with no seed sensitivity analysis."
    301       },
    302       "number_of_runs_stated": {
    303         "applies": true,
    304         "answer": false,
    305         "justification": "The number of experimental runs per configuration is not explicitly stated. Results appear to be from single runs but this is never confirmed."
    306       },
    307       "hyperparameter_search_budget": {
    308         "applies": true,
    309         "answer": false,
    310         "justification": "Hyperparameters are stated as fixed values (k=50 from prior work, learning rate 2×10⁻⁵) but no search budget, number of configurations tried, or search method is reported."
    311       },
    312       "best_config_selection_justified": {
    313         "applies": true,
    314         "answer": true,
    315         "justification": "Section 3.2 states: 'We perform model selection based on development set performance, evaluating models every 100 (POS) or 500 (XNLI) steps.' Selection is on validation set, not test set."
    316       },
    317       "multiple_comparison_correction": {
    318         "applies": true,
    319         "answer": false,
    320         "justification": "Table 1 reports 12 correlation tests (3 scales × 2 tasks × 2 granularities) without any correction for multiple comparisons (e.g., Bonferroni or Holm correction)."
    321       },
    322       "self_comparison_bias_addressed": {
    323         "applies": true,
    324         "answer": false,
    325         "justification": "The probing methodology and layer/feature selection choices are made by the authors without discussion of potential bias in these choices. No independent replication or cross-validation of the probing setup is conducted."
    326       },
    327       "compute_budget_vs_performance": {
    328         "applies": false,
    329         "answer": false,
    330         "justification": "The paper does not compare competing methods with different compute budgets. The x-axis of figures is training steps (showing dynamics), not a method-vs-method compute comparison."
    331       },
    332       "benchmark_construct_validity": {
    333         "applies": true,
    334         "answer": false,
    335         "justification": "XNLI and POS tagging are used as proxies for cross-lingual transfer ability without discussion of whether these benchmarks adequately capture the construct. No discussion of construct validity or comparison with alternative evaluation benchmarks."
    336       },
    337       "scaffold_confound_addressed": {
    338         "applies": false,
    339         "answer": false,
    340         "justification": "No scaffolding is involved in this probing and fine-tuning study."
    341       }
    342     },
    343     "data_leakage": {
    344       "temporal_leakage_addressed": {
    345         "applies": true,
    346         "answer": false,
    347         "justification": "XNLI (2018) and UD treebanks (2017) predate BLOOM's training. No discussion of whether benchmark solutions could have appeared in the ROOTS training corpus."
    348       },
    349       "feature_leakage_addressed": {
    350         "applies": true,
    351         "answer": false,
    352         "justification": "No discussion of whether the evaluation setup introduces information leakage through the fine-tuning process or probing data."
    353       },
    354       "non_independence_addressed": {
    355         "applies": true,
    356         "answer": false,
    357         "justification": "No discussion of whether the UD treebank data used for probing and for POS tagging evaluation share structural similarities or overlap. The probing datasets and downstream evaluation use the same UD source."
    358       },
    359       "leakage_detection_method": {
    360         "applies": true,
    361         "answer": false,
    362         "justification": "No concrete leakage detection or prevention methods (canary strings, membership inference, n-gram overlap analysis) are applied."
    363       }
    364     }
    365   },
    366   "claims": [
    367     {
    368       "claim": "Cross-lingual neuron overlap strongly correlates with zero-shot cross-lingual transfer performance across model scales.",
    369       "evidence": "Table 1 reports Pearson correlations: BLOOM-560m r=0.808/0.940 (XNLI/POS average), BLOOM-1b1 r=0.804/0.831, with statistically significant p-values. Pairwise correlations are significant across all scales (Section 4.2, Figures 3-4).",
    370       "supported": "moderate"
    371     },
    372     {
    373       "claim": "Cross-lingual alignment dynamics are non-monotonic during pre-training, with severe drops at certain training steps in smaller models.",
    374       "evidence": "Figure 1 and Figure 3 show neuron overlap and downstream performance dropping near 600k steps for BLOOM-560m and 400k steps for BLOOM-1b1. Figure 5 shows near-random target-language performance at drop points (Section 4.1).",
    375       "supported": "moderate"
    376     },
    377     {
    378       "claim": "Only the largest model scale (1.7B) shows monotonic growth in cross-lingual alignment, suggesting a model-size threshold.",
    379       "evidence": "Figure 1 and Section 4.1 show BLOOM-1b7 displays monotonic overlap growth while 560m and 1b1 experience drops. However, BLOOM-1b7 has only 4 valid checkpoints, limiting confidence in this trend.",
    380       "supported": "weak"
    381     },
    382     {
    383       "claim": "Autoregressive models show drastically decreased neuron overlap at the last hidden layer, unlike encoder-only models.",
    384       "evidence": "Figure 2 shows overlap rates dropping sharply at the final layer of BLOOM-560m across all 11 morphosyntactic features. This is contrasted with prior findings on m-BERT and XLM-R (Stańczak et al., 2023; Stanczak et al., 2022) in Section 3.1.",
    385       "supported": "moderate"
    386     },
    387     {
    388       "claim": "The drop in alignment also degrades in-language (English) performance, not just cross-lingual transfer.",
    389       "evidence": "Figure 5 shows English POS F1 and XNLI accuracy dropping at the same training steps where cross-lingual transfer degrades (Section 4.3).",
    390       "supported": "moderate"
    391     }
    392   ],
    393   "red_flags": [
    394     {
    395       "flag": "No error bars or variance across runs",
    396       "detail": "All probing and fine-tuning results appear to be from single runs. Given the stochastic nature of both probe training and qLoRA fine-tuning, the reported correlations and performance numbers could vary substantially across seeds."
    397     },
    398     {
    399       "flag": "Very few checkpoints per model scale",
    400       "detail": "Correlations are computed on 4-8 data points per model scale. With 6 checkpoints for BLOOM-560m, a Pearson correlation has very limited statistical power and is highly sensitive to individual data points, especially the drop-point outliers."
    401     },
    402     {
    403       "flag": "Duplicate checkpoint anomaly unexplained",
    404       "detail": "Footnote 3 reveals that multiple released checkpoints were duplicates (560m at 10k/500k, 1b7 at 1k/10k and 250k/300k). These are removed but the anomaly is not investigated — it could indicate broader checkpointing issues affecting the analysis."
    405     },
    406     {
    407       "flag": "Confound in correlation analysis",
    408       "detail": "Both neuron overlap and downstream performance naturally increase with training steps. The observed correlation could be substantially driven by this shared dependency on training progress rather than a direct causal link between alignment and transfer ability."
    409     }
    410   ],
    411   "cited_papers": [
    412     {
    413       "title": "BLOOM: A 176b-parameter open-access multilingual language model",
    414       "authors": ["BigScience Workshop"],
    415       "year": 2023,
    416       "relevance": "The primary model family studied; a major open-access multilingual LLM whose training checkpoints enable the analysis of cross-lingual dynamics."
    417     },
    418     {
    419       "title": "Scaling laws for neural language models",
    420       "authors": ["Jared Kaplan", "Sam McCandlish", "Tom Henighan"],
    421       "year": 2020,
    422       "arxiv_id": "2001.08361",
    423       "relevance": "Foundational work on scaling laws referenced in the paper's hypothesis that alignment follows scaling-dependent trajectories."
    424     },
    425     {
    426       "title": "Training trajectories of language models across scales",
    427       "authors": ["Mengzhou Xia", "Mikel Artetxe", "Chunting Zhou"],
    428       "year": 2023,
    429       "relevance": "Studies training dynamics across model scales in monolingual LMs, finding that larger models escape sub-optimal distributions — directly relevant to the drop-point phenomenon observed."
    430     },
    431     {
    432       "title": "Analyzing the mono- and cross-lingual pretraining dynamics of multilingual language models",
    433       "authors": ["Terra Blevins", "Hila Gonen", "Luke Zettlemoyer"],
    434       "year": 2022,
    435       "relevance": "Most directly related prior work: studies inconsistency between in-language and cross-language ability emergence in encoder LMs, including similar performance degradation during training."
    436     },
    437     {
    438       "title": "Same neurons, different languages: Probing morphosyntax in multilingual pre-trained models",
    439       "authors": ["Karolina Stanczak", "Edoardo Ponti", "Lucas Torroba Hennigen"],
    440       "year": 2022,
    441       "relevance": "Establishes the intrinsic probing methodology for neuron overlap in multilingual models that this paper extends to training dynamics."
    442     },
    443     {
    444       "title": "First align, then predict: Understanding the cross-lingual ability of multilingual BERT",
    445       "authors": ["Benjamin Muller", "Yanai Elazar", "Benoît Sagot"],
    446       "year": 2021,
    447       "relevance": "Demonstrates correlation between representation similarity and cross-lingual transfer in converged mBERT, which this paper extends to training trajectories."
    448     },
    449     {
    450       "title": "Unsupervised cross-lingual representation learning at scale",
    451       "authors": ["Alexis Conneau", "Kartikay Khandelwal", "Naman Goyal"],
    452       "year": 2020,
    453       "relevance": "Introduces the 'curse of multilinguality' concept — fixed parameters with increasing languages degrades performance — central to explaining the alignment drops observed."
    454     },
    455     {
    456       "title": "Scaling monosemanticity: Extracting interpretable features from Claude 3 Sonnet",
    457       "authors": ["Adly Templeton", "Tom Conerly", "Jonathan Marcus"],
    458       "year": 2024,
    459       "relevance": "Demonstrates sparse autoencoder feature extraction in production LLMs, related to the neuron-level interpretability approach used in this paper."
    460     },
    461     {
    462       "title": "QLoRA: Efficient finetuning of quantized LLMs",
    463       "authors": ["Tim Dettmers", "Artidoro Pagnoni", "Ari Holtzman"],
    464       "year": 2023,
    465       "relevance": "The fine-tuning method used for all downstream evaluation experiments in this paper, enabling evaluation on a single GPU."
    466     },
    467     {
    468       "title": "LoRA: Low-rank adaptation of large language models",
    469       "authors": ["Edward J Hu", "Yelong Shen", "Phillip Wallis"],
    470       "year": 2022,
    471       "relevance": "Foundational parameter-efficient fine-tuning method underlying the qLoRA approach used in this paper's downstream evaluations."
    472     },
    473     {
    474       "title": "Sudden drops in the loss: Syntax acquisition, phase transitions, and simplicity bias in MLMs",
    475       "authors": ["Angelica Chen", "Ravid Shwartz-Ziv", "Kyunghyun Cho"],
    476       "year": 2024,
    477       "relevance": "Studies phase transitions during training of masked language models, finding sudden drops analogous to the alignment drops observed in this multilingual study."
    478     }
    479   ],
    480   "engagement_factors": {
    481     "practical_relevance": {
    482       "score": 0,
    483       "justification": "Pure analysis of training dynamics with no actionable tool or technique for practitioners."
    484     },
    485     "surprise_contrarian": {
    486       "score": 1,
    487       "justification": "The non-monotonic alignment dynamics and drop points are somewhat surprising but primarily of interest within the NLP interpretability research community."
    488     },
    489     "fear_safety": {
    490       "score": 0,
    491       "justification": "No safety or security concerns raised; this is a mechanistic interpretability study."
    492     },
    493     "drama_conflict": {
    494       "score": 0,
    495       "justification": "No controversy or conflict; a straightforward empirical analysis."
    496     },
    497     "demo_ability": {
    498       "score": 1,
    499       "justification": "Code is released on GitHub, but there is no interactive demo or easily runnable tool."
    500     },
    501     "brand_recognition": {
    502       "score": 1,
    503       "justification": "BLOOM is a known open-source LLM and ACL is a top NLP venue, but neither the model nor the authors have broad mainstream recognition."
    504     }
    505   }
    506 }

Impressum · Datenschutz