ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

scan-v4.json (27986B)


      1 {
      2   "scan_version": 4,
      3   "paper_type": "empirical",
      4   "paper": {
      5     "title": "Evidence of Phase Transitions in Small Transformer-Based Language Models",
      6     "authors": [
      7       "Noah Hong",
      8       "Tao Hong"
      9     ],
     10     "year": 2025,
     11     "venue": "arXiv.org",
     12     "arxiv_id": "2511.12768",
     13     "doi": "10.48550/arXiv.2511.12768"
     14   },
     15   "checklist": {
     16     "claims_and_evidence": {
     17       "abstract_claims_supported": {
     18         "applies": true,
     19         "answer": true,
     20         "justification": "The three claims in the abstract (transitions in small models, detectable in linear space, occurring early in training) are all supported by the results in Sections IV and V.",
     21         "source": "opus"
     22       },
     23       "causal_claims_justified": {
     24         "applies": true,
     25         "answer": false,
     26         "justification": "The paper uses causal language ('driven by the abrupt emergence of longer, coherent words', 'fragment proliferation precedes word consolidation') but the study design is observational — it observes correlations between metrics, not causal mechanisms. The claims about 'barrier-crossing dynamics' and 'first-order phase transitions' are interpretive analogies, not causally demonstrated.",
     27         "source": "opus"
     28       },
     29       "generalization_bounded": {
     30         "applies": true,
     31         "answer": true,
     32         "justification": "Section V.F explicitly states limitations: single architecture, single dataset, character-level tokenization, and that 'generalization to larger models, multilingual corpora, or instruction-tuned datasets remains untested.'",
     33         "source": "opus"
     34       },
     35       "alternative_explanations_discussed": {
     36         "applies": true,
     37         "answer": true,
     38         "justification": "Section II.E discusses the Schaeffer et al. critique that emergent abilities may be metric artifacts, and Section V.E explains how their methodology addresses this concern using continuous metrics rather than binary thresholds.",
     39         "source": "opus"
     40       },
     41       "proxy_outcome_distinction": {
     42         "applies": true,
     43         "answer": false,
     44         "justification": "The paper measures word-level statistics (dispersion, KL divergence, vocabulary counts) and frames these as evidence of 'phase transitions' and 'emergent abilities,' but does not discuss whether these vocabulary-level probes are adequate proxies for the broader claims about phase transitions in neural learning. The gap between character-level word formation and genuine emergent abilities is not acknowledged.",
     45         "source": "opus"
     46       }
     47     },
     48     "limitations_and_scope": {
     49       "limitations_section_present": {
     50         "applies": true,
     51         "answer": true,
     52         "justification": "Section V.F 'Limitations and Scope' provides a dedicated subsection with six specific limitations.",
     53         "source": "opus"
     54       },
     55       "threats_to_validity_specific": {
     56         "applies": true,
     57         "answer": true,
     58         "justification": "Section V.F lists specific threats: single architecture and dataset, character-level tokenization differs from BPE, correct/incorrect heuristic may misclassify rare valid words, external vs. internal metrics gap, alternative decoding methods might shift results.",
     59         "source": "opus"
     60       },
     61       "scope_boundaries_stated": {
     62         "applies": true,
     63         "answer": true,
     64         "justification": "Section V.F explicitly states what was NOT tested: 'generalization to larger models, multilingual corpora, or instruction-tuned datasets remains untested' and 'we have not yet examined universality across model sizes, datasets, or architectural families.'",
     65         "source": "opus"
     66       }
     67     },
     68     "conflicts_of_interest": {
     69       "funding_disclosed": {
     70         "applies": true,
     71         "answer": false,
     72         "justification": "No funding disclosure, acknowledgments section, or mention of financial support anywhere in the paper.",
     73         "source": "opus"
     74       },
     75       "affiliations_disclosed": {
     76         "applies": true,
     77         "answer": true,
     78         "justification": "Author affiliations are listed: Noah Hong at Lynbrook High School, Tao Hong at Keysight Technologies.",
     79         "source": "opus"
     80       },
     81       "funder_independent_of_outcome": {
     82         "applies": true,
     83         "answer": false,
     84         "justification": "No funding is disclosed, so independence cannot be assessed. The absence of a funding statement is not the same as confirming no conflicts.",
     85         "source": "opus"
     86       },
     87       "financial_interests_declared": {
     88         "applies": true,
     89         "answer": false,
     90         "justification": "No competing interests or financial disclosure statement is present in the paper.",
     91         "source": "opus"
     92       }
     93     },
     94     "scope_and_framing": {
     95       "key_terms_defined": {
     96         "applies": true,
     97         "answer": true,
     98         "justification": "Key terms are defined: 'phase transition' is grounded in Landau-Lifshitz statistical mechanics; 'emergent abilities' references Wei et al.; 'dispersion' (D=σ²/μ) and 'sub-Poisson/Poisson' regimes are precisely defined; 'correct/incorrect words' are operationally defined by corpus vocabulary membership.",
     99         "source": "haiku"
    100       },
    101       "intended_contribution_clear": {
    102         "applies": true,
    103         "answer": true,
    104         "justification": "The paper explicitly states a threefold contribution in both the abstract and the positioning section: (1) phase transitions observable in small models, (2) detectable in linear training space, (3) emerging early in training.",
    105         "source": "haiku"
    106       },
    107       "engagement_with_prior_work": {
    108         "applies": true,
    109         "answer": true,
    110         "justification": "The related work section systematically reviews statistical physics foundations, emergence in complex systems, grokking, emergent abilities in LLMs, the Schaeffer et al. critique, and formal phase-transition models, showing how this work builds on and differs from each tradition.",
    111         "source": "haiku"
    112       }
    113     }
    114   },
    115   "type_checklist": {
    116     "empirical": {
    117       "artifacts": {
    118         "code_released": {
    119           "applies": true,
    120           "answer": false,
    121           "justification": "No repository URL, code archive, or link to source code is provided anywhere in the paper.",
    122           "source": "opus"
    123         },
    124         "data_released": {
    125           "applies": true,
    126           "answer": true,
    127           "justification": "The paper uses the publicly available Tiny Shakespeare corpus (~1.1M character tokens), a standard public dataset.",
    128           "source": "opus"
    129         },
    130         "environment_specified": {
    131           "applies": true,
    132           "answer": false,
    133           "justification": "No environment specifications, dependency files, or hardware details are provided. The model architecture is described but no software stack or library versions are mentioned.",
    134           "source": "opus"
    135         },
    136         "reproduction_instructions": {
    137           "applies": true,
    138           "answer": false,
    139           "justification": "No step-by-step reproduction instructions, README, or scripts are provided. The methods section describes the setup conceptually but not enough to reproduce without guessing implementation details.",
    140           "source": "opus"
    141         }
    142       },
    143       "statistical_methodology": {
    144         "confidence_intervals_or_error_bars": {
    145           "applies": true,
    146           "answer": true,
    147           "justification": "Section III.F states 'shaded error bands (where plotted) denoting ± one standard deviation' and figures show mean ± s.d. across 5 seeds.",
    148           "source": "opus"
    149         },
    150         "significance_tests": {
    151           "applies": true,
    152           "answer": false,
    153           "justification": "The paper claims discontinuities and phase transitions but uses no formal statistical tests (no p-values, no change-point detection tests) to confirm that the observed cusps are statistically significant rather than visual impressions.",
    154           "source": "opus"
    155         },
    156         "effect_sizes_reported": {
    157           "applies": true,
    158           "answer": true,
    159           "justification": "The paper reports concrete magnitudes: word length jumps from ~1.5 to ~2.5 characters, dispersion values shift from D≈1 to D<1, and specific epoch ranges (230–250) for transitions.",
    160           "source": "opus"
    161         },
    162         "sample_size_justified": {
    163           "applies": true,
    164           "answer": false,
    165           "justification": "The paper uses 5 seeds and 30,000 tokens per checkpoint but provides no justification for why these numbers are sufficient. No power analysis or discussion of whether 5 seeds is adequate.",
    166           "source": "opus"
    167         },
    168         "variance_reported": {
    169           "applies": true,
    170           "answer": true,
    171           "justification": "Results are averaged across 5 independent seeds with standard deviation bands shown on figures, as stated in Section III.F.",
    172           "source": "opus"
    173         }
    174       },
    175       "evaluation_design": {
    176         "baselines_included": {
    177           "applies": true,
    178           "answer": true,
    179           "justification": "The Poisson distribution serves as the statistical baseline throughout. The paper compares empirical distributions against fitted Poisson baselines (Figs. 1–3, KL divergence analysis).",
    180           "source": "opus"
    181         },
    182         "baselines_contemporary": {
    183           "applies": true,
    184           "answer": false,
    185           "justification": "No comparison with other methods for detecting phase transitions or training dynamics (e.g., loss landscape analysis, representation similarity analysis, or other recently proposed probes).",
    186           "source": "opus"
    187         },
    188         "ablation_study": {
    189           "applies": true,
    190           "answer": false,
    191           "justification": "No ablation study is performed. The paper does not vary architecture size, dataset, tokenization, or other components to test which factors drive the observed transition.",
    192           "source": "opus"
    193         },
    194         "multiple_metrics": {
    195           "applies": true,
    196           "answer": true,
    197           "justification": "Multiple metrics are used: index of dispersion, KL divergence, average word length, unique vocabulary counts, and word frequency snapshots.",
    198           "source": "opus"
    199         },
    200         "human_evaluation": {
    201           "applies": false,
    202           "answer": false,
    203           "justification": "Human evaluation is not relevant to the claims about statistical signatures of phase transitions in training dynamics.",
    204           "source": "opus"
    205         },
    206         "held_out_test_set": {
    207           "applies": false,
    208           "answer": false,
    209           "justification": "This is not a benchmark evaluation paper. The study analyzes generated text during training, not performance on a test set.",
    210           "source": "opus"
    211         },
    212         "per_category_breakdown": {
    213           "applies": true,
    214           "answer": true,
    215           "justification": "Results are broken down by correct vs. incorrect words, and shown across multiple checkpoint stages (steps 0, 150, 250, 300, 350, 500, 599).",
    216           "source": "opus"
    217         },
    218         "failure_cases_discussed": {
    219           "applies": true,
    220           "answer": false,
    221           "justification": "No discussion of cases where the transition was less clear, seeds that behaved differently, or conditions under which the diagnostics might fail.",
    222           "source": "opus"
    223         },
    224         "negative_results_reported": {
    225           "applies": true,
    226           "answer": true,
    227           "justification": "The paper reports that standard loss/validation curves do NOT reveal the transition, which is a negative result about conventional metrics. Section IV.A: 'not visible in the smooth training/validation losses.'",
    228           "source": "opus"
    229         }
    230       },
    231       "setup_transparency": {
    232         "model_versions_specified": {
    233           "applies": true,
    234           "answer": true,
    235           "justification": "The model is custom-built and fully specified: embedding dimension 192, 8 transformer layers, 6 attention heads, ~3.6M parameters, context length 128 characters.",
    236           "source": "opus"
    237         },
    238         "prompts_provided": {
    239           "applies": false,
    240           "answer": false,
    241           "justification": "The paper does not use prompting. The model is trained from scratch with next-character prediction.",
    242           "source": "opus"
    243         },
    244         "hyperparameters_reported": {
    245           "applies": true,
    246           "answer": false,
    247           "justification": "Only decoding parameters are stated (temperature T=1.0, greedy top-1). Training hyperparameters (learning rate, optimizer, batch size, weight decay) are not reported.",
    248           "source": "opus"
    249         },
    250         "scaffolding_described": {
    251           "applies": false,
    252           "answer": false,
    253           "justification": "No agentic scaffolding is used. This is a standard model training study.",
    254           "source": "opus"
    255         },
    256         "data_preprocessing_documented": {
    257           "applies": true,
    258           "answer": true,
    259           "justification": "Section III.B describes the text segmentation procedure (whitespace and punctuation boundaries) and correct/incorrect classification (corpus vocabulary membership). Section III.D describes the windowing procedure (W=21 words).",
    260           "source": "opus"
    261         }
    262       },
    263       "data_integrity": {
    264         "raw_data_available": {
    265           "applies": true,
    266           "answer": false,
    267           "justification": "No generated text samples, computed statistics, or intermediate data are released for independent verification.",
    268           "source": "opus"
    269         },
    270         "data_collection_described": {
    271           "applies": true,
    272           "answer": true,
    273           "justification": "Section III.A describes the data source (Tiny Shakespeare, ~1.1M character tokens, 65 unique characters) and sampling procedure (30,000 tokens per checkpoint, 5 seeds, 0–600 epochs).",
    274           "source": "opus"
    275         },
    276         "recruitment_methods_described": {
    277           "applies": false,
    278           "answer": false,
    279           "justification": "No human participants. Data is a standard public corpus.",
    280           "source": "opus"
    281         },
    282         "data_pipeline_documented": {
    283           "applies": true,
    284           "answer": true,
    285           "justification": "The pipeline from text generation → segmentation → correct/incorrect labeling → windowing → dispersion/KL computation is described across Sections III.B–III.E.",
    286           "source": "opus"
    287         }
    288       },
    289       "contamination": {
    290         "training_cutoff_stated": {
    291           "applies": false,
    292           "answer": false,
    293           "justification": "The paper trains its own model from scratch on a known corpus. It does not evaluate a pre-trained model's capability on any benchmark.",
    294           "source": "opus"
    295         },
    296         "train_test_overlap_discussed": {
    297           "applies": false,
    298           "answer": false,
    299           "justification": "No pre-trained model is evaluated on a benchmark. The study analyzes training dynamics of a custom model.",
    300           "source": "opus"
    301         },
    302         "benchmark_contamination_addressed": {
    303           "applies": false,
    304           "answer": false,
    305           "justification": "No benchmark evaluation of a pre-trained model is performed.",
    306           "source": "opus"
    307         }
    308       },
    309       "human_studies": {
    310         "pre_registered": {
    311           "applies": false,
    312           "answer": false,
    313           "justification": "No human participants in this study.",
    314           "source": "opus"
    315         },
    316         "irb_or_ethics_approval": {
    317           "applies": false,
    318           "answer": false,
    319           "justification": "No human participants in this study.",
    320           "source": "opus"
    321         },
    322         "demographics_reported": {
    323           "applies": false,
    324           "answer": false,
    325           "justification": "No human participants in this study.",
    326           "source": "opus"
    327         },
    328         "inclusion_exclusion_criteria": {
    329           "applies": false,
    330           "answer": false,
    331           "justification": "No human participants in this study.",
    332           "source": "opus"
    333         },
    334         "randomization_described": {
    335           "applies": false,
    336           "answer": false,
    337           "justification": "No human participants in this study.",
    338           "source": "opus"
    339         },
    340         "blinding_described": {
    341           "applies": false,
    342           "answer": false,
    343           "justification": "No human participants in this study.",
    344           "source": "opus"
    345         },
    346         "attrition_reported": {
    347           "applies": false,
    348           "answer": false,
    349           "justification": "No human participants in this study.",
    350           "source": "opus"
    351         }
    352       },
    353       "cost_and_practicality": {
    354         "inference_cost_reported": {
    355           "applies": true,
    356           "answer": false,
    357           "justification": "No training or inference costs, wall-clock time, or hardware specifications are reported despite training across 5 seeds for 600 epochs.",
    358           "source": "opus"
    359         },
    360         "compute_budget_stated": {
    361           "applies": true,
    362           "answer": false,
    363           "justification": "No GPU hours, hardware used, or total compute budget is stated.",
    364           "source": "opus"
    365         }
    366       }
    367     }
    368   },
    369   "claims": [
    370     {
    371       "claim": "Phase-transition-like reorganizations are observable in small (3.6M parameter) transformers, not only in billion-parameter LLMs.",
    372       "evidence": "Synchronized discontinuities in dispersion, KL divergence, word length, and vocabulary counts observed across 5 seeds at epochs 230–250 in a 3.6M-parameter model.",
    373       "supported": "moderate"
    374     },
    375     {
    376       "claim": "Phase transitions in language model training can be detected directly in linear training space without logarithmic rescaling.",
    377       "evidence": "Dispersion and KL divergence cusps appear on the raw epoch axis (230–250) without any log transformation applied.",
    378       "supported": "moderate"
    379     },
    380     {
    381       "claim": "The transition occurs surprisingly early — around epochs 230–250 — well before loss convergence.",
    382       "evidence": "Loss and validation curves remain smooth while Poisson-based metrics show sharp discontinuities in the same epoch range; average word length jumps from ~1.5 to ~2.5 characters at this point.",
    383       "supported": "strong"
    384     },
    385     {
    386       "claim": "A dispersion flip characterizes the transition: correct words move from near-Poisson to sub-Poisson, incorrect words move from sub-Poisson to Poisson.",
    387       "evidence": "Figures 14–15 show these opposite trajectories across 5 seeds; the flip is synchronized with KL divergence cusps.",
    388       "supported": "strong"
    389     },
    390     {
    391       "claim": "Temporary performance degradation (error surge, dispersion reversion) immediately precedes consolidation, analogous to first-order phase transition barrier-crossing.",
    392       "evidence": "Incorrect vocabulary peaks at step 250 while correct-word dispersion temporarily returns to D≈1, matching theoretical predictions from Rubin et al.'s grokking model.",
    393       "supported": "moderate"
    394     },
    395     {
    396       "claim": "Standard loss and validation curves are insufficient to detect these phase transitions; custom statistical probes are necessary.",
    397       "evidence": "The paper explicitly states transitions 'are not apparent in standard loss or validation curves, but become visible through our vocabulary- and statistics-based probes.'",
    398       "supported": "strong"
    399     }
    400   ],
    401   "methodology_tags": [
    402     "observational",
    403     "theoretical"
    404   ],
    405   "key_findings": "A 3.6M-parameter character-level GPT transformer trained on Tiny Shakespeare exhibits synchronized discontinuities in Poisson dispersion, KL divergence, average word length, and vocabulary diversity around training epochs 230–250, which the authors interpret as a phase-transition-like reorganization from fragmentary to coherent word generation. These discontinuities are invisible in standard loss curves but visible through vocabulary-based statistical probes, and occur directly in linear training space without log rescaling. The paper argues this demonstrates that emergent-like reorganizations are not unique to large models, though the evidence is limited to a single architecture on a single small corpus. A temporary surge in errors and metric instability preceding consolidation is offered as evidence of barrier-crossing dynamics analogous to grokking.",
    406   "red_flags": [
    407     {
    408       "flag": "Single model, single corpus overgeneralization",
    409       "detail": "All findings come from one 3.6M-parameter model on Tiny Shakespeare (~1.1M characters), yet the paper claims 'phase-transition reorganizations are a general feature of language model training.' No other architecture, tokenization scheme, or corpus is tested."
    410     },
    411     {
    412       "flag": "No code or data released",
    413       "detail": "Neither the training code, model checkpoints, sampled text, nor computed metrics are released, making all results unverifiable."
    414     },
    415     {
    416       "flag": "Key training hyperparameters missing",
    417       "detail": "Learning rate, optimizer, batch size, weight decay, and learning rate schedule — all factors that could substantially affect training dynamics and transition timing — are never reported."
    418     },
    419     {
    420       "flag": "No formal statistical tests",
    421       "detail": "All claims about synchronized discontinuities and transition timing rely on visual inspection of plots; no significance tests, permutation tests, or quantitative criteria for identifying a 'cusp' are provided."
    422     },
    423     {
    424       "flag": "Trivially small corpus may drive results",
    425       "detail": "Tiny Shakespeare is ~65 unique characters and ~1.1M tokens of a single author's stylized text. The observed 'phase transition' in spelling may simply reflect the model memorizing a small vocabulary rather than a general principle of language model training."
    426     },
    427     {
    428       "flag": "Correct/incorrect word heuristic is corpus-dependent",
    429       "detail": "Words are labeled correct/incorrect based on presence in the Shakespeare corpus vocabulary, which the paper acknowledges 'excludes words beyond the training distribution.' This circularity means the metric measures alignment to the specific corpus rather than general linguistic coherence."
    430     }
    431   ],
    432   "cited_papers": [
    433     {
    434       "title": "Emergent Abilities of Large Language Models",
    435       "relevance": "Central motivation; paper investigates whether phase transitions underlying emergent abilities extend to small models"
    436     },
    437     {
    438       "title": "Grokking: Generalization Beyond Overfitting on Small Algorithmic Datasets",
    439       "relevance": "Primary small-model analogue; grokking is the closest known phenomenon to what this paper observes"
    440     },
    441     {
    442       "title": "Are Emergent Abilities of Large Language Models a Mirage?",
    443       "relevance": "Foundational critique the paper's methodology is explicitly designed to address; motivates use of continuous internal metrics over binary task thresholds"
    444     },
    445     {
    446       "title": "Grokking as a First Order Phase Transition in Two Layer Networks",
    447       "relevance": "Provides formal statistical-mechanics framework for interpreting the observed dispersion flip as barrier-crossing between competing representational minima"
    448     },
    449     {
    450       "title": "Statistical Mechanics of Deep Learning",
    451       "relevance": "Theoretical grounding for applying phase-transition concepts to deep learning systems"
    452     },
    453     {
    454       "title": "Progress Measures for Grokking via Mechanistic Interpretability",
    455       "relevance": "Mechanistic complement showing internal representation reorganization during grokking, paralleling this paper's external statistical signatures"
    456     },
    457     {
    458       "title": "Language Models are Few-Shot Learners (GPT-3)",
    459       "relevance": "Empirical anchor for emergent abilities at scale that this paper situates its small-model findings against"
    460     }
    461   ],
    462   "engagement_factors": {
    463     "practical_relevance": {
    464       "score": 1,
    465       "justification": "The proposed Poisson-based diagnostics could in principle be applied during training to detect transitions, but no code is released and the single-model evidence is too narrow for practitioners to adopt."
    466     },
    467     "surprise_contrarian": {
    468       "score": 2,
    469       "justification": "Challenges the widely held assumption that phase transitions and emergent abilities require massive scale, arguing a 3.6M-parameter toy model shows the same phenomenon."
    470     },
    471     "fear_safety": {
    472       "score": 0,
    473       "justification": "No safety or risk angle; the paper is a mechanistic study of training dynamics."
    474     },
    475     "drama_conflict": {
    476       "score": 1,
    477       "justification": "Engages directly with the Schaeffer et al. 'emergence is a mirage' controversy, positioning itself as empirical rebuttal using continuous metrics."
    478     },
    479     "demo_ability": {
    480       "score": 1,
    481       "justification": "The model is small enough to retrain cheaply, but no code is provided, making it difficult for readers to replicate or demo."
    482     },
    483     "brand_recognition": {
    484       "score": 0,
    485       "justification": "Authors are a high school student and a Keysight Technologies employee; no affiliation with a recognized ML lab."
    486     }
    487   },
    488   "hn_data": {
    489     "threads": [
    490       {
    491         "hn_id": "33793174",
    492         "title": "Program Repair",
    493         "points": 25,
    494         "comments": 6,
    495         "url": "https://news.ycombinator.com/item?id=33793174",
    496         "created_at": "2022-11-29T20:56:48Z"
    497       },
    498       {
    499         "hn_id": "38422264",
    500         "title": "Prompting Frameworks for Large Language Models: A Survey",
    501         "points": 25,
    502         "comments": 4,
    503         "url": "https://news.ycombinator.com/item?id=38422264",
    504         "created_at": "2023-11-26T15:22:00Z"
    505       },
    506       {
    507         "hn_id": "46665309",
    508         "title": "Reverse Engineering the ESP32-C3 Wi-Fi Drivers for Static Worst-Case Analysis",
    509         "points": 8,
    510         "comments": 0,
    511         "url": "https://news.ycombinator.com/item?id=46665309",
    512         "created_at": "2026-01-18T06:27:12Z"
    513       },
    514       {
    515         "hn_id": "33745326",
    516         "title": "Program Repair",
    517         "points": 5,
    518         "comments": 0,
    519         "url": "https://news.ycombinator.com/item?id=33745326",
    520         "created_at": "2022-11-25T18:26:49Z"
    521       },
    522       {
    523         "hn_id": "42911811",
    524         "title": "Preserving Culinary Traditions. A Crowdsourced Digital Collection of Cookbooks",
    525         "points": 3,
    526         "comments": 0,
    527         "url": "https://news.ycombinator.com/item?id=42911811",
    528         "created_at": "2025-02-02T21:04:34Z"
    529       },
    530       {
    531         "hn_id": "38391666",
    532         "title": "Prompting Frameworks for Large Language Models: A Survey",
    533         "points": 2,
    534         "comments": 0,
    535         "url": "https://news.ycombinator.com/item?id=38391666",
    536         "created_at": "2023-11-23T11:28:55Z"
    537       },
    538       {
    539         "hn_id": "42204850",
    540         "title": "SEFD: Semantic-Enhanced Framework for Detecting LLM-Generated Text",
    541         "points": 1,
    542         "comments": 0,
    543         "url": "https://news.ycombinator.com/item?id=42204850",
    544         "created_at": "2024-11-21T14:54:19Z"
    545       },
    546       {
    547         "hn_id": "38473609",
    548         "title": "AviationGPT: A Large Language Model for the Aviation Domain",
    549         "points": 1,
    550         "comments": 0,
    551         "url": "https://news.ycombinator.com/item?id=38473609",
    552         "created_at": "2023-11-30T14:00:57Z"
    553       },
    554       {
    555         "hn_id": "38388226",
    556         "title": "Prompting Frameworks for Large Language Models: A Survey",
    557         "points": 1,
    558         "comments": 0,
    559         "url": "https://news.ycombinator.com/item?id=38388226",
    560         "created_at": "2023-11-23T01:55:17Z"
    561       }
    562     ],
    563     "top_points": 25,
    564     "total_points": 71,
    565     "total_comments": 10
    566   }
    567 }

Impressum · Datenschutz