scan-v4.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan-v4.json (28779B)
      1 {
      2   "scan_version": 4,
      3   "paper_type": "empirical",
      4   "paper": {
      5     "title": "FairMindSim: Alignment of Behavior, Emotion, and Belief in Humans and LLM Agents Amid Ethical Dilemmas",
      6     "authors": [
      7       "Yu Lei",
      8       "Hao Liu",
      9       "Chengxing Xie",
     10       "Songjia Liu",
     11       "Zhiyu Yin"
     12     ],
     13     "year": 2024,
     14     "venue": "arXiv.org",
     15     "arxiv_id": "2410.10398",
     16     "doi": "10.48550/arXiv.2410.10398"
     17   },
     18   "checklist": {
     19     "claims_and_evidence": {
     20       "abstract_claims_supported": {
     21         "applies": true,
     22         "answer": true,
     23         "justification": "The abstract claims GPT-4o 'exhibits a stronger sense of social justice' (supported by rejection rate data in Table 2/Figure 4) and humans 'display a richer range of emotions' (supported by entropy analysis in Figure 5). Claims are broadly supported by the presented data.",
     24         "source": "opus"
     25       },
     26       "causal_claims_justified": {
     27         "applies": true,
     28         "answer": false,
     29         "justification": "Section 4.4 states 'emotions influence decisions' and 'emotions influence human decision-making.' These are causal claims based on correlational evidence (heatmaps in Figure 7). The with/without emotion comparison in BREM is model-based, not a controlled manipulation of actual emotions.",
     30         "source": "opus"
     31       },
     32       "generalization_bounded": {
     33         "applies": true,
     34         "answer": false,
     35         "justification": "The title and abstract claim alignment between 'Humans and LLM Agents' generally, but results are from 100 participants using RMB (likely Chinese participants) and only GPT-series models. The paper's broad framing ('LLM Agents') is not bounded to the tested GPT models and specific cultural context.",
     36         "source": "opus"
     37       },
     38       "alternative_explanations_discussed": {
     39         "applies": true,
     40         "answer": false,
     41         "justification": "The paper mentions the 'Stochastic Parrot' hypothesis in the introduction (Section 1) but does not discuss alternative explanations for its specific findings. For instance, GPT-4o's higher rejection rate could reflect RLHF training bias toward 'fair' responses rather than genuine fairness reasoning, but this is not explored.",
     42         "source": "opus"
     43       },
     44       "proxy_outcome_distinction": {
     45         "applies": true,
     46         "answer": false,
     47         "justification": "The paper measures game rejection rates and frames them as 'social justice' and 'fairness,' and measures self-reported emotion grid values from LLMs and frames them as 'emotions.' No discussion of whether rejection rates in an economic game adequately proxy for moral values, or whether LLM numerical outputs on a valence-arousal grid constitute emotions.",
     48         "source": "opus"
     49       }
     50     },
     51     "limitations_and_scope": {
     52       "limitations_section_present": {
     53         "applies": true,
     54         "answer": true,
     55         "justification": "Section 7 'Limitations and Future Work' is a dedicated section discussing specific limitations.",
     56         "source": "opus"
     57       },
     58       "threats_to_validity_specific": {
     59         "applies": true,
     60         "answer": true,
     61         "justification": "Section 7 identifies specific threats: no cross-country comparison (cultural differences may influence decision-making), limited to GPT series only (not tested on open-source LLMs), and no verification of applicability across different models.",
     62         "source": "opus"
     63       },
     64       "scope_boundaries_stated": {
     65         "applies": true,
     66         "answer": true,
     67         "justification": "Section 7 explicitly states what was not tested: 'This study does not account for potential differences between countries' and 'the current research is limited to testing on the GPT series of models and has not yet expanded to include other open-source LLMs.'",
     68         "source": "opus"
     69       }
     70     },
     71     "conflicts_of_interest": {
     72       "funding_disclosed": {
     73         "applies": true,
     74         "answer": false,
     75         "justification": "No funding source or acknowledgments section is present in the paper. There is no mention of grants, corporate sponsors, or funding agencies.",
     76         "source": "opus"
     77       },
     78       "affiliations_disclosed": {
     79         "applies": true,
     80         "answer": true,
     81         "justification": "Author affiliations are clearly listed: Tsinghua University, University of Oxford, KAUST, Fudan University, Illinois Institute of Technology, Stevens Institute of Technology, and CAMEL-AI.org.",
     82         "source": "opus"
     83       },
     84       "funder_independent_of_outcome": {
     85         "applies": true,
     86         "answer": false,
     87         "justification": "Since no funding is disclosed, independence of funders cannot be assessed. The use of OpenAI models (GPT series) raises the question of whether API access was funded or provided by OpenAI, which is not addressed.",
     88         "source": "opus"
     89       },
     90       "financial_interests_declared": {
     91         "applies": true,
     92         "answer": false,
     93         "justification": "No competing interests statement or financial disclosure is present. One author is affiliated with CAMEL-AI.org, and the experiments use the CAMEL framework, but no conflict is declared.",
     94         "source": "opus"
     95       }
     96     },
     97     "scope_and_framing": {
     98       "key_terms_defined": {
     99         "applies": true,
    100         "answer": true,
    101         "justification": "'Beliefs' is explicitly defined as 'factors not related to rewards but still impacting subsequent behavior'; 'alignment' is contextualized as behavior/emotion/belief alignment; 'fairness' is operationalized through the economic game structure.",
    102         "source": "haiku"
    103       },
    104       "intended_contribution_clear": {
    105         "applies": true,
    106         "answer": true,
    107         "justification": "The paper lists four explicit contributions: a value-alignment perspective, the FairMindSim framework, the BREM theoretical model, and empirical findings on GPT-4o vs. human behavioral differences.",
    108         "source": "haiku"
    109       },
    110       "engagement_with_prior_work": {
    111         "applies": true,
    112         "answer": true,
    113         "justification": "Section 2 covers related work on ethical values in humans and AI, LLM agent simulation, and economic game theory; the paper situates FairMindSim relative to prior ultimatum game simulations and alignment research.",
    114         "source": "haiku"
    115       }
    116     }
    117   },
    118   "type_checklist": {
    119     "empirical": {
    120       "artifacts": {
    121         "code_released": {
    122           "applies": true,
    123           "answer": true,
    124           "justification": "GitHub repository linked in footnote on page 1: https://github.com/leiyu0210/FairMindSim",
    125           "source": "opus"
    126         },
    127         "data_released": {
    128           "applies": true,
    129           "answer": false,
    130           "justification": "No mention of releasing human participant data or LLM output data. The paper provides no dataset download link or supplementary data files.",
    131           "source": "opus"
    132         },
    133         "environment_specified": {
    134           "applies": true,
    135           "answer": false,
    136           "justification": "The paper mentions using the CAMEL framework (Li et al., 2023) but provides no requirements.txt, Dockerfile, or dependency version listing. No environment setup section is present.",
    137           "source": "opus"
    138         },
    139         "reproduction_instructions": {
    140           "applies": true,
    141           "answer": false,
    142           "justification": "No step-by-step reproduction instructions are provided in the paper. While code is released, the paper itself contains no 'Reproducing Results' section or commands to run.",
    143           "source": "opus"
    144         }
    145       },
    146       "statistical_methodology": {
    147         "confidence_intervals_or_error_bars": {
    148           "applies": true,
    149           "answer": false,
    150           "justification": "Table 2 reports raw reward scores without confidence intervals or error bars. Figures 4-7 show distributions and rates but no uncertainty quantification on the main results.",
    151           "source": "opus"
    152         },
    153         "significance_tests": {
    154           "applies": true,
    155           "answer": false,
    156           "justification": "The paper claims differences between groups (e.g., GPT-4o has higher rejection rates, humans have more diverse emotions) but reports no statistical significance tests — no p-values, t-tests, chi-squared tests, or any inferential statistics.",
    157           "source": "opus"
    158         },
    159         "effect_sizes_reported": {
    160           "applies": true,
    161           "answer": false,
    162           "justification": "No effect sizes (Cohen's d, odds ratios, etc.) are reported. Comparisons are made by presenting raw numbers and rates side by side without quantifying effect magnitudes.",
    163           "source": "opus"
    164         },
    165         "sample_size_justified": {
    166           "applies": true,
    167           "answer": false,
    168           "justification": "N=100 human participants (50 per condition) with no justification for this sample size and no power analysis. No justification given for the number of LLM agent runs either.",
    169           "source": "opus"
    170         },
    171         "variance_reported": {
    172           "applies": true,
    173           "answer": false,
    174           "justification": "Standard deviations are reported only for participant age (Table 1: SD=5.76 and 5.58). No variance, standard deviation, or spread measures are reported for the main outcome variables (rejection rates, reward scores, emotion entropy, belief values).",
    175           "source": "opus"
    176         }
    177       },
    178       "evaluation_design": {
    179         "baselines_included": {
    180           "applies": true,
    181           "answer": true,
    182           "justification": "The study compares four groups: humans, GPT-3.5, GPT-4 Turbo, and GPT-4o, which serve as baselines for each other. Results are presented in Table 2 and Figures 4-7.",
    183           "source": "opus"
    184         },
    185         "baselines_contemporary": {
    186           "applies": true,
    187           "answer": true,
    188           "justification": "GPT-4o was the most recent OpenAI model at time of writing. GPT-4-1106 and GPT-3.5-turbo-0125 were contemporary versions.",
    189           "source": "opus"
    190         },
    191         "ablation_study": {
    192           "applies": true,
    193           "answer": true,
    194           "justification": "The BREM model is tested with and without the emotion component (Figures 6a vs 6b, Figures 7a vs 7b), constituting an ablation of the emotion factor's contribution to belief evolution.",
    195           "source": "opus"
    196         },
    197         "multiple_metrics": {
    198           "applies": true,
    199           "answer": true,
    200           "justification": "Multiple metrics are used: rejection rates (behavior), reward scores, emotional entropy (valence and arousal dimensions), and belief trajectory values.",
    201           "source": "opus"
    202         },
    203         "human_evaluation": {
    204           "applies": true,
    205           "answer": false,
    206           "justification": "The human participants are subjects in the experiment, not evaluators of LLM outputs. No humans evaluate the quality or alignment of LLM responses. The evaluation of LLM behavior is done by comparing aggregate statistics.",
    207           "source": "opus"
    208         },
    209         "held_out_test_set": {
    210           "applies": true,
    211           "answer": false,
    212           "justification": "The BREM model parameters (β1, β2, γ) are fit to the behavioral data, but there is no held-out test set or cross-validation to validate the model. All data appears used for both fitting and evaluation.",
    213           "source": "opus"
    214         },
    215         "per_category_breakdown": {
    216           "applies": true,
    217           "answer": true,
    218           "justification": "Results are broken down by condition (Condition 1 vs 2 in Figure 4b), gender (Figure 4c), and model type (Figures 4-7, Table 2).",
    219           "source": "opus"
    220         },
    221         "failure_cases_discussed": {
    222           "applies": true,
    223           "answer": false,
    224           "justification": "No failure cases are discussed. The paper does not examine where LLM agents make unexpected or inconsistent decisions, nor does it analyze cases where the BREM model poorly fits observed behavior.",
    225           "source": "opus"
    226         },
    227         "negative_results_reported": {
    228           "applies": true,
    229           "answer": false,
    230           "justification": "Every comparison shows the expected or positive narrative (GPT-4o is more fair, humans are more emotional). No experiments that failed or approaches that were tried and abandoned are reported.",
    231           "source": "opus"
    232         }
    233       },
    234       "setup_transparency": {
    235         "model_versions_specified": {
    236           "applies": true,
    237           "answer": false,
    238           "justification": "Section 3.1.3 lists 'GPT-4o, GPT-4-1106, GPT-3.5-turbo-0125.' GPT-4-1106 and GPT-3.5-turbo-0125 are versioned, but 'GPT-4o' is a marketing name without a snapshot date or API version, and model behavior changes across GPT-4o versions.",
    239           "source": "opus"
    240         },
    241         "prompts_provided": {
    242           "applies": true,
    243           "answer": true,
    244           "justification": "Full prompt text is provided in Appendix C: system prompt (C.1), game prompt with emotion measurement instructions (C.2), and a complete persona prompt example (E.1) with all AQ and SDS items.",
    245           "source": "opus"
    246         },
    247         "hyperparameters_reported": {
    248           "applies": true,
    249           "answer": false,
    250           "justification": "No temperature, top-p, max tokens, or other API parameters are reported for the LLM calls. These settings significantly affect LLM output behavior.",
    251           "source": "opus"
    252         },
    253         "scaffolding_described": {
    254           "applies": true,
    255           "answer": true,
    256           "justification": "Section 3.1 describes the agent architecture with three modules (profiling, memory, decision-making), the CAMEL framework (Li et al., 2023) is named, Algorithm 1 in Appendix B details the procedure, and full prompts are provided.",
    257           "source": "opus"
    258         },
    259         "data_preprocessing_documented": {
    260           "applies": true,
    261           "answer": false,
    262           "justification": "Emotion normalization to [0,1] is described in Section 4.2, and entropy computation is specified. However, the pipeline from raw game outputs (LLM API responses) to the analyzed data is not documented — no description of how LLM text responses were parsed into structured emotion scores and decisions.",
    263           "source": "opus"
    264         }
    265       },
    266       "data_integrity": {
    267         "raw_data_available": {
    268           "applies": true,
    269           "answer": false,
    270           "justification": "No raw data (human participant responses, LLM outputs, game logs) is made available. Only aggregated results are presented.",
    271           "source": "opus"
    272         },
    273         "data_collection_described": {
    274           "applies": true,
    275           "answer": true,
    276           "justification": "Section 3.1 describes the experimental procedure in detail: 20-round third-party ultimatum game, allocation schemes (Figure 2), emotion grid measurement at three points per trial (Section 3.1.2), and post-game questionnaires (AQ, SDS).",
    277           "source": "opus"
    278         },
    279         "recruitment_methods_described": {
    280           "applies": true,
    281           "answer": false,
    282           "justification": "Section 3.1.2 states '100 participants from various regions' were 'randomly assigned' to conditions, but provides no information about how participants were recruited (online platform, university recruitment, crowdsourcing, etc.).",
    283           "source": "opus"
    284         },
    285         "data_pipeline_documented": {
    286           "applies": true,
    287           "answer": false,
    288           "justification": "There is no documentation of how raw game data (participant clicks on emotion grid, accept/reject decisions, LLM text outputs) was transformed into the analyzed metrics. The pipeline from collection to final analysis is not described.",
    289           "source": "opus"
    290         }
    291       },
    292       "contamination": {
    293         "training_cutoff_stated": {
    294           "applies": false,
    295           "answer": false,
    296           "justification": "The paper tests LLM behavioral responses in a custom economic game scenario, not model capability on any benchmark. There is no benchmark that could be contaminated by training data.",
    297           "source": "opus"
    298         },
    299         "train_test_overlap_discussed": {
    300           "applies": false,
    301           "answer": false,
    302           "justification": "Not applicable — the game scenarios are constructed experimentally and there is no established benchmark or test set that could overlap with training data.",
    303           "source": "opus"
    304         },
    305         "benchmark_contamination_addressed": {
    306           "applies": false,
    307           "answer": false,
    308           "justification": "Not applicable — no benchmark evaluation is performed. The study tests behavioral responses in a novel experimental paradigm.",
    309           "source": "opus"
    310         }
    311       },
    312       "human_studies": {
    313         "pre_registered": {
    314           "applies": true,
    315           "answer": false,
    316           "justification": "No mention of pre-registration on OSF, AsPredicted, or any other registry.",
    317           "source": "opus"
    318         },
    319         "irb_or_ethics_approval": {
    320           "applies": true,
    321           "answer": true,
    322           "justification": "Section 3.1.2 states: 'The study received ethical approval from the university's ethics committee and informed consent was obtained from all participants prior to the experiment.'",
    323           "source": "opus"
    324         },
    325         "demographics_reported": {
    326           "applies": true,
    327           "answer": true,
    328           "justification": "Table 1 reports participant demographics: group sizes (50 each), average age, standard deviation of age, and gender distribution for both conditions.",
    329           "source": "opus"
    330         },
    331         "inclusion_exclusion_criteria": {
    332           "applies": true,
    333           "answer": false,
    334           "justification": "No inclusion or exclusion criteria for participants are stated. The paper only says '100 participants from various regions' without describing any screening process or eligibility requirements.",
    335           "source": "opus"
    336         },
    337         "randomization_described": {
    338           "applies": true,
    339           "answer": false,
    340           "justification": "Section 3.1.2 states participants were 'randomly assigned to either a selfish group or an extreme selfish group,' but the randomization procedure is not described — no stratification method, randomization tool, or allocation concealment is mentioned.",
    341           "source": "opus"
    342         },
    343         "blinding_described": {
    344           "applies": true,
    345           "answer": false,
    346           "justification": "No blinding is described. It is unclear whether participants knew which condition (selfish vs. extremely selfish allocation scheme) they were assigned to.",
    347           "source": "opus"
    348         },
    349         "attrition_reported": {
    350           "applies": true,
    351           "answer": false,
    352           "justification": "No information on participant attrition or dropout. The paper starts with 100 participants and presents results without reporting whether all 100 completed all 20 rounds.",
    353           "source": "opus"
    354         }
    355       },
    356       "cost_and_practicality": {
    357         "inference_cost_reported": {
    358           "applies": true,
    359           "answer": false,
    360           "justification": "No API costs, tokens consumed, or latency are reported despite running 100 agents across 20 rounds each for 3 different LLM models (at least 6,000 LLM calls).",
    361           "source": "opus"
    362         },
    363         "compute_budget_stated": {
    364           "applies": true,
    365           "answer": false,
    366           "justification": "No computational budget or total API spend is stated for the LLM experiments.",
    367           "source": "opus"
    368         }
    369       }
    370     }
    371   },
    372   "claims": [
    373     {
    374       "claim": "GPT-4o exhibits a stronger sense of social justice than humans and other LLMs, reflected in higher rejection rates of unfair allocations.",
    375       "evidence": "Table 2 shows GPT-4o total reward score of 603 vs. humans at 1167 and GPT-3.5 at 1598, with Figure 4a confirming GPT-4o's higher rejection rate across conditions.",
    376       "supported": "moderate"
    377     },
    378     {
    379       "claim": "Humans display a richer and more diverse range of emotions than LLMs in response to fairness dilemmas.",
    380       "evidence": "Figure 5 shows human entropy values are highest for both valence and arousal dimensions compared to all three GPT models.",
    381       "supported": "moderate"
    382     },
    383     {
    384       "claim": "Beliefs influence decision-making more than immediate monetary rewards for both humans and LLMs (β1 > β2).",
    385       "evidence": "BREM fitting results reported in Section 4.3 state β1 > β2 for all groups, but raw fitted parameter values are not provided for verification.",
    386       "supported": "weak"
    387     },
    388     {
    389       "claim": "Emotions significantly modulate human beliefs and behavior-belief correlation but have minimal impact on LLM behavior.",
    390       "evidence": "Figure 7 heatmaps show that without emotions human behavior-belief correlation is non-significant, but becomes significant when emotions are included; LLM heatmaps show little change.",
    391       "supported": "moderate"
    392     },
    393     {
    394       "claim": "GPT-4o's beliefs remain stable across trial evolution while human beliefs fluctuate and decrease over time.",
    395       "evidence": "Figure 6a shows human belief values decreasing as trials progress while GPT-4o belief values remain relatively stable.",
    396       "supported": "moderate"
    397     }
    398   ],
    399   "methodology_tags": [
    400     "observational",
    401     "benchmark-eval",
    402     "qualitative"
    403   ],
    404   "key_findings": "GPT-4o demonstrates substantially higher altruistic punishment behavior (rejection of unfair allocations) compared to humans and other GPT models in a third-party ultimatum game, interpreted as stronger alignment with fairness and social justice norms. Humans exhibit significantly higher emotional diversity (entropy) across valence and arousal dimensions than all tested LLMs. The proposed BREM model finds that beliefs (socioeconomic motivations) consistently exert stronger influence on decisions than immediate rewards (β1 > β2). Human beliefs and behavior-belief correlations are substantially modulated by emotional state, while LLM belief patterns remain stable regardless of whether emotional factors are incorporated into the model.",
    405   "red_flags": [
    406     {
    407       "flag": "No statistical testing",
    408       "detail": "All comparative claims between humans and LLMs are asserted without statistical significance tests, confidence intervals, or effect sizes, making it impossible to assess whether observed differences exceed chance."
    409     },
    410     {
    411       "flag": "Proxy measure conflation",
    412       "detail": "Rejection rate in a stylized economic game is equated with 'sense of social justice' and 'moral alignment' without validation that game behavior captures these broader constructs."
    413     },
    414     {
    415       "flag": "Contamination ignored",
    416       "detail": "The altruistic punishment ultimatum game is extensively documented in economics literature that LLMs were likely trained on; GPT-4o's high rejection rate may reflect training data patterns rather than genuine moral alignment."
    417     },
    418     {
    419       "flag": "GPT-only generalization",
    420       "detail": "Conclusions about 'LLM agents' broadly are drawn from three GPT models sharing the same training pipeline and RLHF approach; findings may not generalize to other model families."
    421     },
    422     {
    423       "flag": "BREM parameters underreported",
    424       "detail": "The central claim that β1 > β2 (beliefs dominate rewards) is stated without providing the actual fitted parameter values, preventing independent verification."
    425     },
    426     {
    427       "flag": "Recruitment opacity",
    428       "detail": "100 participants are recruited from 'various regions' with no description of recruitment method, compensation, platform, or eligibility criteria."
    429     },
    430     {
    431       "flag": "No power analysis",
    432       "detail": "Sample size of 100 participants (50 per condition) is unjustified given the multiple subgroup analyses performed across condition, gender, and model comparisons."
    433     }
    434   ],
    435   "cited_papers": [
    436     {
    437       "title": "Altruistic punishment in humans (Fehr & Gächter, 2002)",
    438       "relevance": "Foundational economic game paradigm — the third-party ultimatum game that FairMindSim is built upon"
    439     },
    440     {
    441       "title": "Scalable agent alignment via reward modeling: a research direction (Leike et al., 2018)",
    442       "relevance": "Basis for the recursive reward model (RRM) that the proposed BREM model extends"
    443     },
    444     {
    445       "title": "Artificial intelligence, values, and alignment (Gabriel, 2020)",
    446       "relevance": "Conceptual framing for value alignment that motivates the study design"
    447     },
    448     {
    449       "title": "Large language models as simulated economic agents: What can we learn from homo silicus? (Horton, 2023)",
    450       "relevance": "Prior work on using LLMs to simulate economic behavior, directly analogous to FairMindSim's approach"
    451     },
    452     {
    453       "title": "CAMEL: Communicative agents for 'mind' exploration of large language model society (Li et al., 2023)",
    454       "relevance": "The multi-agent framework used to implement all LLM experiments in FairMindSim"
    455     },
    456     {
    457       "title": "Can large language model agents simulate human trust behaviors? (Xie et al., 2024)",
    458       "relevance": "Related work directly comparing LLM agents and humans in social game-theoretic settings"
    459     },
    460     {
    461       "title": "Testing theory of mind in large language models and humans (Strachan et al., 2024)",
    462       "relevance": "Related benchmark comparing LLMs and humans on psychological constructs"
    463     },
    464     {
    465       "title": "Automated social science: Language models as scientist and subjects (Manning et al., 2024)",
    466       "relevance": "Closely related methodology using LLMs as simulated participants in social science studies"
    467     },
    468     {
    469       "title": "Managing extreme AI risks amid rapid progress (Bengio et al., 2024)",
    470       "relevance": "Contextualizes AI alignment as a critical safety priority motivating this research"
    471     },
    472     {
    473       "title": "Can large language models transform computational social science? (Ziems et al., 2024)",
    474       "relevance": "Broader survey of applying LLMs to social science simulations, situating FairMindSim's contribution"
    475     }
    476   ],
    477   "engagement_factors": {
    478     "practical_relevance": {
    479       "score": 0,
    480       "justification": "Academic research on LLM fairness in economic games with no immediately usable tool or technique for practitioners."
    481     },
    482     "surprise_contrarian": {
    483       "score": 1,
    484       "justification": "The finding that GPT-4o displays 'more social justice' than humans is mildly surprising but aligns with known RLHF tendencies."
    485     },
    486     "fear_safety": {
    487       "score": 1,
    488       "justification": "Touches on AI alignment and value alignment but does not demonstrate novel risks or attacks."
    489     },
    490     "drama_conflict": {
    491       "score": 0,
    492       "justification": "No controversy or provocative claims about industry practices."
    493     },
    494     "demo_ability": {
    495       "score": 1,
    496       "justification": "Code is on GitHub but it's an academic experiment requiring API keys and human subjects, not a tryable demo."
    497     },
    498     "brand_recognition": {
    499       "score": 1,
    500       "justification": "Uses GPT-4o (OpenAI) but is from university researchers, not a major AI lab."
    501     }
    502   },
    503   "hn_data": {
    504     "threads": [
    505       {
    506         "hn_id": "47180140",
    507         "title": "Chorba: A novel CRC32 implementation (2024)",
    508         "points": 70,
    509         "comments": 20,
    510         "url": "https://news.ycombinator.com/item?id=47180140"
    511       },
    512       {
    513         "hn_id": "42027043",
    514         "title": "Smoothed asymptotics: from number theory to quantum field theory",
    515         "points": 3,
    516         "comments": 0,
    517         "url": "https://news.ycombinator.com/item?id=42027043"
    518       },
    519       {
    520         "hn_id": "42458903",
    521         "title": "Pattern Matching in AI Compilers and Its Formalization (Extended Version)",
    522         "points": 2,
    523         "comments": 0,
    524         "url": "https://news.ycombinator.com/item?id=42458903"
    525       },
    526       {
    527         "hn_id": "42627675",
    528         "title": "The Reliability Issue in ReRam-Based CIM Architecture for SNN: A Survey",
    529         "points": 1,
    530         "comments": 0,
    531         "url": "https://news.ycombinator.com/item?id=42627675"
    532       },
    533       {
    534         "hn_id": "39420324",
    535         "title": "Smoothed asymptotics: from number theory to quantum field theory",
    536         "points": 1,
    537         "comments": 0,
    538         "url": "https://news.ycombinator.com/item?id=39420324"
    539       }
    540     ],
    541     "top_points": 70,
    542     "total_points": 77,
    543     "total_comments": 20
    544   }
    545 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs