scan-v5.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan-v5.json (28979B)
      1 {
      2   "scan_version": 5,
      3   "paper_type": "empirical",
      4   "paper": {
      5     "title": "Disagreements in Reasoning: How a Model's Thinking Process Dictates Persuasion in Multi-Agent Systems",
      6     "authors": [
      7       "Haodong Zhao",
      8       "Jidong Li",
      9       "Zhaomin Wu",
     10       "Tianjie Ju",
     11       "Zhuosheng Zhang"
     12     ],
     13     "year": 2025,
     14     "venue": "arXiv.org",
     15     "arxiv_id": "2509.21054",
     16     "doi": "10.48550/arXiv.2509.21054"
     17   },
     18   "checklist": {
     19     "claims_and_evidence": {
     20       "abstract_claims_supported": {
     21         "applies": true,
     22         "answer": true,
     23         "justification": "All major abstract claims are supported by experimental data: LRMs show greater persuasion resistance (heatmaps in Figs 1-2 show lower PR rows for thinking models), and sharing thinking content dramatically increases persuasive power (avg 21.07% increase reported).",
     24         "source": "haiku"
     25       },
     26       "causal_claims_justified": {
     27         "applies": true,
     28         "answer": true,
     29         "justification": "The paper uses switchable thinking/non-thinking modes on the same model families (Gemini-2.5-flash, Qwen3-32B, Hunyuan-7B) as a within-model ablation, providing a reasonable basis for causal claims; Fig 6 further isolates content quality from length effects.",
     30         "source": "haiku"
     31       },
     32       "generalization_bounded": {
     33         "applies": true,
     34         "answer": false,
     35         "justification": "Conclusions about 'safer and more resilient MAS architectures' and implications for 'future MAS' are stated broadly, but experiments cover only two datasets (MMLU, PersuasionBench/Perspectrum) and 7 model families. No explicit scope boundaries are stated.",
     36         "source": "haiku"
     37       },
     38       "alternative_explanations_discussed": {
     39         "applies": true,
     40         "answer": true,
     41         "justification": "The paper explicitly tests output length as an alternative explanation for why thinking content improves persuasion via the padding condition in Fig 6, distinguishing verbosity from semantic content quality.",
     42         "source": "haiku"
     43       },
     44       "proxy_outcome_distinction": {
     45         "applies": true,
     46         "answer": true,
     47         "justification": "The paper clearly defines LLM persuasion as a behavioral metric (option change rate) distinct from human belief change, explicitly noting in Section 2.2 that LLMs 'lack mental states in the human sense' and adopting a behavioral definition.",
     48         "source": "haiku"
     49       }
     50     },
     51     "limitations_and_scope": {
     52       "limitations_section_present": {
     53         "applies": true,
     54         "answer": false,
     55         "justification": "No dedicated limitations section exists. The conclusion (Section 5) only summarizes contributions and calls for future research without discussing limitations.",
     56         "source": "haiku"
     57       },
     58       "threats_to_validity_specific": {
     59         "applies": true,
     60         "answer": false,
     61         "justification": "No threats-to-validity are discussed. The artificial setup (correct answers standardized to option A, persuasion target fixed to option D) and narrow dataset scope are not acknowledged as validity threats.",
     62         "source": "haiku"
     63       },
     64       "scope_boundaries_stated": {
     65         "applies": true,
     66         "answer": false,
     67         "justification": "No explicit scope boundaries are stated about what results do NOT show. Findings are generalized to 'multi-agent systems' broadly without qualifying the narrow experimental conditions.",
     68         "source": "haiku"
     69       }
     70     },
     71     "conflicts_of_interest": {
     72       "funding_disclosed": {
     73         "applies": true,
     74         "answer": false,
     75         "justification": "No acknowledgments or funding section is present in the paper. No mention of funding sources anywhere in the text.",
     76         "source": "haiku"
     77       },
     78       "affiliations_disclosed": {
     79         "applies": true,
     80         "answer": true,
     81         "justification": "Author affiliations are clearly disclosed on the title page: Shanghai Jiao Tong University, National University of Singapore, and Inner Mongolia Research Institute.",
     82         "source": "haiku"
     83       },
     84       "funder_independent_of_outcome": {
     85         "applies": false,
     86         "answer": false,
     87         "justification": "No funding is disclosed, so funder independence cannot be assessed.",
     88         "source": "haiku"
     89       },
     90       "financial_interests_declared": {
     91         "applies": true,
     92         "answer": false,
     93         "justification": "No competing interests statement or declaration of financial interests is present in the paper.",
     94         "source": "haiku"
     95       }
     96     },
     97     "scope_and_framing": {
     98       "key_terms_defined": {
     99         "applies": true,
    100         "answer": true,
    101         "justification": "Key terms are formally defined: LLMs vs. LRMs (Section 2), human vs. LLM persuasion (Definitions 2.1-2.2), and quantitative metrics PR/RR/OR are given with explicit formulas (Equations 1-3).",
    102         "source": "haiku"
    103       },
    104       "intended_contribution_clear": {
    105         "applies": true,
    106         "answer": true,
    107         "justification": "Four explicit contributions are listed in the introduction: linking LRM cognitive architecture to persuasion behavior, formalizing the Persuasion Duality, multi-hop chain analysis, and attention-based explanation plus prompt mitigation.",
    108         "source": "haiku"
    109       },
    110       "engagement_with_prior_work": {
    111         "applies": true,
    112         "answer": true,
    113         "justification": "The paper explicitly challenges Breum et al. (2024)'s scale hypothesis, builds on Jones & Bergen (2024)'s LLM persuasion framework, and situates itself relative to PersuasionBench and PMIYC frameworks.",
    114         "source": "haiku"
    115       }
    116     }
    117   },
    118   "type_checklist": {
    119     "empirical": {
    120       "artifacts": {
    121         "code_released": {
    122           "applies": true,
    123           "answer": false,
    124           "justification": "No code repository or release is mentioned anywhere in the paper. No GitHub link or promise of release.",
    125           "source": "haiku"
    126         },
    127         "data_released": {
    128           "applies": true,
    129           "answer": true,
    130           "justification": "The paper uses only standard public benchmarks: MMLU (Hendrycks et al., 2020), PersuasionBench (Durmus et al., 2024), and Perspectrum (Chen et al., 2019), all publicly available.",
    131           "source": "haiku"
    132         },
    133         "environment_specified": {
    134           "applies": true,
    135           "answer": true,
    136           "justification": "Environment specified in Appendix A.3: VLLM v0.10.0, transformers v4.56.0, temperature=0.7, top_p=0.8.",
    137           "source": "haiku"
    138         },
    139         "reproduction_instructions": {
    140           "applies": true,
    141           "answer": false,
    142           "justification": "No step-by-step reproduction instructions are provided. The appendix covers datasets, models, and hyperparameters but not a reproducible pipeline for running the full experiments.",
    143           "source": "haiku"
    144         }
    145       },
    146       "statistical_methodology": {
    147         "confidence_intervals_or_error_bars": {
    148           "applies": true,
    149           "answer": true,
    150           "justification": "All heatmap results (Figs 1-4, 13-14) include ± error bars consistently (e.g., '7.0 ± 1.6' in Fig 1a).",
    151           "source": "haiku"
    152         },
    153         "significance_tests": {
    154           "applies": true,
    155           "answer": false,
    156           "justification": "No formal statistical significance tests are reported. Comparative conclusions rely on visual inspection of heatmaps and average percentage differences without p-values.",
    157           "source": "haiku"
    158         },
    159         "effect_sizes_reported": {
    160           "applies": true,
    161           "answer": true,
    162           "justification": "Effect sizes reported as percentage differences throughout (e.g., '21.07% average increase in persuasion rate' when thinking content added; '19% relative improvement' for native thinking content in Fig 6).",
    163           "source": "haiku"
    164         },
    165         "sample_size_justified": {
    166           "applies": true,
    167           "answer": false,
    168           "justification": "The datasets (~10,000 MMLU questions, 1,000 subjective claims) are described but not justified. No power analysis or sample size rationale is provided.",
    169           "source": "haiku"
    170         },
    171         "variance_reported": {
    172           "applies": true,
    173           "answer": true,
    174           "justification": "± error bars are consistently reported across all heatmaps throughout the paper.",
    175           "source": "haiku"
    176         }
    177       },
    178       "evaluation_design": {
    179         "baselines_included": {
    180           "applies": true,
    181           "answer": true,
    182           "justification": "Each thinking-mode LRM is compared against its own non-thinking counterpart as a within-model baseline; direct pairwise comparisons serve as baselines for multi-hop experiments.",
    183           "source": "haiku"
    184         },
    185         "baselines_contemporary": {
    186           "applies": true,
    187           "answer": true,
    188           "justification": "Baselines include current frontier models (o4-mini, DeepSeek-R1, Gemini-2.5-flash, Qwen3-32B) all from 2025.",
    189           "source": "haiku"
    190         },
    191         "ablation_study": {
    192           "applies": true,
    193           "answer": true,
    194           "justification": "Figure 6 presents an explicit ablation: native thinking content vs. equal-length padding tokens vs. mismatched thinking content vs. no thinking content baseline, isolating verbosity from semantic quality.",
    195           "source": "haiku"
    196         },
    197         "multiple_metrics": {
    198           "applies": true,
    199           "answer": true,
    200           "justification": "Three metrics are used and formally defined: Persuaded-Rate (PR), Remain-Rate (RR), and Other-Rate (OR) (Section 2.2, Equations 1-3).",
    201           "source": "haiku"
    202         },
    203         "human_evaluation": {
    204           "applies": false,
    205           "answer": false,
    206           "justification": "The study evaluates LLM-to-LLM persuasion only; human evaluation is not relevant to this experimental design.",
    207           "source": "haiku"
    208         },
    209         "held_out_test_set": {
    210           "applies": false,
    211           "answer": false,
    212           "justification": "This is a behavioral experiment, not a prediction task requiring train/test split.",
    213           "source": "haiku"
    214         },
    215         "per_category_breakdown": {
    216           "applies": true,
    217           "answer": true,
    218           "justification": "Results are broken down by objective vs. subjective datasets (Figs 1-4) and per model pair in comprehensive 10×10 heatmaps.",
    219           "source": "haiku"
    220         },
    221         "failure_cases_discussed": {
    222           "applies": true,
    223           "answer": true,
    224           "justification": "Figure 12 and Appendix C.1 provide a detailed case study of a persuaded model reasoning incorrectly about a pandemic influenza question, tracing the step-by-step reasoning deviation.",
    225           "source": "haiku"
    226         },
    227         "negative_results_reported": {
    228           "applies": true,
    229           "answer": true,
    230           "justification": "Section 3.2 explicitly reports mixed effects of thinking mode when acting as persuader ('average gains of -7.41%, -1.92%, and 2.07%' for Gemini, Qwen, Hunyuan), reporting negative/inconsistent results.",
    231           "source": "haiku"
    232         }
    233       },
    234       "setup_transparency": {
    235         "model_versions_specified": {
    236           "applies": true,
    237           "answer": false,
    238           "justification": "Two model card references (Gemini 2.5 Flash and OpenAI o4-mini) contain 'Accessed: YYYY-MM-DD' placeholder dates, indicating model version documentation was not completed before submission.",
    239           "source": "haiku"
    240         },
    241         "prompts_provided": {
    242           "applies": true,
    243           "answer": true,
    244           "justification": "Full prompts for persuader content generation and persuadee evaluation are provided in Appendix A.4 for both objective and subjective tasks; the adversarial detection prompt is shown in Figure 15.",
    245           "source": "haiku"
    246         },
    247         "hyperparameters_reported": {
    248           "applies": true,
    249           "answer": true,
    250           "justification": "Hyperparameters reported in Appendix A.3: temperature=0.7, top_p=0.8, VLLM v0.10.0, transformers v4.56.0.",
    251           "source": "haiku"
    252         },
    253         "scaffolding_described": {
    254           "applies": true,
    255           "answer": true,
    256           "justification": "The multi-agent persuasion scaffolding is described: persuader generates content, content is appended to persuadee's context as a prior participant response, persuadee responds with a single-letter choice. Multi-hop chain setup is also described.",
    257           "source": "haiku"
    258         },
    259         "data_preprocessing_documented": {
    260           "applies": true,
    261           "answer": true,
    262           "justification": "Preprocessing is documented: MMLU correct answers standardized to option A, persuasion targets fixed to option D; subjective stances mapped to A/B/C options; persuasion target set based on initial response (neutral if support/oppose, random if neutral).",
    263           "source": "haiku"
    264         }
    265       },
    266       "data_integrity": {
    267         "raw_data_available": {
    268           "applies": true,
    269           "answer": false,
    270           "justification": "No raw experimental output data (model responses, persuasion interaction logs) is released or linked to an external repository.",
    271           "source": "haiku"
    272         },
    273         "data_collection_described": {
    274           "applies": true,
    275           "answer": true,
    276           "justification": "Data collection is described in Appendix A.1: MMLU selection criteria, 1,000 claim sample from PersuasionBench and Perspectrum, and how model responses are recorded.",
    277           "source": "haiku"
    278         },
    279         "recruitment_methods_described": {
    280           "applies": false,
    281           "answer": false,
    282           "justification": "No human participants; standard benchmarks used, so recruitment is not applicable.",
    283           "source": "haiku"
    284         },
    285         "data_pipeline_documented": {
    286           "applies": true,
    287           "answer": false,
    288           "justification": "The experimental pipeline is conceptually described but lacks complete documentation (scripts, API call logic, response parsing, how multi-hop chains were orchestrated).",
    289           "source": "haiku"
    290         }
    291       },
    292       "contamination": {
    293         "training_cutoff_stated": {
    294           "applies": true,
    295           "answer": false,
    296           "justification": "No training data cutoffs are stated for any of the 7 model families despite evaluating them on MMLU, a benchmark widely present in training corpora.",
    297           "source": "haiku"
    298         },
    299         "train_test_overlap_discussed": {
    300           "applies": true,
    301           "answer": false,
    302           "justification": "No discussion of whether MMLU or PersuasionBench examples appeared in any model's training data.",
    303           "source": "haiku"
    304         },
    305         "benchmark_contamination_addressed": {
    306           "applies": true,
    307           "answer": false,
    308           "justification": "MMLU is a widely-used benchmark that frontier models (o4-mini, Qwen3, DeepSeek-R1) are likely trained on; this is not acknowledged or addressed.",
    309           "source": "haiku"
    310         }
    311       },
    312       "human_studies": {
    313         "pre_registered": {
    314           "applies": false,
    315           "answer": false,
    316           "justification": "No human participants in this study.",
    317           "source": "haiku"
    318         },
    319         "irb_or_ethics_approval": {
    320           "applies": false,
    321           "answer": false,
    322           "justification": "No human participants in this study.",
    323           "source": "haiku"
    324         },
    325         "demographics_reported": {
    326           "applies": false,
    327           "answer": false,
    328           "justification": "No human participants in this study.",
    329           "source": "haiku"
    330         },
    331         "inclusion_exclusion_criteria": {
    332           "applies": false,
    333           "answer": false,
    334           "justification": "No human participants in this study.",
    335           "source": "haiku"
    336         },
    337         "randomization_described": {
    338           "applies": false,
    339           "answer": false,
    340           "justification": "No human participants in this study.",
    341           "source": "haiku"
    342         },
    343         "blinding_described": {
    344           "applies": false,
    345           "answer": false,
    346           "justification": "No human participants in this study.",
    347           "source": "haiku"
    348         },
    349         "attrition_reported": {
    350           "applies": false,
    351           "answer": false,
    352           "justification": "No human participants in this study.",
    353           "source": "haiku"
    354         }
    355       },
    356       "cost_and_practicality": {
    357         "inference_cost_reported": {
    358           "applies": true,
    359           "answer": false,
    360           "justification": "No inference cost or latency reported despite experiments involving hundreds of thousands of model calls across 10 model modes × 10 model modes × ~1,000 questions × multiple experimental conditions.",
    361           "source": "haiku"
    362         },
    363         "compute_budget_stated": {
    364           "applies": true,
    365           "answer": false,
    366           "justification": "No compute budget is stated. The scale of computation (multiple GPU-served models, full pairwise evaluation) is substantial but undisclosed.",
    367           "source": "haiku"
    368         }
    369       }
    370     }
    371   },
    372   "claims": [
    373     {
    374       "claim": "LRMs with thinking mode enabled are significantly more resistant to persuasion than their non-thinking counterparts",
    375       "evidence": "Heatmaps (Figs 1-2) show thinking-mode models have substantially lower Persuaded-Rate; thinking mode reduces PR by average 7.82% on objective and 29.68% on subjective datasets; Fig 7 shows R²=0.85, r=-0.92 between PR and RR for model pairs",
    376       "supported": "strong"
    377     },
    378     {
    379       "claim": "Sharing LRM thinking content with persuadees dramatically increases persuasive effectiveness",
    380       "evidence": "Comparing Fig 1a vs 1b, adding thinking content yields average 21.07% increase in PR; individual model rows show increases from ~12% to >80% PR for some persuadee targets",
    381       "supported": "strong"
    382     },
    383     {
    384       "claim": "Persuasive efficacy is primarily driven by cognitive process (reasoning mode) rather than model scale",
    385       "evidence": "Fig 4 column-wise analysis shows no clear PR increase with stronger persuaders, while Fig 3 shows a clear row-wise effect of persuadee capability, supporting the process-centric over scale-centric view",
    386       "supported": "moderate"
    387     },
    388     {
    389       "claim": "Models are substantially more easily persuaded on subjective questions than objective ones",
    390       "evidence": "PR values are consistently and substantially higher throughout the subjective heatmaps (Fig 2) compared to objective heatmaps (Fig 1) across all model pairs",
    391       "supported": "strong"
    392     },
    393     {
    394       "claim": "Persuasive content length positively correlates with persuasion effectiveness",
    395       "evidence": "Fig 5 shows PR generally increases with token limit while RR decreases, with the effect being non-monotonic at very high token counts",
    396       "supported": "moderate"
    397     },
    398     {
    399       "claim": "Adversarial argument detection prompt substantially reduces model susceptibility to persuasion",
    400       "evidence": "Fig 11 shows consistent PR reduction and RR increase across four persuadee models (Hunyuan w/o T, w/ T, Llama-3-8B, Qwen2.5-7B) when the detection prompt is applied",
    401       "supported": "strong"
    402     }
    403   ],
    404   "methodology_tags": [
    405     "benchmark-eval",
    406     "observational"
    407   ],
    408   "key_findings": "The paper identifies a 'Persuasion Duality' in LLM multi-agent systems: enabling explicit reasoning (thinking mode) in LRMs simultaneously increases their resistance to persuasion (lower Persuaded-Rate) and dramatically increases their persuasive power when thinking content is shared with persuadees (avg 21.07% PR increase on objective tasks). Persuasion dynamics depend more on cognitive architecture (reasoning mode) than model scale, as stronger persuaders do not reliably raise persuasion rates when analyzed column-wise across the heatmaps. Models are substantially more persuadable on subjective questions than objective ones, likely because subjective claims lack ground-truth anchors. Multi-hop persuasion propagates non-linearly through agent chains with both amplification and attenuation effects depending on chain composition, and a simple adversarial argument detection prompt consistently reduces persuasion vulnerability across diverse model types.",
    409   "red_flags": [
    410     {
    411       "flag": "No limitations section",
    412       "detail": "The paper contains no dedicated limitations section. Critical gaps such as the highly artificial experimental setup (correct answers forced to option A, persuasion target fixed to option D) and narrow dataset scope are not acknowledged as limitations."
    413     },
    414     {
    415       "flag": "Incomplete model version documentation",
    416       "detail": "Two model card references (Gemini 2.5 Flash and OpenAI o4-mini system card) contain 'Accessed: YYYY-MM-DD' placeholder dates, indicating model version documentation was not finalized before submission."
    417     },
    418     {
    419       "flag": "No significance testing",
    420       "detail": "Comparative claims about 'significant' differences rely on visual heatmap comparison and average percentages. No formal statistical tests are reported despite the availability of per-trial data needed to compute them."
    421     },
    422     {
    423       "flag": "Benchmark contamination unaddressed",
    424       "detail": "MMLU is used as the primary objective evaluation dataset but training data cutoffs for any model are not stated, and potential contamination of MMLU in training data for frontier models (o4-mini, DeepSeek-R1, Qwen3) is not discussed."
    425     },
    426     {
    427       "flag": "No code released",
    428       "detail": "No code repository is provided for reproducing the experiments despite the computational complexity involving pairwise model evaluations across thousands of questions."
    429     },
    430     {
    431       "flag": "Artificial persuasion design",
    432       "detail": "Standardizing all correct answers to option A and fixing persuasion targets to option D creates an artificial setup whose generalizability to real multi-agent tasks is not discussed."
    433     },
    434     {
    435       "flag": "Funding not disclosed",
    436       "detail": "No acknowledgment or funding section is present in the paper."
    437     }
    438   ],
    439   "cited_papers": [
    440     {
    441       "title": "The persuasive power of large language models",
    442       "relevance": "Prior work establishing that LLM persuasive efficacy scales with model size — the dominant hypothesis this paper directly challenges"
    443     },
    444     {
    445       "title": "Scaling language model size yields diminishing returns for single-message political persuasion",
    446       "relevance": "Key supporting evidence for diminishing returns from scale in persuasion, motivating the shift to cognitive architecture focus"
    447     },
    448     {
    449       "title": "Measuring massive multitask language understanding",
    450       "relevance": "MMLU benchmark used as the primary objective evaluation dataset in all experiments"
    451     },
    452     {
    453       "title": "Measuring the persuasiveness of language models",
    454       "relevance": "PersuasionBench dataset used for subjective evaluation; Anthropic study on persuasion measurement methodology"
    455     },
    456     {
    457       "title": "Lies, damned lies, and distributional language statistics: Persuasion and deception with large language models",
    458       "relevance": "Framework distinguishing roles for LLMs in persuasive contexts (persuader, persuadee, judge) that the paper builds upon"
    459     },
    460     {
    461       "title": "Conformity in large language models",
    462       "relevance": "Related work on LLM susceptibility to social influence; provides the PR/RR/OR metrics framework used in this paper"
    463     },
    464     {
    465       "title": "DeepSeek-R1: Incentivizing reasoning capability in LLMs via reinforcement learning",
    466       "relevance": "Defines the LRM architecture category central to the paper's thesis; one of the key evaluated models"
    467     },
    468     {
    469       "title": "Chain-of-thought prompting elicits reasoning in large language models",
    470       "relevance": "Foundational work for the CoT-as-persuasion-resistance finding in Section 3.4.2"
    471     },
    472     {
    473       "title": "Flooding spread of manipulated knowledge in LLM-based multi-agent communities",
    474       "relevance": "Prior work by overlapping authors on knowledge manipulation in MAS, directly related context"
    475     },
    476     {
    477       "title": "Encouraging divergent thinking in large language models through multi-agent debate",
    478       "relevance": "Multi-agent debate framework relevant to the MAS persuasion dynamics studied; foundational for multi-hop analysis"
    479     }
    480   ],
    481   "engagement_factors": {
    482     "practical_relevance": {
    483       "score": 2,
    484       "justification": "MAS designers can immediately apply the adversarial argument detection prompt; findings directly inform architecture choices between LLMs and LRMs for agent robustness."
    485     },
    486     "surprise_contrarian": {
    487       "score": 3,
    488       "justification": "The Persuasion Duality is genuinely counterintuitive: the same thinking mechanism that makes an agent more persuasive also makes it harder to persuade, directly challenging the dominant scale hypothesis."
    489     },
    490     "fear_safety": {
    491       "score": 2,
    492       "justification": "Demonstrates that agents can be systematically manipulated via shared thinking content and that influence cascades non-linearly through MAS chains, raising concrete safety concerns."
    493     },
    494     "drama_conflict": {
    495       "score": 1,
    496       "justification": "The challenge to the scale hypothesis creates intellectual tension, but there is no high-profile named controversy or adversarial replication involved."
    497     },
    498     "demo_ability": {
    499       "score": 1,
    500       "justification": "The adversarial detection prompt is provided verbatim and is immediately usable, but reproducing the full experiments requires access to multiple closed-source frontier models and no code is released."
    501     },
    502     "brand_recognition": {
    503       "score": 1,
    504       "justification": "Shanghai Jiao Tong University and National University of Singapore are well-regarded academic institutions but not top AI labs; no famous lab affiliation."
    505     }
    506   },
    507   "hn_data": {
    508     "threads": [
    509       {
    510         "hn_id": "43243109",
    511         "title": "An Attempt to Catch Up with JIT Compilers",
    512         "points": 203,
    513         "comments": 142,
    514         "url": "https://news.ycombinator.com/item?id=43243109",
    515         "created_at": "2025-03-03T16:06:50Z"
    516       },
    517       {
    518         "hn_id": "44433899",
    519         "title": "Converting a large mathematical software package written in C++ to C++20 modules",
    520         "points": 141,
    521         "comments": 42,
    522         "url": "https://news.ycombinator.com/item?id=44433899",
    523         "created_at": "2025-07-01T13:46:56Z"
    524       },
    525       {
    526         "hn_id": "46339300",
    527         "title": "Signaling in the Age of AI: Evidence from Cover Letters",
    528         "points": 17,
    529         "comments": 1,
    530         "url": "https://news.ycombinator.com/item?id=46339300",
    531         "created_at": "2025-12-20T20:23:28Z"
    532       },
    533       {
    534         "hn_id": "45472586",
    535         "title": "Physics of Learning: A Lagrangian perspective to different learning paradigms",
    536         "points": 3,
    537         "comments": 0,
    538         "url": "https://news.ycombinator.com/item?id=45472586",
    539         "created_at": "2025-10-04T11:38:44Z"
    540       },
    541       {
    542         "hn_id": "47195084",
    543         "title": "Limitations on Safe, Trusted, Artificial General Intelligence",
    544         "points": 2,
    545         "comments": 0,
    546         "url": "https://news.ycombinator.com/item?id=47195084",
    547         "created_at": "2026-02-28T13:25:35Z"
    548       },
    549       {
    550         "hn_id": "45418635",
    551         "title": "Can LLMs Be Creative? Paper: Combinatorial Creativity: A New Frontier",
    552         "points": 2,
    553         "comments": 0,
    554         "url": "https://news.ycombinator.com/item?id=45418635",
    555         "created_at": "2025-09-29T20:53:22Z"
    556       },
    557       {
    558         "hn_id": "24567265",
    559         "title": "Context-Theoretic Semantics for Natural Language: An Algebraic Framework (2007)",
    560         "points": 2,
    561         "comments": 0,
    562         "url": "https://news.ycombinator.com/item?id=24567265",
    563         "created_at": "2020-09-23T14:11:23Z"
    564       },
    565       {
    566         "hn_id": "46479718",
    567         "title": "FakeParts: A New Family of AI-Generated DeepFakes",
    568         "points": 1,
    569         "comments": 0,
    570         "url": "https://news.ycombinator.com/item?id=46479718",
    571         "created_at": "2026-01-03T18:14:11Z"
    572       },
    573       {
    574         "hn_id": "45069333",
    575         "title": "A multi-task neural network for atypical mitosis recognition under domain shift",
    576         "points": 1,
    577         "comments": 0,
    578         "url": "https://news.ycombinator.com/item?id=45069333",
    579         "created_at": "2025-08-29T21:00:57Z"
    580       }
    581     ],
    582     "top_points": 203,
    583     "total_points": 372,
    584     "total_comments": 185
    585   }
    586 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs