scan-v5.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan-v5.json (26791B)
      1 {
      2   "scan_version": 5,
      3   "paper_type": "empirical",
      4   "paper": {
      5     "title": "Detection Method for Prompt Injection by Integrating Pre-trained Model and Heuristic Feature Engineering",
      6     "authors": [
      7       "Yi Ji",
      8       "Runzhi Li",
      9       "Baolei Mao"
     10     ],
     11     "year": 2025,
     12     "venue": "Knowledge Science, Engineering and Management",
     13     "arxiv_id": "2506.06384",
     14     "doi": "10.48550/arXiv.2506.06384"
     15   },
     16   "checklist": {
     17     "claims_and_evidence": {
     18       "abstract_claims_supported": {
     19         "applies": true,
     20         "answer": true,
     21         "justification": "Abstract claims (outperforming baselines, reducing attack success rates) are supported by Tables 1 and 3 showing superior accuracy and lower ASR across LLMs.",
     22         "source": "haiku"
     23       },
     24       "causal_claims_justified": {
     25         "applies": true,
     26         "answer": true,
     27         "justification": "Table 2 ablation study shows each module (M1, M2, M3) improves metrics, justifying the causal claim that dual-channel fusion improves detection.",
     28         "source": "haiku"
     29       },
     30       "generalization_bounded": {
     31         "applies": true,
     32         "answer": false,
     33         "justification": "Title claims 'Detection Method for Prompt Injection' (broad), but Table 1 shows 97.94% on safeguard-v2 vs. 91.24% on deepset-v2. Paper acknowledges distribution differences but doesn't bound claims to specific attack/dataset types.",
     34         "source": "haiku"
     35       },
     36       "alternative_explanations_discussed": {
     37         "applies": true,
     38         "answer": false,
     39         "justification": "Paper presents no alternative explanations for improved performance. No discussion of whether results could be due to dataset artifacts, training/test similarity, or other confounds.",
     40         "source": "haiku"
     41       },
     42       "proxy_outcome_distinction": {
     43         "applies": true,
     44         "answer": true,
     45         "justification": "Paper distinguishes between benchmark accuracy (Table 1) and actual attack success rate on real LLMs (Table 3), the true outcome of interest.",
     46         "source": "haiku"
     47       }
     48     },
     49     "limitations_and_scope": {
     50       "limitations_section_present": {
     51         "applies": true,
     52         "answer": false,
     53         "justification": "Section 5 contains only a single-paragraph limitations statement in the conclusion ('precision requires further enhancement'). No dedicated limitations or threats-to-validity section.",
     54         "source": "haiku"
     55       },
     56       "threats_to_validity_specific": {
     57         "applies": true,
     58         "answer": false,
     59         "justification": "Limitations are boilerplate ('precision requires further enhancement'). No discussion of specific threats: dataset bias, test-set contamination, attack pattern representativeness, or robustness to novel attacks.",
     60         "source": "haiku"
     61       },
     62       "scope_boundaries_stated": {
     63         "applies": true,
     64         "answer": false,
     65         "justification": "Paper focuses on direct prompt injection (not indirect) and English datasets but doesn't explicitly state what it does NOT show regarding novel attacks, cross-lingual transfer, or edge cases.",
     66         "source": "haiku"
     67       }
     68     },
     69     "conflicts_of_interest": {
     70       "funding_disclosed": {
     71         "applies": true,
     72         "answer": false,
     73         "justification": "No funding source is mentioned anywhere in the paper.",
     74         "source": "haiku"
     75       },
     76       "affiliations_disclosed": {
     77         "applies": true,
     78         "answer": true,
     79         "justification": "All three authors are affiliated with Zhengzhou University, clearly disclosed at paper header. No evaluation of their own product.",
     80         "source": "haiku"
     81       },
     82       "funder_independent_of_outcome": {
     83         "applies": false,
     84         "answer": false,
     85         "justification": "No funder disclosed.",
     86         "source": "haiku"
     87       },
     88       "financial_interests_declared": {
     89         "applies": true,
     90         "answer": false,
     91         "justification": "No competing interests statement included.",
     92         "source": "haiku"
     93       }
     94     },
     95     "scope_and_framing": {
     96       "key_terms_defined": {
     97         "applies": true,
     98         "answer": true,
     99         "justification": "Prompt injection (direct vs. indirect, semantic vs. structure-based) are clearly defined with examples. DeBERTa and heuristic feature engineering explained in Method section.",
    100         "source": "haiku"
    101       },
    102       "intended_contribution_clear": {
    103         "applies": true,
    104         "answer": true,
    105         "justification": "Contributions explicitly stated: (1) dual-channel detection framework, (2) heuristic rules for attack patterns, (3) evaluation demonstrating effectiveness. Clear what paper adds.",
    106         "source": "haiku"
    107       },
    108       "engagement_with_prior_work": {
    109         "applies": true,
    110         "answer": true,
    111         "justification": "Section 2 categorizes prior defenses (detection-based, architecture-based, self-supervision) and explains gaps that DMPI-PMHFE addresses. Good positioning relative to existing work.",
    112         "source": "haiku"
    113       }
    114     }
    115   },
    116   "type_checklist": {
    117     "empirical": {
    118       "artifacts": {
    119         "code_released": {
    120           "applies": true,
    121           "answer": false,
    122           "justification": "No mention of code release, GitHub repository, or implementation details beyond algorithm descriptions.",
    123           "source": "haiku"
    124         },
    125         "data_released": {
    126           "applies": true,
    127           "answer": false,
    128           "justification": "External test sets (deepset, ivanleomk) are public HuggingFace datasets. However, the augmented safeguard-v2 dataset (10,400 samples, 3,000 GPT-4o generated) is not mentioned as released.",
    129           "source": "haiku"
    130         },
    131         "environment_specified": {
    132           "applies": true,
    133           "answer": false,
    134           "justification": "No requirements.txt, Dockerfile, or dependency versions provided. Only model names (DeBERTa-v3-base, en_core_web_sm) mentioned without version pins.",
    135           "source": "haiku"
    136         },
    137         "reproduction_instructions": {
    138           "applies": true,
    139           "answer": false,
    140           "justification": "No step-by-step instructions to reproduce experiments. Method is described conceptually but not operationally.",
    141           "source": "haiku"
    142         }
    143       },
    144       "statistical_methodology": {
    145         "confidence_intervals_or_error_bars": {
    146           "applies": true,
    147           "answer": false,
    148           "justification": "Tables 1, 2, 3 report single point estimates (accuracy, precision, recall, F1) with no confidence intervals, error bars, or variance measures.",
    149           "source": "haiku"
    150         },
    151         "significance_tests": {
    152           "applies": true,
    153           "answer": false,
    154           "justification": "No significance tests reported. Differences between methods (e.g., 97.94% vs 97.87%) lack p-values or statistical justification.",
    155           "source": "haiku"
    156         },
    157         "effect_sizes_reported": {
    158           "applies": true,
    159           "answer": false,
    160           "justification": "Improvements shown as absolute percentage-point differences (e.g., 97.94% - 97.87% = 0.07pp) but not formally reported as effect sizes with interpretation.",
    161           "source": "haiku"
    162         },
    163         "sample_size_justified": {
    164           "applies": true,
    165           "answer": false,
    166           "justification": "Test set sizes (1,300 / 354 / 610 / 251) are provided but no power analysis or justification for adequacy of sample sizes.",
    167           "source": "haiku"
    168         },
    169         "variance_reported": {
    170           "applies": true,
    171           "answer": false,
    172           "justification": "Results are single point estimates. No mention of multiple runs, standard deviation, or variance across experimental repetitions.",
    173           "source": "haiku"
    174         }
    175       },
    176       "evaluation_design": {
    177         "baselines_included": {
    178           "applies": true,
    179           "answer": true,
    180           "justification": "Detection compared against Fmops, ProtectAI, SafeGuard, InjecGuard. Defense evaluated against Self-Reminder and Self-Defense baselines.",
    181           "source": "haiku"
    182         },
    183         "baselines_contemporary": {
    184           "applies": true,
    185           "answer": true,
    186           "justification": "Baselines are described as 'currently widely applied on Hugging Face, enjoying high recognition and practical value' and from 2023-2024, contemporary to 2025 paper.",
    187           "source": "haiku"
    188         },
    189         "ablation_study": {
    190           "applies": true,
    191           "answer": true,
    192           "justification": "Table 2 ablates modules: M1 (DeBERTa only), M1+M2 (add synonym matching), M1+M2+M3 (add pattern matching), showing each contributes to performance.",
    193           "source": "haiku"
    194         },
    195         "multiple_metrics": {
    196           "applies": true,
    197           "answer": true,
    198           "justification": "Detection uses accuracy, precision, recall, F1-score. Defense evaluation uses attack success rate (ASR). Multiple dimensions assessed.",
    199           "source": "haiku"
    200         },
    201         "human_evaluation": {
    202           "applies": false,
    203           "answer": false,
    204           "justification": "Not applicable. Detection model evaluated on automated benchmarks; human judgment not needed for classification correctness.",
    205           "source": "haiku"
    206         },
    207         "held_out_test_set": {
    208           "applies": true,
    209           "answer": true,
    210           "justification": "safeguard-v2 split 80/10/10 (train/val/test = 10,400/1,300/1,300). External test sets also held separate.",
    211           "source": "haiku"
    212         },
    213         "per_category_breakdown": {
    214           "applies": true,
    215           "answer": false,
    216           "justification": "Results reported by dataset (Table 1) and by LLM (Table 3). No breakdown by attack type (semantic vs. structure-based) or per-attack-pattern results.",
    217           "source": "haiku"
    218         },
    219         "failure_cases_discussed": {
    220           "applies": true,
    221           "answer": false,
    222           "justification": "No concrete failure examples or analysis. Only a brief note that precision drops from 99.58% to 98.00% when pattern matching is added, attributed to increased false positives.",
    223           "source": "haiku"
    224         },
    225         "negative_results_reported": {
    226           "applies": true,
    227           "answer": true,
    228           "justification": "Paper reports precision degradation as pattern matching is added (99.58% → 98.00%), explicitly acknowledging trade-off. This is a negative result on one metric.",
    229           "source": "haiku"
    230         }
    231       },
    232       "setup_transparency": {
    233         "model_versions_specified": {
    234           "applies": true,
    235           "answer": true,
    236           "justification": "DeBERTa-v3-base clearly specified. Sufficient to identify the exact pretrained model.",
    237           "source": "haiku"
    238         },
    239         "prompts_provided": {
    240           "applies": true,
    241           "answer": false,
    242           "justification": "For defense eval, 251 attack samples are used but actual attack prompts are not provided in paper. Only attack pattern categories mentioned.",
    243           "source": "haiku"
    244         },
    245         "hyperparameters_reported": {
    246           "applies": true,
    247           "answer": true,
    248           "justification": "Optimizer (Adam), learning rate (2e-5), batch size (16), weight decay (0.02), early stopping (patience=3) all specified.",
    249           "source": "haiku"
    250         },
    251         "scaffolding_described": {
    252           "applies": false,
    253           "answer": false,
    254           "justification": "Not applicable. This is a detection classifier, not an agentic system with scaffolding.",
    255           "source": "haiku"
    256         },
    257         "data_preprocessing_documented": {
    258           "applies": true,
    259           "answer": false,
    260           "justification": "Paper mentions tokenization, lemmatization, lowercase conversion but doesn't document complete preprocessing pipeline or filtering steps.",
    261           "source": "haiku"
    262         }
    263       },
    264       "data_integrity": {
    265         "raw_data_available": {
    266           "applies": true,
    267           "answer": false,
    268           "justification": "External test sets (deepset, ivanleomk) are publicly available on HuggingFace. Safeguard-v2 training data (augmented with 3,000 GPT-4o samples) is not mentioned as released.",
    269           "source": "haiku"
    270         },
    271         "data_collection_described": {
    272           "applies": true,
    273           "answer": true,
    274           "justification": "Safeguard-v2 creation described: augmented xTRam1/safeguard-prompt-injections, 15 attack patterns, 3,000 GPT-4o samples, three-stage QA (manual verification, dedup, balanced sampling).",
    275           "source": "haiku"
    276         },
    277         "recruitment_methods_described": {
    278           "applies": false,
    279           "answer": false,
    280           "justification": "Not applicable. No human participants or recruitment.",
    281           "source": "haiku"
    282         },
    283         "data_pipeline_documented": {
    284           "applies": true,
    285           "answer": true,
    286           "justification": "Data creation and splitting pipeline described at conceptual level (augmentation, QA steps, 80/10/10 split). Documented adequately for overview but not reproducibly detailed.",
    287           "source": "haiku"
    288         }
    289       },
    290       "contamination": {
    291         "training_cutoff_stated": {
    292           "applies": false,
    293           "answer": false,
    294           "justification": "Not applicable. This is a detection model on labeled benchmarks, not evaluating LLM capabilities on contaminated benchmarks. However, whether the underlying LLMs in defense eval have seen these attack patterns in pretraining is not discussed.",
    295           "source": "haiku"
    296         },
    297         "train_test_overlap_discussed": {
    298           "applies": true,
    299           "answer": false,
    300           "justification": "Paper does not discuss whether attack patterns in safeguard-v2 training set overlap with external test sets (ivanleomk-v2, deepset-v2) or if benchmarks have contamination.",
    301           "source": "haiku"
    302         },
    303         "benchmark_contamination_addressed": {
    304           "applies": true,
    305           "answer": false,
    306           "justification": "External datasets are noted as separate but their construction and uniqueness vs. training set not discussed. Whether the 15 attack patterns are novel or overlap with benchmark sources is not addressed.",
    307           "source": "haiku"
    308         }
    309       },
    310       "human_studies": {
    311         "pre_registered": {
    312           "applies": false,
    313           "answer": false,
    314           "justification": "Not applicable. No human participants.",
    315           "source": "haiku"
    316         },
    317         "irb_or_ethics_approval": {
    318           "applies": false,
    319           "answer": false,
    320           "justification": "Not applicable. No human participants.",
    321           "source": "haiku"
    322         },
    323         "demographics_reported": {
    324           "applies": false,
    325           "answer": false,
    326           "justification": "Not applicable. No human participants.",
    327           "source": "haiku"
    328         },
    329         "inclusion_exclusion_criteria": {
    330           "applies": false,
    331           "answer": false,
    332           "justification": "Not applicable. No human participants.",
    333           "source": "haiku"
    334         },
    335         "randomization_described": {
    336           "applies": false,
    337           "answer": false,
    338           "justification": "Not applicable. No human participants.",
    339           "source": "haiku"
    340         },
    341         "blinding_described": {
    342           "applies": false,
    343           "answer": false,
    344           "justification": "Not applicable. No human participants.",
    345           "source": "haiku"
    346         },
    347         "attrition_reported": {
    348           "applies": false,
    349           "answer": false,
    350           "justification": "Not applicable. No human participants.",
    351           "source": "haiku"
    352         }
    353       },
    354       "cost_and_practicality": {
    355         "inference_cost_reported": {
    356           "applies": true,
    357           "answer": false,
    358           "justification": "No inference latency, computational cost, or resource requirements reported for running the detector.",
    359           "source": "haiku"
    360         },
    361         "compute_budget_stated": {
    362           "applies": true,
    363           "answer": false,
    364           "justification": "No compute budget stated for training or evaluation (GPU hours, memory, etc.).",
    365           "source": "haiku"
    366         }
    367       }
    368     }
    369   },
    370   "claims": [
    371     {
    372       "claim": "DMPI-PMHFE achieves 97.94% accuracy on safeguard-v2, outperforming baselines (InjecGuard 97.87%)",
    373       "evidence": "Table 1 shows DMPI-PMHFE: 97.94% accuracy vs. InjecGuard 97.87% on safeguard-v2 test set",
    374       "supported": "strong"
    375     },
    376     {
    377       "claim": "Dual-channel feature fusion (DeBERTa + heuristic rules) is more effective than DeBERTa alone",
    378       "evidence": "Table 2 ablation: M1 alone 97.26% → M1+M2+M3 97.94% accuracy on safeguard-v2",
    379       "supported": "strong"
    380     },
    381     {
    382       "claim": "DMPI-PMHFE reduces attack success rates across mainstream LLMs (10-14% vs. 25-72% baseline)",
    383       "evidence": "Table 3 shows ASR drops from baseline (14.34% to 71.71%) to DMPI-PMHFE (10.35% to 14.34%) across 5 LLMs",
    384       "supported": "strong"
    385     },
    386     {
    387       "claim": "Pattern matching (M3) improves recall while trading off precision",
    388       "evidence": "Table 2: safeguard-v2 recall improves 95.64% (M1+M2) → 98.59% (M1+M2+M3), precision drops 98.77% → 98.00%",
    389       "supported": "strong"
    390     },
    391     {
    392       "claim": "Heuristic rules capture 8 semantic-based and 2 structure-based attack patterns",
    393       "evidence": "Appendices A.1 and A.2 list 8 semantic patterns and 2 structure patterns with matching rules. Evaluation shows coverage via F1-score but not per-pattern breakdown.",
    394       "supported": "moderate"
    395     },
    396     {
    397       "claim": "DMPI-PMHFE generalizes across external test sets (ivanleomk-v2, deepset-v2)",
    398       "evidence": "Table 1: 94.75% accuracy on ivanleomk-v2, 91.24% on deepset-v2. Performance drops vs. safeguard-v2 (97.94%) indicate distribution shift, not strong generalization.",
    399       "supported": "moderate"
    400     }
    401   ],
    402   "methodology_tags": [
    403     "benchmark-eval",
    404     "observational"
    405   ],
    406   "key_findings": "DMPI-PMHFE combines DeBERTa semantic embeddings with heuristic pattern matching to detect 10 attack patterns. It achieves 97.94% accuracy on the safeguard-v2 benchmark and reduces attack success rates from 10–72% to 10–14% across five mainstream LLMs (GPT-4o, Qwen, Llama, GLM-4). However, performance degrades substantially on external test sets (91.24% on deepset-v2), suggesting overfitting to training distribution, and pattern matching introduces false positives (precision drops from 99.58% to 98.00%), indicating the dual-channel approach trades precision for coverage.",
    407   "red_flags": [
    408     {
    409       "flag": "No code or data release",
    410       "detail": "Reproducibility severely limited. Augmented safeguard-v2 dataset (10,400 samples, 3,000 GPT-4o-generated) not released. Code not available."
    411     },
    412     {
    413       "flag": "No statistical significance testing",
    414       "detail": "Point estimates only (e.g., 97.94% vs. 97.87%). Differences of 0.07–0.13 percentage points lack p-values; may not be statistically significant."
    415     },
    416     {
    417       "flag": "Overfitting to training distribution",
    418       "detail": "Performance optimal on safeguard-v2 (97.94%) but drops to 91.24% on deepset-v2. Suggests model memorized patterns from training data rather than learning generalizable features."
    419     },
    420     {
    421       "flag": "Data augmentation via generative model",
    422       "detail": "3,000 of 10,400 training samples generated by GPT-4o. Potential for synthetic artifacts, mode collapse, or biases in attack pattern representation."
    423     },
    424     {
    425       "flag": "Precision-recall trade-off unresolved",
    426       "detail": "Pattern matching (M3) increases recall (95.64% → 98.59%) but decreases precision (98.77% → 98.00%). False positives not analyzed or mitigated."
    427     },
    428     {
    429       "flag": "No analysis of attack pattern coverage",
    430       "detail": "Paper claims to detect '15 mainstream attack patterns' but provides no breakdown of per-pattern precision/recall or discussion of novel attack robustness."
    431     },
    432     {
    433       "flag": "Limited generalization analysis across LLMs",
    434       "detail": "Table 3 shows defense effectiveness varies 3–5x across LLMs (e.g., GLM-4: 71.71% baseline vs. Llama-3.3: 25.09%). Detector trained on fixed patterns; unclear how it will perform on LLMs beyond tested set."
    435     },
    436     {
    437       "flag": "Boilerplate limitations section",
    438       "detail": "Single sentence in conclusion: 'precision requires further enhancement.' No discussion of scope boundaries, dataset bias, or threats to validity."
    439     },
    440     {
    441       "flag": "No variance or error estimates",
    442       "detail": "Single point estimate per result. No standard deviation, 95% CIs, or multiple runs reported."
    443     }
    444   ],
    445   "cited_papers": [
    446     {
    447       "title": "Struq: Defending against prompt injection with structured queries",
    448       "authors": "Sizhe Chen, Julien Piet, Chawin Sitawarin, David Wagner",
    449       "year": 2024,
    450       "relevance": "Architecture-based defense alternative; separates prompts and data to prevent instruction injection."
    451     },
    452     {
    453       "title": "Jatmo: Prompt injection defense by task-specific finetuning",
    454       "authors": "Julien Piet, Maha Alrashed, Chawin Sitawarin, et al.",
    455       "year": 2024,
    456       "relevance": "Defense method trading generalization for task-specific robustness; illustrates design trade-offs in prompt defense."
    457     },
    458     {
    459       "title": "Defending chatgpt against jailbreak attack via self-reminders",
    460       "authors": "Yueqi Xie, Jingwei Yi, Jiawei Shao, et al.",
    461       "year": 2023,
    462       "relevance": "Self-supervision baseline for prompt injection defense; compared against DMPI-PMHFE in Table 3."
    463     },
    464     {
    465       "title": "Ignore previous prompt: Attack techniques for language models",
    466       "authors": "Fábio Perez, Ian Ribeiro",
    467       "year": 2022,
    468       "relevance": "Early taxonomy of direct prompt injection attacks; foundational for understanding attack patterns."
    469     },
    470     {
    471       "title": "Not what you've signed up for: Compromising real-world llm-integrated applications with indirect prompt injection",
    472       "authors": "Kai Greshake, Sahar Abdelnabi, Shailesh Mishra, et al.",
    473       "year": 2023,
    474       "relevance": "Indirect prompt injection attacks; complements paper's focus on direct injection."
    475     },
    476     {
    477       "title": "Formalizing and benchmarking prompt injection attacks and defenses",
    478       "authors": "Yupei Liu, Yuqi Jia, Runpeng Geng, et al.",
    479       "year": 2024,
    480       "relevance": "Benchmark and formalization framework for evaluating prompt injection defenses."
    481     },
    482     {
    483       "title": "Cybersecurity evaluation suite for large language models (CybersecEval 2)",
    484       "authors": "Manish Bhatt, Sahana Chennabasappa, Yue Li, et al.",
    485       "year": 2024,
    486       "relevance": "Defense effectiveness evaluation benchmark; provides 251 attack samples used in Table 3."
    487     }
    488   ],
    489   "engagement_factors": {
    490     "practical_relevance": {
    491       "score": 3,
    492       "justification": "Deployed as input filter for mainstream LLMs (GPT-4o, Qwen, Llama, GLM-4). Reduces attack success rates from 10–72% to 10–14%, directly applicable to production systems."
    493     },
    494     "surprise_contrarian": {
    495       "score": 1,
    496       "justification": "Dual-channel approach (semantic + syntactic) is intuitive; combining pretrained models with heuristic rules is unsurprising and standard."
    497     },
    498     "fear_safety": {
    499       "score": 2,
    500       "justification": "Addresses real LLM security threat (prompt injection ranked #1 by OWASP), but is a defense paper rather than surfacing new risks."
    501     },
    502     "drama_conflict": {
    503       "score": 1,
    504       "justification": "Straightforward technical problem with engineering solution. No methodological drama, controversial claims, or conflict narrative."
    505     },
    506     "demo_ability": {
    507       "score": 2,
    508       "justification": "Code not released, but benchmark datasets (deepset, ivanleomk) are public. Could reimplement heuristic rules and test with public data, but would require effort."
    509     },
    510     "brand_recognition": {
    511       "score": 1,
    512       "justification": "Authors from Zhengzhou University, China. Not a well-known AI lab. No affiliation with major AI companies or research institutes."
    513     }
    514   },
    515   "hn_data": {
    516     "threads": [
    517       {
    518         "hn_id": "31636401",
    519         "title": "End-to-End 3D Hand Pose Estimation from Stereo Cameras",
    520         "points": 80,
    521         "comments": 4,
    522         "url": "https://news.ycombinator.com/item?id=31636401",
    523         "created_at": "2022-06-06T01:07:13Z"
    524       },
    525       {
    526         "hn_id": "36373410",
    527         "title": "A Survey of Modern Compiler Fuzzing",
    528         "points": 29,
    529         "comments": 2,
    530         "url": "https://news.ycombinator.com/item?id=36373410",
    531         "created_at": "2023-06-17T19:05:42Z"
    532       },
    533       {
    534         "hn_id": "27521090",
    535         "title": "SimSwap: An Efficient Framework for High Fidelity Face Swapping",
    536         "points": 2,
    537         "comments": 1,
    538         "url": "https://news.ycombinator.com/item?id=27521090",
    539         "created_at": "2021-06-15T20:30:01Z"
    540       },
    541       {
    542         "hn_id": "45044093",
    543         "title": "Omni Geometry Representation Learning vs. LLMs for Geospatial Entity Resolution",
    544         "points": 2,
    545         "comments": 0,
    546         "url": "https://news.ycombinator.com/item?id=45044093",
    547         "created_at": "2025-08-27T19:38:10Z"
    548       },
    549       {
    550         "hn_id": "43548771",
    551         "title": "Large Language Models Share Representations of Latent Grammatical Concepts",
    552         "points": 2,
    553         "comments": 0,
    554         "url": "https://news.ycombinator.com/item?id=43548771",
    555         "created_at": "2025-04-01T16:34:21Z"
    556       },
    557       {
    558         "hn_id": "43436502",
    559         "title": "Optimization of Monolithically Stackable Gain Cell Memory for Last-Level Cache",
    560         "points": 2,
    561         "comments": 0,
    562         "url": "https://news.ycombinator.com/item?id=43436502",
    563         "created_at": "2025-03-21T14:58:30Z"
    564       },
    565       {
    566         "hn_id": "44524946",
    567         "title": "Finding Compiler Bugs: Cross-Language Code Generator and Differential Testing",
    568         "points": 1,
    569         "comments": 0,
    570         "url": "https://news.ycombinator.com/item?id=44524946",
    571         "created_at": "2025-07-10T20:07:28Z"
    572       },
    573       {
    574         "hn_id": "43389464",
    575         "title": "Decoupling the components of geometric understanding in Vision Language Models",
    576         "points": 1,
    577         "comments": 0,
    578         "url": "https://news.ycombinator.com/item?id=43389464",
    579         "created_at": "2025-03-17T15:16:52Z"
    580       }
    581     ],
    582     "top_points": 80,
    583     "total_points": 119,
    584     "total_comments": 7
    585   }
    586 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs