ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

scan-v5.json (28814B)


      1 {
      2   "scan_version": 5,
      3   "paper_type": "empirical",
      4   "paper": {
      5     "title": "Defending Against Indirect Prompt Injection Attacks With Spotlighting",
      6     "authors": [
      7       "Keegan Hines",
      8       "Gary Lopez",
      9       "Matthew Hall",
     10       "Federico Zarfati",
     11       "Yonatan Zunger"
     12     ],
     13     "year": 2024,
     14     "venue": "CAMLIS",
     15     "arxiv_id": "2403.14720",
     16     "doi": "10.48550/arXiv.2403.14720"
     17   },
     18   "checklist": {
     19     "claims_and_evidence": {
     20       "abstract_claims_supported": {
     21         "applies": true,
     22         "answer": true,
     23         "justification": "Abstract claims (ASR reduction from >50% to <2%) are supported by Figures 4-6, though specific numbers vary by technique and model. Datamarking achieves 3-0% ASR; encoding achieves 0-1.8% ASR.",
     24         "source": "haiku"
     25       },
     26       "causal_claims_justified": {
     27         "applies": true,
     28         "answer": true,
     29         "justification": "Causal claims (spotlighting reduces ASR, does not impair performance) are tested via before/after comparisons with/without techniques. No randomization, but appropriate comparative design for prompt engineering evaluation.",
     30         "source": "haiku"
     31       },
     32       "generalization_bounded": {
     33         "applies": true,
     34         "answer": true,
     35         "justification": "Results explicitly bounded to GPT-family models (text-davinci-003, GPT-3.5, GPT-4) and 2 task types (summarization, Q&A). Paper notes encoding only suitable for high-capacity models, but doesn't discuss applicability to non-OpenAI architectures.",
     36         "source": "haiku"
     37       },
     38       "alternative_explanations_discussed": {
     39         "applies": true,
     40         "answer": false,
     41         "justification": "Paper states 'we lack a clear understanding of why spotlighting actually helps' (Section 6). Provides telecommunications analogy but no rigorous mechanism exploration or alternative hypotheses tested.",
     42         "source": "haiku"
     43       },
     44       "proxy_outcome_distinction": {
     45         "applies": true,
     46         "answer": true,
     47         "justification": "Attack Success Rate is precisely defined in Section 4.2 and Appendix 8.1 as return of specific keyword; distinguished from Affected Success Rate (AffSR) in appendix. Clear mapping between measured outcome and claim.",
     48         "source": "haiku"
     49       }
     50     },
     51     "limitations_and_scope": {
     52       "limitations_section_present": {
     53         "applies": true,
     54         "answer": false,
     55         "justification": "No dedicated limitations section. Caveats scattered across Results (Section 5.2-5.4), Discussion (Section 6), and Appendix (8.2), but not compiled into formal threats-to-validity discussion.",
     56         "source": "haiku"
     57       },
     58       "threats_to_validity_specific": {
     59         "applies": true,
     60         "answer": true,
     61         "justification": "Specific threats discussed: encoding only for high-capacity models (5.2-5.3), few-shot knowledge-boundedness (Appendix 8.2), adversarial subversion paths per technique (5.4). Not systematic, but concrete.",
     62         "source": "haiku"
     63       },
     64       "scope_boundaries_stated": {
     65         "applies": true,
     66         "answer": true,
     67         "justification": "Explicitly bounded to GPT-family black-box models (Section 4.1), summarization and Q&A tasks (Sections 4-5), synthetic keyword-based attacks. Does not discuss generalization to open-source models, other domains, or sophisticated attack strategies.",
     68         "source": "haiku"
     69       }
     70     },
     71     "conflicts_of_interest": {
     72       "funding_disclosed": {
     73         "applies": true,
     74         "answer": false,
     75         "justification": "No funding disclosure or acknowledgments section visible in paper. Authors list Microsoft affiliation but no funding source stated.",
     76         "source": "haiku"
     77       },
     78       "affiliations_disclosed": {
     79         "applies": true,
     80         "answer": true,
     81         "justification": "All authors listed as Microsoft. Relevant because paper evaluates OpenAI models (competitors), but affiliation clearly stated.",
     82         "source": "haiku"
     83       },
     84       "funder_independent_of_outcome": {
     85         "applies": true,
     86         "answer": true,
     87         "justification": "Microsoft (employer) does not provide the models being evaluated (OpenAI). Microsoft benefits from LLM security broadly, but not directly from OpenAI product improvement.",
     88         "source": "haiku"
     89       },
     90       "financial_interests_declared": {
     91         "applies": true,
     92         "answer": false,
     93         "justification": "No competing interests statement or financial interests declaration present.",
     94         "source": "haiku"
     95       }
     96     },
     97     "scope_and_framing": {
     98       "key_terms_defined": {
     99         "applies": true,
    100         "answer": true,
    101         "justification": "Key terms defined: Indirect prompt injection attacks/XPIA (2.2), Attack Success Rate (4.2, Appendix 8.1), spotlighting family (3.0), datamarking/encoding/delimiting (3.2-3.4).",
    102         "source": "haiku"
    103       },
    104       "intended_contribution_clear": {
    105         "applies": true,
    106         "answer": true,
    107         "justification": "Contribution explicitly stated: introduce spotlighting (family of three prompt engineering techniques: delimiting, datamarking, encoding) for defending against indirect prompt injection attacks. Evaluation on effectiveness is clearly framed.",
    108         "source": "haiku"
    109       },
    110       "engagement_with_prior_work": {
    111         "applies": true,
    112         "answer": true,
    113         "justification": "Related work section (2.2-2.3) cites Yi et al. 2023 on XPIA, Greshake/Bard attacks, safety alignment work. Paper states 'Early versions of some of these techniques have been described previously [2], and here we expand the results,' but doesn't deeply contrast novelty from prior approaches.",
    114         "source": "haiku"
    115       }
    116     }
    117   },
    118   "type_checklist": {
    119     "empirical": {
    120       "artifacts": {
    121         "code_released": {
    122           "applies": true,
    123           "answer": false,
    124           "justification": "No code repository, GitHub link, or supplementary code mentioned. Techniques described in prose and example prompts provided, but no deployable implementation.",
    125           "source": "haiku"
    126         },
    127         "data_released": {
    128           "applies": true,
    129           "answer": false,
    130           "justification": "Synthetic 1000-document attack dataset not released or available. Standard benchmarks (SQuAD, IMDB, SuperGLUE) are public but not the paper's attack corpus.",
    131           "source": "haiku"
    132         },
    133         "environment_specified": {
    134           "applies": true,
    135           "answer": false,
    136           "justification": "Model names and temperature (1.0) specified, but no requirements.txt, Dockerfile, conda env, or reproducibility config provided. API details minimal.",
    137           "source": "haiku"
    138         },
    139         "reproduction_instructions": {
    140           "applies": true,
    141           "answer": false,
    142           "justification": "Techniques described in natural language with example prompts shown (Sections 3.2-3.4), but no step-by-step reproduction instructions or automation scripts. Implementation would require custom development.",
    143           "source": "haiku"
    144         }
    145       },
    146       "statistical_methodology": {
    147         "confidence_intervals_or_error_bars": {
    148           "applies": true,
    149           "answer": false,
    150           "justification": "Figures 3-8 show point estimates without error bars or confidence intervals. Single run reported per condition, no variance bounds.",
    151           "source": "haiku"
    152         },
    153         "significance_tests": {
    154           "applies": true,
    155           "answer": false,
    156           "justification": "No p-values, t-tests, or statistical significance tests reported. ASR reductions presented as raw percentages without hypothesis testing.",
    157           "source": "haiku"
    158         },
    159         "effect_sizes_reported": {
    160           "applies": true,
    161           "answer": true,
    162           "justification": "ASR reductions reported in percentage points (e.g., 50%→3%, 60%→0%). Effect sizes quantified; not just p-values.",
    163           "source": "haiku"
    164         },
    165         "sample_size_justified": {
    166           "applies": true,
    167           "answer": false,
    168           "justification": "Paper states 'we generated a synthetic dataset of 1000 documents' but does not justify this sample size or discuss power analysis. No minimum sample size calculated.",
    169           "source": "haiku"
    170         },
    171         "variance_reported": {
    172           "applies": true,
    173           "answer": false,
    174           "justification": "No multiple runs with different random seeds shown. No SD/variance/min-max ranges reported. Results presented as single-point estimates.",
    175           "source": "haiku"
    176         }
    177       },
    178       "evaluation_design": {
    179         "baselines_included": {
    180           "applies": true,
    181           "answer": true,
    182           "justification": "Multiple baselines compared: no defense (baseline ASR), instruction-only, delimiting, datamarking, encoding. Also compared to few-shot approach in appendix.",
    183           "source": "haiku"
    184         },
    185         "baselines_contemporary": {
    186           "applies": true,
    187           "answer": true,
    188           "justification": "Baselines are contemporary GPT models (June 2023 snapshots). However, no comparison to other defense methods from Section 2.3 (fine-tuning, other prompt-engineering defenses).",
    189           "source": "haiku"
    190         },
    191         "ablation_study": {
    192           "applies": true,
    193           "answer": true,
    194           "justification": "Three spotlighting instantiations (delimiting, datamarking, encoding) serve as ablations of increasing sophistication. Progressive improvements shown (Figures 3-6).",
    195           "source": "haiku"
    196         },
    197         "multiple_metrics": {
    198           "applies": true,
    199           "answer": true,
    200           "justification": "Primary metric: Attack Success Rate (ASR). Secondary metrics: task performance on 4 NLP benchmarks (SQuAD, IMDB, SuperGLUE BoolQ, SuperGLUE WiC). Figure 7-8 show accuracy impacts.",
    201           "source": "haiku"
    202         },
    203         "human_evaluation": {
    204           "applies": false,
    205           "answer": false,
    206           "justification": "No human evaluation of model outputs. Not clearly required for technical evaluation of prompt injection defense.",
    207           "source": "haiku"
    208         },
    209         "held_out_test_set": {
    210           "applies": true,
    211           "answer": true,
    212           "justification": "Standard benchmarks use held-out test sets (SQuAD, IMDB, SuperGLUE are standard). For synthetic attack corpus, no train/test split mentioned; single 1000-document set.",
    213           "source": "haiku"
    214         },
    215         "per_category_breakdown": {
    216           "applies": true,
    217           "answer": true,
    218           "justification": "Results broken down by model (text-davinci-003, GPT-3.5-Turbo, GPT-4), task type (summarization, Q&A), and technique (delimiting, datamarking, encoding). Benchmark breakdowns in Figure 7-8.",
    219           "source": "haiku"
    220         },
    221         "failure_cases_discussed": {
    222           "applies": true,
    223           "answer": true,
    224           "justification": "Encoding fails with GPT-3.5-Turbo (Figure 8, task performance degradation). Delimiting shown insufficient (Figure 3). Appendix 8.2 discusses few-shot caveats. Limited analysis of attack vectors spotlighting cannot defend against.",
    225           "source": "haiku"
    226         },
    227         "negative_results_reported": {
    228           "applies": true,
    229           "answer": true,
    230           "justification": "Paper reports: delimiting alone insufficient, encoding hurts task accuracy with weaker models, few-shot examples overfit to known attacks. Honest about limitations.",
    231           "source": "haiku"
    232         }
    233       },
    234       "setup_transparency": {
    235         "model_versions_specified": {
    236           "applies": true,
    237           "answer": true,
    238           "justification": "Specific model snapshots: text-davinci-003, GPT-3.5-Turbo (June 2023), GPT-4 (June 2023). Dates provided for reproducibility.",
    239           "source": "haiku"
    240         },
    241         "prompts_provided": {
    242           "applies": true,
    243           "answer": true,
    244           "justification": "Full example system prompts shown for: instructions-only baseline (4.2), delimiting (3.2), datamarking (3.3), encoding (3.4). Templates can be copied directly.",
    245           "source": "haiku"
    246         },
    247         "hyperparameters_reported": {
    248           "applies": true,
    249           "answer": true,
    250           "justification": "Temperature=1.0 specified with note: 'We examined the effect of temperature on XPIA susceptibility and found no notable impact.' Only temperature reported; no top-p, frequency_penalty, etc.",
    251           "source": "haiku"
    252         },
    253         "scaffolding_described": {
    254           "applies": false,
    255           "answer": false,
    256           "justification": "Not an agentic system; pure prompt engineering. No scaffolding (tools, actions, loops) to describe.",
    257           "source": "haiku"
    258         },
    259         "data_preprocessing_documented": {
    260           "applies": true,
    261           "answer": false,
    262           "justification": "Attack dataset described as 'synthetic... containing prompt injection attacks' with 'variations on a simple keyword payload attack,' but generation algorithm/process not documented. No code for reproducing dataset.",
    263           "source": "haiku"
    264         }
    265       },
    266       "data_integrity": {
    267         "raw_data_available": {
    268           "applies": true,
    269           "answer": false,
    270           "justification": "Synthetic 1000-document attack corpus not released or available for verification. Standard benchmark raw data (SQuAD, IMDB) are publicly available but not paper-specific.",
    271           "source": "haiku"
    272         },
    273         "data_collection_described": {
    274           "applies": true,
    275           "answer": true,
    276           "justification": "Attack data: 'generated synthetic dataset of 1000 documents... variations on simple keyword payload attack.' Benchmarks: uses standard published datasets. Description adequate for understanding but not for reproduction.",
    277           "source": "haiku"
    278         },
    279         "recruitment_methods_described": {
    280           "applies": false,
    281           "answer": false,
    282           "justification": "No human subjects; N/A.",
    283           "source": "haiku"
    284         },
    285         "data_pipeline_documented": {
    286           "applies": true,
    287           "answer": true,
    288           "justification": "For benchmarks, standard pipelines used. For attack dataset, pipeline partially described: documents → prompts with models → responses → ASR scoring. Full generation process not detailed.",
    289           "source": "haiku"
    290         }
    291       },
    292       "contamination": {
    293         "training_cutoff_stated": {
    294           "applies": true,
    295           "answer": false,
    296           "justification": "Models identified by snapshot (June 2023) but exact training data cutoff dates not stated. Paper does not discuss what dates these versions were trained on.",
    297           "source": "haiku"
    298         },
    299         "train_test_overlap_discussed": {
    300           "applies": true,
    301           "answer": false,
    302           "justification": "No discussion of whether benchmark test sets (SQuAD 2016, IMDB, SuperGLUE) may have been in training data of June 2023 model snapshots.",
    303           "source": "haiku"
    304         },
    305         "benchmark_contamination_addressed": {
    306           "applies": true,
    307           "answer": false,
    308           "justification": "Synthetic attack dataset is new, so no contamination there. But standard benchmarks potentially contaminated—not addressed. Paper evaluates model performance on these benchmarks without discussing potential data leakage.",
    309           "source": "haiku"
    310         }
    311       },
    312       "human_studies": {
    313         "pre_registered": {
    314           "applies": false,
    315           "answer": false,
    316           "justification": "No human subjects; N/A.",
    317           "source": "haiku"
    318         },
    319         "irb_or_ethics_approval": {
    320           "applies": false,
    321           "answer": false,
    322           "justification": "No human subjects; N/A.",
    323           "source": "haiku"
    324         },
    325         "demographics_reported": {
    326           "applies": false,
    327           "answer": false,
    328           "justification": "No human subjects; N/A.",
    329           "source": "haiku"
    330         },
    331         "inclusion_exclusion_criteria": {
    332           "applies": false,
    333           "answer": false,
    334           "justification": "No human subjects; N/A.",
    335           "source": "haiku"
    336         },
    337         "randomization_described": {
    338           "applies": false,
    339           "answer": false,
    340           "justification": "No human subjects; N/A.",
    341           "source": "haiku"
    342         },
    343         "blinding_described": {
    344           "applies": false,
    345           "answer": false,
    346           "justification": "No human subjects; N/A.",
    347           "source": "haiku"
    348         },
    349         "attrition_reported": {
    350           "applies": false,
    351           "answer": false,
    352           "justification": "No human subjects; N/A.",
    353           "source": "haiku"
    354         }
    355       },
    356       "cost_and_practicality": {
    357         "inference_cost_reported": {
    358           "applies": true,
    359           "answer": false,
    360           "justification": "No inference cost ($ per API call) or latency reported. Experiments used OpenAI API but no pricing/time data disclosed.",
    361           "source": "haiku"
    362         },
    363         "compute_budget_stated": {
    364           "applies": true,
    365           "answer": false,
    366           "justification": "Total computational budget ($ or compute hours) not stated. 1000 attack documents × 3 models × multiple tasks = thousands of API calls, but no aggregate cost reported.",
    367           "source": "haiku"
    368         }
    369       }
    370     }
    371   },
    372   "claims": [
    373     {
    374       "claim": "Spotlighting via datamarking reduces attack success rate (ASR) from ~50% to <3% with GPT-3.5-Turbo and to 0% with text-davinci-003",
    375       "evidence": "Figure 4 (document summarization) and Figure 5 (Q&A tasks) show ASR percentages across models. Specific numbers: GPT-3.5-Turbo 50%→3.1%, Text-003 40%→0%.",
    376       "supported": "strong"
    377     },
    378     {
    379       "claim": "Spotlighting via encoding reduces ASR to 0-1.8% across tasks",
    380       "evidence": "Figure 6 shows encoding results: summarization 0.0% ASR with GPT-3.5-Turbo, Q&A 1.8% ASR. Consistent across models.",
    381       "supported": "strong"
    382     },
    383     {
    384       "claim": "Datamarking transformations have minimal detrimental impact on downstream NLP task performance",
    385       "evidence": "Figure 7 shows no detrimental effect on SQuAD, IMDB, SuperGLUE BoolQ/WiC benchmarks with datamarking present.",
    386       "supported": "strong"
    387     },
    388     {
    389       "claim": "Encoding transformations degrade task performance with GPT-3.5-Turbo but not GPT-4",
    390       "evidence": "Figure 8 shows GPT-3.5-Turbo accuracy drops significantly with encoding (top row), while GPT-4 maintains high accuracy (bottom row).",
    391       "supported": "strong"
    392     },
    393     {
    394       "claim": "Simple instructions to avoid prompt injection have 'almost no added benefit' for GPT-3.5-Turbo",
    395       "evidence": "Figure 2 shows instructions-only approach yields minimal ASR reduction for GPT-3.5-Turbo vs baseline.",
    396       "supported": "moderate"
    397     },
    398     {
    399       "claim": "Spotlighting is more robust than simple delimiting because adversaries with knowledge of system prompts can easily subvert delimiters",
    400       "evidence": "Section 5.4 discusses adversary considerations: 'If an adversary gains knowledge of our system prompt... it would be simple to craft a string that contains our delimiters.' Datamarking/encoding harder to subvert with dynamic tokens.",
    401       "supported": "moderate"
    402     },
    403     {
    404       "claim": "Few-shot examples can reduce ASR below 5% but risk overfitting to known attack patterns",
    405       "evidence": "Appendix 8.2 shows Figure 9 with few-shot examples achieving <5% ASR, but text cautions: 'relying on in-context learning will always be limited by our current understanding of typical attack tactics.'",
    406       "supported": "moderate"
    407     }
    408   ],
    409   "methodology_tags": [
    410     "benchmark-eval"
    411   ],
    412   "key_findings": "Spotlighting, a family of three prompt engineering techniques (delimiting, datamarking, encoding), significantly reduces indirect prompt injection attack success rate from 50%+ to below 2%. Datamarking achieves this reduction with minimal impact on downstream NLP task performance across multiple benchmarks. Encoding is most effective but only suitable for high-capacity models (GPT-4), as it degrades performance in GPT-3.5-Turbo. The findings suggest that structural transformations making input provenance more salient to models are necessary because simple instructions alone are insufficient defense.",
    413   "red_flags": [
    414     {
    415       "flag": "No statistical significance testing",
    416       "detail": "All results reported as point estimates without confidence intervals, error bars, standard deviations, or p-values. Cannot assess whether observed ASR differences are statistically reliable or due to random variation."
    417     },
    418     {
    419       "flag": "Synthetic attack dataset not released",
    420       "detail": "The 1000-document corpus used for evaluation is not available for independent verification or reproduction. Limits scientific reproducibility."
    421     },
    422     {
    423       "flag": "Sample size not justified",
    424       "detail": "No power analysis or justification provided for why 1000 attack documents is sufficient. No minimum sample size calculated based on effect sizes."
    425     },
    426     {
    427       "flag": "Limited to GPT models only",
    428       "detail": "Evaluation only on OpenAI models (text-davinci-003, GPT-3.5, GPT-4). Generalization to Llama, Claude, and other LLMs unknown."
    429     },
    430     {
    431       "flag": "Attacks are simplistic",
    432       "detail": "All attacks are 'variations on a simple keyword payload attack.' May not reflect sophisticated adversarial strategies that target semantic vulnerabilities or use knowledge of spotlighting techniques."
    433     },
    434     {
    435       "flag": "No code or data release",
    436       "detail": "No GitHub repository, supplementary materials, or code artifacts provided. Implementation requires custom development from prose descriptions."
    437     },
    438     {
    439       "flag": "No comparison to alternative defenses",
    440       "detail": "Paper discusses other approaches (fine-tuning, alignment tuning, classifiers) in Section 2.3 but does not empirically compare spotlighting to any competing defense methods."
    441     },
    442     {
    443       "flag": "Training data contamination not addressed",
    444       "detail": "Benchmark test sets (SQuAD 2016, IMDB, SuperGLUE) may have been in training data of June 2023 LLM snapshots. Potential data leakage not discussed."
    445     },
    446     {
    447       "flag": "Mechanism unclear",
    448       "detail": "Paper acknowledges 'we lack a clear understanding of why spotlighting actually helps' (Section 6). No mechanistic explanation or ablation to understand which aspects of marking/encoding are necessary."
    449     },
    450     {
    451       "flag": "Adversarial evaluation incomplete",
    452       "detail": "Section 5.4 discusses attack vectors against each technique but does not empirically test whether sophisticated adversaries can craft attacks that bypass spotlighting."
    453     }
    454   ],
    455   "cited_papers": [
    456     {
    457       "title": "Benchmarking and Defending Against Indirect Prompt Injection Attacks on Large Language Models",
    458       "relevance": "Core prior work on XPIA problem; paper extends some spotlighting techniques from this baseline [Yi et al. 2023]"
    459     },
    460     {
    461       "title": "More than you've asked for: A Comprehensive Analysis of Novel Prompt Injection Threats to Application-Integrated Large Language Models",
    462       "relevance": "Foundational work identifying indirect prompt injection threats in LLM systems [Greshake et al.]"
    463     },
    464     {
    465       "title": "How We Broke LLMs: Indirect Prompt Injection",
    466       "relevance": "Early demonstration of XPIA vulnerability in practice [Greshake blog post, 2022]"
    467     },
    468     {
    469       "title": "Hacking Google Bard - From Prompt Injection to Data Exfiltration",
    470       "relevance": "Empirical demonstration of XPIA attack enabling data exfiltration in real deployed system [Wunderwuzzi]"
    471     },
    472     {
    473       "title": "Chain-of-Thought Prompting Elicits Reasoning in Large Language Models",
    474       "relevance": "Foundation for understanding prompt engineering effectiveness and model instruction-following behavior [Wei et al.]"
    475     },
    476     {
    477       "title": "Universal and Transferable Adversarial Attacks on Aligned Language Models",
    478       "relevance": "Relevant for understanding adversarial robustness of LLMs and potential attack transferability [Zou et al. 2023]"
    479     },
    480     {
    481       "title": "SQuAD: 100,000+ Questions for Machine Comprehension of Text",
    482       "relevance": "Benchmark used for evaluating downstream task performance impact of spotlighting transformations [Rajpurkar et al.]"
    483     },
    484     {
    485       "title": "SuperGLUE: A Stickier Benchmark for General-Purpose Language Understanding Systems",
    486       "relevance": "Benchmark used to evaluate spotlighting impact on multiple NLP tasks [Wang et al.]"
    487     }
    488   ],
    489   "engagement_factors": {
    490     "practical_relevance": {
    491       "score": 3,
    492       "justification": "Directly implementable in production LLM systems today. Requires only prompt engineering changes, not model retraining. Teams can add datamarking/encoding immediately."
    493     },
    494     "surprise_contrarian": {
    495       "score": 2,
    496       "justification": "Core insight (marking provenance helps models distinguish code from data) is intuitive once stated, though specific techniques are novel. Does not challenge conventional wisdom fundamentally."
    497     },
    498     "fear_safety": {
    499       "score": 2,
    500       "justification": "Addresses real prompt injection vulnerability in deployed systems. However, positions spotlighting as limited defense ('security against interference' not 'perfectly secure'), avoiding overclaiming."
    501     },
    502     "demo_ability": {
    503       "score": 2,
    504       "justification": "Practitioners can implement spotlighting prompts immediately, but full evaluation requires GPT API access and attack corpus. Not fully reproducible without released code/data."
    505     },
    506     "brand_recognition": {
    507       "score": 2,
    508       "justification": "Authors from Microsoft (reputable), but no Nobel laureate labs or breakthrough-tier recognition. Venue (CAMLIS) is specialized security conference, not top-tier ML venue."
    509     },
    510     "drama_conflict": {
    511       "score": 1,
    512       "justification": "Straightforward technical contribution with no controversy. No competing claims, no debate about methods or findings. Lacking narrative tension."
    513     }
    514   },
    515   "hn_data": {
    516     "threads": [
    517       {
    518         "hn_id": "22768143",
    519         "title": "Deep Molecular Programming",
    520         "points": 130,
    521         "comments": 11,
    522         "url": "https://news.ycombinator.com/item?id=22768143"
    523       },
    524       {
    525         "hn_id": "39466681",
    526         "title": "Coercing LLMs to do and reveal almost anything",
    527         "points": 12,
    528         "comments": 1,
    529         "url": "https://news.ycombinator.com/item?id=39466681"
    530       },
    531       {
    532         "hn_id": "45489599",
    533         "title": "Tutorials for Sandia's Lammps Simulation Package",
    534         "points": 8,
    535         "comments": 1,
    536         "url": "https://news.ycombinator.com/item?id=45489599"
    537       },
    538       {
    539         "hn_id": "44478832",
    540         "title": "CodingGenie: A Proactive LLM-Powered Programming Assistant",
    541         "points": 5,
    542         "comments": 0,
    543         "url": "https://news.ycombinator.com/item?id=44478832"
    544       },
    545       {
    546         "hn_id": "23363404",
    547         "title": "“Periodic table” for protons in the nucleus",
    548         "points": 4,
    549         "comments": 0,
    550         "url": "https://news.ycombinator.com/item?id=23363404"
    551       },
    552       {
    553         "hn_id": "44415220",
    554         "title": "Storm – Help LLMs to write very long articles",
    555         "points": 2,
    556         "comments": 0,
    557         "url": "https://news.ycombinator.com/item?id=44415220"
    558       },
    559       {
    560         "hn_id": "43540243",
    561         "title": "AttentionRAG: Attention-Guided Context Pruning in Retrieval-Augmented Generation",
    562         "points": 2,
    563         "comments": 0,
    564         "url": "https://news.ycombinator.com/item?id=43540243"
    565       },
    566       {
    567         "hn_id": "41125541",
    568         "title": "Solving the Traveling Salesman Problem Using a Single Qubit",
    569         "points": 2,
    570         "comments": 0,
    571         "url": "https://news.ycombinator.com/item?id=41125541"
    572       },
    573       {
    574         "hn_id": "41066825",
    575         "title": "Solving the Travelling Salesman Problem Using a Single Qubit",
    576         "points": 2,
    577         "comments": 0,
    578         "url": "https://news.ycombinator.com/item?id=41066825"
    579       },
    580       {
    581         "hn_id": "40822524",
    582         "title": "Do LLMs Have Distinct and Consistent Personality?",
    583         "points": 2,
    584         "comments": 0,
    585         "url": "https://news.ycombinator.com/item?id=40822524"
    586       }
    587     ],
    588     "top_points": 130,
    589     "total_points": 169,
    590     "total_comments": 13
    591   }
    592 }

Impressum · Datenschutz