ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

scan.json (28082B)


      1 {
      2   "paper": {
      3     "title": "Review of Generative AI Methods in Cybersecurity",
      4     "authors": [
      5       "Yagmur Yigit",
      6       "William J Buchanan",
      7       "Madjid G Tehrani",
      8       "Leandros Maglaras"
      9     ],
     10     "year": 2024,
     11     "venue": "arXiv.org",
     12     "arxiv_id": "2403.08701",
     13     "doi": "10.48550/arXiv.2403.08701"
     14   },
     15   "scan_version": 3,
     16   "active_modules": ["survey_methodology"],
     17   "methodology_tags": ["meta-analysis", "qualitative", "case-study"],
     18   "key_findings": "This narrative review surveys GenAI applications in cybersecurity across offensive (jailbreaking, phishing, malware generation, automated hacking) and defensive (threat intelligence, vulnerability detection, secure code generation) use cases. The authors informally demonstrate that ChatGPT-4 and Google Gemini remain susceptible to jailbreaking and reverse psychology manipulation despite built-in safeguards, and show GPT-4 can generate attack payloads and educational ransomware code. The paper emphasizes the dual-use nature of GenAI and calls for interdisciplinary collaboration on ethical frameworks, but lacks systematic methodology or quantitative analysis.",
     19   "checklist": {
     20     "artifacts": {
     21       "code_released": {
     22         "applies": true,
     23         "answer": false,
     24         "justification": "No source code, repository URL, or archive is provided anywhere in the paper. The jailbreaking and malware demonstrations are shown only as screenshots in figures."
     25       },
     26       "data_released": {
     27         "applies": true,
     28         "answer": false,
     29         "justification": "No dataset, corpus of reviewed papers, or structured analysis data is released. The paper does not provide any downloadable data."
     30       },
     31       "environment_specified": {
     32         "applies": true,
     33         "answer": false,
     34         "justification": "No environment specifications, dependency lists, or tool versions are provided. The paper mentions using ChatGPT-4 and Google Gemini but provides no setup details for reproducing the demonstrations."
     35       },
     36       "reproduction_instructions": {
     37         "applies": true,
     38         "answer": false,
     39         "justification": "No reproduction instructions are provided. Neither the literature review process nor the informal demonstrations include step-by-step instructions for replication."
     40       }
     41     },
     42     "statistical_methodology": {
     43       "confidence_intervals_or_error_bars": {
     44         "applies": false,
     45         "answer": false,
     46         "justification": "This is a narrative review/survey paper with no quantitative experiments producing statistical results."
     47       },
     48       "significance_tests": {
     49         "applies": false,
     50         "answer": false,
     51         "justification": "No statistical comparisons are made. The paper is a qualitative literature review with informal demonstrations."
     52       },
     53       "effect_sizes_reported": {
     54         "applies": false,
     55         "answer": false,
     56         "justification": "No experiments are conducted that would produce effect sizes. The paper reviews and summarizes existing literature."
     57       },
     58       "sample_size_justified": {
     59         "applies": false,
     60         "answer": false,
     61         "justification": "No experimental sample is defined. This is a survey paper."
     62       },
     63       "variance_reported": {
     64         "applies": false,
     65         "answer": false,
     66         "justification": "No experimental runs are conducted. This is a survey paper with qualitative demonstrations."
     67       }
     68     },
     69     "evaluation_design": {
     70       "baselines_included": {
     71         "applies": true,
     72         "answer": false,
     73         "justification": "The survey does not compare its scope, methodology, or coverage against prior surveys on GenAI in cybersecurity. Section 1.2 mentions related works but does not systematically compare against them."
     74       },
     75       "baselines_contemporary": {
     76         "applies": true,
     77         "answer": false,
     78         "justification": "No prior surveys are used as baselines for comparison. The paper does not position itself against existing reviews or explain what additional coverage it provides."
     79       },
     80       "ablation_study": {
     81         "applies": false,
     82         "answer": false,
     83         "justification": "Not applicable to a survey paper — there is no system with components to ablate."
     84       },
     85       "multiple_metrics": {
     86         "applies": false,
     87         "answer": false,
     88         "justification": "Not applicable — no experiments are conducted that would require evaluation metrics."
     89       },
     90       "human_evaluation": {
     91         "applies": false,
     92         "answer": false,
     93         "justification": "Not applicable — the paper does not produce system outputs that require human evaluation."
     94       },
     95       "held_out_test_set": {
     96         "applies": false,
     97         "answer": false,
     98         "justification": "Not applicable — no experiments with train/test splits are conducted."
     99       },
    100       "per_category_breakdown": {
    101         "applies": true,
    102         "answer": true,
    103         "justification": "The paper organizes findings into distinct categories: attacking GenAI (Section 2: jailbreaks, reverse psychology, prompt injection), cyber offense (Section 3: social engineering, phishing, automated hacking, payload generation, malware), cyber defense (Section 4: automation, reporting, threat intelligence, code security, vulnerability detection), and ethical/legal implications (Section 5)."
    104       },
    105       "failure_cases_discussed": {
    106         "applies": true,
    107         "answer": true,
    108         "justification": "The paper discusses cases where GenAI fails or has limitations: ChatGPT hallucinations (Section 5.6), limitations in dynamic code analysis (Section 4.4), ChatGPT's decreased accuracy with job specificity (Section 4.8), and Gemini producing the most code vulnerabilities (Section 4.6)."
    109       },
    110       "negative_results_reported": {
    111         "applies": true,
    112         "answer": true,
    113         "justification": "The paper reports several negative findings: existing DAN jailbreak techniques no longer work on current GPT-4 (Section 3.5), ChatGPT and GPT-3 matched only dummy classifier performance for vulnerability detection (Section 4.5, citing Cheshkov et al.), and GPT's accuracy decreases with job specificity (Section 4.8)."
    114       }
    115     },
    116     "claims_and_evidence": {
    117       "abstract_claims_supported": {
    118         "applies": true,
    119         "answer": true,
    120         "justification": "The abstract claims are generally matched by the paper content: it provides an overview of GenAI attacks (jailbreaking, prompt injection, reverse psychology in Section 2), cybercrimes (Section 3), defensive applications (Section 4), and ethical considerations (Section 5). The claims are descriptive rather than empirical, and the paper delivers on the described scope."
    121       },
    122       "causal_claims_justified": {
    123         "applies": true,
    124         "answer": false,
    125         "justification": "The paper makes causal-adjacent claims like 'GenAI can significantly improve the automation of defensive cyber security processes' (abstract) and 'ChatGPT can simplify the process of launching complex phishing attacks' (citing ref [28]). These claims are supported only by narrative review and informal demonstrations, not by controlled experiments or rigorous causal evidence."
    126       },
    127       "generalization_bounded": {
    128         "applies": true,
    129         "answer": false,
    130         "justification": "The title claims to review 'Generative AI Methods in Cybersecurity' broadly, but the paper focuses almost exclusively on ChatGPT (GPT-3.5/4) and Google Gemini/Bard. Other GenAI systems (WormGPT, FraudGPT, XXXGPT, WolfGPT) are mentioned but not evaluated. The demonstrations are limited to a few informal tests, yet conclusions are drawn about GenAI capabilities in general."
    131       },
    132       "alternative_explanations_discussed": {
    133         "applies": true,
    134         "answer": false,
    135         "justification": "The paper does not consider alternative explanations for its observations. For example, when demonstrating jailbreaking, it does not consider whether the responses would actually be useful for real attacks, or whether the generated code would function in practice. No confounds or alternative interpretations are discussed."
    136       },
    137       "proxy_outcome_distinction": {
    138         "applies": true,
    139         "answer": false,
    140         "justification": "The paper shows that GenAI can produce text that resembles malware code, attack payloads, and phishing emails, but does not distinguish between producing such text and its actual operational viability. Generating skeleton ransomware code (Fig 11) or a rootkit snippet (Fig A6) is treated as evidence that GenAI enables these attacks, without verifying whether the outputs are functional, evasive, or practically deployable."
    141       }
    142     },
    143     "setup_transparency": {
    144       "model_versions_specified": {
    145         "applies": true,
    146         "answer": false,
    147         "justification": "The paper refers to 'ChatGPT 4', 'GPT-4', 'GPT-3.5', and 'Google's Gemini' without specifying exact model versions, API snapshot dates, or access dates for the demonstrations. No version identifiers like 'gpt-4-0613' are provided."
    148       },
    149       "prompts_provided": {
    150         "applies": true,
    151         "answer": true,
    152         "justification": "The jailbreaking prompt text is provided verbatim in Section 2.1 (the 'JailBreak' persona prompt). Conversation screenshots showing prompts and responses are included in Figures 2-8. The attack payload and malware prompts are shown in appendix figures."
    153       },
    154       "hyperparameters_reported": {
    155         "applies": true,
    156         "answer": false,
    157         "justification": "No hyperparameters (temperature, top-p, max tokens) are reported for any of the ChatGPT or Gemini interactions shown in the paper."
    158       },
    159       "scaffolding_described": {
    160         "applies": false,
    161         "answer": false,
    162         "justification": "No agentic scaffolding is used. The demonstrations involve direct conversational prompting of ChatGPT and Gemini."
    163       },
    164       "data_preprocessing_documented": {
    165         "applies": true,
    166         "answer": false,
    167         "justification": "The paper provides no description of how the literature was collected, searched, or filtered. There is no search strategy, database selection, keyword specification, or inclusion/exclusion criteria for the reviewed papers."
    168       }
    169     },
    170     "limitations_and_scope": {
    171       "limitations_section_present": {
    172         "applies": true,
    173         "answer": false,
    174         "justification": "There is no dedicated limitations or threats-to-validity section. The Discussion (Section 6) and Conclusion (Section 7) mention broad challenges and future directions but do not substantively discuss limitations of the review itself."
    175       },
    176       "threats_to_validity_specific": {
    177         "applies": true,
    178         "answer": false,
    179         "justification": "No specific threats to validity are discussed. The paper does not acknowledge limitations of its informal demonstration methodology, potential selection bias in reviewed literature, or the narrow scope of GenAI tools tested."
    180       },
    181       "scope_boundaries_stated": {
    182         "applies": true,
    183         "answer": false,
    184         "justification": "The paper does not explicitly state what it does not cover. There is no mention of excluded topics (e.g., GenAI for privacy, GenAI in non-English contexts, open-source LLMs beyond those mentioned), excluded time periods, or explicit non-claims."
    185       }
    186     },
    187     "data_integrity": {
    188       "raw_data_available": {
    189         "applies": true,
    190         "answer": false,
    191         "justification": "No raw data is available. The corpus of reviewed papers is not provided as a structured dataset, conversation logs from demonstrations are not released, and no supplementary materials are provided."
    192       },
    193       "data_collection_described": {
    194         "applies": true,
    195         "answer": false,
    196         "justification": "The paper does not describe how the reviewed literature was collected. There is no mention of which databases were searched, what search queries were used, or what time period was covered."
    197       },
    198       "recruitment_methods_described": {
    199         "applies": false,
    200         "answer": false,
    201         "justification": "No human participants are involved. The reviewed literature is the 'data', but the absence of systematic selection methodology is captured under data_collection_described."
    202       },
    203       "data_pipeline_documented": {
    204         "applies": true,
    205         "answer": false,
    206         "justification": "No data pipeline is documented. The paper goes from referencing works to presenting findings without describing any intermediate filtering, categorization, or analysis process for the reviewed literature."
    207       }
    208     },
    209     "conflicts_of_interest": {
    210       "funding_disclosed": {
    211         "applies": true,
    212         "answer": false,
    213         "justification": "No funding source, acknowledgments section, or grant information is mentioned anywhere in the paper."
    214       },
    215       "affiliations_disclosed": {
    216         "applies": true,
    217         "answer": true,
    218         "justification": "Author affiliations are listed via numbered superscripts on the title page (institutions 1-3), following standard academic practice."
    219       },
    220       "funder_independent_of_outcome": {
    221         "applies": true,
    222         "answer": false,
    223         "justification": "No funding is disclosed, so independence of funder cannot be verified. The absence of a funding statement makes this impossible to assess."
    224       },
    225       "financial_interests_declared": {
    226         "applies": true,
    227         "answer": false,
    228         "justification": "No competing interests or financial interests statement is included in the paper."
    229       }
    230     },
    231     "contamination": {
    232       "training_cutoff_stated": {
    233         "applies": false,
    234         "answer": false,
    235         "justification": "This is a survey/review paper. It does not evaluate a pre-trained model's capability on any benchmark — the informal demonstrations test model safety guardrails, not knowledge."
    236       },
    237       "train_test_overlap_discussed": {
    238         "applies": false,
    239         "answer": false,
    240         "justification": "Not applicable. The paper does not evaluate model performance on benchmarks where train/test overlap would be a concern."
    241       },
    242       "benchmark_contamination_addressed": {
    243         "applies": false,
    244         "answer": false,
    245         "justification": "Not applicable. No benchmark evaluation is conducted in this paper."
    246       }
    247     },
    248     "human_studies": {
    249       "pre_registered": {
    250         "applies": false,
    251         "answer": false,
    252         "justification": "No human participants are involved in this survey paper."
    253       },
    254       "irb_or_ethics_approval": {
    255         "applies": false,
    256         "answer": false,
    257         "justification": "No human participants are involved in this survey paper."
    258       },
    259       "demographics_reported": {
    260         "applies": false,
    261         "answer": false,
    262         "justification": "No human participants are involved in this survey paper."
    263       },
    264       "inclusion_exclusion_criteria": {
    265         "applies": false,
    266         "answer": false,
    267         "justification": "No human participants are involved in this survey paper."
    268       },
    269       "randomization_described": {
    270         "applies": false,
    271         "answer": false,
    272         "justification": "No human participants are involved in this survey paper."
    273       },
    274       "blinding_described": {
    275         "applies": false,
    276         "answer": false,
    277         "justification": "No human participants are involved in this survey paper."
    278       },
    279       "attrition_reported": {
    280         "applies": false,
    281         "answer": false,
    282         "justification": "No human participants are involved in this survey paper."
    283       }
    284     },
    285     "cost_and_practicality": {
    286       "inference_cost_reported": {
    287         "applies": false,
    288         "answer": false,
    289         "justification": "This is a survey paper. It does not propose a method with its own inference costs."
    290       },
    291       "compute_budget_stated": {
    292         "applies": false,
    293         "answer": false,
    294         "justification": "This is a survey paper with no significant computational requirements beyond the informal demonstrations."
    295       }
    296     },
    297     "survey_methodology": {
    298       "prisma_or_structured_protocol": {
    299         "applies": true,
    300         "answer": false,
    301         "justification": "The paper does not follow PRISMA or any structured review protocol. There is no flow diagram, no protocol registration, no reproducible search strategy, and no systematic methodology for paper selection. It is a narrative review."
    302       },
    303       "quality_assessment_of_sources": {
    304         "applies": true,
    305         "answer": false,
    306         "justification": "The paper does not assess the quality of its source papers. All cited works are treated equally regardless of their methodological rigor — preprints, peer-reviewed papers, blog posts, and company announcements are presented alongside each other without quality differentiation."
    307       },
    308       "publication_bias_discussed": {
    309         "applies": true,
    310         "answer": false,
    311         "justification": "No discussion of publication bias. The paper does not consider whether the literature it reviews skews toward positive results for GenAI capabilities or whether negative findings are underrepresented."
    312       }
    313     }
    314   },
    315   "claims": [
    316     {
    317       "claim": "ChatGPT-4 remains vulnerable to jailbreaking prompts despite improved safeguards, though it becomes more robust after exposure to similar prompts in the same chat session.",
    318       "evidence": "Informal demonstrations shown in Figures 2 and 3 where jailbreaking prompts elicit forbidden responses, and subsequent robustness within the same conversation.",
    319       "supported": "weak"
    320     },
    321     {
    322       "claim": "Google Gemini is more resistant to jailbreaking than ChatGPT-4, refusing all existing prompts and name-changing scenarios at the beginning of chat.",
    323       "evidence": "Informal comparison shown in Figure 4, where Gemini refused jailbreaking entries that ChatGPT-4 responded to.",
    324       "supported": "weak"
    325     },
    326     {
    327       "claim": "Both ChatGPT-4 and Google Gemini can be manipulated through reverse psychology to produce phishing email examples.",
    328       "evidence": "Demonstrations in Figures 5 and 6 showing both models eventually providing phishing email examples after conversational manipulation.",
    329       "supported": "moderate"
    330     },
    331     {
    332       "claim": "GPT-4 can generate functional attack payloads and embed them into files using reverse proxies, leveraging frameworks like Metasploit, Veil, and TheFatRat.",
    333       "evidence": "Figure 10 shows a script for payload generation. The paper lists six frameworks GPT-4 can use for payload code generation (Section 3.4).",
    334       "supported": "weak"
    335     },
    336     {
    337       "claim": "GenAI can generate educational ransomware code with basic code obfuscation techniques like renaming and control flow flattening.",
    338       "evidence": "Figure 11 shows educational ransomware code generated by ChatGPT. Previous DAN jailbreak techniques from [48] were no longer functional at time of research (Section 3.5).",
    339       "supported": "moderate"
    340     },
    341     {
    342       "claim": "SecurityLLM achieves 98% overall accuracy in identifying 14 types of cyberattacks using LLMs combined with a basic classification model.",
    343       "evidence": "Cited from Ferrag et al. [86] in Section 4.9. This is a claim from a reviewed paper, not independently verified.",
    344       "supported": "weak"
    345     },
    346     {
    347       "claim": "LLift, combining static analysis with an LLM, achieves 50% precision and 100% recall on real-world UBI bugs, uncovering 13 new bugs in the Linux kernel.",
    348       "evidence": "Cited from Li et al. [71] in Section 4.5. This is a claim from a reviewed paper, not independently verified by the authors.",
    349       "supported": "weak"
    350     },
    351     {
    352       "claim": "GenAI has the potential to dramatically increase cybersecurity standards through automating defenses, enhancing threat intelligence, and improving cybersecurity protocols.",
    353       "evidence": "Narrative synthesis across Section 4 and the Discussion (Section 6). No original quantitative evidence provided.",
    354       "supported": "weak"
    355     }
    356   ],
    357   "red_flags": [
    358     {
    359       "flag": "No systematic review methodology",
    360       "detail": "The paper claims to be a review but follows no PRISMA protocol, has no systematic search strategy, no inclusion/exclusion criteria, no quality assessment of sources, and no reproducible paper selection process. This is a narrative review that launders the signal-to-noise ratio of its heterogeneous sources."
    361     },
    362     {
    363       "flag": "Informal demonstrations presented as evidence",
    364       "detail": "The jailbreaking, reverse psychology, and code generation demonstrations (Figs 2-11) are ad-hoc conversational tests with no systematic methodology, no repeated trials, no controlled conditions, and no reproducibility information (model versions, dates, hyperparameters). These anecdotal demonstrations are presented as supporting broad claims about GenAI capabilities."
    365     },
    366     {
    367       "flag": "Claims significantly outrun evidence",
    368       "detail": "The paper makes sweeping claims about GenAI's transformative potential for both offense and defense in cybersecurity based on a narrative literature review and a handful of informal demonstrations. No original quantitative analysis supports the conclusions."
    369     },
    370     {
    371       "flag": "No quality differentiation among sources",
    372       "detail": "Peer-reviewed papers, arXiv preprints, blog posts, company press releases, and medium.com articles are cited alongside each other and treated with equal authority. For example, Yandex press releases [4], Microsoft blog posts [5], MIT project pages [6-7], and Medium articles [8] are presented alongside NeurIPS papers [19] without any quality weighting."
    373     },
    374     {
    375       "flag": "Proxy-outcome conflation",
    376       "detail": "The paper equates GenAI producing text that resembles malware code or phishing emails with GenAI enabling actual cyberattacks. Generating a skeleton rootkit or ransomware snippet (Figs A2, A6, 11) does not establish that these outputs are functional, evasive, or practically deployable, but the paper treats them as evidence of real-world threat."
    377     },
    378     {
    379       "flag": "Missing limitations section",
    380       "detail": "Despite being a review paper making broad claims about GenAI's impact on cybersecurity, there is no limitations section, no threats to validity, and no discussion of the review's own methodological weaknesses."
    381     }
    382   ],
    383   "cited_papers": [
    384     {
    385       "title": "From chatgpt to threatgpt: Impact of generative ai in cybersecurity and privacy",
    386       "authors": ["Maanak Gupta", "CharanKumar Akiri", "Kshitiz Aryal", "Eli Parker", "Lopamudra Praharaj"],
    387       "year": 2023,
    388       "relevance": "Directly examines ChatGPT's capabilities for social engineering attacks, phishing, automated hacking, and malware creation — core survey-scope topics."
    389     },
    390     {
    391       "title": "Evaluating large language models trained on code",
    392       "authors": ["Mark Chen"],
    393       "year": 2021,
    394       "relevance": "Introduces Codex and evaluates LLM code generation capability on HumanEval, foundational for understanding AI code generation quality."
    395     },
    396     {
    397       "title": "An Analysis of the Automatic Bug Fixing Performance of ChatGPT",
    398       "authors": ["Dominik Sobania", "Carol Hanna", "Martin Briesch", "Justyna Petke"],
    399       "year": 2023,
    400       "arxiv_id": "2301.08653",
    401       "relevance": "Evaluates ChatGPT for automated program repair on QuixBugs benchmark, relevant to LLM code repair capabilities."
    402     },
    403     {
    404       "title": "PentestGPT: An LLM-empowered Automatic Penetration Testing Tool",
    405       "authors": ["Gelei Deng"],
    406       "year": 2023,
    407       "relevance": "Describes an agentic LLM tool for automated penetration testing, directly relevant to LLM-based security tooling."
    408     },
    409     {
    410       "title": "Exploring the dark side of ai: Advanced phishing attack design and deployment using chatgpt",
    411       "authors": ["Nicolas Begou", "Jeremy Vinoy", "Andrzej Duda", "Maciej Korczynski"],
    412       "year": 2023,
    413       "arxiv_id": "2309.10463",
    414       "relevance": "Examines ChatGPT's role in automating sophisticated phishing campaigns including website cloning and credential theft."
    415     },
    416     {
    417       "title": "Evaluating llms for privilege-escalation scenarios",
    418       "authors": ["Andreas Happe", "Aaron Kaplan", "Juergen Cito"],
    419       "year": 2023,
    420       "arxiv_id": "2310.11409",
    421       "relevance": "Benchmarks LLMs on Linux privilege escalation tasks, finding GPT-4 achieves up to 100% success rate in some scenarios."
    422     },
    423     {
    424       "title": "Revolutionizing cyber threat detection with large language models",
    425       "authors": ["Mohamed Amine Ferrag"],
    426       "year": 2023,
    427       "arxiv_id": "2306.14263",
    428       "relevance": "Introduces SecurityLLM for cybersecurity threat identification achieving 98% accuracy across 14 attack types."
    429     },
    430     {
    431       "title": "The Hitchhiker's Guide to Program Analysis: A Journey with Large Language Models",
    432       "authors": ["Haonan Li", "Yu Hao", "Yizhuo Zhai", "Zhiyun Qian"],
    433       "year": 2023,
    434       "arxiv_id": "2308.00245",
    435       "relevance": "Introduces LLift combining LLMs with static analysis for bug detection, uncovering 13 new UBI bugs in the Linux kernel."
    436     },
    437     {
    438       "title": "THE FORMAI DATASET: GENERATIVE AI IN SOFTWARE SECURITY THROUGH THE LENS OF FORMAL VERIFICATION",
    439       "authors": ["Norbert Tihanyi", "Tamas Bisztray", "Ridhi Jain", "Mohamed Amine Ferrag"],
    440       "year": 2023,
    441       "arxiv_id": "2307.02192",
    442       "relevance": "Provides 112K AI-generated C programs with vulnerability classifications, relevant to understanding AI code security."
    443     },
    444     {
    445       "title": "Ocassionally secure: A comparative analysis of code generation assistants",
    446       "authors": ["Rami Elgedawy"],
    447       "year": 2024,
    448       "arxiv_id": "2402.00689",
    449       "relevance": "Compares security of code generated by GPT-3.5, GPT-4, Google Bard, and Gemini across multiple dimensions."
    450     },
    451     {
    452       "title": "The Scope of ChatGPT in Software Engineering: A Thorough Investigation",
    453       "authors": ["Wei Ma", "Shangqing Liu", "Wenhan Wang"],
    454       "year": 2023,
    455       "relevance": "Evaluates ChatGPT's understanding of code semantics including syntax, static behavior, and dynamic behavior."
    456     },
    457     {
    458       "title": "Defending chatgpt against jailbreak attack via self-reminder",
    459       "authors": ["Yueqi Xie", "Jingwei Yi", "Jiawei Shao"],
    460       "year": 2023,
    461       "doi": "10.1038/s42256-023-00765-8",
    462       "relevance": "Proposes System-Mode Self-Reminder defense reducing jailbreak success from 67.21% to 19.34%, relevant to LLM safety."
    463     }
    464   ],
    465   "engagement_factors": {
    466     "practical_relevance": {
    467       "score": 2,
    468       "justification": "Discusses real attack and defense techniques with GenAI that security practitioners should be aware of, though no tools or actionable methods are released."
    469     },
    470     "surprise_contrarian": {
    471       "score": 1,
    472       "justification": "Mostly confirms widely expected concerns about GenAI dual-use in cybersecurity; does not challenge conventional wisdom."
    473     },
    474     "fear_safety": {
    475       "score": 3,
    476       "justification": "Directly demonstrates AI-generated malware, rootkits, phishing attacks, data exfiltration scripts, and jailbreaking — highly fear-inducing content for general audiences."
    477     },
    478     "drama_conflict": {
    479       "score": 1,
    480       "justification": "Presents offense vs defense tension but avoids controversial claims about specific companies or products."
    481     },
    482     "demo_ability": {
    483       "score": 1,
    484       "justification": "Shows conversation screenshots but releases no code, tools, or demos that readers could try."
    485     },
    486     "brand_recognition": {
    487       "score": 2,
    488       "justification": "Prominently features ChatGPT/GPT-4 and Google Gemini — well-known products — but the paper itself is from a non-famous lab."
    489     }
    490   }
    491 }

Impressum · Datenschutz