ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

scan.json (19395B)


      1 {
      2   "paper": {
      3     "title": "Comprehensive Analysis of Machine Learning and Deep Learning models on Prompt Injection Classification using Natural Language Processing techniques",
      4     "authors": ["Bhavvya Jain", "Pranav Pawar", "Dhruv Gada", "Tanish Patwa", "Pratik Kanani", "Deepali Patil", "Lakshmi Kurup"],
      5     "year": 2025,
      6     "venue": "International Research Journal of Multidisciplinary Technovation",
      7     "doi": "10.54392/irjmt2523"
      8   },
      9   "checklist": {
     10     "artifacts": {
     11       "code_released": {
     12         "applies": true,
     13         "answer": false,
     14         "justification": "No repository URL, code archive, or link to source code is provided anywhere in the paper."
     15       },
     16       "data_released": {
     17         "applies": true,
     18         "answer": true,
     19         "justification": "The dataset is publicly available from Hugging Face (deepset prompt injection dataset), as stated in Section 3.1."
     20       },
     21       "environment_specified": {
     22         "applies": true,
     23         "answer": false,
     24         "justification": "No requirements.txt, Dockerfile, or detailed environment specification is provided. Libraries like scikit-learn, nltk, and BERT are mentioned but no versions are given."
     25       },
     26       "reproduction_instructions": {
     27         "applies": true,
     28         "answer": false,
     29         "justification": "No step-by-step reproduction instructions, README, or runnable scripts are provided."
     30       }
     31     },
     32     "statistical_methodology": {
     33       "confidence_intervals_or_error_bars": {
     34         "applies": true,
     35         "answer": false,
     36         "justification": "Only point estimates of accuracy are reported (e.g., 94.74% for RNN). No confidence intervals or error bars are provided."
     37       },
     38       "significance_tests": {
     39         "applies": true,
     40         "answer": false,
     41         "justification": "The paper claims RNN outperforms other models but provides no statistical significance tests to support these comparisons."
     42       },
     43       "effect_sizes_reported": {
     44         "applies": true,
     45         "answer": false,
     46         "justification": "Only raw accuracy numbers are reported. No effect sizes (Cohen's d, etc.) or contextual improvement measures beyond raw differences are given."
     47       },
     48       "sample_size_justified": {
     49         "applies": true,
     50         "answer": false,
     51         "justification": "The dataset size is not stated explicitly in the paper text (only that it was 'sampled out from a larger dataset'), and no justification for the sample size is provided."
     52       },
     53       "variance_reported": {
     54         "applies": true,
     55         "answer": false,
     56         "justification": "No variance, standard deviation, or results across multiple runs are reported. All results appear to be from single runs."
     57       }
     58     },
     59     "evaluation_design": {
     60       "baselines_included": {
     61         "applies": true,
     62         "answer": true,
     63         "justification": "Multiple ML models (LR, SVM, Decision Tree, RF, AdaBoost, XGBoost, KNN, Gradient Boosting) and DL models (CNN, RNN, LSTM, Bi-LSTM) are compared against each other."
     64       },
     65       "baselines_contemporary": {
     66         "applies": true,
     67         "answer": false,
     68         "justification": "The models used (Logistic Regression, SVM, basic RNN, LSTM) are all classical approaches. No contemporary prompt injection detection methods or transformer-based classifiers are compared against."
     69       },
     70       "ablation_study": {
     71         "applies": true,
     72         "answer": false,
     73         "justification": "No ablation study is performed. The comparison of vectorization techniques is not an ablation — it compares independent approaches rather than removing components from a single system."
     74       },
     75       "multiple_metrics": {
     76         "applies": true,
     77         "answer": false,
     78         "justification": "Only accuracy is reported as the evaluation metric. No F1, precision, recall, or AUC metrics are provided despite this being a binary classification task where class balance matters."
     79       },
     80       "human_evaluation": {
     81         "applies": false,
     82         "answer": false,
     83         "justification": "Human evaluation is not relevant to this binary classification benchmark evaluation."
     84       },
     85       "held_out_test_set": {
     86         "applies": true,
     87         "answer": true,
     88         "justification": "The dataset was split using train_test_split from scikit-learn (Section 3.4), and results are reported on the test set."
     89       },
     90       "per_category_breakdown": {
     91         "applies": true,
     92         "answer": true,
     93         "justification": "Results are broken down by vectorization technique (TF-IDF, Word2Vec, BoW) and by model in Tables 2 and 3."
     94       },
     95       "failure_cases_discussed": {
     96         "applies": true,
     97         "answer": false,
     98         "justification": "No failure cases, error analysis, or qualitative examples of misclassified prompts are discussed."
     99       },
    100       "negative_results_reported": {
    101         "applies": true,
    102         "answer": true,
    103         "justification": "Word2Vec's lower performance is reported and discussed (Section 4.1), noting its 'static embeddings may not fully capture the context.' KNN with BoW achieving only 54.14% is also reported."
    104       }
    105     },
    106     "claims_and_evidence": {
    107       "abstract_claims_supported": {
    108         "applies": true,
    109         "answer": true,
    110         "justification": "The abstract claims RNN achieved 94.74% detection rate, which matches Table 3. The claim that deep learning architectures capturing sequential dependencies are effective is supported by the results."
    111       },
    112       "causal_claims_justified": {
    113         "applies": true,
    114         "answer": false,
    115         "justification": "The paper makes causal claims such as 'RNN's ability to capture sequential dependency helped to detect the pattern' (Section 4.2) without controlled experiments isolating this mechanism."
    116       },
    117       "generalization_bounded": {
    118         "applies": true,
    119         "answer": false,
    120         "justification": "The paper claims to 'enhance the security, integrity, and trustworthiness of AI-driven technologies' in the abstract, but only tests on a single Hugging Face dataset. No discussion of generalization boundaries."
    121       },
    122       "alternative_explanations_discussed": {
    123         "applies": true,
    124         "answer": false,
    125         "justification": "No alternative explanations for the results are discussed. For example, dataset characteristics (keyword-heavy malicious prompts) that could explain why simple BoW matches deep learning are not explored."
    126       }
    127     },
    128     "setup_transparency": {
    129       "model_versions_specified": {
    130         "applies": true,
    131         "answer": true,
    132         "justification": "The BERT model is specified as 'bert-base-uncased' (Section 3.5.5/Table 3). The ML models are standard scikit-learn implementations."
    133       },
    134       "prompts_provided": {
    135         "applies": false,
    136         "answer": false,
    137         "justification": "This paper does not use prompting — it trains ML/DL classifiers on text data."
    138       },
    139       "hyperparameters_reported": {
    140         "applies": true,
    141         "answer": true,
    142         "justification": "Hyperparameters are reported: 10 epochs, batch size 32, 0.2 validation split, Adam optimizer with learning rate 0.001, Word2Vec dimensionality 100, window size 5, min frequency 1 (Sections 3.4.2, 3.5)."
    143       },
    144       "scaffolding_described": {
    145         "applies": false,
    146         "answer": false,
    147         "justification": "No agentic scaffolding is used in this work."
    148       },
    149       "data_preprocessing_documented": {
    150         "applies": true,
    151         "answer": true,
    152         "justification": "Preprocessing steps are documented in Section 3.3: removal of special characters via regex, lowercasing, stopword removal, with a worked example."
    153       }
    154     },
    155     "limitations_and_scope": {
    156       "limitations_section_present": {
    157         "applies": true,
    158         "answer": false,
    159         "justification": "There is no limitations or threats-to-validity section. The conclusion briefly mentions future work but does not discuss limitations."
    160       },
    161       "threats_to_validity_specific": {
    162         "applies": true,
    163         "answer": false,
    164         "justification": "No threats to validity are discussed anywhere in the paper."
    165       },
    166       "scope_boundaries_stated": {
    167         "applies": true,
    168         "answer": false,
    169         "justification": "No explicit scope boundaries are stated. The paper does not discuss what the results do NOT show or what settings were not tested."
    170       }
    171     },
    172     "data_integrity": {
    173       "raw_data_available": {
    174         "applies": true,
    175         "answer": true,
    176         "justification": "The dataset is publicly available on Hugging Face (deepset prompt injection dataset), enabling independent verification."
    177       },
    178       "data_collection_described": {
    179         "applies": true,
    180         "answer": false,
    181         "justification": "The paper only states the dataset was 'obtained from Hugging Face datasets' and 'developed by deepset' and 'sampled out from a larger dataset' (Section 3.1). No details on how deepset created it or the sampling process."
    182       },
    183       "recruitment_methods_described": {
    184         "applies": false,
    185         "answer": false,
    186         "justification": "No human participants; the data source is a standard public benchmark dataset."
    187       },
    188       "data_pipeline_documented": {
    189         "applies": true,
    190         "answer": true,
    191         "justification": "The data pipeline from raw text through preprocessing (special character removal, denoising) to vectorization to model training is documented in Sections 3.2-3.5 with a flowchart (Figure 3)."
    192       }
    193     },
    194     "conflicts_of_interest": {
    195       "funding_disclosed": {
    196         "applies": true,
    197         "answer": false,
    198         "justification": "No funding source or acknowledgments section is present in the paper."
    199       },
    200       "affiliations_disclosed": {
    201         "applies": true,
    202         "answer": true,
    203         "justification": "All authors are listed as affiliated with Dwarkadas J. Sanghvi College of Engineering, Mumbai, India."
    204       },
    205       "funder_independent_of_outcome": {
    206         "applies": false,
    207         "answer": false,
    208         "justification": "No funding is disclosed; appears to be unfunded academic work from a single university."
    209       },
    210       "financial_interests_declared": {
    211         "applies": true,
    212         "answer": false,
    213         "justification": "No competing interests or financial interests statement is present in the paper."
    214       }
    215     },
    216     "contamination": {
    217       "training_cutoff_stated": {
    218         "applies": false,
    219         "answer": false,
    220         "justification": "This paper trains ML/DL classifiers from scratch on a dataset; it does not evaluate a pre-trained model's capability on a benchmark."
    221       },
    222       "train_test_overlap_discussed": {
    223         "applies": false,
    224         "answer": false,
    225         "justification": "Same as above — no pre-trained model benchmark evaluation is performed."
    226       },
    227       "benchmark_contamination_addressed": {
    228         "applies": false,
    229         "answer": false,
    230         "justification": "Same as above — no pre-trained model benchmark evaluation is performed."
    231       }
    232     },
    233     "human_studies": {
    234       "pre_registered": {
    235         "applies": false,
    236         "answer": false,
    237         "justification": "No human participants in this study."
    238       },
    239       "irb_or_ethics_approval": {
    240         "applies": false,
    241         "answer": false,
    242         "justification": "No human participants in this study."
    243       },
    244       "demographics_reported": {
    245         "applies": false,
    246         "answer": false,
    247         "justification": "No human participants in this study."
    248       },
    249       "inclusion_exclusion_criteria": {
    250         "applies": false,
    251         "answer": false,
    252         "justification": "No human participants in this study."
    253       },
    254       "randomization_described": {
    255         "applies": false,
    256         "answer": false,
    257         "justification": "No human participants in this study."
    258       },
    259       "blinding_described": {
    260         "applies": false,
    261         "answer": false,
    262         "justification": "No human participants in this study."
    263       },
    264       "attrition_reported": {
    265         "applies": false,
    266         "answer": false,
    267         "justification": "No human participants in this study."
    268       }
    269     },
    270     "cost_and_practicality": {
    271       "inference_cost_reported": {
    272         "applies": true,
    273         "answer": false,
    274         "justification": "No inference cost, latency, or per-example computation time is reported despite using BERT embeddings and multiple model architectures."
    275       },
    276       "compute_budget_stated": {
    277         "applies": true,
    278         "answer": false,
    279         "justification": "No hardware specifications, GPU hours, or total computation budget is stated."
    280       }
    281     }
    282   },
    283   "claims": [
    284     {
    285       "claim": "RNN model achieved the highest overall accuracy of 94.74% for prompt injection detection, outperforming all other ML and DL models.",
    286       "evidence": "Table 3 shows RNN accuracy of 0.9474 with BERT embeddings, compared to CNN (0.9398), LSTM (0.9248), and Bi-LSTM (0.9245). Table 2 shows best ML accuracy was 93.99%.",
    287       "supported": "weak"
    288     },
    289     {
    290       "claim": "Deep learning architectures that capture sequential dependencies are highly effective in identifying prompt injection threats.",
    291       "evidence": "RNN (94.74%) outperformed CNN (93.98%), but LSTM (92.48%) and Bi-LSTM (92.45%) performed worse than CNN, contradicting the sequential dependency narrative. Results from Table 3.",
    292       "supported": "weak"
    293     },
    294     {
    295       "claim": "TF-IDF and BoW vectorization techniques achieved comparable highest accuracies of 93.99% with Random Forest and Logistic Regression respectively.",
    296       "evidence": "Table 2 shows Random Forest with TF-IDF at 0.9399 and Logistic Regression with BoW at 0.9399.",
    297       "supported": "moderate"
    298     },
    299     {
    300       "claim": "Word2Vec showed lower overall performance compared to TF-IDF and BoW for prompt injection classification.",
    301       "evidence": "Table 2 shows Word2Vec's best accuracy was 84.96% (Gradient Boosting) vs 93.99% for TF-IDF and BoW.",
    302       "supported": "moderate"
    303     }
    304   ],
    305   "methodology_tags": ["benchmark-eval"],
    306   "key_findings": "The paper evaluates 8 ML models with 3 vectorization techniques (TF-IDF, Word2Vec, BoW) and 4 DL models with BERT embeddings for prompt injection classification. RNN with BERT embeddings achieved the highest accuracy at 94.74%, while simple approaches like Logistic Regression with BoW and Random Forest with TF-IDF both achieved 93.99%. The marginal improvement of deep learning over traditional ML (0.75 percentage points) is not statistically validated, and only accuracy is reported as a metric.",
    307   "red_flags": [
    308     {
    309       "flag": "Single metric evaluation",
    310       "detail": "Only accuracy is reported for a binary classification task. No precision, recall, F1, or AUC-ROC are provided, which is problematic since class distribution is not clearly specified and accuracy alone can be misleading for imbalanced datasets."
    311     },
    312     {
    313       "flag": "No statistical validation of claims",
    314       "detail": "The RNN's 94.74% vs CNN's 93.98% (0.76pp difference) is presented as a meaningful finding, but no significance tests or confidence intervals are provided. Single-run results without variance make it impossible to determine if this difference is real."
    315     },
    316     {
    317       "flag": "No limitations section",
    318       "detail": "The paper has no limitations, threats to validity, or discussion of what the results do not show. Dataset size, single-dataset evaluation, and lack of real-world validation are not discussed."
    319     },
    320     {
    321       "flag": "Claims outrun evidence",
    322       "detail": "The abstract claims the study 'enhances the security, integrity, and trustworthiness of AI-driven technologies, ensuring their safe use across diverse applications' based on a single-dataset classification experiment with no real-world deployment or validation."
    323     },
    324     {
    325       "flag": "Contradictory narrative",
    326       "detail": "The paper emphasizes sequential dependency modeling as key, but LSTM and Bi-LSTM (designed for long-range sequential dependencies) performed worse than both RNN and CNN, contradicting this interpretation."
    327     }
    328   ],
    329   "cited_papers": [
    330     {
    331       "title": "Prompt Injection attack against LLM Integrated Applications",
    332       "authors": ["Y. Liu", "G. Deng", "Y. Li", "K. Wang", "Z. Wang", "X. Wang", "T. Zhang", "Y. Liu", "H. Wang", "Y. Zheng", "Y. Liu"],
    333       "year": 2023,
    334       "arxiv_id": "2306.05499",
    335       "relevance": "Foundational work on prompt injection attacks against LLM-integrated applications using the HOUYI toolkit."
    336     },
    337     {
    338       "title": "Not what you've signed up for: Compromising real-world llm-integrated applications with indirect prompt injection",
    339       "authors": ["K. Greshake", "A. Sahar", "M. Shailesh", "E. Christoph", "H. Thorsten", "F. Mario"],
    340       "year": 2023,
    341       "relevance": "Key study on indirect prompt injection attacks and LLM vulnerabilities in real-world applications."
    342     },
    343     {
    344       "title": "Benchmarking and defending against indirect prompt injection attacks on large language models",
    345       "authors": ["J. Yi", "Y. Xie", "B. Zhu", "E. Kiciman", "G. Sun", "X. Xie", "F. Wu"],
    346       "year": 2023,
    347       "arxiv_id": "2312.14197",
    348       "relevance": "Benchmark and defense evaluation for indirect prompt injection attacks on LLMs."
    349     },
    350     {
    351       "title": "Ignore previous prompt: Attack techniques for language models",
    352       "authors": ["F. Perez", "I. Ribeiro"],
    353       "year": 2022,
    354       "arxiv_id": "2211.09527",
    355       "relevance": "Early systematic study of prompt injection attack techniques against language models."
    356     },
    357     {
    358       "title": "Injecagent: Benchmarking indirect prompt injections in tool-integrated large language model agents",
    359       "authors": ["Q. Zhan", "L. Zhixiang", "Y. Zifan", "K. Daniel"],
    360       "year": 2024,
    361       "arxiv_id": "2403.02691",
    362       "relevance": "Benchmark for evaluating LLM agent susceptibility to indirect prompt injection attacks."
    363     },
    364     {
    365       "title": "Optimization-based Prompt Injection Attack to LLM-as-a-Judge",
    366       "authors": ["J. Shi", "Z. Yuan", "Y. Liu", "Y. Huang", "P. Zhou", "L. Sun", "N. Z. Gong"],
    367       "year": 2024,
    368       "relevance": "Advanced prompt injection attack technique targeting LLM-as-a-Judge evaluation systems."
    369     },
    370     {
    371       "title": "Guardian: A multi-tiered defense architecture for thwarting prompt injection attacks on llms",
    372       "authors": ["P. Rai", "S. Sood", "V. K. Madisetti", "A. Bahga"],
    373       "year": 2024,
    374       "relevance": "Multi-layered defense architecture for prompt injection attack prevention on LLMs."
    375     },
    376     {
    377       "title": "Strengthening LLM Trust Boundaries: A Survey of Prompt Injection Attacks",
    378       "authors": ["S.S. Kumar", "M.L. Cummings", "A. Stimpson"],
    379       "year": 2024,
    380       "relevance": "Survey categorizing prompt injection attacks by prompt types, trust boundaries, and required expertise."
    381     },
    382     {
    383       "title": "A new era in llm security: Exploring security concerns in real-world llm-based systems",
    384       "authors": ["F. Wu", "N. Zhang", "S. Jha", "P. McDaniel", "C. Xiao"],
    385       "year": 2024,
    386       "arxiv_id": "2402.18649",
    387       "relevance": "Comprehensive exploration of security concerns in deployed LLM-based systems."
    388     }
    389   ]
    390 }

Impressum · Datenschutz