scan.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan.json (28964B)
      1 {
      2   "paper": {
      3     "title": "Adversarial Bug Reports as a Security Risk in Language Model-Based Automated Program Repair",
      4     "authors": ["Piotr Przymus", "Andreas Happe", "Jürgen Cito"],
      5     "year": 2026,
      6     "venue": "23rd International Conference on Mining Software Repositories (MSR '26)",
      7     "arxiv_id": "2509.05372",
      8     "doi": "10.1145/3793302.3793352"
      9   },
     10   "checklist": {
     11     "artifacts": {
     12       "code_released": {
     13         "applies": true,
     14         "answer": true,
     15         "justification": "Section 1.1.2 states 'We provide a complete replication package, including automation scripts for generating malicious bug reports, code to orchestrate the attack workflows, all defense mechanisms with prompts, as well as the full set of issues, generated patches, and execution trajectories' with DOI 10.6084/m9.figshare.31140619 (reference [12])."
     16       },
     17       "data_released": {
     18         "applies": true,
     19         "answer": true,
     20         "justification": "The replication package (figshare DOI provided) includes 'the full set of issues, generated patches, and execution trajectories' used in the experiments, as stated in Section 1.1.2."
     21       },
     22       "environment_specified": {
     23         "applies": true,
     24         "answer": false,
     25         "justification": "The paper names specific tool versions (SWE-agent v1.1.0, SW-ReX v1.2.2, Claude Sonnet claude-sonnet-4-20250514) and names the models used (DevStral:24B, GPT-4.1-mini, o4-mini, etc.) but does not provide a requirements file, Dockerfile, or comprehensive environment specification sufficient to recreate the full experimental environment."
     26       },
     27       "reproduction_instructions": {
     28         "applies": true,
     29         "answer": true,
     30         "justification": "A complete replication package is provided via figshare (Section 1.1.2), which the authors describe as containing automation scripts and code to orchestrate attack workflows. The paper references the package for full reproduction."
     31       }
     32     },
     33     "statistical_methodology": {
     34       "confidence_intervals_or_error_bars": {
     35         "applies": true,
     36         "answer": false,
     37         "justification": "Cost figures include standard deviations (e.g., mean APR cost '$0.87 ± $0.39'), but the main detection/attack success rates are reported as raw counts and percentages without confidence intervals or error bars. The core results in Table 2 are point estimates only."
     38       },
     39       "significance_tests": {
     40         "applies": true,
     41         "answer": false,
     42         "justification": "No statistical significance tests are reported. The paper compares detection rates across defense mechanisms (e.g., o4-mini detecting 47% vs. GPT-4.1-mini detecting 23%) without any statistical tests to establish whether differences are significant given the small sample of 51 attacks."
     43       },
     44       "effect_sizes_reported": {
     45         "applies": true,
     46         "answer": false,
     47         "justification": "The paper reports raw counts and percentages but does not report effect sizes in a statistical sense (e.g., Cohen's d, odds ratios). Percentage comparisons are given but without baseline context sufficient to constitute formal effect size reporting."
     48       },
     49       "sample_size_justified": {
     50         "applies": true,
     51         "answer": false,
     52         "justification": "The paper generated 51 adversarial bug reports but does not justify why 51 was chosen or discuss statistical power. The sample size appears to be determined by practical feasibility rather than a principled analysis of what N is needed to detect differences in defense effectiveness."
     53       },
     54       "variance_reported": {
     55         "applies": true,
     56         "answer": false,
     57         "justification": "LLMs are noted to be non-deterministic (Section 8) and the paper explicitly states they could not fix temperature to 0. However, results are reported from single runs without variance measures across repeated trials. Cost standard deviations are reported but not variance for detection rates."
     58       }
     59     },
     60     "evaluation_design": {
     61       "baselines_included": {
     62         "applies": true,
     63         "answer": true,
     64         "justification": "Multiple defense mechanisms are compared against each other: PromptGuard, PromptGuardV2, LlamaGuard v3 and v4, Granite Guardian, GPT-4.1-mini (custom prompt), o4-mini (custom prompt), CodeQL, and GitHub Copilot. The study compares these systematically in Table 2."
     65       },
     66       "baselines_contemporary": {
     67         "applies": true,
     68         "answer": true,
     69         "justification": "All evaluated defense mechanisms are contemporary (2023-2025 models), including LlamaGuard4, PromptGuardV2, GPT-4.1-mini, o4-mini, and Granite Guardian. The APR system SWE-agent v1.1.0 was selected as the top-ranked system on SWE-bench verified leaderboard at evaluation start."
     70       },
     71       "ablation_study": {
     72         "applies": true,
     73         "answer": true,
     74         "justification": "The paper evaluates individual defense components and their combinations (ensemble), effectively performing an ablation of the defense pipeline. Section 6.5 explicitly evaluates how combining pre- and post-APR countermeasures changes detection rates."
     75       },
     76       "multiple_metrics": {
     77         "applies": true,
     78         "answer": true,
     79         "justification": "The evaluation uses multiple metrics including: attack success rate, detection rate per defense mechanism, false-positive rate (on 100 real issues), cost per issue, and test suite failure rates. Table 2 captures several of these dimensions."
     80       },
     81       "human_evaluation": {
     82         "applies": true,
     83         "answer": true,
     84         "justification": "Section 5 states 'Manual validation was performed by two independent experts—a security researcher and a penetration tester—who jointly reviewed each issue and patch' to determine whether patches fulfilled attacker objectives."
     85       },
     86       "held_out_test_set": {
     87         "applies": true,
     88         "answer": true,
     89         "justification": "The false-positive rate evaluation used 100 real issues from the psf/requests project (issues #6000-6100) as a separate validation set from the 51 adversarial bug reports used in the main attack evaluation."
     90       },
     91       "per_category_breakdown": {
     92         "applies": true,
     93         "answer": true,
     94         "justification": "Table 2 provides a detailed per-attack-type breakdown (Naive APR, Inject, CI/CD, Revert CVE, Noise APR) with counts and percentages for each defense mechanism, allowing comparison across attack categories."
     95       },
     96       "failure_cases_discussed": {
     97         "applies": true,
     98         "answer": true,
     99         "justification": "Section 6.3 discusses failure cases of defense mechanisms (e.g., LlamaGuard failing to detect any malicious issues, PromptGuard detecting only 3/51). Section 7.2 explicitly discusses limitations and failure modes of the defense strategies."
    100       },
    101       "negative_results_reported": {
    102         "applies": true,
    103         "answer": true,
    104         "justification": "Section 6.4 reports that 'Using CodeQL with the python rule-set was not able to detect any of the created patches as malicious' — a clear negative result. LlamaGuard also returned zero detections (0%), and PromptGuard detected only 3/51 (5.88%)."
    105       }
    106     },
    107     "claims_and_evidence": {
    108       "abstract_claims_supported": {
    109         "applies": true,
    110         "answer": true,
    111         "justification": "The abstract claims 90% of crafted bug reports triggered attacker-aligned patches (supported by Table 2: 46/51=90.2%), best pre-repair filter blocked only 47% (o4-mini unstructured: 24/51=47.05%), and post-repair was effective in 58% (Copilot all-levels: 30/51=58.82%). All abstract figures are consistent with results."
    112       },
    113       "causal_claims_justified": {
    114         "applies": true,
    115         "answer": true,
    116         "justification": "The paper makes causal claims about adversarial bug reports causing insecure patches, but this is demonstrated directly through controlled experiments where each adversarial report was manually crafted and submitted, and outcomes were verified by expert annotators. The controlled experiment design (attack → observed outcome) provides adequate causal identification."
    117       },
    118       "generalization_bounded": {
    119         "applies": true,
    120         "answer": true,
    121         "justification": "Section 8 explicitly bounds external validity: 'Our experiments focus on a subset of open-source projects and a single APR system (SWE-agent). While the selected repositories are diverse and representative...our findings may not generalize to proprietary software or APR tools with substantially different architectures.' The limitations section is specific about scope."
    122       },
    123       "alternative_explanations_discussed": {
    124         "applies": true,
    125         "answer": true,
    126         "justification": "Section 8 (Threats to Validity) discusses internal validity threats (subjectivity in manual annotation, LLM non-determinism), external validity (single APR system, open-source only), and construct validity (whether the attack success definition captures downstream harm). These are specific threats to this study."
    127       }
    128     },
    129     "setup_transparency": {
    130       "model_versions_specified": {
    131         "applies": true,
    132         "answer": true,
    133         "justification": "The paper specifies: SWE-agent v1.1.0 with SW-ReX v1.2.2, Claude Sonnet (claude-sonnet-4-20250514), DevStral:24B, GPT-4.1-mini, o4-mini, LlamaGuard v3 and v4, PromptGuard and PromptGuardV2, Granite Guardian. These are sufficiently specific model identifiers."
    134       },
    135       "prompts_provided": {
    136         "applies": true,
    137         "answer": true,
    138         "justification": "Figure 3 shows the full template used for the Revert CVE attack, and Figure 4 shows the full template for pre-APR issue classification. The actual prompt texts are provided in the paper, not just descriptions of what they do."
    139       },
    140       "hyperparameters_reported": {
    141         "applies": true,
    142         "answer": false,
    143         "justification": "Section 5 states 'when using LLMs, we used their default temperature' but does not specify what that default temperature is for each model. No other hyperparameters (top-p, max tokens, etc.) are reported. Using defaults without stating what those defaults are is insufficient for reproducibility."
    144       },
    145       "scaffolding_described": {
    146         "applies": true,
    147         "answer": true,
    148         "justification": "The attack scaffolding is described in detail: context-building using LSH and rapidfuzz (Section 3.2.1), the DevStral model for generation, the GitHub API workflow for submitting issues and PRs, and the SWE-agent configuration. Figure 1 illustrates the full pipeline architecture."
    149       },
    150       "data_preprocessing_documented": {
    151         "applies": true,
    152         "answer": true,
    153         "justification": "Section 3.2.1 describes how diffs are sanitized ('removing tests, documentation, and unrelated changes') for context-building. Section 5 describes how the 51 adversarial reports were allocated across attack types. The false-positive study used issues #6000-6100 excluding one deleted entry."
    154       }
    155     },
    156     "limitations_and_scope": {
    157       "limitations_section_present": {
    158         "applies": true,
    159         "answer": true,
    160         "justification": "Section 8 is titled 'Threats to Validity' and provides substantive discussion of internal, external, construct, and tool limitations as separate subsections."
    161       },
    162       "threats_to_validity_specific": {
    163         "applies": true,
    164         "answer": true,
    165         "justification": "Threats are specific to this study: the dual-annotator protocol addresses annotation subjectivity; LLM non-determinism is acknowledged with explicit note that temperature could not be fixed without violating construct validity; single APR system limitation is acknowledged with justification for why similar vulnerabilities would likely manifest in others."
    166       },
    167       "scope_boundaries_stated": {
    168         "applies": true,
    169         "answer": true,
    170         "justification": "Section 8 explicitly states the work is limited to 'open-source projects and a single APR system (SWE-agent)' and that findings 'may not generalize to proprietary software or APR tools with substantially different architectures or training regimes.' Construct validity section notes attack success definitions may not capture downstream harm."
    171       }
    172     },
    173     "data_integrity": {
    174       "raw_data_available": {
    175         "applies": true,
    176         "answer": true,
    177         "justification": "The replication package (figshare DOI: 10.6084/m9.figshare.31140619) includes 'the full set of issues, generated patches, and execution trajectories,' providing raw data for independent verification."
    178       },
    179       "data_collection_described": {
    180         "applies": true,
    181         "answer": true,
    182         "justification": "Section 5 describes the data collection in detail: which projects were used (django, flask, pip, requests, scikit-learn from SWE-bench), how attack reports were generated (DevStral with context-building), and how outcomes were recorded (Copilot comments, CodeQL results, test suite outcomes, manual validation)."
    183       },
    184       "recruitment_methods_described": {
    185         "applies": false,
    186         "answer": false,
    187         "justification": "This is a controlled experiment on software repositories with no participant recruitment; the 'data' is adversarial bug reports generated by the authors themselves and existing OSS repositories. NA for this criterion."
    188       },
    189       "data_pipeline_documented": {
    190         "applies": true,
    191         "answer": true,
    192         "justification": "The full pipeline is documented: adversarial issue generation → submission to GitHub forks → SWE-agent processing → PR creation → test suite execution → CodeQL scan → Copilot review → manual expert validation. Each step is described in Sections 3-5 and outcomes are captured in Table 2."
    193       }
    194     },
    195     "conflicts_of_interest": {
    196       "funding_disclosed": {
    197         "applies": true,
    198         "answer": true,
    199         "justification": "Section 1.1.3 states 'This work was supported by the Polish National Agency for Academic Exchange (NAWA) - BPN/BAT/2025/1/00019 and Agentur für Bildung und Internationalisierung (ÖeAD).' Funding is clearly disclosed."
    200       },
    201       "affiliations_disclosed": {
    202         "applies": true,
    203         "answer": true,
    204         "justification": "Author affiliations are stated on the first page: Piotr Przymus at Nicolaus Copernicus University, Andreas Happe and Jürgen Cito at TU Wien. None of the authors are affiliated with OpenAI, Anthropic, Meta, or GitHub, whose tools are evaluated."
    205       },
    206       "funder_independent_of_outcome": {
    207         "applies": true,
    208         "answer": true,
    209         "justification": "The funders (NAWA and ÖeAD) are academic exchange agencies with no financial stake in the outcomes of evaluating APR security or any of the evaluated tools."
    210       },
    211       "financial_interests_declared": {
    212         "applies": true,
    213         "answer": false,
    214         "justification": "There is no competing interests statement or declaration of financial interests (patents, equity, etc.) in the paper. Absence of such a declaration means this criterion is not satisfied."
    215       }
    216     },
    217     "contamination": {
    218       "training_cutoff_stated": {
    219         "applies": true,
    220         "answer": false,
    221         "justification": "The paper uses Claude Sonnet (claude-sonnet-4-20250514) and other LLMs for patch generation and filtering, but does not state the training data cutoffs for any of the models. The SWE-bench benchmark is used as the source of target repositories, which raises contamination concerns."
    222       },
    223       "train_test_overlap_discussed": {
    224         "applies": true,
    225         "answer": false,
    226         "justification": "The paper does not discuss whether SWE-agent (using Claude Sonnet) may have been trained on data from the five target repositories (django, flask, pip, requests, scikit-learn), which are all major Python projects with extensive online presence. This is a relevant concern but is not addressed."
    227       },
    228       "benchmark_contamination_addressed": {
    229         "applies": true,
    230         "answer": false,
    231         "justification": "The experiments use repositories from SWE-bench, and the APR system uses Claude Sonnet trained before May 2025. The SWE-bench benchmark was published in 2024, meaning these repositories and potentially their issue histories were available during model training. This contamination risk is not discussed."
    232       }
    233     },
    234     "human_studies": {
    235       "pre_registered": {
    236         "applies": false,
    237         "answer": false,
    238         "justification": "The paper involves expert manual validation of patches by two annotators, but this is not a human subjects study — the humans are researchers performing analysis, not participants. Human studies criteria do not apply."
    239       },
    240       "irb_or_ethics_approval": {
    241         "applies": false,
    242         "answer": false,
    243         "justification": "No human participants are involved; the study operates on software repositories and generated code. IRB approval is not applicable. The paper does note responsible disclosure practices in Section 7.5."
    244       },
    245       "demographics_reported": {
    246         "applies": false,
    247         "answer": false,
    248         "justification": "No human participants in the study; the two expert annotators are described by role only (security researcher, penetration tester), which is appropriate for the validation task rather than a human subjects study."
    249       },
    250       "inclusion_exclusion_criteria": {
    251         "applies": false,
    252         "answer": false,
    253         "justification": "No human participant recruitment; not a human subjects study."
    254       },
    255       "randomization_described": {
    256         "applies": false,
    257         "answer": false,
    258         "justification": "No human participants; not applicable."
    259       },
    260       "blinding_described": {
    261         "applies": false,
    262         "answer": false,
    263         "justification": "No human participants; not applicable."
    264       },
    265       "attrition_reported": {
    266         "applies": false,
    267         "answer": false,
    268         "justification": "No human participants; not applicable."
    269       }
    270     },
    271     "cost_and_practicality": {
    272       "inference_cost_reported": {
    273         "applies": true,
    274         "answer": true,
    275         "justification": "Section 6.6 provides detailed cost analysis: attack generation cost $0.000295 per bug report, GPT-4.1-mini pre-filter cost $0.00101 per issue, o4-mini cost $0.00268 per issue, APR execution cost $0.87 ± $0.39 per issue. Costs are reported with standard deviations."
    276       },
    277       "compute_budget_stated": {
    278         "applies": true,
    279         "answer": false,
    280         "justification": "API costs are reported per-issue but total computational budget (total API spend for the study, hardware used for local model runs) is not stated. The paper mentions models ran locally on consumer-grade GPUs but gives no specifics on GPU hours or total expenditure."
    281       }
    282     }
    283   },
    284   "claims": [
    285     {
    286       "claim": "90% of crafted adversarial bug reports triggered attacker-aligned patches when submitted to SWE-agent.",
    287       "evidence": "Table 2 shows 46 out of 51 adversarial issues (90.2%) led to patches consistent with attacker objectives. Section 6.1 provides breakdown by attack type: all 8 Noise APR, 5/5 Naive APR (noise), all 10 CI/CD, 10/14 Injection, 13/14 Revert CVE.",
    288       "supported": "strong"
    289     },
    290     {
    291       "claim": "The best pre-repair filter (o4-mini with unstructured output) blocked only 47% of adversarial bug reports.",
    292       "evidence": "Table 2 and Section 6.3 report o4-mini unstructured output detected 24/51 issues (47.05%), making it the best-performing pre-APR detection mechanism among those tested.",
    293       "supported": "strong"
    294     },
    295     {
    296       "claim": "CodeQL static analysis failed to detect any of the adversarial patches as malicious.",
    297       "evidence": "Section 6.4 states 'Using CodeQL with the python rule-set was not able to detect any of the created patches as malicious.' Table 2 shows 0 detections for CodeQL across all 51 cases.",
    298       "supported": "strong"
    299     },
    300     {
    301       "claim": "Generating adversarial bug reports is orders of magnitude cheaper than defending against them, creating a structural asymmetry that favors attackers.",
    302       "evidence": "Section 6.6 reports attack cost ~$0.000295/report, pre-filter cost ~$0.00268/issue (o4-mini), APR execution ~$0.87/issue — approximately 3 orders of magnitude difference between attack and APR cost.",
    303       "supported": "strong"
    304     },
    305     {
    306       "claim": "Project test suites are insufficient to detect adversarial patches: 68.6% of all adversarial patches produced no new test failures.",
    307       "evidence": "Section 6.2 reports 35/51 (68.6%) adversarial patches showed no additional test failures; 12/51 (23.5%) failed the test suite; 4/51 (7.8%) increased failures within expected thresholds.",
    308       "supported": "strong"
    309     },
    310     {
    311       "claim": "Combining best pre-APR filter and Copilot post-APR review blocks 68.6% of attacks, leaving 31.4% undetected.",
    312       "evidence": "Section 6.5 reports the targeted configuration (o4-mini + Copilot) blocked 35/51 attacks (68.6%), while the full pre-APR ensemble + Copilot blocked 37/51 (72.5%).",
    313       "supported": "strong"
    314     }
    315   ],
    316   "methodology_tags": ["benchmark-eval", "case-study"],
    317   "key_findings": "LLM-based APR systems are highly vulnerable to adversarial bug reports: 46/51 crafted reports (90.2%) induced attacker-aligned patches including CVE reversions, vulnerability injections, and CI/CD exploits. Current defenses are severely insufficient — the best pre-filter (o4-mini) blocked only 47% of attacks, CodeQL detected zero malicious patches, and GitHub Copilot flagged only 58% requiring manual review to be useful. A fundamental economic asymmetry exists: adversarial bug report generation costs ~$0.0003 per report while APR execution costs ~$0.87 per issue, structurally favoring attackers. The paper contributes an open-source framework for generating and evaluating adversarial bug reports to support future security testing of APR systems.",
    318   "red_flags": [
    319     {
    320       "flag": "Small sample size (n=51) for comparing many defense mechanisms",
    321       "detail": "The study evaluates 10+ defense mechanisms on only 51 adversarial bug reports, which is too small to reliably detect statistically significant differences between mechanisms. Differences like 23% vs 35% detection rate between GPT-4.1-mini structured vs o4-mini structured could easily be noise at this sample size."
    322     },
    323     {
    324       "flag": "No statistical tests for comparative claims",
    325       "detail": "The paper makes comparative claims about defense effectiveness (e.g., o4-mini outperforms GPT-4.1-mini) without any statistical tests. With n=51, many of these differences are not distinguishable from chance."
    326     },
    327     {
    328       "flag": "LLM non-determinism not controlled",
    329       "detail": "Section 8 acknowledges that LLMs are non-deterministic and that temperature could not be set to 0 (to maintain construct validity of default configurations). All results are from single runs, so result variability across repeated experiments is unknown."
    330     },
    331     {
    332       "flag": "Single APR system evaluated",
    333       "detail": "Only SWE-agent is evaluated as the APR system. While the paper argues structural properties are shared across systems, the attack success rates may differ substantially for other APR systems with different prompt engineering or safety measures."
    334     },
    335     {
    336       "flag": "Benchmark contamination not addressed",
    337       "detail": "The study uses repositories from SWE-bench with Claude Sonnet (trained before May 2025) as the APR backend. The SWE-bench repositories and issues may have been in Claude's training data, potentially inflating APR success rates on these specific projects."
    338     }
    339   ],
    340   "cited_papers": [
    341     {
    342       "title": "SWE-bench: Can Language Models Resolve Real-World GitHub Issues?",
    343       "authors": ["Carlos E. Jimenez", "John Yang", "Alexander Wettig", "Shunyu Yao", "Kexin Pei", "Ofir Press", "Karthik Narasimhan"],
    344       "year": 2024,
    345       "arxiv_id": "2310.06770",
    346       "doi": "10.48550/arXiv.2310.06770",
    347       "relevance": "Primary benchmark used in the study; core evaluation platform for LLM-based APR systems."
    348     },
    349     {
    350       "title": "SWE-agent: Agent-Computer Interfaces Enable Automated Software Engineering",
    351       "authors": ["John Yang", "Carlos E Jimenez", "Alexander Wettig", "Kilian Lieret", "Shunyu Yao", "Karthik R Narasimhan", "Ofir Press"],
    352       "year": 2024,
    353       "arxiv_id": "2405.15793",
    354       "relevance": "The APR system evaluated in this paper; a leading LLM agent for automated code repair."
    355     },
    356     {
    357       "title": "RepairAgent: An Autonomous, LLM-Based Agent for Program Repair",
    358       "authors": ["Islem Bouzenia", "Premkumar Devanbu", "Michael Pradel"],
    359       "year": 2024,
    360       "arxiv_id": "2403.17134",
    361       "doi": "10.48550/arXiv.2403.17134",
    362       "relevance": "Related LLM-based APR system demonstrating autonomous agent-based program repair."
    363     },
    364     {
    365       "title": "OpenHands: An Open Platform for AI Software Developers as Generalist Agents",
    366       "authors": ["Xingyao Wang", "Boxuan Li", "Yufan Song"],
    367       "year": 2025,
    368       "arxiv_id": "2407.16741",
    369       "relevance": "Related agentic AI system for software engineering; cited as an example of APR system with known prompt injection vulnerabilities."
    370     },
    371     {
    372       "title": "AutoCodeRover: Autonomous Program Improvement",
    373       "authors": ["Yuntong Zhang", "Haifeng Ruan", "Zhiyu Fan", "Abhik Roychoudhury"],
    374       "year": 2024,
    375       "doi": "10.1145/3650212.3680384",
    376       "relevance": "Another leading LLM-based APR system; cited as sharing structural properties that make it potentially vulnerable to similar attacks."
    377     },
    378     {
    379       "title": "Not what you've signed up for: Compromising Real-World LLM-Integrated Applications with Indirect Prompt Injection",
    380       "authors": ["Kai Greshake", "Sahar Abdelnabi", "Shailesh Mishra", "Christoph Endres", "Thorsten Holz", "Mario Fritz"],
    381       "year": 2023,
    382       "arxiv_id": "2302.12173",
    383       "relevance": "Foundational work on indirect prompt injection attacks against LLM applications, directly relevant to this paper's threat model."
    384     },
    385     {
    386       "title": "Llama guard: LLM-based input-output safeguard for human-AI conversations",
    387       "authors": ["Hakan Inan", "Kartikeya Upasani", "Jianfeng Chi"],
    388       "year": 2023,
    389       "arxiv_id": "2312.06674",
    390       "relevance": "LlamaGuard defense mechanism evaluated in this paper as a pre-APR filter."
    391     },
    392     {
    393       "title": "Granite Guardian",
    394       "authors": ["Inkit Padhi", "Manish Nagireddy", "Giandomenico Cornacchia"],
    395       "year": 2024,
    396       "arxiv_id": "2412.07724",
    397       "relevance": "IBM's safety classifier evaluated as a pre-APR defense mechanism in this paper."
    398     },
    399     {
    400       "title": "Jailbreak Attacks and Defenses Against Large Language Models: A Survey",
    401       "authors": ["Sibo Yi", "Yule Liu", "Zhen Sun", "Tianshuo Cong", "Xinlei He", "Jiaxing Song", "Ke Xu", "Qi Li"],
    402       "year": 2024,
    403       "arxiv_id": "2407.04295",
    404       "relevance": "Survey of jailbreak attacks and defenses relevant to pre-APR filtering design."
    405     },
    406     {
    407       "title": "Automatic Programming: Large Language Models and Beyond",
    408       "authors": ["Michael R. Lyu", "Baishakhi Ray", "Abhik Roychoudhury", "Shin Hwei Tan", "Patanamon Thongtanunam"],
    409       "year": 2024,
    410       "arxiv_id": "2405.02213",
    411       "doi": "10.48550/arXiv.2405.02213",
    412       "relevance": "Survey on LLM-based automated programming including APR; relevant background for the survey's scope."
    413     },
    414     {
    415       "title": "Evaluating agent-based program repair at Google",
    416       "authors": ["Pat Rondon", "Renyao Wei", "José Cambronero", "Jürgen Cito", "Aaron Sun", "Siddhant Sanyam", "Michele Tufano", "Satish Chandra"],
    417       "year": 2025,
    418       "arxiv_id": "2501.07531",
    419       "relevance": "Industrial-scale evaluation of LLM-based APR at Google; provides context for real-world APR deployment."
    420     },
    421     {
    422       "title": "Defeating Prompt Injections by Design",
    423       "authors": ["Edoardo Debenedetti", "Ilia Shumailov", "Tianqi Fan", "Jamie Hayes", "Nicholas Carlini"],
    424       "year": 2025,
    425       "arxiv_id": "2503.18813",
    426       "relevance": "Defense mechanism (CaMeL) against prompt injection attacks relevant to APR security."
    427     },
    428     {
    429       "title": "SWE-bench Multimodal: Do AI Systems Generalize to Visual Software Domains?",
    430       "authors": ["John Yang", "Carlos E. Jimenez", "Alex L. Zhang", "Kilian Lieret"],
    431       "year": 2024,
    432       "arxiv_id": "2410.03859",
    433       "doi": "10.48550/arXiv.2410.03859",
    434       "relevance": "Extension of SWE-bench benchmark; relevant as broader evaluation framework for LLM software engineering."
    435     }
    436   ]
    437 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs