scan.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan.json (20128B)
      1 {
      2   "paper": {
      3     "title": "Explainable AI In Software Engineering: Enhancing Developer-AI Collaboration",
      4     "authors": ["Jyoti Kunal Shah"],
      5     "year": 2024,
      6     "venue": "The American Journal of Engineering and Technology",
      7     "doi": "10.37547/tajet/Volume06Issue07-11"
      8   },
      9   "scan_version": 3,
     10   "active_modules": [],
     11   "methodology_tags": ["theoretical", "case-study"],
     12   "key_findings": "The paper proposes a modular three-layer architecture (AI Layer, Explanation & Integration Layer, User Interaction Layer) for integrating Explainable AI into software engineering workflows. It argues that XAI can enhance developer trust, enable knowledge transfer, and improve adoption of AI-assisted tools. A hypothetical case study of explainable code review illustrates how transparent AI suggestions could improve collaboration. No empirical evaluation of the proposed framework is conducted.",
     13   "checklist": {
     14     "artifacts": {
     15       "code_released": {
     16         "applies": true,
     17         "answer": false,
     18         "justification": "No source code, repository URL, or prototype implementation is released. The paper describes a conceptual architecture and a hypothetical case study but provides no downloadable artifacts."
     19       },
     20       "data_released": {
     21         "applies": true,
     22         "answer": false,
     23         "justification": "No dataset is released. The paper is theoretical with no empirical data collection."
     24       },
     25       "environment_specified": {
     26         "applies": false,
     27         "answer": false,
     28         "justification": "The paper is purely theoretical/conceptual with no implemented system or experiments that would require an environment specification."
     29       },
     30       "reproduction_instructions": {
     31         "applies": false,
     32         "answer": false,
     33         "justification": "No experiments or implementations were conducted; there are no results to reproduce."
     34       }
     35     },
     36     "statistical_methodology": {
     37       "confidence_intervals_or_error_bars": {
     38         "applies": false,
     39         "answer": false,
     40         "justification": "The paper is theoretical with no quantitative experiments. No statistical results are produced."
     41       },
     42       "significance_tests": {
     43         "applies": false,
     44         "answer": false,
     45         "justification": "No experiments are conducted; there are no comparative claims based on the paper's own data."
     46       },
     47       "effect_sizes_reported": {
     48         "applies": false,
     49         "answer": false,
     50         "justification": "No experiments are conducted; no effect sizes are applicable."
     51       },
     52       "sample_size_justified": {
     53         "applies": false,
     54         "answer": false,
     55         "justification": "No experiments with samples are conducted in this theoretical paper."
     56       },
     57       "variance_reported": {
     58         "applies": false,
     59         "answer": false,
     60         "justification": "No experimental runs are conducted; there are no results to report variance for."
     61       }
     62     },
     63     "evaluation_design": {
     64       "baselines_included": {
     65         "applies": true,
     66         "answer": false,
     67         "justification": "The proposed framework is not compared against any alternative frameworks or existing approaches for integrating XAI into software engineering."
     68       },
     69       "baselines_contemporary": {
     70         "applies": true,
     71         "answer": false,
     72         "justification": "No baselines are included at all, so contemporaneity cannot be assessed."
     73       },
     74       "ablation_study": {
     75         "applies": false,
     76         "answer": false,
     77         "justification": "The system is a conceptual architecture that was not implemented; there are no components to ablate."
     78       },
     79       "multiple_metrics": {
     80         "applies": false,
     81         "answer": false,
     82         "justification": "No quantitative evaluation of the framework is conducted."
     83       },
     84       "human_evaluation": {
     85         "applies": false,
     86         "answer": false,
     87         "justification": "No evaluation is conducted at all. The case study is a hypothetical narrative, not an actual user study."
     88       },
     89       "held_out_test_set": {
     90         "applies": false,
     91         "answer": false,
     92         "justification": "No datasets or experiments are involved in this theoretical paper."
     93       },
     94       "per_category_breakdown": {
     95         "applies": false,
     96         "answer": false,
     97         "justification": "No quantitative results are produced to break down."
     98       },
     99       "failure_cases_discussed": {
    100         "applies": true,
    101         "answer": false,
    102         "justification": "The case study only presents a success scenario (Alice accepts the AI suggestion). No failure cases or scenarios where the framework would not work are discussed."
    103       },
    104       "negative_results_reported": {
    105         "applies": false,
    106         "answer": false,
    107         "justification": "No experiments are conducted from which negative results could emerge."
    108       }
    109     },
    110     "claims_and_evidence": {
    111       "abstract_claims_supported": {
    112         "applies": true,
    113         "answer": false,
    114         "justification": "The abstract claims 'A case study on explainable code review demonstrates how transparent AI suggestions can improve developer trust and team learning.' However, the case study is a hypothetical scenario with a fictional developer (Alice), not an empirical demonstration. The conclusion also claims '80% acceptance' without empirical backing from this work."
    115       },
    116       "causal_claims_justified": {
    117         "applies": true,
    118         "answer": false,
    119         "justification": "The paper makes numerous causal claims: 'explainability improves both trust and effectiveness,' 'explanations increase developers' trust and efficiency,' 'providing such explanations for refactoring decisions can significantly enhance developer trust and transparency.' These are stated as conclusions but are not supported by any causal study design in this paper — they are assertions drawn from narrative literature review."
    120       },
    121       "generalization_bounded": {
    122         "applies": true,
    123         "answer": false,
    124         "justification": "The paper makes broad claims about XAI in software engineering generally (e.g., 'XAI has the potential to become a standard feature of the next generation of software development environments') without bounding to specific settings, tools, or developer populations. The title implies general applicability but no evaluation is conducted."
    125       },
    126       "alternative_explanations_discussed": {
    127         "applies": true,
    128         "answer": false,
    129         "justification": "No alternative explanations are discussed for any of the claims. The paper does not consider whether factors other than explainability (e.g., accuracy, novelty, or developer training) might account for the trust and adoption effects attributed to XAI."
    130       },
    131       "proxy_outcome_distinction": {
    132         "applies": true,
    133         "answer": false,
    134         "justification": "The paper frames its claims in terms of 'trust,' 'collaboration,' and 'team learning' but never defines or measures these constructs. The case study uses acceptance of a suggestion as a proxy for trust without acknowledging this gap. The conclusion references '80% acceptance rate' and equates it with 'developer confidence' without discussion."
    135       }
    136     },
    137     "setup_transparency": {
    138       "model_versions_specified": {
    139         "applies": false,
    140         "answer": false,
    141         "justification": "No AI models are used in experiments. The paper is a theoretical framework proposal."
    142       },
    143       "prompts_provided": {
    144         "applies": false,
    145         "answer": false,
    146         "justification": "No prompting is used in this theoretical paper."
    147       },
    148       "hyperparameters_reported": {
    149         "applies": false,
    150         "answer": false,
    151         "justification": "No models are run; no hyperparameters are applicable."
    152       },
    153       "scaffolding_described": {
    154         "applies": false,
    155         "answer": false,
    156         "justification": "No agentic scaffolding is used. The paper proposes a conceptual architecture but does not implement or run any system."
    157       },
    158       "data_preprocessing_documented": {
    159         "applies": false,
    160         "answer": false,
    161         "justification": "No data is collected or preprocessed. The paper is theoretical."
    162       }
    163     },
    164     "limitations_and_scope": {
    165       "limitations_section_present": {
    166         "applies": true,
    167         "answer": false,
    168         "justification": "The paper references 'the limitations we discussed in Section 10' in the future directions, but no dedicated limitations section appears in the paper text. The Challenges section discusses general challenges of XAI in SE, not specific limitations of this paper's proposed framework or methodology."
    169       },
    170       "threats_to_validity_specific": {
    171         "applies": true,
    172         "answer": false,
    173         "justification": "No threats to validity are discussed. The paper does not address specific threats to its own claims, case study validity, or framework design."
    174       },
    175       "scope_boundaries_stated": {
    176         "applies": true,
    177         "answer": false,
    178         "justification": "No explicit scope boundaries are stated. The paper does not specify what the framework does NOT cover or what settings/contexts are excluded from its claims."
    179       }
    180     },
    181     "data_integrity": {
    182       "raw_data_available": {
    183         "applies": false,
    184         "answer": false,
    185         "justification": "The paper is theoretical with no empirical data collection. There is no raw data to make available."
    186       },
    187       "data_collection_described": {
    188         "applies": false,
    189         "answer": false,
    190         "justification": "No structured data collection is performed. The literature review is narrative, not systematic."
    191       },
    192       "recruitment_methods_described": {
    193         "applies": false,
    194         "answer": false,
    195         "justification": "No human participants and no standard benchmark used. The case study features a fictional developer."
    196       },
    197       "data_pipeline_documented": {
    198         "applies": false,
    199         "answer": false,
    200         "justification": "No data pipeline exists in this theoretical paper."
    201       }
    202     },
    203     "conflicts_of_interest": {
    204       "funding_disclosed": {
    205         "applies": true,
    206         "answer": false,
    207         "justification": "No funding source is disclosed anywhere in the paper. The author is listed as 'Independent Researcher, USA' but no statement about funding or its absence is provided."
    208       },
    209       "affiliations_disclosed": {
    210         "applies": true,
    211         "answer": true,
    212         "justification": "The author's affiliation is listed as 'Independent Researcher, USA.' No product or company is being evaluated, so there is no hidden conflict."
    213       },
    214       "funder_independent_of_outcome": {
    215         "applies": false,
    216         "answer": false,
    217         "justification": "The work appears to be unfunded independent research."
    218       },
    219       "financial_interests_declared": {
    220         "applies": true,
    221         "answer": false,
    222         "justification": "No competing interests or financial interests statement is included in the paper."
    223       }
    224     },
    225     "contamination": {
    226       "training_cutoff_stated": {
    227         "applies": false,
    228         "answer": false,
    229         "justification": "The paper does not evaluate a pre-trained model on any benchmark. It is a theoretical framework proposal."
    230       },
    231       "train_test_overlap_discussed": {
    232         "applies": false,
    233         "answer": false,
    234         "justification": "No model evaluation is conducted; contamination is structurally inapplicable."
    235       },
    236       "benchmark_contamination_addressed": {
    237         "applies": false,
    238         "answer": false,
    239         "justification": "No benchmark evaluation is performed."
    240       }
    241     },
    242     "human_studies": {
    243       "pre_registered": {
    244         "applies": false,
    245         "answer": false,
    246         "justification": "No human participants are involved. The case study describes a hypothetical scenario with a fictional developer."
    247       },
    248       "irb_or_ethics_approval": {
    249         "applies": false,
    250         "answer": false,
    251         "justification": "No human subjects research is conducted."
    252       },
    253       "demographics_reported": {
    254         "applies": false,
    255         "answer": false,
    256         "justification": "No human participants are involved."
    257       },
    258       "inclusion_exclusion_criteria": {
    259         "applies": false,
    260         "answer": false,
    261         "justification": "No human participants are recruited."
    262       },
    263       "randomization_described": {
    264         "applies": false,
    265         "answer": false,
    266         "justification": "No experimental study with human participants is conducted."
    267       },
    268       "blinding_described": {
    269         "applies": false,
    270         "answer": false,
    271         "justification": "No experimental study with human participants is conducted."
    272       },
    273       "attrition_reported": {
    274         "applies": false,
    275         "answer": false,
    276         "justification": "No human participants are involved."
    277       }
    278     },
    279     "cost_and_practicality": {
    280       "inference_cost_reported": {
    281         "applies": false,
    282         "answer": false,
    283         "justification": "The paper is purely theoretical. No system is run; no inference costs apply."
    284       },
    285       "compute_budget_stated": {
    286         "applies": false,
    287         "answer": false,
    288         "justification": "No computation is performed. The paper proposes a conceptual architecture only."
    289       }
    290     }
    291   },
    292   "claims": [
    293     {
    294       "claim": "XAI integration enhances developer trust and collaboration in software engineering",
    295       "evidence": "Supported by narrative references to [3] (PyExplainer) and [4] (X-SBR), plus a hypothetical case study in the code review section. No original empirical evidence is presented.",
    296       "supported": "weak"
    297     },
    298     {
    299       "claim": "With explanations, AI suggestions were accepted about 80% of the time",
    300       "evidence": "Stated in the conclusion with citation [4], but this figure does not come from any experiment conducted in this paper. The case study is a hypothetical walkthrough with one fictional developer.",
    301       "supported": "unsupported"
    302     },
    303     {
    304       "claim": "The proposed three-layer modular architecture can integrate explanation engines with AI models, IDEs, dashboards, and CI tools",
    305       "evidence": "Architecture described conceptually in the Architecture Overview section with a figure. No implementation, prototype, or feasibility evaluation is provided.",
    306       "supported": "unsupported"
    307     },
    308     {
    309       "claim": "Explainability in AI tools enables knowledge transfer and team learning",
    310       "evidence": "Illustrated through the hypothetical case study where Alice learns about SecureRandom from the AI's suggestion. No empirical measurement of knowledge transfer is attempted.",
    311       "supported": "weak"
    312     }
    313   ],
    314   "red_flags": [
    315     {
    316       "flag": "Hypothetical case study presented as empirical",
    317       "detail": "The paper states 'we conducted a case study using a prototype implementation' but the scenario is entirely hypothetical with a fictional developer (Alice). No actual prototype, no real developers, and no measured outcomes. This framing is misleading."
    318     },
    319     {
    320       "flag": "Unsupported quantitative claim in conclusion",
    321       "detail": "The conclusion states 'AI suggestions were accepted about 80% of the time in the scenario' with citation [4]. This figure does not come from any experiment in this paper and is misattributed to the hypothetical case study, conflating referenced literature with original findings."
    322     },
    323     {
    324       "flag": "Extremely thin reference list",
    325       "detail": "The paper cites only 6 references while claiming to provide a 'comprehensive exploration' of XAI in software engineering. Major XAI frameworks (SHAP, LIME), foundational SE works, and relevant developer-tool interaction studies are mentioned in text but not formally cited."
    326     },
    327     {
    328       "flag": "No empirical evaluation of proposed framework",
    329       "detail": "The paper proposes a three-layer architecture but provides no implementation, feasibility analysis, user feedback, or any form of empirical validation. The entire contribution is conceptual."
    330     },
    331     {
    332       "flag": "Claims significantly outrun evidence",
    333       "detail": "Broad causal claims like 'explainability improves both trust and effectiveness' and 'XAI has the potential to become a standard feature of the next generation of software development environments' are stated as conclusions but are not supported by any evidence from this paper."
    334     },
    335     {
    336       "flag": "Questionable venue quality",
    337       "detail": "Published in 'The American Journal of Engineering and Technology' (theamericanjournals.com), a journal with no apparent impact factor or established reputation in the software engineering or AI communities."
    338     }
    339   ],
    340   "cited_papers": [
    341     {
    342       "title": "A Systematic Literature Review of Explainable AI for Software Engineering",
    343       "authors": ["A. H. Mohammadkhani", "N. S. Bommi", "M. Daboussi", "O. Sabnis", "C. Tantithamthavorn", "H. Hemmati"],
    344       "year": 2023,
    345       "arxiv_id": "2302.06065",
    346       "relevance": "Systematic review of XAI applications in software engineering, directly relevant to understanding the state of explainability in SE tools."
    347     },
    348     {
    349       "title": "PyExplainer: Explaining the Predictions of Just-In-Time Defect Models",
    350       "authors": ["C. Pornprasit", "C. Tantithamthavorn", "J. Jiarpakdee", "M. Fu", "P. Thongtanunam"],
    351       "year": 2021,
    352       "doi": "10.1109/ASE51524.2021.9678820",
    353       "relevance": "Tool for generating human-understandable explanations for defect prediction models, relevant to explainable AI-assisted code analysis."
    354     },
    355     {
    356       "title": "X-SBR: On the Use of the History of Refactorings for Explainable Search-Based Refactoring and Intelligent Change Operators",
    357       "authors": ["C. Abid", "D. E. Rzig", "T. do N. Ferreira", "M. Kessentini", "T. Sharma"],
    358       "year": 2022,
    359       "doi": "10.1109/TSE.2022.3172576",
    360       "relevance": "Applies explainability to AI-driven code refactoring, relevant to understanding how transparency affects developer trust in automated code changes."
    361     },
    362     {
    363       "title": "Aligning XAI Explanations with Software Developers' Expectations: A Case Study with Code Smell Prioritization",
    364       "authors": ["Z. Huang", "H. Yu", "G. Fan", "Z. Shao", "M. Li", "Y. Liang"],
    365       "year": 2024,
    366       "doi": "10.1016/j.eswa.2023.121999",
    367       "relevance": "Empirical study on the gap between XAI-generated explanations and developer expectations, relevant to human-AI collaboration in code quality."
    368     },
    369     {
    370       "title": "Evaluation Metrics in Explainable Artificial Intelligence (XAI)",
    371       "authors": ["M. Coroamă", "A. Groza"],
    372       "year": 2022,
    373       "doi": "10.1007/978-3-031-19238-2_30",
    374       "relevance": "Proposes evaluation metrics for XAI systems, relevant to assessing the quality and effectiveness of AI explanations."
    375     }
    376   ],
    377   "engagement_factors": {
    378     "practical_relevance": {
    379       "score": 1,
    380       "justification": "Proposes a conceptual framework for XAI in SE but provides no implementation, tools, or actionable techniques a practitioner could use."
    381     },
    382     "surprise_contrarian": {
    383       "score": 0,
    384       "justification": "Confirms the widely-held view that explainability is beneficial for AI tool adoption — no contrarian or surprising findings."
    385     },
    386     "fear_safety": {
    387       "score": 0,
    388       "justification": "No AI risk, security, or safety concerns are raised beyond the general observation that opaque AI can cause mistrust."
    389     },
    390     "drama_conflict": {
    391       "score": 0,
    392       "justification": "No controversy, no criticism of specific tools or companies, no provocative claims."
    393     },
    394     "demo_ability": {
    395       "score": 0,
    396       "justification": "No code, prototype, demo, or downloadable artifact is provided."
    397     },
    398     "brand_recognition": {
    399       "score": 0,
    400       "justification": "Solo independent researcher published in an unknown journal; no major lab or well-known product involved."
    401     }
    402   }
    403 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs