scan-v5.json (17934B)
1 { 2 "scan_version": 5, 3 "paper_type": "survey", 4 "paper": { 5 "title": "In-IDE Human-AI Experience in the Era of Large Language Models; A Literature Review", 6 "authors": [ 7 "Agnia Sergeyuk", 8 "Sergey Titov", 9 "Maliheh Izadi" 10 ], 11 "year": 2024, 12 "venue": "ICSE", 13 "arxiv_id": "2401.10739", 14 "doi": "10.1145/3643796.3648463" 15 }, 16 "checklist": { 17 "claims_and_evidence": { 18 "abstract_claims_supported": { 19 "applies": true, 20 "answer": true, 21 "justification": "Abstract claims 36 papers analyzed in three branches (Design, Impact, Quality). Text confirms 14+13+9=36 papers across these categories with detailed evidence for each branch's findings.", 22 "source": "haiku" 23 }, 24 "causal_claims_justified": { 25 "applies": true, 26 "answer": true, 27 "justification": "This is a survey synthesizing other papers' findings, not making original causal claims. Appropriately reports correlative findings from reviewed studies (e.g., 'AI tools increase productivity') without overreaching.", 28 "source": "haiku" 29 }, 30 "generalization_bounded": { 31 "applies": true, 32 "answer": true, 33 "justification": "Scope explicitly bounded to in-IDE AI assistance (IDEs, code editors, LLM-based tools) from 2020-2024. Title specifies 'Large Language Models' era. Limitations section acknowledges temporal boundary of focus.", 34 "source": "haiku" 35 }, 36 "alternative_explanations_discussed": { 37 "applies": true, 38 "answer": true, 39 "justification": "Discussion notes multiple factors shaping effectiveness: 'The context in which AI tools are used, the quality of suggestions, and compatibility issues play crucial roles.' Acknowledges trade-offs rather than single narratives.", 40 "source": "haiku" 41 }, 42 "proxy_outcome_distinction": { 43 "applies": true, 44 "answer": true, 45 "justification": "Paper distinguishes between measured and claimed outcomes: 'Using AI tools increases productivity but may involve a trade-off in code quality since developers sometimes struggle to receive... outputs that would align with their requirements.'", 46 "source": "haiku" 47 } 48 }, 49 "limitations_and_scope": { 50 "limitations_section_present": { 51 "applies": true, 52 "answer": true, 53 "justification": "Dedicated Section 4.2 'Threats to Validity' explicitly discusses sampling bias, temporal bias, source reliability, and interpretation bias with substantive discussion of each.", 54 "source": "haiku" 55 }, 56 "threats_to_validity_specific": { 57 "applies": true, 58 "answer": true, 59 "justification": "Specific threats articulated: sampling bias acknowledged despite refinement efforts; temporal bias from 2020+ cutoff stated; ArXiv non-peer-review concern flagged; interpretation bias from large information volume noted.", 60 "source": "haiku" 61 }, 62 "scope_boundaries_stated": { 63 "applies": true, 64 "answer": true, 65 "justification": "Explicit boundaries: databases listed (ACM, DBLP, IEEE, ArXiv), year range (2020-2024), initial pool (211) and final selection (36) documented. Search terms published in full.", 66 "source": "haiku" 67 } 68 }, 69 "conflicts_of_interest": { 70 "funding_disclosed": { 71 "applies": true, 72 "answer": false, 73 "justification": "No funding source is stated. Authors are from JetBrains Research and Delft University, but funding statement is absent from paper.", 74 "source": "haiku" 75 }, 76 "affiliations_disclosed": { 77 "applies": true, 78 "answer": true, 79 "justification": "Author affiliations clearly stated: JetBrains Research (Belgrade, Cyprus) and Delft University. However, JetBrains manufactures IDEs, creating potential conflict of interest not addressed.", 80 "source": "haiku" 81 }, 82 "funder_independent_of_outcome": { 83 "applies": true, 84 "answer": false, 85 "justification": "Funding source not explicitly stated. JetBrains (likely implicit funder given author affiliations) has commercial interest in AI-in-IDE adoption, creating potential non-independence not disclosed.", 86 "source": "haiku" 87 }, 88 "financial_interests_declared": { 89 "applies": true, 90 "answer": false, 91 "justification": "No competing interests statement present. No declaration of patents, equity, consulting, or other financial interests related to reviewed tools or AI assistants.", 92 "source": "haiku" 93 } 94 }, 95 "scope_and_framing": { 96 "key_terms_defined": { 97 "applies": true, 98 "answer": true, 99 "justification": "Key terms defined: HAX as 'deeper integration of AI within user interactions where AI is not just a tool but collaborative partner.' IDEs contextualized. Some terms (productivity, code quality) used without precise operational definition.", 100 "source": "haiku" 101 }, 102 "intended_contribution_clear": { 103 "applies": true, 104 "answer": true, 105 "justification": "Contribution explicitly stated: 'offers an overview of existing in-IDE HAX research, distilling main directions and insights to guide future investigations.' Bridges gap in centralized knowledge of developer-AI interactions in IDEs.", 106 "source": "haiku" 107 }, 108 "engagement_with_prior_work": { 109 "applies": true, 110 "answer": true, 111 "justification": "Introduction situates work within HCI and Human-AI Experience frameworks, citing Amershi et al. 2019. However, positioning relative to other literature reviews of AI tooling or developer productivity is limited. Adequate but not deep engagement.", 112 "source": "haiku" 113 } 114 } 115 }, 116 "type_checklist": { 117 "survey": { 118 "search_and_selection": { 119 "search_strategy_reproducible": { 120 "applies": true, 121 "answer": true, 122 "justification": "Exact search string provided: ('Integrated Development Environment' OR 'IDE'...) AND ('AI assistant' OR 'AI features'...). Strategy is explicit and reproducible; no ambiguity in query formulation.", 123 "source": "haiku" 124 }, 125 "inclusion_exclusion_explicit": { 126 "applies": true, 127 "answer": true, 128 "justification": "Inclusion/exclusion criteria stated: published 2020+, English only, thematically relevant, no blog posts. Criteria applied systematically to filter 211→36 papers.", 129 "source": "haiku" 130 }, 131 "prisma_or_structured_protocol": { 132 "applies": true, 133 "answer": false, 134 "justification": "Paper does not reference PRISMA guidelines or register protocol. Methodology is described informally without adherence to structured review standards.", 135 "source": "haiku" 136 }, 137 "search_terms_provided": { 138 "applies": true, 139 "answer": true, 140 "justification": "Full search string provided verbatim in Method section. All search terms for both IDE types and AI/interaction concepts are explicit.", 141 "source": "haiku" 142 }, 143 "databases_listed": { 144 "applies": true, 145 "answer": true, 146 "justification": "Four databases explicitly listed: ACM Digital Library, DBLP, IEEE Digital Library, ArXiv. Rationale for including ArXiv (rapid development in field) explained.", 147 "source": "haiku" 148 }, 149 "screening_process_documented": { 150 "applies": true, 151 "answer": true, 152 "justification": "Initial search yielded 211 papers, final selection 36. Table 1 provides venue breakdown (11 conferences, 3 journals, 22 ArXiv). Intermediate screening stages (title, abstract, full-text filtering) not detailed by count.", 153 "source": "haiku" 154 }, 155 "review_scope_justified": { 156 "applies": true, 157 "answer": true, 158 "justification": "2020+ cutoff justified: 'aligning with the recent advancements in the field' following LLM advances. Database selection justified by relevance and inclusivity rationale. Scope boundaries clearly motivated.", 159 "source": "haiku" 160 } 161 }, 162 "synthesis_quality": { 163 "conflicting_findings_acknowledged": { 164 "applies": true, 165 "answer": true, 166 "justification": "Paper notes productivity-quality trade-off: 'increases productivity but may involve a trade-off in code quality.' Acknowledges variability in correctness ('While AI assistants can provide relevant solutions...they still might be erroneous'). Limited deep engagement with fundamental contradictions.", 167 "source": "haiku" 168 }, 169 "quality_assessment_of_sources": { 170 "applies": true, 171 "answer": false, 172 "justification": "No quality rubric, risk-of-bias assessment, or differentiation applied to reviewed papers. Treats 22 ArXiv preprints and 11 peer-reviewed conference papers equally without quality weighting.", 173 "source": "haiku" 174 }, 175 "publication_bias_discussed": { 176 "applies": true, 177 "answer": true, 178 "justification": "Acknowledges ArXiv's mitigating effect: 'open publication environment of ArXiv encourages publishing negative or null results.' Broader publication bias (positive findings more likely in peer-reviewed venues) not discussed.", 179 "source": "haiku" 180 }, 181 "quantitative_synthesis_present": { 182 "applies": true, 183 "answer": true, 184 "justification": "Vote counting by category present (14 papers on design, 13 on impact, 9 on quality). No meta-analysis, effect size pooling, or statistical aggregation. Synthesis is categorical, not quantitative.", 185 "source": "haiku" 186 }, 187 "recommendations_supported_by_evidence": { 188 "applies": true, 189 "answer": true, 190 "justification": "Recommendations (task-specific UI, trust, readability) flow from identified gaps in reviewed papers. However, recommendations are somewhat generic and not tightly grounded in quantitative evidence or majority finding.", 191 "source": "haiku" 192 } 193 } 194 } 195 }, 196 "claims": [ 197 { 198 "claim": "User interface of in-IDE AI assistance affects the usefulness of this tool and should be built thoughtfully", 199 "evidence": "Design principles papers [16, 20, 34, 38] highlight importance of clear communication, user control, adaptability, and snoozability", 200 "supported": "strong" 201 }, 202 { 203 "claim": "Using AI tools increases productivity but may involve a trade-off in code quality", 204 "evidence": "Multiple papers [10, 13, 17, 35, 36, 39, 43] show productivity gains but developers struggle to receive outputs aligned with requirements", 205 "supported": "moderate" 206 }, 207 { 208 "claim": "AI assistants generally produce understandable code that might be even less complex than code written by humans", 209 "evidence": "Several research papers [8, 24, 40] show assistants produce understandable code, possibly less complex than human-written code", 210 "supported": "moderate" 211 }, 212 { 213 "claim": "Security vulnerabilities in AI-generated code can be significant, with rates reaching 40% for C language programs", 214 "evidence": "Security assessment studies [13, 25, 30] find vulnerability rates vary by model, with some cases reaching 40% for C language", 215 "supported": "moderate" 216 }, 217 { 218 "claim": "In-IDE Human-AI interaction significantly affects and changes developers' workflows", 219 "evidence": "Impact studies [5, 21, 26] show interaction introduces dedicated time for AI output processing and creates multiple modes of HAX interaction", 220 "supported": "strong" 221 }, 222 { 223 "claim": "Fine-tuning foundational models can improve quality of interaction through accuracy and timing of suggestions", 224 "evidence": "Papers [11, 22, 41] investigate how model fine-tuning improves suggestion accuracy and acceptance rates", 225 "supported": "moderate" 226 }, 227 { 228 "claim": "While AI tools positively influence programming education and motivation, challenges include over-reliance requiring educational strategies", 229 "evidence": "Novice-focused studies [2, 15, 26] show educational benefits but also overreliance concerns needing instructional attention", 230 "supported": "moderate" 231 } 232 ], 233 "methodology_tags": [ 234 "meta-analysis", 235 "qualitative" 236 ], 237 "key_findings": "The survey identifies three primary research branches in in-IDE Human-AI Experience: Design (emphasizing UI principles for transparency and user control), Impact (showing productivity gains offset by code quality trade-offs and workflow changes), and Quality (demonstrating that AI output correctness varies significantly and security vulnerabilities remain a concern). The field would benefit from research on task-specific interfaces, trust mechanisms, and code readability alignment rather than generic chat-based interaction.", 238 "red_flags": [ 239 { 240 "flag": "No quality assessment of sources", 241 "detail": "22 of 36 papers are ArXiv preprints; no risk-of-bias assessment, quality rubric, or differentiation between peer-reviewed and preprint papers despite their reliability differences." 242 }, 243 { 244 "flag": "Unaddressed conflict of interest", 245 "detail": "Authors from JetBrains (IDE vendor) reviewing in-IDE AI tools. No competing interests statement despite clear financial incentive for positive framing of AI-in-IDE adoption." 246 }, 247 { 248 "flag": "No funding disclosure", 249 "detail": "Funding source not stated despite author affiliations with JetBrains Research; whether JetBrains funded this review is undisclosed." 250 }, 251 { 252 "flag": "Screening process documentation incomplete", 253 "detail": "Jumps from 211 initial papers to 36 final without documenting title/abstract screening counts, making reproducibility difficult." 254 }, 255 { 256 "flag": "No PRISMA adherence", 257 "detail": "Survey does not reference PRISMA guidelines or structured protocol despite this being standard for literature reviews." 258 }, 259 { 260 "flag": "Limited publication bias discussion", 261 "detail": "Acknowledges ArXiv's positive contribution but does not address broader publication bias (positive findings overrepresented in peer-reviewed venues)." 262 }, 263 { 264 "flag": "Vote counting only, no quantitative synthesis", 265 "detail": "Synthesis groups 36 papers into three categories with counts (14, 13, 9) but performs no meta-analysis, effect aggregation, or statistical pooling despite calling itself a 'review.'" 266 }, 267 { 268 "flag": "Generic recommendations weakly grounded in evidence", 269 "detail": "Recommendations for 'task-specific UI,' 'trust,' and 'readability' are somewhat obvious directions not tightly linked to quantitative findings or majority patterns in reviewed papers." 270 } 271 ], 272 "cited_papers": [ 273 { 274 "title": "Guidelines for Human-AI Interaction", 275 "authors": "Amershi et al.", 276 "year": 2019, 277 "relevance": "Foundational framework for Human-AI Experience (HAX) concept that frames the entire survey" 278 }, 279 { 280 "title": "Human–Computer Interaction", 281 "authors": "Rapp, A.", 282 "year": 2023, 283 "relevance": "Foundational HCI theory contextualized in introduction; positions AI integration as shift in HCI paradigms" 284 }, 285 { 286 "title": "Deep reinforcement learning from human preferences", 287 "authors": "Christiano et al.", 288 "year": 2017, 289 "relevance": "Alignment methods relevant to discussed approaches for improving model output quality" 290 }, 291 { 292 "title": "A Unified Approach to Interpreting Model Predictions", 293 "authors": "Lundberg & Lee", 294 "year": 2017, 295 "relevance": "Explainability approach cited for trust-building in HAX; interpretable AI relevant to readability theme" 296 }, 297 { 298 "title": "Automatically assessing code understandability: How far are we?", 299 "authors": "Scalabrino et al.", 300 "year": 2017, 301 "relevance": "Foundational work on code readability metrics; directly relevant to Quality theme recommendation on readability alignment" 302 } 303 ], 304 "engagement_factors": { 305 "practical_relevance": { 306 "score": 2, 307 "justification": "Practitioners (IDE vendors, AI tool builders) can identify research directions and pain points, but survey provides no actionable design guidelines or implementation strategies." 308 }, 309 "surprise_contrarian": { 310 "score": 1, 311 "justification": "Findings largely confirm conventional wisdom (productivity increases, trade-offs exist, UI matters). No contrarian or counter-intuitive claims; measured tone avoids controversy." 312 }, 313 "fear_safety": { 314 "score": 1, 315 "justification": "Brief mention of security vulnerabilities (40% C code could be vulnerable) but safety/risk concerns are not emphasized or foregrounded as major findings." 316 }, 317 "drama_conflict": { 318 "score": 2, 319 "justification": "Topic (AI in code editors) is trendy and touches automation anxieties, but paper maintains academic neutrality without controversy, conflict framing, or provocative claims." 320 }, 321 "demo_ability": { 322 "score": 0, 323 "justification": "Literature review with no interactive elements, prototypes, or demonstrations. Readers must consult original 36 papers to engage with examples." 324 }, 325 "brand_recognition": { 326 "score": 2, 327 "justification": "Authors from JetBrains (recognizable IDE vendor) and Delft University. Published at ICSE 2024 (top-tier SE venue). Moderate recognition without celebrity authors." 328 } 329 }, 330 "hn_data": { 331 "threads": [], 332 "top_points": 0, 333 "total_points": 0, 334 "total_comments": 0 335 } 336 }