ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

scan-v5.json (17934B)


      1 {
      2   "scan_version": 5,
      3   "paper_type": "survey",
      4   "paper": {
      5     "title": "In-IDE Human-AI Experience in the Era of Large Language Models; A Literature Review",
      6     "authors": [
      7       "Agnia Sergeyuk",
      8       "Sergey Titov",
      9       "Maliheh Izadi"
     10     ],
     11     "year": 2024,
     12     "venue": "ICSE",
     13     "arxiv_id": "2401.10739",
     14     "doi": "10.1145/3643796.3648463"
     15   },
     16   "checklist": {
     17     "claims_and_evidence": {
     18       "abstract_claims_supported": {
     19         "applies": true,
     20         "answer": true,
     21         "justification": "Abstract claims 36 papers analyzed in three branches (Design, Impact, Quality). Text confirms 14+13+9=36 papers across these categories with detailed evidence for each branch's findings.",
     22         "source": "haiku"
     23       },
     24       "causal_claims_justified": {
     25         "applies": true,
     26         "answer": true,
     27         "justification": "This is a survey synthesizing other papers' findings, not making original causal claims. Appropriately reports correlative findings from reviewed studies (e.g., 'AI tools increase productivity') without overreaching.",
     28         "source": "haiku"
     29       },
     30       "generalization_bounded": {
     31         "applies": true,
     32         "answer": true,
     33         "justification": "Scope explicitly bounded to in-IDE AI assistance (IDEs, code editors, LLM-based tools) from 2020-2024. Title specifies 'Large Language Models' era. Limitations section acknowledges temporal boundary of focus.",
     34         "source": "haiku"
     35       },
     36       "alternative_explanations_discussed": {
     37         "applies": true,
     38         "answer": true,
     39         "justification": "Discussion notes multiple factors shaping effectiveness: 'The context in which AI tools are used, the quality of suggestions, and compatibility issues play crucial roles.' Acknowledges trade-offs rather than single narratives.",
     40         "source": "haiku"
     41       },
     42       "proxy_outcome_distinction": {
     43         "applies": true,
     44         "answer": true,
     45         "justification": "Paper distinguishes between measured and claimed outcomes: 'Using AI tools increases productivity but may involve a trade-off in code quality since developers sometimes struggle to receive... outputs that would align with their requirements.'",
     46         "source": "haiku"
     47       }
     48     },
     49     "limitations_and_scope": {
     50       "limitations_section_present": {
     51         "applies": true,
     52         "answer": true,
     53         "justification": "Dedicated Section 4.2 'Threats to Validity' explicitly discusses sampling bias, temporal bias, source reliability, and interpretation bias with substantive discussion of each.",
     54         "source": "haiku"
     55       },
     56       "threats_to_validity_specific": {
     57         "applies": true,
     58         "answer": true,
     59         "justification": "Specific threats articulated: sampling bias acknowledged despite refinement efforts; temporal bias from 2020+ cutoff stated; ArXiv non-peer-review concern flagged; interpretation bias from large information volume noted.",
     60         "source": "haiku"
     61       },
     62       "scope_boundaries_stated": {
     63         "applies": true,
     64         "answer": true,
     65         "justification": "Explicit boundaries: databases listed (ACM, DBLP, IEEE, ArXiv), year range (2020-2024), initial pool (211) and final selection (36) documented. Search terms published in full.",
     66         "source": "haiku"
     67       }
     68     },
     69     "conflicts_of_interest": {
     70       "funding_disclosed": {
     71         "applies": true,
     72         "answer": false,
     73         "justification": "No funding source is stated. Authors are from JetBrains Research and Delft University, but funding statement is absent from paper.",
     74         "source": "haiku"
     75       },
     76       "affiliations_disclosed": {
     77         "applies": true,
     78         "answer": true,
     79         "justification": "Author affiliations clearly stated: JetBrains Research (Belgrade, Cyprus) and Delft University. However, JetBrains manufactures IDEs, creating potential conflict of interest not addressed.",
     80         "source": "haiku"
     81       },
     82       "funder_independent_of_outcome": {
     83         "applies": true,
     84         "answer": false,
     85         "justification": "Funding source not explicitly stated. JetBrains (likely implicit funder given author affiliations) has commercial interest in AI-in-IDE adoption, creating potential non-independence not disclosed.",
     86         "source": "haiku"
     87       },
     88       "financial_interests_declared": {
     89         "applies": true,
     90         "answer": false,
     91         "justification": "No competing interests statement present. No declaration of patents, equity, consulting, or other financial interests related to reviewed tools or AI assistants.",
     92         "source": "haiku"
     93       }
     94     },
     95     "scope_and_framing": {
     96       "key_terms_defined": {
     97         "applies": true,
     98         "answer": true,
     99         "justification": "Key terms defined: HAX as 'deeper integration of AI within user interactions where AI is not just a tool but collaborative partner.' IDEs contextualized. Some terms (productivity, code quality) used without precise operational definition.",
    100         "source": "haiku"
    101       },
    102       "intended_contribution_clear": {
    103         "applies": true,
    104         "answer": true,
    105         "justification": "Contribution explicitly stated: 'offers an overview of existing in-IDE HAX research, distilling main directions and insights to guide future investigations.' Bridges gap in centralized knowledge of developer-AI interactions in IDEs.",
    106         "source": "haiku"
    107       },
    108       "engagement_with_prior_work": {
    109         "applies": true,
    110         "answer": true,
    111         "justification": "Introduction situates work within HCI and Human-AI Experience frameworks, citing Amershi et al. 2019. However, positioning relative to other literature reviews of AI tooling or developer productivity is limited. Adequate but not deep engagement.",
    112         "source": "haiku"
    113       }
    114     }
    115   },
    116   "type_checklist": {
    117     "survey": {
    118       "search_and_selection": {
    119         "search_strategy_reproducible": {
    120           "applies": true,
    121           "answer": true,
    122           "justification": "Exact search string provided: ('Integrated Development Environment' OR 'IDE'...) AND ('AI assistant' OR 'AI features'...). Strategy is explicit and reproducible; no ambiguity in query formulation.",
    123           "source": "haiku"
    124         },
    125         "inclusion_exclusion_explicit": {
    126           "applies": true,
    127           "answer": true,
    128           "justification": "Inclusion/exclusion criteria stated: published 2020+, English only, thematically relevant, no blog posts. Criteria applied systematically to filter 211→36 papers.",
    129           "source": "haiku"
    130         },
    131         "prisma_or_structured_protocol": {
    132           "applies": true,
    133           "answer": false,
    134           "justification": "Paper does not reference PRISMA guidelines or register protocol. Methodology is described informally without adherence to structured review standards.",
    135           "source": "haiku"
    136         },
    137         "search_terms_provided": {
    138           "applies": true,
    139           "answer": true,
    140           "justification": "Full search string provided verbatim in Method section. All search terms for both IDE types and AI/interaction concepts are explicit.",
    141           "source": "haiku"
    142         },
    143         "databases_listed": {
    144           "applies": true,
    145           "answer": true,
    146           "justification": "Four databases explicitly listed: ACM Digital Library, DBLP, IEEE Digital Library, ArXiv. Rationale for including ArXiv (rapid development in field) explained.",
    147           "source": "haiku"
    148         },
    149         "screening_process_documented": {
    150           "applies": true,
    151           "answer": true,
    152           "justification": "Initial search yielded 211 papers, final selection 36. Table 1 provides venue breakdown (11 conferences, 3 journals, 22 ArXiv). Intermediate screening stages (title, abstract, full-text filtering) not detailed by count.",
    153           "source": "haiku"
    154         },
    155         "review_scope_justified": {
    156           "applies": true,
    157           "answer": true,
    158           "justification": "2020+ cutoff justified: 'aligning with the recent advancements in the field' following LLM advances. Database selection justified by relevance and inclusivity rationale. Scope boundaries clearly motivated.",
    159           "source": "haiku"
    160         }
    161       },
    162       "synthesis_quality": {
    163         "conflicting_findings_acknowledged": {
    164           "applies": true,
    165           "answer": true,
    166           "justification": "Paper notes productivity-quality trade-off: 'increases productivity but may involve a trade-off in code quality.' Acknowledges variability in correctness ('While AI assistants can provide relevant solutions...they still might be erroneous'). Limited deep engagement with fundamental contradictions.",
    167           "source": "haiku"
    168         },
    169         "quality_assessment_of_sources": {
    170           "applies": true,
    171           "answer": false,
    172           "justification": "No quality rubric, risk-of-bias assessment, or differentiation applied to reviewed papers. Treats 22 ArXiv preprints and 11 peer-reviewed conference papers equally without quality weighting.",
    173           "source": "haiku"
    174         },
    175         "publication_bias_discussed": {
    176           "applies": true,
    177           "answer": true,
    178           "justification": "Acknowledges ArXiv's mitigating effect: 'open publication environment of ArXiv encourages publishing negative or null results.' Broader publication bias (positive findings more likely in peer-reviewed venues) not discussed.",
    179           "source": "haiku"
    180         },
    181         "quantitative_synthesis_present": {
    182           "applies": true,
    183           "answer": true,
    184           "justification": "Vote counting by category present (14 papers on design, 13 on impact, 9 on quality). No meta-analysis, effect size pooling, or statistical aggregation. Synthesis is categorical, not quantitative.",
    185           "source": "haiku"
    186         },
    187         "recommendations_supported_by_evidence": {
    188           "applies": true,
    189           "answer": true,
    190           "justification": "Recommendations (task-specific UI, trust, readability) flow from identified gaps in reviewed papers. However, recommendations are somewhat generic and not tightly grounded in quantitative evidence or majority finding.",
    191           "source": "haiku"
    192         }
    193       }
    194     }
    195   },
    196   "claims": [
    197     {
    198       "claim": "User interface of in-IDE AI assistance affects the usefulness of this tool and should be built thoughtfully",
    199       "evidence": "Design principles papers [16, 20, 34, 38] highlight importance of clear communication, user control, adaptability, and snoozability",
    200       "supported": "strong"
    201     },
    202     {
    203       "claim": "Using AI tools increases productivity but may involve a trade-off in code quality",
    204       "evidence": "Multiple papers [10, 13, 17, 35, 36, 39, 43] show productivity gains but developers struggle to receive outputs aligned with requirements",
    205       "supported": "moderate"
    206     },
    207     {
    208       "claim": "AI assistants generally produce understandable code that might be even less complex than code written by humans",
    209       "evidence": "Several research papers [8, 24, 40] show assistants produce understandable code, possibly less complex than human-written code",
    210       "supported": "moderate"
    211     },
    212     {
    213       "claim": "Security vulnerabilities in AI-generated code can be significant, with rates reaching 40% for C language programs",
    214       "evidence": "Security assessment studies [13, 25, 30] find vulnerability rates vary by model, with some cases reaching 40% for C language",
    215       "supported": "moderate"
    216     },
    217     {
    218       "claim": "In-IDE Human-AI interaction significantly affects and changes developers' workflows",
    219       "evidence": "Impact studies [5, 21, 26] show interaction introduces dedicated time for AI output processing and creates multiple modes of HAX interaction",
    220       "supported": "strong"
    221     },
    222     {
    223       "claim": "Fine-tuning foundational models can improve quality of interaction through accuracy and timing of suggestions",
    224       "evidence": "Papers [11, 22, 41] investigate how model fine-tuning improves suggestion accuracy and acceptance rates",
    225       "supported": "moderate"
    226     },
    227     {
    228       "claim": "While AI tools positively influence programming education and motivation, challenges include over-reliance requiring educational strategies",
    229       "evidence": "Novice-focused studies [2, 15, 26] show educational benefits but also overreliance concerns needing instructional attention",
    230       "supported": "moderate"
    231     }
    232   ],
    233   "methodology_tags": [
    234     "meta-analysis",
    235     "qualitative"
    236   ],
    237   "key_findings": "The survey identifies three primary research branches in in-IDE Human-AI Experience: Design (emphasizing UI principles for transparency and user control), Impact (showing productivity gains offset by code quality trade-offs and workflow changes), and Quality (demonstrating that AI output correctness varies significantly and security vulnerabilities remain a concern). The field would benefit from research on task-specific interfaces, trust mechanisms, and code readability alignment rather than generic chat-based interaction.",
    238   "red_flags": [
    239     {
    240       "flag": "No quality assessment of sources",
    241       "detail": "22 of 36 papers are ArXiv preprints; no risk-of-bias assessment, quality rubric, or differentiation between peer-reviewed and preprint papers despite their reliability differences."
    242     },
    243     {
    244       "flag": "Unaddressed conflict of interest",
    245       "detail": "Authors from JetBrains (IDE vendor) reviewing in-IDE AI tools. No competing interests statement despite clear financial incentive for positive framing of AI-in-IDE adoption."
    246     },
    247     {
    248       "flag": "No funding disclosure",
    249       "detail": "Funding source not stated despite author affiliations with JetBrains Research; whether JetBrains funded this review is undisclosed."
    250     },
    251     {
    252       "flag": "Screening process documentation incomplete",
    253       "detail": "Jumps from 211 initial papers to 36 final without documenting title/abstract screening counts, making reproducibility difficult."
    254     },
    255     {
    256       "flag": "No PRISMA adherence",
    257       "detail": "Survey does not reference PRISMA guidelines or structured protocol despite this being standard for literature reviews."
    258     },
    259     {
    260       "flag": "Limited publication bias discussion",
    261       "detail": "Acknowledges ArXiv's positive contribution but does not address broader publication bias (positive findings overrepresented in peer-reviewed venues)."
    262     },
    263     {
    264       "flag": "Vote counting only, no quantitative synthesis",
    265       "detail": "Synthesis groups 36 papers into three categories with counts (14, 13, 9) but performs no meta-analysis, effect aggregation, or statistical pooling despite calling itself a 'review.'"
    266     },
    267     {
    268       "flag": "Generic recommendations weakly grounded in evidence",
    269       "detail": "Recommendations for 'task-specific UI,' 'trust,' and 'readability' are somewhat obvious directions not tightly linked to quantitative findings or majority patterns in reviewed papers."
    270     }
    271   ],
    272   "cited_papers": [
    273     {
    274       "title": "Guidelines for Human-AI Interaction",
    275       "authors": "Amershi et al.",
    276       "year": 2019,
    277       "relevance": "Foundational framework for Human-AI Experience (HAX) concept that frames the entire survey"
    278     },
    279     {
    280       "title": "Human–Computer Interaction",
    281       "authors": "Rapp, A.",
    282       "year": 2023,
    283       "relevance": "Foundational HCI theory contextualized in introduction; positions AI integration as shift in HCI paradigms"
    284     },
    285     {
    286       "title": "Deep reinforcement learning from human preferences",
    287       "authors": "Christiano et al.",
    288       "year": 2017,
    289       "relevance": "Alignment methods relevant to discussed approaches for improving model output quality"
    290     },
    291     {
    292       "title": "A Unified Approach to Interpreting Model Predictions",
    293       "authors": "Lundberg & Lee",
    294       "year": 2017,
    295       "relevance": "Explainability approach cited for trust-building in HAX; interpretable AI relevant to readability theme"
    296     },
    297     {
    298       "title": "Automatically assessing code understandability: How far are we?",
    299       "authors": "Scalabrino et al.",
    300       "year": 2017,
    301       "relevance": "Foundational work on code readability metrics; directly relevant to Quality theme recommendation on readability alignment"
    302     }
    303   ],
    304   "engagement_factors": {
    305     "practical_relevance": {
    306       "score": 2,
    307       "justification": "Practitioners (IDE vendors, AI tool builders) can identify research directions and pain points, but survey provides no actionable design guidelines or implementation strategies."
    308     },
    309     "surprise_contrarian": {
    310       "score": 1,
    311       "justification": "Findings largely confirm conventional wisdom (productivity increases, trade-offs exist, UI matters). No contrarian or counter-intuitive claims; measured tone avoids controversy."
    312     },
    313     "fear_safety": {
    314       "score": 1,
    315       "justification": "Brief mention of security vulnerabilities (40% C code could be vulnerable) but safety/risk concerns are not emphasized or foregrounded as major findings."
    316     },
    317     "drama_conflict": {
    318       "score": 2,
    319       "justification": "Topic (AI in code editors) is trendy and touches automation anxieties, but paper maintains academic neutrality without controversy, conflict framing, or provocative claims."
    320     },
    321     "demo_ability": {
    322       "score": 0,
    323       "justification": "Literature review with no interactive elements, prototypes, or demonstrations. Readers must consult original 36 papers to engage with examples."
    324     },
    325     "brand_recognition": {
    326       "score": 2,
    327       "justification": "Authors from JetBrains (recognizable IDE vendor) and Delft University. Published at ICSE 2024 (top-tier SE venue). Moderate recognition without celebrity authors."
    328     }
    329   },
    330   "hn_data": {
    331     "threads": [],
    332     "top_points": 0,
    333     "total_points": 0,
    334     "total_comments": 0
    335   }
    336 }

Impressum · Datenschutz