ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

scan-v5.json (29358B)


      1 {
      2   "scan_version": 5,
      3   "paper_type": "empirical",
      4   "paper": {
      5     "title": "Evolving with AI: A Longitudinal Analysis of Developer Logs",
      6     "authors": [
      7       "Agnia Sergeyuk",
      8       "Eric Huang",
      9       "Dariia Karaeva",
     10       "Anastasiia Serova",
     11       "Yaroslav Golubev",
     12       "Iftekhar Ahmed"
     13     ],
     14     "year": 2026,
     15     "venue": "ICSE 2026",
     16     "arxiv_id": "2601.10258",
     17     "doi": "10.1145/3744916.3787811"
     18   },
     19   "checklist": {
     20     "claims_and_evidence": {
     21       "abstract_claims_supported": {
     22         "applies": true,
     23         "answer": true,
     24         "justification": "The abstract claim that AI users 'produce substantially more code but also delete significantly more' is directly supported by telemetry: +587 characters/month vs +75 for non-users; +102 deletions/month vs +7.6. The 82.3% perceived productivity gain is confirmed by survey data.",
     25         "source": "haiku"
     26       },
     27       "causal_claims_justified": {
     28         "applies": true,
     29         "answer": false,
     30         "justification": "The conclusion states 'AI redistributes and reshapes development work' using causal language, but the design is entirely observational with self-selected groups. Authors explicitly acknowledge in threats to validity: 'These interpretations cannot be fully disentangled without experimental assignment to conditions.'",
     31         "source": "haiku"
     32       },
     33       "generalization_bounded": {
     34         "applies": true,
     35         "answer": true,
     36         "justification": "The threats to validity section explicitly limits generalization to JetBrains IDEs and the specific JetBrains AI Assistant, stating 'findings may not generalize to all development environments.' Findings are also bounded to sustained early adopters rather than casual users.",
     37         "source": "haiku"
     38       },
     39       "alternative_explanations_discussed": {
     40         "applies": true,
     41         "answer": true,
     42         "justification": "The internal validity section explicitly discusses that 'AI users are generally more active in the IDE than AI non-users' independent of AI, and that 'early adopters maintain elevated activity levels regardless,' providing a clear self-selection alternative explanation.",
     43         "source": "haiku"
     44       },
     45       "proxy_outcome_distinction": {
     46         "applies": true,
     47         "answer": true,
     48         "justification": "The paper consistently labels metrics as proxies throughout: 'As a proxy for productivity, we counted the number of typed characters'; limitations of each proxy are acknowledged, and the conclusion discusses what the proxies capture versus what is claimed.",
     49         "source": "haiku"
     50       }
     51     },
     52     "limitations_and_scope": {
     53       "limitations_section_present": {
     54         "applies": true,
     55         "answer": true,
     56         "justification": "Section 6 'Threats to Validity' is a dedicated section with three subsections (construct, internal, external validity), well beyond a single sentence in the conclusion.",
     57         "source": "haiku"
     58       },
     59       "threats_to_validity_specific": {
     60         "applies": true,
     61         "answer": true,
     62         "justification": "Specific threats are enumerated: JetBrains-only IDE data, misclassification of users of non-JetBrains AI tools, self-selection bias in AI user group, telemetry not capturing developer intent, and the survey's retrospective holistic nature measuring a different construct than behavioral data.",
     63         "source": "haiku"
     64       },
     65       "scope_boundaries_stated": {
     66         "applies": true,
     67         "answer": true,
     68         "justification": "External validity section explicitly states 'our findings may not generalize to all development environments or interface paradigms' and scopes results to the JetBrains ecosystem and the AI code-completion transition period (not agentic AI).",
     69         "source": "haiku"
     70       }
     71     },
     72     "conflicts_of_interest": {
     73       "funding_disclosed": {
     74         "applies": true,
     75         "answer": false,
     76         "justification": "No funding disclosure or acknowledgments section is present in the paper. The collaboration with JetBrains as data provider is described in the methodology but no formal funding statement appears.",
     77         "source": "haiku"
     78       },
     79       "affiliations_disclosed": {
     80         "applies": true,
     81         "answer": true,
     82         "justification": "Author affiliations are clearly disclosed: three authors are affiliated with JetBrains or JetBrains Research, and the data is explicitly described as 'provided by JetBrains in an anonymized form,' making the industry connection transparent.",
     83         "source": "haiku"
     84       },
     85       "funder_independent_of_outcome": {
     86         "applies": true,
     87         "answer": false,
     88         "justification": "The data is provided by JetBrains, multiple authors are JetBrains employees, and the study evaluates JetBrains AI Assistant specifically. The data provider and the evaluated product belong to the same organization.",
     89         "source": "haiku"
     90       },
     91       "financial_interests_declared": {
     92         "applies": true,
     93         "answer": false,
     94         "justification": "No competing interests statement or declaration of financial interests (patents, equity, consulting) is present in the paper despite clear industry ties.",
     95         "source": "haiku"
     96       }
     97     },
     98     "scope_and_framing": {
     99       "key_terms_defined": {
    100         "applies": true,
    101         "answer": true,
    102         "justification": "All five workflow dimensions are operationally defined: productivity = typed characters, code quality = debugging instances, code editing = deletion count, code reuse = external pastes, context switching = IDE window activations. Each operationalization is explicitly motivated.",
    103         "source": "haiku"
    104       },
    105       "intended_contribution_clear": {
    106         "applies": true,
    107         "answer": true,
    108         "justification": "Two explicit contributions are stated: 'Empirical characterization of evolving AI-assisted workflows' and 'Reframing of AI's impact on effort and attention,' clearly distinguishing what the paper adds beyond prior short-term or self-report studies.",
    109         "source": "haiku"
    110       },
    111       "engagement_with_prior_work": {
    112         "applies": true,
    113         "answer": true,
    114         "justification": "Section 2 provides detailed engagement across five sub-topics, explicitly identifying the gap ('short-term experiments,' 'self-reported perceptions') and explaining how this study's longitudinal mixed-method approach differs from and builds on specific prior works.",
    115         "source": "haiku"
    116       }
    117     }
    118   },
    119   "type_checklist": {
    120     "empirical": {
    121       "artifacts": {
    122         "code_released": {
    123           "applies": true,
    124           "answer": false,
    125           "justification": "Supplementary materials include survey questionnaire, anonymized responses, interview script, and statistical outputs (at Zenodo), but no analysis source code is mentioned as released.",
    126           "source": "haiku"
    127         },
    128         "data_released": {
    129           "applies": true,
    130           "answer": false,
    131           "justification": "The paper explicitly states 'Raw IDE telemetry logs cannot be released due to confidentiality agreements with our industry partner.' Survey responses are partially available, but the primary behavioral dataset (151M events) is unavailable.",
    132           "source": "haiku"
    133         },
    134         "environment_specified": {
    135           "applies": true,
    136           "answer": false,
    137           "justification": "No requirements.txt, Dockerfile, or explicit environment specification is provided. References cite specific scipy and statsmodels functions but no versioned environment specs are given.",
    138           "source": "haiku"
    139         },
    140         "reproduction_instructions": {
    141           "applies": true,
    142           "answer": false,
    143           "justification": "No step-by-step reproduction instructions are provided. The primary telemetry dataset is unavailable and no code pipeline is released, making reproduction infeasible.",
    144           "source": "haiku"
    145         }
    146       },
    147       "statistical_methodology": {
    148         "confidence_intervals_or_error_bars": {
    149           "applies": true,
    150           "answer": true,
    151           "justification": "All five longitudinal figures show shaded regions representing ±1 standard deviation from the monthly mean for both AI user and non-user groups across the 24-month period.",
    152           "source": "haiku"
    153         },
    154         "significance_tests": {
    155           "applies": true,
    156           "answer": true,
    157           "justification": "Mixed Linear Model Regression is used for all five metrics with p < 0.05 threshold; normality (Kolmogorov-Smirnov) and heteroscedasticity (Bartlett's test) are verified to justify model choice. Full outputs in supplementary.",
    158           "source": "haiku"
    159         },
    160         "effect_sizes_reported": {
    161           "applies": true,
    162           "answer": true,
    163           "justification": "Quantitative monthly effect estimates are reported for all metrics: +587 vs +75 characters/month, +102 vs +7.6 deletions/month, +6.4 vs -7.6 IDE activations/month, allowing assessment of practical magnitude.",
    164           "source": "haiku"
    165         },
    166         "sample_size_justified": {
    167           "applies": true,
    168           "answer": false,
    169           "justification": "The sample of 800 developers (400 per group) and 62 survey respondents are described but no power analysis or sample size justification is provided; sample sizes were determined by availability given the selection criteria.",
    170           "source": "haiku"
    171         },
    172         "variance_reported": {
    173           "applies": true,
    174           "answer": true,
    175           "justification": "Standard deviation bands shown in all longitudinal figures; the mixed-effects model also accounts for random intercepts per device to handle inter-device variability.",
    176           "source": "haiku"
    177         }
    178       },
    179       "evaluation_design": {
    180         "baselines_included": {
    181           "applies": true,
    182           "answer": true,
    183           "justification": "400 AI non-users serve as a comparison group, matched by having IDE activity at both temporal endpoints (October 2022 and October 2024), providing a behavioral baseline.",
    184           "source": "haiku"
    185         },
    186         "baselines_contemporary": {
    187           "applies": true,
    188           "answer": true,
    189           "justification": "AI non-users are drawn from the same two-year period (October 2022–October 2024) as AI users, making the comparison group fully contemporary.",
    190           "source": "haiku"
    191         },
    192         "ablation_study": {
    193           "applies": false,
    194           "answer": false,
    195           "justification": "This is an observational study of developer behavior, not a system with components to ablate.",
    196           "source": "haiku"
    197         },
    198         "multiple_metrics": {
    199           "applies": true,
    200           "answer": true,
    201           "justification": "Five workflow dimensions are studied, each with both a telemetry metric and survey responses, providing multi-faceted coverage across behavioral and perceptual data sources.",
    202           "source": "haiku"
    203         },
    204         "human_evaluation": {
    205           "applies": true,
    206           "answer": true,
    207           "justification": "62 developers completed a structured survey evaluating AI tool impact on their workflows, supplemented by 5 semi-structured in-depth interviews providing qualitative assessment.",
    208           "source": "haiku"
    209         },
    210         "held_out_test_set": {
    211           "applies": false,
    212           "answer": false,
    213           "justification": "This is an observational longitudinal study, not a prediction task. Held-out test sets are not applicable.",
    214           "source": "haiku"
    215         },
    216         "per_category_breakdown": {
    217           "applies": true,
    218           "answer": true,
    219           "justification": "Results are presented separately for each of the five RQs with dedicated figures and analysis, and Table 1 provides a structured overview comparing survey vs. telemetry findings per dimension.",
    220           "source": "haiku"
    221         },
    222         "failure_cases_discussed": {
    223           "applies": true,
    224           "answer": true,
    225           "justification": "Cases where AI increases burden are discussed (P33's time wasted prompting, P7's paranoia about AI code quality) and the finding that AI users show no improvement in debugging (unlike non-users who improve) is reported and discussed.",
    226           "source": "haiku"
    227         },
    228         "negative_results_reported": {
    229           "applies": true,
    230           "answer": true,
    231           "justification": "The paper reports that debugging activity does not improve for AI users (contra expectations), context switching increases rather than decreases, and perceptions fail to match behavioral changes — all framed as findings rather than minimized.",
    232           "source": "haiku"
    233         }
    234       },
    235       "setup_transparency": {
    236         "model_versions_specified": {
    237           "applies": true,
    238           "answer": false,
    239           "justification": "JetBrains AI Assistant is the product studied but no specific model version or snapshot date is reported; only that the study covers the period April–October 2024 when the assistant 'became widely available and stable.'",
    240           "source": "haiku"
    241         },
    242         "prompts_provided": {
    243           "applies": false,
    244           "answer": false,
    245           "justification": "This is an observational study of developer behavior interacting with a commercial product; no prompts were administered by the researchers.",
    246           "source": "haiku"
    247         },
    248         "hyperparameters_reported": {
    249           "applies": false,
    250           "answer": false,
    251           "justification": "No AI model was run by the researchers; this is a behavioral telemetry study. Hyperparameters are not applicable.",
    252           "source": "haiku"
    253         },
    254         "scaffolding_described": {
    255           "applies": false,
    256           "answer": false,
    257           "justification": "No agentic scaffolding was used; the study observes developers using a commercial IDE AI assistant as a black box.",
    258           "source": "haiku"
    259         },
    260         "data_preprocessing_documented": {
    261           "applies": true,
    262           "answer": true,
    263           "justification": "Processing pipeline documented: monthly aggregation of action counts per device, zero-filling for inactive months, normality testing (KS test), heteroscedasticity testing (Bartlett's) before mixed-effects model selection.",
    264           "source": "haiku"
    265         }
    266       },
    267       "data_integrity": {
    268         "raw_data_available": {
    269           "applies": true,
    270           "answer": false,
    271           "justification": "Explicitly stated: 'Raw IDE telemetry logs cannot be released due to confidentiality agreements with our industry partner.' Only aggregated statistical outputs and survey responses are publicly available.",
    272           "source": "haiku"
    273         },
    274         "data_collection_described": {
    275           "applies": true,
    276           "answer": true,
    277           "justification": "Data collection described in detail: four specific IDEs, device selection criteria (activity in both Oct 2022 and Oct 2024), AI user definition (monthly AI Assistant use from April 2024 onward), and five specific action types with definitions.",
    278           "source": "haiku"
    279         },
    280         "recruitment_methods_described": {
    281           "applies": true,
    282           "answer": true,
    283           "justification": "Survey recruitment is described: emails to 1,231 eligible participants from an internal JetBrains panel of prior-consenting AI tool users; 76 clicks, 67 completions, 62 final. Interview sampling criteria (experience, geography, AI satisfaction, role diversity) are stated.",
    284           "source": "haiku"
    285         },
    286         "data_pipeline_documented": {
    287           "applies": true,
    288           "answer": true,
    289           "justification": "Pipeline described: raw timestamped logs → monthly aggregation per device → zero-fill missing months → normality/homogeneity tests → mixed-effects linear models. Full statistical outputs available in supplementary materials at Zenodo.",
    290           "source": "haiku"
    291         }
    292       },
    293       "contamination": {
    294         "training_cutoff_stated": {
    295           "applies": false,
    296           "answer": false,
    297           "justification": "This study observes developer behavior in IDEs; it does not evaluate AI model capabilities on benchmarks. Training cutoff is not applicable.",
    298           "source": "haiku"
    299         },
    300         "train_test_overlap_discussed": {
    301           "applies": false,
    302           "answer": false,
    303           "justification": "Not applicable — observational behavioral study with no benchmark evaluation.",
    304           "source": "haiku"
    305         },
    306         "benchmark_contamination_addressed": {
    307           "applies": false,
    308           "answer": false,
    309           "justification": "No benchmarks are evaluated in this study; telemetry logs and developer surveys are the data sources.",
    310           "source": "haiku"
    311         }
    312       },
    313       "human_studies": {
    314         "pre_registered": {
    315           "applies": true,
    316           "answer": false,
    317           "justification": "No pre-registration is mentioned anywhere in the paper.",
    318           "source": "haiku"
    319         },
    320         "irb_or_ethics_approval": {
    321           "applies": true,
    322           "answer": true,
    323           "justification": "The paper states the survey 'was conducted in line with our institution's ethical standards, adhering to the values and guidelines outlined in the ICC/ESOMAR International Code,' constituting an ethics compliance statement.",
    324           "source": "haiku"
    325         },
    326         "demographics_reported": {
    327           "applies": true,
    328           "answer": true,
    329           "justification": "Survey demographics reported: professional roles (Developer, Team Lead, Architect, DevOps), years of coding experience (five categories from 1-2 to 16+ years), AI tool usage duration (five categories), and most-used AI tools.",
    330           "source": "haiku"
    331         },
    332         "inclusion_exclusion_criteria": {
    333           "applies": true,
    334           "answer": true,
    335           "justification": "Telemetry inclusion: activity in both Oct 2022 and Oct 2024; AI user definition: monthly JetBrains AI Assistant use April–October 2024; non-user: never used the assistant. Survey: excluded 5 respondents who had not used AI coding tools.",
    336           "source": "haiku"
    337         },
    338         "randomization_described": {
    339           "applies": false,
    340           "answer": false,
    341           "justification": "This is an observational study with self-selected groups; randomization was neither performed nor applicable.",
    342           "source": "haiku"
    343         },
    344         "blinding_described": {
    345           "applies": false,
    346           "answer": false,
    347           "justification": "Blinding is not feasible in this observational study where participants are defined by their own actual AI tool adoption behavior.",
    348           "source": "haiku"
    349         },
    350         "attrition_reported": {
    351           "applies": true,
    352           "answer": true,
    353           "justification": "Survey attrition documented: 1,231 invitations → 76 link clicks → 67 completions → 62 final (5 excluded). Telemetry sample was defined by persistence at both temporal endpoints, with attrition handled by design.",
    354           "source": "haiku"
    355         }
    356       },
    357       "cost_and_practicality": {
    358         "inference_cost_reported": {
    359           "applies": false,
    360           "answer": false,
    361           "justification": "This is an observational study; the researchers did not run model inference. Cost reporting is not applicable.",
    362           "source": "haiku"
    363         },
    364         "compute_budget_stated": {
    365           "applies": false,
    366           "answer": false,
    367           "justification": "No model training or heavy computation was performed by the researchers. Compute budget is not applicable.",
    368           "source": "haiku"
    369         }
    370       }
    371     }
    372   },
    373   "claims": [
    374     {
    375       "claim": "AI users increase typed characters at +587/month versus +75/month for non-users over a two-year period",
    376       "evidence": "Mixed-effects linear model on 151M telemetry events from 800 developers; both trends statistically significant; shown in Figure 1b with ±1 SD bands",
    377       "supported": "strong"
    378     },
    379     {
    380       "claim": "AI users increase code deletions at +102/month versus +7.6/month for non-users, suggesting increased iterative rework",
    381       "evidence": "Same mixed-effects model applied to deletion events (delete keystrokes, backspaces, undos); statistically significant difference in rate; Figure 3b",
    382       "supported": "strong"
    383     },
    384     {
    385       "claim": "AI users show no change in debugging activity while non-users show a declining trend (-0.46 sessions/month, p<0.001)",
    386       "evidence": "Mixed-effects model on debugging initiation events in telemetry; AI users: no statistically significant trend; non-users: significant decline; Figure 2b",
    387       "supported": "strong"
    388     },
    389     {
    390       "claim": "Context switching increases for AI users (+6.4 IDE activations/month) but decreases for non-users (-7.6/month), contrary to the promise that in-IDE AI reduces interruptions",
    391       "evidence": "Mixed-effects model on IDE window activation events; directionally opposite trends both statistically significant; Figure 5b",
    392       "supported": "strong"
    393     },
    394     {
    395       "claim": "Developer survey perceptions significantly underestimate actual behavioral changes observable in telemetry",
    396       "evidence": "82.3% report productivity gains but ~50% report no change in code quality, editing, reuse, and context switching — all dimensions showing measurable behavioral change in logs; discussed in Section 5.2",
    397       "supported": "moderate"
    398     },
    399     {
    400       "claim": "AI redistributes developer effort rather than reducing it, increasing workflow volume and fragmentation simultaneously",
    401       "evidence": "Simultaneous increase in typing, deletions, and context switches; Discussion Section 5.1 frames this as effort redistribution not reduction; however, interpretation is speculative given observational design",
    402       "supported": "moderate"
    403     },
    404     {
    405       "claim": "External code reuse (paste from external sources) increases faster for AI users (+1 paste/month) than non-users (+0.4/month)",
    406       "evidence": "Paste events not preceded by in-IDE copy used as proxy; statistically significant at p=0.03; Figure 4b; effect is small in absolute terms",
    407       "supported": "moderate"
    408     }
    409   ],
    410   "methodology_tags": [
    411     "observational",
    412     "qualitative"
    413   ],
    414   "key_findings": "A 2-year longitudinal study of 800 developers (400 AI users, 400 non-users) via 151M IDE telemetry events finds that AI coding tool adoption is associated with substantially increased code authoring (+587 characters/month vs +75) and deletion (+102/month vs +7.6), suggesting productivity gains come with increased iteration and rework rather than reduced effort. Contrary to AI's promise of reducing interruptions, context switching increases for AI users (+6.4 IDE activations/month) while decreasing for non-users (-7.6/month). Critically, a parallel survey of 62 developers reveals a persistent perceptual gap: while 82.3% report productivity gains, roughly half report no change in code quality, editing frequency, and context switching — dimensions where telemetry shows clear behavioral change, suggesting AI silently restructures workflows in ways developers do not consciously perceive.",
    415   "red_flags": [
    416     {
    417       "flag": "Self-selection bias unresolved",
    418       "detail": "AI users were self-selected based on actual tool adoption, not randomly assigned. The paper acknowledges 'early adopters may maintain elevated activity levels regardless,' making causal attribution to AI impossible. The activity difference may predate AI adoption."
    419     },
    420     {
    421       "flag": "Industry data and researcher conflict",
    422       "detail": "All data provided by JetBrains, multiple authors are JetBrains employees, and the study evaluates JetBrains AI Assistant. No competing interests statement is present despite this clear conflict."
    423     },
    424     {
    425       "flag": "Coarse proxy for productivity",
    426       "detail": "Typed characters as a productivity proxy captures volume including AI autocomplete insertions, not developer cognitive effort or output value. An AI-typed suggestion accepted whole appears identical to developer-typed code in this metric."
    427     },
    428     {
    429       "flag": "Small survey with 5% response rate",
    430       "detail": "62 final survey respondents from 1,231 invitations (~5% response rate) drawn from a JetBrains user panel who self-identified as AI users — severe selection bias toward satisfied users."
    431     },
    432     {
    433       "flag": "Primary dataset not reproducible",
    434       "detail": "Raw IDE telemetry logs cannot be released due to confidentiality agreements. Independent verification of the main dataset and analysis is impossible; only aggregated outputs are available."
    435     },
    436     {
    437       "flag": "No model version specified",
    438       "detail": "JetBrains AI Assistant underwent significant changes over the 2022-2024 study period, but no version history or changelog is provided. The 'AI' being studied is not a stable intervention."
    439     }
    440   ],
    441   "cited_papers": [
    442     {
    443       "title": "The impact of AI on developer productivity: Evidence from GitHub Copilot",
    444       "relevance": "Key prior work on AI productivity using short-term controlled experiment; this paper's longitudinal observational design is positioned as addressing the temporal gap"
    445     },
    446     {
    447       "title": "Reading between the lines: Modeling user behavior and costs in AI-assisted programming",
    448       "relevance": "Documents that developers spend 50%+ of time evaluating/editing AI output and 18.16% of accepted code is later deleted — directly corroborated by this paper's deletion findings"
    449     },
    450     {
    451       "title": "A large-scale survey on the usability of AI programming assistants: Successes and challenges",
    452       "relevance": "Large self-report study on AI coding assistant adoption; this paper extends those self-report findings with behavioral telemetry to reveal perceptual gaps"
    453     },
    454     {
    455       "title": "Measuring the impact of early-2025 AI on experienced open-source developer productivity",
    456       "relevance": "Contemporaneous study finding AI increases task completion time by 19%, providing a contrasting productivity finding cited in related work"
    457     },
    458     {
    459       "title": "Expectation vs. experience: Evaluating the usability of code generation tools powered by large language models",
    460       "relevance": "Showed Copilot didn't consistently reduce task time, motivating investigation of the perception vs. behavioral gap that is central to this paper's contribution"
    461     },
    462     {
    463       "title": "Productivity assessment of neural code completion",
    464       "relevance": "Documents disconnect between perceived and actual productivity gains from AI coding tools — empirically grounded antecedent to this paper's main finding"
    465     },
    466     {
    467       "title": "Are large language models a threat to digital public goods? Evidence from activity on Stack Overflow",
    468       "relevance": "Reports 33% drop in StackOverflow posts post-ChatGPT, contextualizing the code reuse dimension findings about shifts in external knowledge sources"
    469     },
    470     {
    471       "title": "The impact of generative AI on collaborative open-source software development: Evidence from GitHub Copilot",
    472       "relevance": "Contemporaneous large-scale observational study on GitHub Copilot's impact on open-source development activity and code quality"
    473     }
    474   ],
    475   "engagement_factors": {
    476     "practical_relevance": {
    477       "score": 3,
    478       "justification": "Directly actionable for any developer or engineering organization evaluating AI coding tool adoption; provides 2-year empirical evidence about actual workflow changes."
    479     },
    480     "surprise_contrarian": {
    481       "score": 2,
    482       "justification": "The finding that AI increases context switching (not decreases) and dramatically increases deletions contradicts the dominant productivity narrative, and the perception-behavior gap is a striking methodological finding."
    483     },
    484     "fear_safety": {
    485       "score": 0,
    486       "justification": "No AI safety or harm concerns raised; focused on workflow efficiency and developer experience."
    487     },
    488     "drama_conflict": {
    489       "score": 1,
    490       "justification": "Mild tension between vendor-affiliated research (JetBrains evaluating its own AI product) and findings showing AI may increase developer burden and workflow fragmentation."
    491     },
    492     "demo_ability": {
    493       "score": 0,
    494       "justification": "Observational longitudinal study with proprietary data; nothing interactive or demonstrable."
    495     },
    496     "brand_recognition": {
    497       "score": 2,
    498       "justification": "JetBrains is a well-known IDE vendor used by millions of developers; ICSE is the top software engineering conference, lending credibility."
    499     }
    500   },
    501   "hn_data": {
    502     "threads": [
    503       {
    504         "hn_id": "46676395",
    505         "title": "Too Helpful to Be Safe: User-Mediated Attacks on Planning and Web-Use Agents",
    506         "points": 4,
    507         "comments": 0,
    508         "url": "https://news.ycombinator.com/item?id=46676395",
    509         "created_at": "2026-01-19T08:39:39Z"
    510       }
    511     ],
    512     "top_points": 4,
    513     "total_points": 4,
    514     "total_comments": 0
    515   }
    516 }

Impressum · Datenschutz