scan-v5.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan-v5.json (21103B)
      1 {
      2   "scan_version": 5,
      3   "paper_type": "position",
      4   "paper": {
      5     "title": "Lumen: Developer Agency Through Transparent Context Control in AI-Assisted Programming",
      6     "authors": [
      7       "Nakul Goel",
      8       "Glaucia Melo"
      9     ],
     10     "year": 2025,
     11     "venue": "CASCON 2025",
     12     "arxiv_id": null,
     13     "doi": "10.1109/CASCON66301.2025.00024"
     14   },
     15   "checklist": {
     16     "claims_and_evidence": {
     17       "abstract_claims_supported": {
     18         "applies": true,
     19         "answer": false,
     20         "justification": "Claims about Lumen's features (double-copy, dependency graph, summaries) are supported. Claims about benefits (reducing overhead, maintaining transparency) are stated but depend on cognitive walkthrough analysis, not empirical user data. The paper later admits 'absence of an empirical user study' undermines productivity claims.",
     21         "source": "haiku"
     22       },
     23       "causal_claims_justified": {
     24         "applies": true,
     25         "answer": false,
     26         "justification": "Paper claims Lumen 'reduces context assembly overhead' and improves developer productivity/trust, but provides only cognitive walkthrough (analytical method), not RCT or ablation study. Section VII.E explicitly acknowledges this gap: 'key limitation is the absence of an empirical user study.'",
     27         "source": "haiku"
     28       },
     29       "generalization_bounded": {
     30         "applies": true,
     31         "answer": false,
     32         "justification": "Title and abstract make broad claims ('transparent context control in AI-assisted programming'), but evidence is limited to regex-based dependency detection (40+ languages), clipboard-level integration, and cognitive walkthroughs on hypothetical scenarios. Static analysis limitations (no runtime imports, conditional imports) and 500KB file limit are buried in Section VII.E.",
     33         "source": "haiku"
     34       },
     35       "alternative_explanations_discussed": {
     36         "applies": true,
     37         "answer": false,
     38         "justification": "Paper presents dichotomy (manual vs automatic) but doesn't explore alternative explanations for why senior developers use less AI assistance. Possible: skill mismatch, security/liability concerns, distrust of AI logic, or preference for hand-written code. No engagement with these alternatives.",
     39         "source": "haiku"
     40       },
     41       "proxy_outcome_distinction": {
     42         "applies": true,
     43         "answer": false,
     44         "justification": "Paper measures 'number of manual operations' (15-20 vs 2-5) as proxy for productivity/developer experience, but never validates that fewer operations = faster development, better code, or higher satisfaction. Conflates interaction simplicity with actual developer outcomes.",
     45         "source": "haiku"
     46       }
     47     },
     48     "limitations_and_scope": {
     49       "limitations_section_present": {
     50         "applies": true,
     51         "answer": true,
     52         "justification": "Dedicated Section VII.E 'Limitations' discusses dynamic behavior understanding, semantic understanding, cross-repo dependencies, collaborative context, performance at scale, and empirical validation gaps.",
     53         "source": "haiku"
     54       },
     55       "threats_to_validity_specific": {
     56         "applies": true,
     57         "answer": true,
     58         "justification": "Specific threats listed: 'Current static analysis cannot capture runtime dependencies, configuration loading, or conditional imports' (not generic). 'Very large codebases (millions of files) challenge current indexing approaches.' 'Absence of empirical user study' with plans for 'observational protocols, think-aloud methods, post-task interviews.'",
     59         "source": "haiku"
     60       },
     61       "scope_boundaries_stated": {
     62         "applies": true,
     63         "answer": false,
     64         "justification": "Technical boundaries stated (500KB file limit, static analysis only, 40+ language patterns). But critical scope boundary—no user validation—is acknowledged only in limitations section. No upfront statement like 'this paper presents a design proposal, not empirical evidence of benefit.'",
     65         "source": "haiku"
     66       }
     67     },
     68     "conflicts_of_interest": {
     69       "funding_disclosed": {
     70         "applies": true,
     71         "answer": false,
     72         "justification": "No funding source mentioned. Paper lists affiliations as 'Computer Science Department, Toronto Metropolitan University' but does not disclose funding or grant support.",
     73         "source": "haiku"
     74       },
     75       "affiliations_disclosed": {
     76         "applies": true,
     77         "answer": false,
     78         "justification": "Author affiliations with Toronto Metropolitan University are stated, but no disclosure of whether authors have financial interests in Lumen (e.g., consulting, equity, commercial licensing).",
     79         "source": "haiku"
     80       },
     81       "funder_independent_of_outcome": {
     82         "applies": false,
     83         "answer": false,
     84         "justification": "No funder identified, so independence cannot be assessed.",
     85         "source": "haiku"
     86       },
     87       "financial_interests_declared": {
     88         "applies": true,
     89         "answer": false,
     90         "justification": "No competing interests or financial disclosure statement appears in the paper.",
     91         "source": "haiku"
     92       }
     93     },
     94     "scope_and_framing": {
     95       "key_terms_defined": {
     96         "applies": true,
     97         "answer": false,
     98         "justification": "Key terms used but not formally defined: 'developer control' (central theme), 'transparent context assembly' (explained through examples, not definition), 'agency,' 'cognitive load.' Terms are implicit through usage, not stated with precision.",
     99         "source": "haiku"
    100       },
    101       "intended_contribution_clear": {
    102         "applies": true,
    103         "answer": true,
    104         "justification": "Paper explicitly states three contributions: (1) double-copy interaction paradigm, (2) transparent context visualization, (3) open-source implementation. Research question clearly stated: 'How can we design AI-powered developer tools that enhance transparency and preserve agency without disrupting natural workflows?'",
    105         "source": "haiku"
    106       },
    107       "engagement_with_prior_work": {
    108         "applies": true,
    109         "answer": true,
    110         "justification": "Section II engages with prior work on AI-assisted development (Copilot, ChatGPT, Cursor) and trust/control challenges (Barke et al., Liang et al.). Paper positions itself: 'to the best of our knowledge, none have successfully integrated transparent, developer-controlled context assembly directly into the natural development workflow.'",
    111         "source": "haiku"
    112       }
    113     }
    114   },
    115   "type_checklist": {
    116     "position": {
    117       "argument_quality": {
    118         "argument_internally_consistent": {
    119           "applies": true,
    120           "answer": true,
    121           "justification": "Main argument flows logically: AI assistants lack transparency → developers lose control → trust deficit emerges → solution is transparent context assembly via double-copy paradigm. Premises support conclusion. One potential inconsistency: paper cites METR showing AI reduces senior developer productivity but attributes this to opacity without proving causation.",
    122           "source": "haiku"
    123         },
    124         "counterarguments_addressed": {
    125           "applies": true,
    126           "answer": false,
    127           "justification": "Paper contrasts against manual copy-paste (inefficient) and automatic repository access (opaque), but does not engage strongest opposing views: 'maybe developers should avoid AI for production code altogether' or 'maybe transparency doesn't actually improve outcomes' or 'maybe the problem is AI capability, not visibility.'",
    128           "source": "haiku"
    129         },
    130         "analogies_appropriate": {
    131           "applies": true,
    132           "answer": true,
    133           "justification": "Analogy of clipboard-based interaction as 'natural' developer behavior is appropriate; copy-paste is indeed universal. Black-box/transparent framing is standard but clear. No false equivalences detected.",
    134           "source": "haiku"
    135         },
    136         "prescriptions_proportional": {
    137           "applies": true,
    138           "answer": false,
    139           "justification": "Design recommendations (use double-copy, show context graph, allow adjustment) are proportional to evidence. But prescriptions about productivity ('reduces context assembly overhead') exceed evidence strength (cognitive walkthrough only, not user testing).",
    140           "source": "haiku"
    141         },
    142         "evidence_for_claims_cited": {
    143           "applies": true,
    144           "answer": true,
    145           "justification": "Factual claims are generally cited: METR [1] on senior developer productivity, Sergeyuk [7] on 481 programmers, Liang [9] on ICSE survey with 410 developers. Flow state research [3], Copilot evaluation [5], Cody context retrieval [6] all cited.",
    146           "source": "haiku"
    147         },
    148         "alternatives_discussed": {
    149           "applies": true,
    150           "answer": false,
    151           "justification": "Paper presents alternatives in practice (manual, automatic) but does not discuss alternative VIEWPOINTS: maybe developers don't care about transparency, maybe they prefer simplicity over control, maybe the real barrier is something else. No engagement with strongest opposing perspective.",
    152           "source": "haiku"
    153         },
    154         "historical_context_accurate": {
    155           "applies": true,
    156           "answer": true,
    157           "justification": "References to GitHub Copilot, ChatGPT, Claude, Cursor, Aider, and ICSE 2024 are accurate. No obvious historical errors or misrepresentations of prior work detected.",
    158           "source": "haiku"
    159         }
    160       },
    161       "clarity_and_scope": {
    162         "key_terms_defined_precisely": {
    163           "applies": true,
    164           "answer": false,
    165           "justification": "'Transparent context assembly' is explained through Figures 1-2 and Section III.D but not formally defined (e.g., 'the process of displaying to a developer all files available to an AI model, with rationale for inclusion, before query submission'). 'Developer agency' is central to title but never formally defined.",
    166           "source": "haiku"
    167         },
    168         "engages_with_existing_literature": {
    169           "applies": true,
    170           "answer": true,
    171           "justification": "Section II references Hartman et al. on two-stage retrieval, Sergeyuk et al. on perceptions of AI assistants, Liang et al. on usability barriers, and Barke et al. on control modes. Engagement is present but not deeply analytical; mostly citing findings rather than comparing methodologies.",
    172           "source": "haiku"
    173         },
    174         "intended_audience_clear": {
    175           "applies": true,
    176           "answer": true,
    177           "justification": "Paper assumes technical knowledge (imports, middleware, authentication endpoints, codebases). Clearly written for software developers, tool builders, and HCI researchers interested in AI-assisted development—not general public.",
    178           "source": "haiku"
    179         },
    180         "assumptions_stated": {
    181           "applies": true,
    182           "answer": false,
    183           "justification": "Paper assumes developers work in multi-file projects, value control, care about missed dependencies, and find clipboard interaction natural. These are mostly implicit. Critical assumption ('developers prefer transparency') is stated philosophically but not validated empirically.",
    184           "source": "haiku"
    185         },
    186         "scope_of_applicability_discussed": {
    187           "applies": true,
    188           "answer": false,
    189           "justification": "Technical scope is clear (40+ languages, any IDE via clipboard). But decision scope is absent: when should a developer choose Lumen vs manual assistance vs automatic tools? Paper does not guide practitioners on appropriate use cases or discuss whether transparent UI helps beginners or only seniors.",
    190           "source": "haiku"
    191         }
    192       }
    193     }
    194   },
    195   "claims": [
    196     {
    197       "claim": "AI assistance can reduce the productivity of senior developers",
    198       "evidence": "Citation to METR study [1] showing experienced developers taking 19% longer with AI tools",
    199       "supported": "moderate"
    200     },
    201     {
    202       "claim": "Developers spend significant time verifying AI suggestions and crafting prompts to provide adequate context",
    203       "evidence": "Citation [2] to study showing 'multiple rounds of prompting, waiting, reviewing, and discarding'",
    204       "supported": "moderate"
    205     },
    206     {
    207       "claim": "Lumen reduces context assembly from 15-20 manual operations to 2-5 guided selections",
    208       "evidence": "Cognitive walkthrough analysis comparing traditional (bug fix: 15 ops), automatic (opaque), and Lumen (2-5 selections)",
    209       "supported": "weak"
    210     },
    211     {
    212       "claim": "Double-copy interaction preserves developer flow state while enabling AI assistance",
    213       "evidence": "Design philosophy argument; citation [3] to flow state interruption study (not applied to Lumen)",
    214       "supported": "weak"
    215     },
    216     {
    217       "claim": "Transparent context visibility reduces cognitive load",
    218       "evidence": "Claimed in cognitive walkthrough but not measured empirically",
    219       "supported": "weak"
    220     },
    221     {
    222       "claim": "Developers report feeling more confident using AI when they can see its inputs",
    223       "evidence": "Citation [7], [9] to surveys indicating developers prefer visible context",
    224       "supported": "moderate"
    225     },
    226     {
    227       "claim": "Lumen enables dependency-aware context suggestion through static analysis of 40+ programming languages",
    228       "evidence": "Section IV.C describes regex-based import pattern matching for 40+ languages; GitHub implementation referenced",
    229       "supported": "strong"
    230     }
    231   ],
    232   "methodology_tags": [
    233     "position",
    234     "theoretical",
    235     "case-study"
    236   ],
    237   "key_findings": "Lumen presents a design approach for transparent AI-assisted development through a double-copy interaction paradigm that displays file dependencies and summaries before query submission. Cognitive walkthrough analysis suggests this reduces manual operations (15–20 to 2–5) compared to traditional copy-paste while maintaining visibility over automatic tools. The open-source implementation demonstrates that developer control is technically feasible alongside powerful AI capabilities. However, the paper lacks empirical validation; productivity and usability claims rest on analytical design inspection, not user studies.",
    238   "red_flags": [
    239     {
    240       "flag": "No empirical user validation",
    241       "detail": "All productivity and usability claims depend on cognitive walkthrough (a heuristic inspection method). Paper explicitly states: 'key limitation is the absence of an empirical user study' with plans for future user testing. Cognitive walkthroughs can identify usability issues but cannot prove actual benefit."
    242     },
    243     {
    244       "flag": "Causal claims unsupported by design",
    245       "detail": "Paper claims Lumen 'reduces context assembly overhead' and improves productivity/trust, but provides no RCT, ablation study, or user comparison data. No evidence that developers actually experience the predicted benefits."
    246     },
    247     {
    248       "flag": "Unvalidated core assumption",
    249       "detail": "Paper assumes developers care about transparency and control, but does not test this. Alternative hypothesis untested: developers may prefer simplicity (fully automatic) over control (transparent but complex)."
    250     },
    251     {
    252       "flag": "No code quality or security outcomes measured",
    253       "detail": "Despite framing around security (authentication middleware, rate limiting), paper measures only interaction complexity, not whether transparent context assembly leads to fewer bugs, better security, or higher code quality."
    254     },
    255     {
    256       "flag": "Double-copy timing unvalidated",
    257       "detail": "Interaction parameters (1-second single-copy timeout, 60-second extended timeout) are described as 'based on informal testing' with no systematic evaluation or user preference data."
    258     },
    259     {
    260       "flag": "Comparison limited to hypothetical scenarios",
    261       "detail": "Section VI compares Lumen against hypothetical 'without Lumen' workflows, not against actual tools (Cursor, Claude Code) with real users. No head-to-head empirical comparison."
    262     },
    263     {
    264       "flag": "Tool adoption unvalidated",
    265       "detail": "GitHub repository exists, but no data on actual usage, user satisfaction, retention, or whether developers adopt the tool after discovering it."
    266     },
    267     {
    268       "flag": "Scope boundaries underemphasized",
    269       "detail": "Technical limitations (static analysis only, no runtime imports, 500KB file limit, no cross-repo support) are critical but relegated to Section VII.E. No upfront statement that this is a design proposal pending empirical validation."
    270     }
    271   ],
    272   "cited_papers": [
    273     {
    274       "title": "Measuring the impact of early-2025 AI on experienced open-source developer productivity",
    275       "relevance": "Direct evidence that AI assistance paradoxically reduces senior developer productivity—motivating force for transparency argument",
    276       "authors": "METR",
    277       "year": 2025
    278     },
    279     {
    280       "title": "Using AI-based coding assistants in practice: State of affairs, perceptions, and ways forward",
    281       "relevance": "Survey of 481 programmers on challenges with AI assistants; context retrieval and workflow disruption themes align with Lumen's motivation",
    282       "authors": "Sergeyuk et al.",
    283       "year": 2024
    284     },
    285     {
    286       "title": "A large-scale survey on the usability of AI programming assistants: Successes and challenges",
    287       "relevance": "ICSE 2024 study with 410 developers identifying lack of contextual understanding as adoption barrier; cited as validation of core problem",
    288       "authors": "Liang et al.",
    289       "year": 2024
    290     },
    291     {
    292       "title": "Grounded Copilot: How programmers interact with code-generating models",
    293       "relevance": "Empirical study of developer interaction modes with Copilot; establishes dichotomy of maintain/lose control",
    294       "authors": "Barke et al.",
    295       "year": 2023
    296     },
    297     {
    298       "title": "Do users write more insecure code with AI assistants?",
    299       "relevance": "Security implications of AI-assisted development; supports motivation for transparent security-aware context (auth middleware, rate limiting examples)",
    300       "authors": "Perry et al.",
    301       "year": 2023
    302     },
    303     {
    304       "title": "AI-assisted coding with Cody: Lessons from context retrieval and evaluation for code recommendations",
    305       "relevance": "Directly related tool (Cody) implementing context retrieval; paper positions Lumen as solving transparency gap in such systems",
    306       "authors": "Hartman et al.",
    307       "year": 2024
    308     },
    309     {
    310       "title": "Cognitive dimensions of notations",
    311       "relevance": "Theoretical framework (Green, 1989) used to evaluate Lumen's interaction design; grounds evaluation method",
    312       "authors": "Green, T.R.",
    313       "year": 1989
    314     },
    315     {
    316       "title": "Reducing interruptions at work: A large-scale field study of FlowLight",
    317       "relevance": "Study on flow state interruption in software development; motivates Lumen's clipboard-based non-disruptive interaction",
    318       "authors": "Meyer et al.",
    319       "year": 2017
    320     }
    321   ],
    322   "engagement_factors": {
    323     "practical_relevance": {
    324       "score": 2,
    325       "justification": "Open-source tool addressing real workflow problem (context assembly for AI). Technically implementable. But lacks evidence that developers actually want this over simpler alternatives (automatic) or prefer it to manual approaches."
    326     },
    327     "surprise_contrarian": {
    328       "score": 2,
    329       "justification": "Challenges 'full automation is always better' narrative, but transparency-focused design is mainstream in 2025 (Apple privacy stance, regulatory pressure). Double-copy mechanism is specific but not revolutionary."
    330     },
    331     "fear_safety": {
    332       "score": 1,
    333       "justification": "Focuses on developer usability and trust, not fundamental AI safety. Security is a use-case example (authentication middleware), not the argument's core. No discussion of alignment, adversarial robustness, or long-term risk."
    334     },
    335     "drama_conflict": {
    336       "score": 0,
    337       "justification": "Constructive design paper with no controversial claims, polemical framing, or social conflict angle. Low attention-grabbing potential."
    338     },
    339     "demo_ability": {
    340       "score": 3,
    341       "justification": "Fully demoable. GitHub repo exists (lumenEngines/Core). Requires Python, PyQt5, API keys (Anthropic/Groq/OpenAI). Clear usage instructions in paper. Developer could prototype today."
    342     },
    343     "brand_recognition": {
    344       "score": 1,
    345       "justification": "Unknown authors. Toronto Metropolitan University is not a top-tier AI research institution. No affiliation with major labs (Anthropic, OpenAI, Google, Meta, DeepMind). No celebrity endorsement or famous developer quotes."
    346     }
    347   },
    348   "hn_data": {
    349     "threads": [],
    350     "top_points": 0,
    351     "total_points": 0,
    352     "total_comments": 0
    353   }
    354 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs