scan.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan.json (18011B)
      1 {
      2   "paper": {
      3     "title": "Prompts First, Precision Later: Reviving the Vision of Natural Language Programming for Computing Education",
      4     "authors": ["Brent N. Reeves", "James Prather", "Paul Denny", "Juho Leinonen", "Stephen MacNeil", "Andrew Luxton-Reilly", "Sebastian Mateos Nicolajsen", "Claus Brabrand"],
      5     "year": 2025,
      6     "venue": "Koli Calling '25 (25th Koli Calling International Conference on Computing Education Research)",
      7     "doi": "10.1145/3769994.3770039"
      8   },
      9   "scan_version": 2,
     10   "active_modules": [],
     11   "methodology_tags": ["theoretical"],
     12   "key_findings": "This position paper argues that computing education should adopt a 'prompts first, precision later' pedagogy, where students begin programming via natural language prompts before learning formal syntax. The authors trace the history of programming abstraction from machine code to GenAI, arguing this trajectory was always headed toward natural language programming. They propose that traditional syntax-first instruction creates unnecessary barriers, and that GenAI enables students to engage in computational thinking without mastering formal language features first.",
     13   "checklist": {
     14     "artifacts": {
     15       "code_released": {
     16         "applies": false,
     17         "answer": false,
     18         "justification": "This is a position/theoretical paper with no code artifacts to release."
     19       },
     20       "data_released": {
     21         "applies": false,
     22         "answer": false,
     23         "justification": "No data was collected or analyzed in this position paper."
     24       },
     25       "environment_specified": {
     26         "applies": false,
     27         "answer": false,
     28         "justification": "No computational experiments were conducted."
     29       },
     30       "reproduction_instructions": {
     31         "applies": false,
     32         "answer": false,
     33         "justification": "No experiments to reproduce; this is a position paper presenting an argument."
     34       }
     35     },
     36     "statistical_methodology": {
     37       "confidence_intervals_or_error_bars": {
     38         "applies": false,
     39         "answer": false,
     40         "justification": "No quantitative results are reported in this position paper."
     41       },
     42       "significance_tests": {
     43         "applies": false,
     44         "answer": false,
     45         "justification": "No comparative claims based on data are made."
     46       },
     47       "effect_sizes_reported": {
     48         "applies": false,
     49         "answer": false,
     50         "justification": "No quantitative analysis is conducted."
     51       },
     52       "sample_size_justified": {
     53         "applies": false,
     54         "answer": false,
     55         "justification": "No samples or data collection in this theoretical paper."
     56       },
     57       "variance_reported": {
     58         "applies": false,
     59         "answer": false,
     60         "justification": "No experimental runs or quantitative results."
     61       }
     62     },
     63     "evaluation_design": {
     64       "baselines_included": {
     65         "applies": false,
     66         "answer": false,
     67         "justification": "No empirical evaluation is conducted; this is a position paper arguing for a pedagogical approach."
     68       },
     69       "baselines_contemporary": {
     70         "applies": false,
     71         "answer": false,
     72         "justification": "No evaluation with baselines; position paper."
     73       },
     74       "ablation_study": {
     75         "applies": false,
     76         "answer": false,
     77         "justification": "No system or method to ablate."
     78       },
     79       "multiple_metrics": {
     80         "applies": false,
     81         "answer": false,
     82         "justification": "No metrics measured."
     83       },
     84       "human_evaluation": {
     85         "applies": false,
     86         "answer": false,
     87         "justification": "No evaluation of any kind is conducted."
     88       },
     89       "held_out_test_set": {
     90         "applies": false,
     91         "answer": false,
     92         "justification": "No data or test sets used."
     93       },
     94       "per_category_breakdown": {
     95         "applies": false,
     96         "answer": false,
     97         "justification": "No results to break down."
     98       },
     99       "failure_cases_discussed": {
    100         "applies": true,
    101         "answer": true,
    102         "justification": "Section 6 ('Potential Risks') discusses technical risks (hallucinations, non-determinism, bias, security), learning risks (over-reliance, poor metacognition, notional machine understanding), and teaching risks (opportunity cost, assessment challenges)."
    103       },
    104       "negative_results_reported": {
    105         "applies": false,
    106         "answer": false,
    107         "justification": "No experiments produce results to report negatively."
    108       }
    109     },
    110     "claims_and_evidence": {
    111       "abstract_claims_supported": {
    112         "applies": true,
    113         "answer": true,
    114         "justification": "The abstract claims that computing history has trended toward natural language and that pedagogy should adopt 'prompts first.' The paper supports these through historical analysis (Sections 2-3) and pedagogical argument (Sections 4-5). As a position paper, the claims are argumentative rather than empirical, and the paper provides the historical and conceptual evidence to back them."
    115       },
    116       "causal_claims_justified": {
    117         "applies": false,
    118         "answer": false,
    119         "justification": "The paper makes no empirical causal claims. Its arguments are about what pedagogy *should* be, not empirical demonstrations that a particular approach *causes* better outcomes."
    120       },
    121       "generalization_bounded": {
    122         "applies": true,
    123         "answer": false,
    124         "justification": "The paper makes broad claims about computing education pedagogy ('New learners should start from the highest level of abstraction we have: prompts first') without bounding these to specific contexts, student populations, or course types. The title and abstract present this as a universal recommendation for computing education."
    125       },
    126       "alternative_explanations_discussed": {
    127         "applies": true,
    128         "answer": true,
    129         "justification": "Section 2 extensively presents Dijkstra's and McCarthy's opposing views that natural language programming is misguided. Section 6 discusses risks including that heavy GenAI use 'could lead to similar failures as seen with other top-down pedagogies' (ref [36]). The paper engages substantively with counterarguments."
    130       },
    131       "proxy_outcome_distinction": {
    132         "applies": false,
    133         "answer": false,
    134         "justification": "No measurements are taken; this is a theoretical position paper."
    135       }
    136     },
    137     "setup_transparency": {
    138       "model_versions_specified": {
    139         "applies": false,
    140         "answer": false,
    141         "justification": "No models are used in experiments."
    142       },
    143       "prompts_provided": {
    144         "applies": false,
    145         "answer": false,
    146         "justification": "No prompting experiments conducted."
    147       },
    148       "hyperparameters_reported": {
    149         "applies": false,
    150         "answer": false,
    151         "justification": "No experiments with hyperparameters."
    152       },
    153       "scaffolding_described": {
    154         "applies": false,
    155         "answer": false,
    156         "justification": "No agentic scaffolding used."
    157       },
    158       "data_preprocessing_documented": {
    159         "applies": false,
    160         "answer": false,
    161         "justification": "No data collected or preprocessed."
    162       }
    163     },
    164     "limitations_and_scope": {
    165       "limitations_section_present": {
    166         "applies": true,
    167         "answer": true,
    168         "justification": "Section 6 ('Potential Risks') serves as a limitations section, discussing technical, learning, and teaching risks across multiple paragraphs."
    169       },
    170       "threats_to_validity_specific": {
    171         "applies": true,
    172         "answer": true,
    173         "justification": "Section 6 raises specific threats: students may 'take the easy way out' with basic coding skills, GenAI 'may not function well as a tutor' (citing refs [7, 31]), over-reliance risks despite scaffolding, and the specific parallel to failures of top-down pedagogies (ref [36])."
    174       },
    175       "scope_boundaries_stated": {
    176         "applies": true,
    177         "answer": false,
    178         "justification": "The paper does not explicitly state what it is NOT claiming. It argues for a broad pedagogical shift without clearly bounding which educational contexts, student populations, or institutional settings the recommendation applies to."
    179       }
    180     },
    181     "data_integrity": {
    182       "raw_data_available": {
    183         "applies": false,
    184         "answer": false,
    185         "justification": "No data collected in this position paper."
    186       },
    187       "data_collection_described": {
    188         "applies": false,
    189         "answer": false,
    190         "justification": "No data collection occurred."
    191       },
    192       "recruitment_methods_described": {
    193         "applies": false,
    194         "answer": false,
    195         "justification": "No participants recruited."
    196       },
    197       "data_pipeline_documented": {
    198         "applies": false,
    199         "answer": false,
    200         "justification": "No data pipeline exists."
    201       }
    202     },
    203     "conflicts_of_interest": {
    204       "funding_disclosed": {
    205         "applies": true,
    206         "answer": true,
    207         "justification": "Acknowledgments section states: 'This work was supported by the Research Council of Finland grant #356114 and a Google GARA grant.'"
    208       },
    209       "affiliations_disclosed": {
    210         "applies": true,
    211         "answer": true,
    212         "justification": "All author affiliations are listed (Abilene Christian University, University of Auckland, Aalto University, Temple University, IT University of Copenhagen)."
    213       },
    214       "funder_independent_of_outcome": {
    215         "applies": true,
    216         "answer": false,
    217         "justification": "Google has a financial interest in promoting GenAI adoption in education (as a major LLM provider). The paper received a Google GARA grant and argues for increased GenAI use in CS education. This potential conflict is not acknowledged."
    218       },
    219       "financial_interests_declared": {
    220         "applies": true,
    221         "answer": false,
    222         "justification": "No competing interests statement is present in the paper."
    223       }
    224     },
    225     "contamination": {
    226       "training_cutoff_stated": {
    227         "applies": false,
    228         "answer": false,
    229         "justification": "No model evaluation on benchmarks; position paper."
    230       },
    231       "train_test_overlap_discussed": {
    232         "applies": false,
    233         "answer": false,
    234         "justification": "No model evaluation on benchmarks."
    235       },
    236       "benchmark_contamination_addressed": {
    237         "applies": false,
    238         "answer": false,
    239         "justification": "No benchmark evaluation conducted."
    240       }
    241     },
    242     "human_studies": {
    243       "pre_registered": {
    244         "applies": false,
    245         "answer": false,
    246         "justification": "No human participants in this position paper."
    247       },
    248       "irb_or_ethics_approval": {
    249         "applies": false,
    250         "answer": false,
    251         "justification": "No human participants."
    252       },
    253       "demographics_reported": {
    254         "applies": false,
    255         "answer": false,
    256         "justification": "No human participants."
    257       },
    258       "inclusion_exclusion_criteria": {
    259         "applies": false,
    260         "answer": false,
    261         "justification": "No human participants."
    262       },
    263       "randomization_described": {
    264         "applies": false,
    265         "answer": false,
    266         "justification": "No human participants."
    267       },
    268       "blinding_described": {
    269         "applies": false,
    270         "answer": false,
    271         "justification": "No human participants."
    272       },
    273       "attrition_reported": {
    274         "applies": false,
    275         "answer": false,
    276         "justification": "No human participants."
    277       }
    278     },
    279     "cost_and_practicality": {
    280       "inference_cost_reported": {
    281         "applies": false,
    282         "answer": false,
    283         "justification": "Position paper; no method with inference costs."
    284       },
    285       "compute_budget_stated": {
    286         "applies": false,
    287         "answer": false,
    288         "justification": "No computation performed."
    289       }
    290     }
    291   },
    292   "claims": [
    293     {
    294       "claim": "The history of computing has consistently trended toward higher levels of abstraction, culminating in natural language programming via GenAI.",
    295       "evidence": "Table 1 presents a chronological timeline from theoretical computing (1930s) through physical computing, machine code, programming languages, ADTs, OOP, DSLs, frameworks, components, SOA, reactive programming, to GenAI (2020s). Sections 2-3 trace this history with specific examples (FLOW-MATIC, COBOL, SQL, Action Semantics).",
    296       "supported": "moderate"
    297     },
    298     {
    299       "claim": "Computing education should adopt a 'prompts first, precision later' pedagogy where students begin with natural language prompts before learning formal syntax.",
    300       "evidence": "Sections 4-5 argue this by analogy to teaching writing (spelling/grammar come after storytelling) and by citing existing courses (CS1-LLM, ref [37]) and pedagogical frameworks. Section 4 cites Prather et al. finding students could use native languages (Arabic, Chinese, Portuguese) for prompting.",
    301       "supported": "weak"
    302     },
    303     {
    304       "claim": "GenAI enables students to use their native languages for programming, breaking the coupling between programming and English.",
    305       "evidence": "Section 4 cites Prather et al. [33] finding students 'were able to successfully use a variety of native languages (Arabic, Chinese, and Portuguese) to solve computational tasks through natural language prompting.'",
    306       "supported": "moderate"
    307     }
    308   ],
    309   "red_flags": [
    310     {
    311       "flag": "No empirical evidence for central claim",
    312       "detail": "The paper advocates a fundamental pedagogical shift ('prompts first') but provides no empirical evidence that this approach improves learning outcomes. The argument is entirely based on historical analogy and cited work, not original data."
    313     },
    314     {
    315       "flag": "Google funding not acknowledged as conflict",
    316       "detail": "The paper received a Google GARA grant and argues for increased GenAI adoption in education. Google is a major LLM provider with financial interest in this outcome. This conflict is not discussed."
    317     },
    318     {
    319       "flag": "Unbounded generalization",
    320       "detail": "The paper recommends 'prompts first' as a general approach for computing education without specifying which student populations, course types, or institutional contexts it applies to. No boundary conditions are stated."
    321     }
    322   ],
    323   "cited_papers": [
    324     {
    325       "title": "Measuring the Impact of Early-2025 AI on Experienced Open-Source Developer Productivity",
    326       "authors": ["Joel Becker", "Nate Rush", "Elizabeth Barnes", "David Rein"],
    327       "year": 2025,
    328       "arxiv_id": "2507.09089",
    329       "relevance": "RCT measuring AI coding tool impact on developer productivity."
    330     },
    331     {
    332       "title": "The Robots Are Coming: Exploring the Implications of OpenAI Codex on Introductory Programming",
    333       "authors": ["James Finnie-Ansley", "Paul Denny", "Brett A. Becker", "Andrew Luxton-Reilly", "James Prather"],
    334       "year": 2022,
    335       "doi": "10.1145/3511861.3511863",
    336       "relevance": "Early evaluation of LLM code generation capabilities on introductory programming tasks."
    337     },
    338     {
    339       "title": "Prompt Problems: A New Programming Exercise for the Generative AI Era",
    340       "authors": ["Paul Denny", "Juho Leinonen", "James Prather", "Andrew Luxton-Reilly"],
    341       "year": 2024,
    342       "relevance": "Proposes prompt-based programming exercises, directly relevant to AI-assisted code generation pedagogy."
    343     },
    344     {
    345       "title": "Breaking the Programming Language Barrier: Multilingual Prompting to Empower Non-Native English Learners",
    346       "authors": ["James Prather", "Brent Reeves", "Paul Denny", "Juho Leinonen", "Stephen MacNeil"],
    347       "year": 2025,
    348       "relevance": "Evaluates multilingual natural language prompting for code generation tasks."
    349     },
    350     {
    351       "title": "The Widening Gap: The Benefits and Harms of Generative AI for Novice Programmers",
    352       "authors": ["James Prather", "Brent Reeves", "Juho Leinonen", "Stephen MacNeil"],
    353       "year": 2024,
    354       "relevance": "Studies GenAI impact on novice programmers including over-reliance risks."
    355     },
    356     {
    357       "title": "Beyond the Hype: A Comprehensive Review of Current Trends in Generative AI Research, Teaching Practices, and Tools",
    358       "authors": ["James Prather", "Juho Leinonen", "Natalie Kiesler"],
    359       "year": 2025,
    360       "doi": "10.1145/3689187.3709614",
    361       "relevance": "Comprehensive review of GenAI in CS education research and teaching practices."
    362     },
    363     {
    364       "title": "Cs1-llm: Integrating llms into cs1 instruction",
    365       "authors": ["Annapurna Vadaparty", "Daniel Zingaro", "David H Smith IV"],
    366       "year": 2024,
    367       "relevance": "Describes a CS1 course redesigned around LLM integration, directly relevant to prompts-first pedagogy."
    368     },
    369     {
    370       "title": "Refuting LLM-generated Code with Reactive Task Comprehension",
    371       "authors": ["Sannidhi V Hebbar", "Sasmita Harini S", "Viraj Kumar"],
    372       "year": 2025,
    373       "doi": "10.1145/3724363.3729100",
    374       "relevance": "Addresses verification of LLM-generated code in educational settings."
    375     },
    376     {
    377       "title": "Self-Regulation, Self-Efficacy, and Fear of Failure Interactions with How Novices Use LLMs to Solve Programming Problems",
    378       "authors": ["Lauren E. Margulieux", "James Prather", "Brent N. Reeves"],
    379       "year": 2024,
    380       "doi": "10.1145/3649217.3653621",
    381       "relevance": "Studies metacognitive and self-regulation aspects of novice LLM use for programming."
    382     },
    383     {
    384       "title": "Using Large Language Models to Enhance Programming Error Messages",
    385       "authors": ["Juho Leinonen", "Arto Hellas", "Sami Sarsa", "Brent Reeves", "Paul Denny", "James Prather", "Brett A. Becker"],
    386       "year": 2023,
    387       "doi": "10.1145/3545945.3569770",
    388       "relevance": "LLM application to improve programming education through better error messages."
    389     }
    390   ]
    391 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs