scan.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan.json (15354B)
      1 {
      2   "scan_version": 2,
      3   "active_modules": [],
      4   "paper": {
      5     "title": "Implementing Grassroots Logic Programs with Multiagent Transition Systems and AI",
      6     "authors": ["Ehud Shapiro"],
      7     "year": 2026,
      8     "venue": "arXiv",
      9     "arxiv_id": "2602.06934"
     10   },
     11   "methodology_tags": ["theoretical"],
     12   "key_findings": "The paper presents dGLP and madGLP — implementation-ready deterministic operational semantics for single-agent and multiagent Grassroots Logic Programs — and proves them correct with respect to their nondeterministic specifications. The key technical insight is that disjoint substitution commutativity (from the single-occurrence invariant) enables binary atomic transactions to be correctly decomposed into sequences of unary transactions. The paper describes a three-layer development process (math → English+code spec → Dart code) where AI (Claude) served as the programmer, with back-and-forth refinement revealing gaps in the mathematical spec.",
     13   "checklist": {
     14     "artifacts": {
     15       "code_released": {
     16         "applies": true,
     17         "answer": false,
     18         "justification": "No repository URL or code archive is provided. The paper mentions Dart implementations exist but does not release them."
     19       },
     20       "data_released": {
     21         "applies": false,
     22         "answer": false,
     23         "justification": "This is a theoretical paper with mathematical proofs; there is no dataset to release."
     24       },
     25       "environment_specified": {
     26         "applies": false,
     27         "answer": false,
     28         "justification": "Purely theoretical paper with no computational experiments to reproduce."
     29       },
     30       "reproduction_instructions": {
     31         "applies": false,
     32         "answer": false,
     33         "justification": "No experiments to reproduce. The proofs are self-contained in the paper and appendices."
     34       }
     35     },
     36     "statistical_methodology": {
     37       "confidence_intervals_or_error_bars": {
     38         "applies": false,
     39         "answer": false,
     40         "justification": "Theoretical paper with no empirical results or statistical analysis."
     41       },
     42       "significance_tests": {
     43         "applies": false,
     44         "answer": false,
     45         "justification": "No empirical comparisons are made."
     46       },
     47       "effect_sizes_reported": {
     48         "applies": false,
     49         "answer": false,
     50         "justification": "No empirical results."
     51       },
     52       "sample_size_justified": {
     53         "applies": false,
     54         "answer": false,
     55         "justification": "Theoretical paper; no samples."
     56       },
     57       "variance_reported": {
     58         "applies": false,
     59         "answer": false,
     60         "justification": "No experimental runs."
     61       }
     62     },
     63     "evaluation_design": {
     64       "baselines_included": {
     65         "applies": false,
     66         "answer": false,
     67         "justification": "Theoretical paper with formal proofs, not empirical evaluation. There are no baselines to compare against."
     68       },
     69       "baselines_contemporary": {
     70         "applies": false,
     71         "answer": false,
     72         "justification": "No empirical evaluation."
     73       },
     74       "ablation_study": {
     75         "applies": false,
     76         "answer": false,
     77         "justification": "No system evaluation; the paper presents mathematical definitions and proofs."
     78       },
     79       "multiple_metrics": {
     80         "applies": false,
     81         "answer": false,
     82         "justification": "No empirical evaluation with metrics."
     83       },
     84       "human_evaluation": {
     85         "applies": false,
     86         "answer": false,
     87         "justification": "No system outputs to evaluate."
     88       },
     89       "held_out_test_set": {
     90         "applies": false,
     91         "answer": false,
     92         "justification": "No data-driven evaluation."
     93       },
     94       "per_category_breakdown": {
     95         "applies": false,
     96         "answer": false,
     97         "justification": "No empirical results to break down."
     98       },
     99       "failure_cases_discussed": {
    100         "applies": true,
    101         "answer": true,
    102         "justification": "Section 7 (Conclusion) discusses development history where initial designs failed: 'The initial design used routing tables to track variable location; when variables migrated between agents, these tables required complex updates that proved error-prone to implement.' This led to major redesigns."
    103       },
    104       "negative_results_reported": {
    105         "applies": true,
    106         "answer": true,
    107         "justification": "Section 7 reports that the initial madGLP design with routing tables and separate cold-call mechanisms proved error-prone and had to be redesigned twice."
    108       }
    109     },
    110     "claims_and_evidence": {
    111       "abstract_claims_supported": {
    112         "applies": true,
    113         "answer": true,
    114         "justification": "The abstract claims dGLP and madGLP are implementation-ready deterministic semantics proved correct with respect to their specifications. Theorem 3.24 proves dGLP correct, Theorem 5.7 proves madGLP correct, and Theorem D.10 proves madGLP is grassroots. All claims are supported by formal proofs."
    115       },
    116       "causal_claims_justified": {
    117         "applies": false,
    118         "answer": false,
    119         "justification": "The paper makes mathematical claims (correctness theorems) rather than causal empirical claims."
    120       },
    121       "generalization_bounded": {
    122         "applies": true,
    123         "answer": true,
    124         "justification": "The paper's claims are precisely bounded by the formal definitions. Theorems state exactly what is proved (e.g., 'dGLP correctly implements GLP' per Definition 2.3). The scope is clearly the GLP language family as defined."
    125       },
    126       "alternative_explanations_discussed": {
    127         "applies": false,
    128         "answer": false,
    129         "justification": "No empirical results requiring alternative explanations. The paper presents mathematical proofs which are either correct or not."
    130       },
    131       "proxy_outcome_distinction": {
    132         "applies": false,
    133         "answer": false,
    134         "justification": "Theoretical paper with no measurements or proxy metrics."
    135       }
    136     },
    137     "setup_transparency": {
    138       "model_versions_specified": {
    139         "applies": false,
    140         "answer": false,
    141         "justification": "The paper mentions AI (Claude) was used for implementation but does not evaluate any AI model's capability. The AI is a tool, not the subject of study."
    142       },
    143       "prompts_provided": {
    144         "applies": false,
    145         "answer": false,
    146         "justification": "No prompting-based experiments. The paper is about formal operational semantics, not AI evaluation."
    147       },
    148       "hyperparameters_reported": {
    149         "applies": false,
    150         "answer": false,
    151         "justification": "No computational experiments with hyperparameters."
    152       },
    153       "scaffolding_described": {
    154         "applies": false,
    155         "answer": false,
    156         "justification": "No agentic scaffolding used in evaluation."
    157       },
    158       "data_preprocessing_documented": {
    159         "applies": false,
    160         "answer": false,
    161         "justification": "No data to preprocess; purely theoretical work."
    162       }
    163     },
    164     "limitations_and_scope": {
    165       "limitations_section_present": {
    166         "applies": true,
    167         "answer": false,
    168         "justification": "No dedicated limitations section. The paper has a conclusion but no discussion of limitations of the formal framework or the development approach."
    169       },
    170       "threats_to_validity_specific": {
    171         "applies": true,
    172         "answer": false,
    173         "justification": "No threats to validity discussed. While the proofs are formal, the paper does not discuss limitations of the approach (e.g., scalability of the implementation, assumptions about fair message delivery)."
    174       },
    175       "scope_boundaries_stated": {
    176         "applies": true,
    177         "answer": false,
    178         "justification": "The paper does not explicitly state what is out of scope or what the framework does not handle, beyond briefly mentioning fair message delivery as a 'standard assumption.'"
    179       }
    180     },
    181     "data_integrity": {
    182       "raw_data_available": {
    183         "applies": false,
    184         "answer": false,
    185         "justification": "Theoretical paper; no data collected."
    186       },
    187       "data_collection_described": {
    188         "applies": false,
    189         "answer": false,
    190         "justification": "No data collection."
    191       },
    192       "recruitment_methods_described": {
    193         "applies": false,
    194         "answer": false,
    195         "justification": "No participants or samples recruited."
    196       },
    197       "data_pipeline_documented": {
    198         "applies": false,
    199         "answer": false,
    200         "justification": "No data pipeline."
    201       }
    202     },
    203     "conflicts_of_interest": {
    204       "funding_disclosed": {
    205         "applies": true,
    206         "answer": false,
    207         "justification": "No funding information is provided anywhere in the paper."
    208       },
    209       "affiliations_disclosed": {
    210         "applies": true,
    211         "answer": true,
    212         "justification": "Author affiliation is clearly stated: 'London School of Economics, UK, and Weizmann Institute of Science, Israel.'"
    213       },
    214       "funder_independent_of_outcome": {
    215         "applies": true,
    216         "answer": false,
    217         "justification": "No funding information disclosed, so independence cannot be assessed."
    218       },
    219       "financial_interests_declared": {
    220         "applies": true,
    221         "answer": false,
    222         "justification": "No competing interests statement is provided."
    223       }
    224     },
    225     "contamination": {
    226       "training_cutoff_stated": {
    227         "applies": false,
    228         "answer": false,
    229         "justification": "The paper does not evaluate any pre-trained model on a benchmark. AI (Claude) is used as a development tool, not as the subject of evaluation."
    230       },
    231       "train_test_overlap_discussed": {
    232         "applies": false,
    233         "answer": false,
    234         "justification": "No benchmark evaluation of any model."
    235       },
    236       "benchmark_contamination_addressed": {
    237         "applies": false,
    238         "answer": false,
    239         "justification": "No benchmark evaluation."
    240       }
    241     },
    242     "human_studies": {
    243       "pre_registered": {
    244         "applies": false,
    245         "answer": false,
    246         "justification": "No human participants."
    247       },
    248       "irb_or_ethics_approval": {
    249         "applies": false,
    250         "answer": false,
    251         "justification": "No human participants."
    252       },
    253       "demographics_reported": {
    254         "applies": false,
    255         "answer": false,
    256         "justification": "No human participants."
    257       },
    258       "inclusion_exclusion_criteria": {
    259         "applies": false,
    260         "answer": false,
    261         "justification": "No human participants."
    262       },
    263       "randomization_described": {
    264         "applies": false,
    265         "answer": false,
    266         "justification": "No human participants."
    267       },
    268       "blinding_described": {
    269         "applies": false,
    270         "answer": false,
    271         "justification": "No human participants."
    272       },
    273       "attrition_reported": {
    274         "applies": false,
    275         "answer": false,
    276         "justification": "No human participants."
    277       }
    278     },
    279     "cost_and_practicality": {
    280       "inference_cost_reported": {
    281         "applies": false,
    282         "answer": false,
    283         "justification": "Theoretical paper; no computational method whose cost needs reporting."
    284       },
    285       "compute_budget_stated": {
    286         "applies": false,
    287         "answer": false,
    288         "justification": "Theoretical paper with no computational experiments."
    289       }
    290     }
    291   },
    292   "claims": [
    293     {
    294       "claim": "dGLP correctly implements the Concurrent GLP operational semantics",
    295       "evidence": "Theorem 3.24, proved via Lemma 3.19 (liveness) and Lemma 3.23 (completeness), with supporting lemmas on disjoint substitution commutativity (3.20) and reduction set outcome determinism (3.21). Full proofs in Appendix A.",
    296       "supported": "strong"
    297     },
    298     {
    299       "claim": "madGLP correctly implements maGLP (multiagent GLP)",
    300       "evidence": "Theorem 5.7, proved via Lemma C.36 (liveness) and Lemma C.38 (completeness). Full proofs in Appendix C.",
    301       "supported": "strong"
    302     },
    303     {
    304       "claim": "madGLP is grassroots (oblivious and interactive)",
    305       "evidence": "Theorem D.10, proved by showing madGLP is transactions-based with interactive transactions (Appendix D).",
    306       "supported": "strong"
    307     },
    308     {
    309       "claim": "Binary atomic transactions can be correctly implemented by sequences of unary transactions",
    310       "evidence": "Propositions C.33-C.35 show Reduce, Communicate, and Cold-call implementations. The decomposition relies on persistence (Lemma 3.12) and disjoint substitution commutativity (Lemma 3.20).",
    311       "supported": "strong"
    312     },
    313     {
    314       "claim": "The three-layer development process (math → English+code spec → Dart) with AI revealed gaps in the mathematical specification",
    315       "evidence": "Section 7 describes two major redesigns of madGLP driven by implementation difficulties discovered during Dart programming, including elimination of variable migration and unification of cold-call mechanisms.",
    316       "supported": "moderate"
    317     }
    318   ],
    319   "red_flags": [
    320     {
    321       "flag": "No empirical evaluation of the AI development process",
    322       "detail": "The paper claims a three-layer AI development discipline (math → spec → code) but provides no empirical evidence of its effectiveness — no metrics on code correctness, development time, iteration counts, or comparison with human-only development. The claim about this methodology is anecdotal."
    323     },
    324     {
    325       "flag": "Implementation not released",
    326       "detail": "The Dart implementations that motivated this work are not released. The paper describes mathematical specs used by AI to generate code, but readers cannot verify the implementations exist or work correctly."
    327     }
    328   ],
    329   "cited_papers": [
    330     {
    331       "title": "Statically contextualizing large language models with typed holes",
    332       "authors": ["Andrew Blinn", "Xiang Li", "June Hyung Kim", "Cyrus Omar"],
    333       "year": 2024,
    334       "doi": "10.1145/3689746",
    335       "relevance": "Explores using type information to constrain LLM code generation, directly relevant to AI-assisted programming methodology."
    336     },
    337     {
    338       "title": "Type-constrained code generation with language models",
    339       "authors": ["Niels Mündler", "Rachid Guerraoui", "Martin Vechev"],
    340       "year": 2025,
    341       "relevance": "Addresses type-constrained AI code generation, relevant to formal specification-driven AI programming."
    342     },
    343     {
    344       "title": "Understanding spec-driven-development: Kiro, spec-kit, and tessl",
    345       "authors": ["Martin Fowler"],
    346       "year": 2025,
    347       "relevance": "Analyzes the shift toward formal specifications as the primary interface for AI code generation, directly related to this paper's methodology."
    348     },
    349     {
    350       "title": "AutoGen: Enabling next-gen LLM applications via multi-agent conversation",
    351       "authors": ["Qingyun Wu"],
    352       "year": 2023,
    353       "relevance": "Multi-agent AI framework relevant to the survey's coverage of agentic AI systems."
    354     },
    355     {
    356       "title": "Communicative agents for software development (ChatDev)",
    357       "authors": ["Chen Qian"],
    358       "year": 2023,
    359       "relevance": "Multi-agent system for software development, relevant to AI-assisted coding approaches."
    360     }
    361   ]
    362 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs