scan-v5.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan-v5.json (17997B)
      1 {
      2   "scan_version": 5,
      3   "paper_type": "theoretical",
      4   "paper": {
      5     "title": "Implementing Grassroots Logic Programs with Multiagent Transition Systems and AI",
      6     "authors": [
      7       "Ehud Shapiro"
      8     ],
      9     "year": 2026,
     10     "venue": "arXiv",
     11     "arxiv_id": "2602.06934",
     12     "doi": null
     13   },
     14   "checklist": {
     15     "claims_and_evidence": {
     16       "abstract_claims_supported": {
     17         "applies": true,
     18         "answer": false,
     19         "justification": "The mathematical correctness claims (Theorems 3.24, 5.7) are fully proved in the paper, but the abstract asserts 'AI developed a workstation-based implementation of GLP' and is 'developing a smartphone-based implementation'—neither implementation is shown, tested, or evidenced anywhere in the paper.",
     20         "source": "haiku"
     21       },
     22       "causal_claims_justified": {
     23         "applies": false,
     24         "answer": false,
     25         "justification": "The paper makes no empirical causal claims; all claims are formal mathematical theorems with proofs.",
     26         "source": "haiku"
     27       },
     28       "generalization_bounded": {
     29         "applies": true,
     30         "answer": true,
     31         "justification": "The correctness theorems are precisely bounded to the formally defined transition systems (dGLP implements GLP; madGLP implements maGLP). The advocacy for a three-layer development methodology is explicitly framed as 'emerging discipline we advocate' rather than a proven result.",
     32         "source": "haiku"
     33       },
     34       "alternative_explanations_discussed": {
     35         "applies": false,
     36         "answer": false,
     37         "justification": "This is a pure theoretical paper with formal proofs; alternative explanations are not applicable to mathematical correctness arguments.",
     38         "source": "haiku"
     39       },
     40       "proxy_outcome_distinction": {
     41         "applies": false,
     42         "answer": false,
     43         "justification": "No empirical measurements are made; all results are formal mathematical proofs.",
     44         "source": "haiku"
     45       }
     46     },
     47     "limitations_and_scope": {
     48       "limitations_section_present": {
     49         "applies": true,
     50         "answer": false,
     51         "justification": "There is no dedicated limitations or threats-to-validity section. Section 7 is a 'Conclusion' containing 'Development History' and 'Summary' subsections but no limitations discussion.",
     52         "source": "haiku"
     53       },
     54       "threats_to_validity_specific": {
     55         "applies": true,
     56         "answer": false,
     57         "justification": "No threats to validity are discussed; specifically, the adequacy of the formal models as abstractions of real distributed systems is not examined, and the assumption of fair message delivery is stated but not analyzed for real-world applicability.",
     58         "source": "haiku"
     59       },
     60       "scope_boundaries_stated": {
     61         "applies": true,
     62         "answer": false,
     63         "justification": "The paper defines what the formal results cover but never explicitly states what the results do not show—no discussion of whether the correctness guarantees transfer to the actual Dart implementation, or what real network properties are excluded from the model.",
     64         "source": "haiku"
     65       }
     66     },
     67     "conflicts_of_interest": {
     68       "funding_disclosed": {
     69         "applies": true,
     70         "answer": false,
     71         "justification": "No funding acknowledgment appears anywhere in the paper.",
     72         "source": "haiku"
     73       },
     74       "affiliations_disclosed": {
     75         "applies": true,
     76         "answer": true,
     77         "justification": "Author affiliations (London School of Economics, UK, and Weizmann Institute of Science, Israel) are disclosed on the title page.",
     78         "source": "haiku"
     79       },
     80       "funder_independent_of_outcome": {
     81         "applies": false,
     82         "answer": false,
     83         "justification": "No funding is disclosed, making this criterion not applicable.",
     84         "source": "haiku"
     85       },
     86       "financial_interests_declared": {
     87         "applies": true,
     88         "answer": false,
     89         "justification": "No competing interests or financial interests statement appears in the paper.",
     90         "source": "haiku"
     91       }
     92     },
     93     "scope_and_framing": {
     94       "key_terms_defined": {
     95         "applies": true,
     96         "answer": true,
     97         "justification": "All key terms are rigorously defined: GLP variables (Def. 3.1), terms and goals (Def. 3.2), SO invariant (Def. 3.3), GLP program (Def. 3.4), transition system (Def. 2.1), implementation (Def. 2.2), multiagent transition system (Def. 4.3), grassroots (Def. D.4), globalize/localize (Defs. C.12–C.13), etc.",
     98         "source": "haiku"
     99       },
    100       "intended_contribution_clear": {
    101         "applies": true,
    102         "answer": true,
    103         "justification": "The contribution is clearly stated: 'we describe the mathematics developed to facilitate the workstation- and smartphone-based implementations of GLP by AI'—specifically dGLP and madGLP with correctness proofs, and a formal-spec-driven human-AI development methodology.",
    104         "source": "haiku"
    105       },
    106       "engagement_with_prior_work": {
    107         "applies": true,
    108         "answer": true,
    109         "justification": "Section 6 provides substantive engagement: GLP is explicitly characterized as 'Flat Concurrent Prolog with the SRSW constraint added'; connections to CCP, linear logic, session types (Caires-Pfenning, Wadler), futures/promises, I/O automata (Lynch-Tuttle), CRDTs, and concurrent logic programming languages are all explicitly drawn with comparison of similarities and differences.",
    110         "source": "haiku"
    111       }
    112     }
    113   },
    114   "type_checklist": {
    115     "theoretical": {
    116       "formal_quality": {
    117         "assumptions_stated_explicitly": {
    118           "applies": true,
    119           "answer": false,
    120           "justification": "Several assumptions are deferred or left implicit: the notion of fair runs is 'assumed abstractly' with reference to [46] without restating it, and goal identity needed for liveness proofs is explicitly noted as 'left implicit' (Section 3.1, p. 5). Fair message delivery is stated but not analyzed.",
    121           "source": "haiku"
    122         },
    123         "proofs_complete_or_sketched": {
    124           "applies": true,
    125           "answer": true,
    126           "justification": "Main theorems (3.24, 5.7, D.10, D.11) have complete proofs in the main text referencing prior lemmas. Key lemmas (3.19–3.23, C.28–C.38, D.9–D.11) are proved with sufficient detail in Appendices A–D.",
    127           "source": "haiku"
    128         },
    129         "bounds_tight_or_discussed": {
    130           "applies": false,
    131           "answer": false,
    132           "justification": "The paper proves exact correctness (not bounds or approximations); there are no asymptotic or approximation bounds to assess for tightness.",
    133           "source": "haiku"
    134         },
    135         "counterexamples_explored": {
    136           "applies": true,
    137           "answer": true,
    138           "justification": "Appendix E provides three detailed example traces including the critical edge case of both ends of a variable pair exported to the same agent (E.3), and the friend-mediated introduction scenario where values transit through an intermediate relay agent (E.2).",
    139           "source": "haiku"
    140         },
    141         "notation_consistent": {
    142           "applies": true,
    143           "answer": true,
    144           "justification": "Notation is consistent throughout: writer variables (uppercase), reader variables (uppercase with ?), TS = (C, c0, T) for all transition systems, numbered definitions and lemmas referenced consistently across all sections and appendices.",
    145           "source": "haiku"
    146         },
    147         "constructive_vs_existence_noted": {
    148           "applies": true,
    149           "answer": true,
    150           "justification": "Implementation mappings σ (Def. 3.18) and π (Def. C.32) are explicitly constructed. Lemma 3.22 ('dGLP Performs Same Reductions') and Lemma C.37 prove constructively by induction on spawn order that the implementations can perform the same reductions as the specifications.",
    151           "source": "haiku"
    152         }
    153       },
    154       "connections": {
    155         "connection_to_practice_discussed": {
    156           "applies": true,
    157           "answer": true,
    158           "justification": "The practical motivation is explicit: implementing GLP in Dart on workstations and peer-to-peer smartphones via AI. The three-layer methodology (math→English+code spec→Dart code) is described as a practical discipline for human-AI co-development of formally verified software.",
    159           "source": "haiku"
    160         },
    161         "relationship_to_prior_work_clear": {
    162           "applies": true,
    163           "answer": true,
    164           "justification": "Section 6 clearly positions each contribution: GLP vs. Flat Concurrent Prolog (adds SRSW), GLP vs. CCP (inherits monotonicity), GLP vs. linear logic/session types (same resource sensitivity), correctness framework vs. Lynch-Tuttle I/O automata and Abadi-Lamport refinement mappings.",
    165           "source": "haiku"
    166         },
    167         "computational_complexity_discussed": {
    168           "applies": true,
    169           "answer": false,
    170           "justification": "No computational complexity analysis is provided for dGLP or madGLP; time/space complexity of FIFO-based goal scheduling, global writers table lookup, or message routing is not discussed.",
    171           "source": "haiku"
    172         },
    173         "limitations_of_formal_model_stated": {
    174           "applies": true,
    175           "answer": false,
    176           "justification": "The paper does not discuss what the formal models fail to capture—network failure, message loss, Byzantine agents, Dart runtime limitations, or the gap between the formal correctness proof and the actual Dart implementation's correctness.",
    177           "source": "haiku"
    178         }
    179       }
    180     }
    181   },
    182   "claims": [
    183     {
    184       "claim": "dGLP correctly implements GLP: every fair dGLP run maps to a fair GLP run, and every GLP outcome is achievable by a dGLP run",
    185       "evidence": "Theorem 3.24, proved via Lemma 3.19 (liveness) and Lemma 3.23 (completeness), relying on disjoint substitution commutativity (Lemma 3.20) and GLP persistence (Lemma 3.12)",
    186       "supported": "strong"
    187     },
    188     {
    189       "claim": "madGLP correctly implements maGLP, decomposing binary atomic transactions into sequences of unary transactions via global_send goals",
    190       "evidence": "Theorem 5.7, proved via Lemma C.36 (liveness) and Lemma C.38 (completeness); transaction correspondence shown by Propositions C.33–C.35",
    191       "supported": "strong"
    192     },
    193     {
    194       "claim": "madGLP is grassroots: any subset of agents forms a functioning subsystem independent of global resources",
    195       "evidence": "Theorem D.10, proved by showing madGLP is a transactions-based protocol with interactive transactions, applying Theorem D.9",
    196       "supported": "strong"
    197     },
    198     {
    199       "claim": "Local variable pairs connected by global links correctly implement maGLP shared variable pairs, including when both ends are exported to the same agent",
    200       "evidence": "Corollary 5.8, following from Theorem 5.7 and Lemmas C.28–C.29; verified with a complete formal trace in Appendix E.3",
    201       "supported": "strong"
    202     },
    203     {
    204       "claim": "AI (Claude) developed a working workstation-based GLP implementation in Dart using dGLP as formal specification",
    205       "evidence": "Asserted in the abstract and introduction; no implementation code, test results, or validation appears in the paper",
    206       "supported": "unsupported"
    207     },
    208     {
    209       "claim": "The three-layer methodology (math→English+code spec→Dart) is a productive discipline for human-AI co-development of verified software",
    210       "evidence": "Described qualitatively; the concrete outcome (major redesigns were required) is mentioned, but no systematic evaluation of the methodology's effectiveness is provided",
    211       "supported": "weak"
    212     }
    213   ],
    214   "methodology_tags": [
    215     "theoretical"
    216   ],
    217   "key_findings": "The paper presents dGLP and madGLP—deterministic, implementation-ready operational semantics for single-agent and multiagent Grassroots Logic Programs—and proves them formally correct with respect to their nondeterministic abstract specifications. The key mathematical insight is that disjoint substitution commutativity (from the single-occurrence invariant) combined with persistence allows FIFO-scheduled deterministic execution to achieve the same outcomes as arbitrary-order nondeterminism. The madGLP design replaces abstract shared variable pairs with local pairs connected by global links and global_send goals, decomposing binary atomic transactions into sequences of unary transactions while preserving full correctness. A general theorem (D.11) establishes that correct and complete implementations preserve the grassroots property, yielding an alternative proof path for specifications via their implementations.",
    218   "red_flags": [
    219     {
    220       "flag": "AI implementation claims unsupported",
    221       "detail": "The paper asserts AI (Claude) developed a working Dart implementation using dGLP/madGLP as formal specs, but no implementation, tests, or evidence of correctness beyond the mathematical framework appears anywhere in the paper."
    222     },
    223     {
    224       "flag": "Goal identity left implicit",
    225       "detail": "The paper acknowledges that goal identity—needed to track 'the same goal across configurations' for liveness proofs—'can be formalized' but is explicitly 'left implicit' (Section 3.1), creating an informal gap in the liveness argument."
    226     },
    227     {
    228       "flag": "Fair runs assumption deferred to external work",
    229       "detail": "The notion of fair runs is 'assumed abstractly' with reference to [46] without restating the definition, making the liveness framework partially opaque to readers without access to that paper."
    230     },
    231     {
    232       "flag": "No model limitations stated",
    233       "detail": "The paper does not discuss what real-world properties the formal model fails to capture (network failure, Byzantine agents, Dart runtime constraints), leaving a gap between the correctness proof and the claim of a working implementation."
    234     }
    235   ],
    236   "cited_papers": [
    237     {
    238       "title": "GLP: A Grassroots, Multiagent, Concurrent, Logic Programming Language (Shapiro 2025)",
    239       "relevance": "Foundation paper defining GLP syntax and abstract semantics that this paper implements with deterministic semantics"
    240     },
    241     {
    242       "title": "Multiagent Transition Systems: Protocol-Stack Mathematics for Distributed Computing (Shapiro 2021)",
    243       "relevance": "Defines the multiagent transition system framework and liveness/fairness definitions used throughout"
    244     },
    245     {
    246       "title": "Grassroots Distributed Systems: Concept, Examples, Implementation and Applications (Shapiro 2023)",
    247       "relevance": "Introduces the grassroots property proved for GLP in this paper"
    248     },
    249     {
    250       "title": "Semantic Foundations of Concurrent Constraint Programming (Saraswat, Rinard, Panangaden 1991)",
    251       "relevance": "CCP paradigm providing the monotonic constraint store template for GLP's single-assignment variable semantics"
    252     },
    253     {
    254       "title": "The Family of Concurrent Logic Programming Languages (Shapiro 1989)",
    255       "relevance": "Survey of the concurrent logic programming tradition to which GLP belongs; covers GHC, Concurrent Prolog, PARLOG"
    256     },
    257     {
    258       "title": "Session Types as Intuitionistic Linear Propositions (Caires, Pfenning 2010)",
    259       "relevance": "Curry-Howard correspondence connecting linear logic and session types, mirroring GLP's writer/reader duality"
    260     },
    261     {
    262       "title": "Hierarchical Correctness Proofs for Distributed Algorithms (Lynch, Tuttle 1987)",
    263       "relevance": "I/O automata and forward simulation methodology used as template for correctness proofs"
    264     },
    265     {
    266       "title": "The Existence of Refinement Mappings (Abadi, Lamport 1991)",
    267       "relevance": "Establishes that simulation mappings can always be found under suitable conditions; foundational for the correctness framework"
    268     },
    269     {
    270       "title": "Conflict-free Replicated Data Types (Shapiro et al. 2011)",
    271       "relevance": "CRDTs share monotonic state growth with GLP agents; comparison illuminates GLP's distributed execution model"
    272     },
    273     {
    274       "title": "Linearizability: A Correctness Condition for Concurrent Objects (Herlihy, Wing 1990)",
    275       "relevance": "Gold standard for concurrent correctness; paper's transaction decomposition achieves a related goal using different machinery"
    276     }
    277   ],
    278   "engagement_factors": {
    279     "practical_relevance": {
    280       "score": 1,
    281       "justification": "The formal-spec-as-AI-programming-interface methodology has practical interest, but the GLP smartphone system is highly specialized and no working implementation is demonstrated in the paper."
    282     },
    283     "surprise_contrarian": {
    284       "score": 1,
    285       "justification": "Using formal operational semantics as the primary interface between human designer and AI programmer is a distinctive framing, though not fully evidenced."
    286     },
    287     "fear_safety": {
    288       "score": 0,
    289       "justification": "No AI safety concerns; the paper is about programming language semantics and distributed systems correctness."
    290     },
    291     "drama_conflict": {
    292       "score": 0,
    293       "justification": "No controversy or conflict angle; a straightforward formal methods contribution."
    294     },
    295     "demo_ability": {
    296       "score": 1,
    297       "justification": "A Dart implementation is mentioned as existing but is not included or linked in the paper; the English+code spec is supplementary material but the runnable code is not accessible."
    298     },
    299     "brand_recognition": {
    300       "score": 1,
    301       "justification": "Ehud Shapiro is a well-known figure in logic programming (inventor of Concurrent Prolog, ACM fellow) but no famous lab or commercial product is associated."
    302     }
    303   },
    304   "hn_data": {
    305     "threads": [],
    306     "top_points": 0,
    307     "total_points": 0,
    308     "total_comments": 0
    309   }
    310 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs