scan.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan.json (23116B)
      1 {
      2   "paper": {
      3     "title": "Review of Tools for Zero-Code LLM Based Application Development",
      4     "authors": [
      5       "Priyaranjan Pattnayak",
      6       "Hussain Bohra"
      7     ],
      8     "year": 2025,
      9     "venue": "arXiv.org",
     10     "arxiv_id": "2510.19747",
     11     "doi": "10.48550/arXiv.2510.19747"
     12   },
     13   "scan_version": 3,
     14   "active_modules": ["survey_methodology"],
     15   "methodology_tags": ["meta-analysis", "qualitative"],
     16   "key_findings": "This survey categorizes zero-code LLM platforms along four dimensions (interface type, LLM backend, output type, customization) and compares seven platforms (OpenAI GPTs, Bolt.new, Dust.tt, Flowise, Cognosys, Bubble, Glide). The authors find that while these platforms significantly lower the barrier to building AI-powered applications, they trade off fine-grained control, flexibility, scalability, and output reliability. The paper explicitly acknowledges it does not follow a systematic review protocol, relying instead on an ad-hoc selection of representative platforms.",
     17   "checklist": {
     18     "artifacts": {
     19       "code_released": {
     20         "applies": true,
     21         "answer": false,
     22         "justification": "No analysis code, comparison scripts, or structured data are released. No repository URL is provided anywhere in the paper."
     23       },
     24       "data_released": {
     25         "applies": true,
     26         "answer": false,
     27         "justification": "No structured dataset of platform features or comparison data is released. The comparison tables exist only within the paper text."
     28       },
     29       "environment_specified": {
     30         "applies": true,
     31         "answer": false,
     32         "justification": "No environment or tooling specifications are provided, as no computational experiments were conducted."
     33       },
     34       "reproduction_instructions": {
     35         "applies": true,
     36         "answer": false,
     37         "justification": "No instructions for reproducing the survey methodology or platform comparisons are provided."
     38       }
     39     },
     40     "statistical_methodology": {
     41       "confidence_intervals_or_error_bars": {
     42         "applies": false,
     43         "answer": false,
     44         "justification": "This is a qualitative survey with no quantitative experiments or statistical results."
     45       },
     46       "significance_tests": {
     47         "applies": false,
     48         "answer": false,
     49         "justification": "No statistical comparisons are made; the paper provides qualitative descriptions of platforms."
     50       },
     51       "effect_sizes_reported": {
     52         "applies": false,
     53         "answer": false,
     54         "justification": "No quantitative experiments are conducted, so effect sizes are not applicable."
     55       },
     56       "sample_size_justified": {
     57         "applies": false,
     58         "answer": false,
     59         "justification": "No quantitative sample is analyzed; the paper reviews a small number of platforms qualitatively."
     60       },
     61       "variance_reported": {
     62         "applies": false,
     63         "answer": false,
     64         "justification": "No experiments with multiple runs or repeated measurements are conducted."
     65       }
     66     },
     67     "evaluation_design": {
     68       "baselines_included": {
     69         "applies": true,
     70         "answer": false,
     71         "justification": "The survey does not compare against prior surveys of no-code LLM platforms. Section 6 compares zero-code platforms with traditional and low-code development paradigms, but no prior survey is used as a baseline for the review itself."
     72       },
     73       "baselines_contemporary": {
     74         "applies": false,
     75         "answer": false,
     76         "justification": "No experimental baselines are applicable to a qualitative survey paper."
     77       },
     78       "ablation_study": {
     79         "applies": false,
     80         "answer": false,
     81         "justification": "No system with components to ablate; this is a survey paper."
     82       },
     83       "multiple_metrics": {
     84         "applies": false,
     85         "answer": false,
     86         "justification": "No quantitative evaluation metrics are used; platform comparison is purely qualitative."
     87       },
     88       "human_evaluation": {
     89         "applies": false,
     90         "answer": false,
     91         "justification": "No system outputs to evaluate; the paper surveys existing platforms descriptively."
     92       },
     93       "held_out_test_set": {
     94         "applies": false,
     95         "answer": false,
     96         "justification": "No data splits or test sets are applicable to this survey."
     97       },
     98       "per_category_breakdown": {
     99         "applies": true,
    100         "answer": true,
    101         "justification": "Tables 1–4 break down platforms across multiple dimensions: interface type, LLM backend, output type, extensibility, agent support, memory, workflow logic, and specific feature support."
    102       },
    103       "failure_cases_discussed": {
    104         "applies": true,
    105         "answer": true,
    106         "justification": "Section 5 ('Trade-offs and Limitations') discusses platform failure modes including limited customizability, scalability problems, vendor lock-in, unreliable AI outputs, prompt engineering difficulties, and shallow learning."
    107       },
    108       "negative_results_reported": {
    109         "applies": true,
    110         "answer": true,
    111         "justification": "Section 5 reports negative findings about the platforms: quality/reliability issues with AI outputs (Section 5.4), scalability constraints (Section 5.2), and the persistent need for prompt engineering skills despite 'no-code' branding (Section 5.5)."
    112       }
    113     },
    114     "claims_and_evidence": {
    115       "abstract_claims_supported": {
    116         "applies": true,
    117         "answer": true,
    118         "justification": "The abstract's main claims — that zero-code LLM platforms lower the barrier to app creation but face challenges in flexibility and reliability — are qualitatively supported by the platform descriptions (Sections 3–4) and trade-off analysis (Section 5). The claims are appropriately hedged."
    119       },
    120       "causal_claims_justified": {
    121         "applies": true,
    122         "answer": false,
    123         "justification": "The paper uses causal language throughout: 'LLMs are transforming software creation by enabling zero-code development' (abstract), 'significantly lowered the barrier' (Section 1). These causal claims are not supported by any controlled study or formal evidence; they are asserted based on platform descriptions and vendor documentation."
    124       },
    125       "generalization_bounded": {
    126         "applies": true,
    127         "answer": false,
    128         "justification": "The title claims to review 'Tools for Zero-Code LLM Based Application Development' broadly, but only 7 platforms are examined in depth. Section 1 notes it 'focuses on representative, influential platforms available publicly' but the paper's conclusions generalize to 'the landscape' without adequately bounding this to the small sample of platforms reviewed."
    129       },
    130       "alternative_explanations_discussed": {
    131         "applies": false,
    132         "answer": false,
    133         "justification": "This is a pure survey/taxonomy paper with no empirical results for which alternative explanations would be relevant."
    134       },
    135       "proxy_outcome_distinction": {
    136         "applies": false,
    137         "answer": false,
    138         "justification": "This is a qualitative survey with no quantitative measurements, so there is no proxy-outcome gap to address."
    139       }
    140     },
    141     "setup_transparency": {
    142       "model_versions_specified": {
    143         "applies": false,
    144         "answer": false,
    145         "justification": "No LLM models were used in the research process; this is a survey paper reviewing platforms."
    146       },
    147       "prompts_provided": {
    148         "applies": false,
    149         "answer": false,
    150         "justification": "No prompting was used in the research methodology."
    151       },
    152       "hyperparameters_reported": {
    153         "applies": false,
    154         "answer": false,
    155         "justification": "No experiments with hyperparameters were conducted."
    156       },
    157       "scaffolding_described": {
    158         "applies": false,
    159         "answer": false,
    160         "justification": "No agentic scaffolding was used in the research."
    161       },
    162       "data_preprocessing_documented": {
    163         "applies": true,
    164         "answer": false,
    165         "justification": "The paper does not describe how platforms were selected for inclusion. No search strategy, selection criteria, or filtering pipeline is documented. Section 1 only states the survey 'focuses on representative, influential platforms available publicly' without defining what counts as representative or influential."
    166       }
    167     },
    168     "limitations_and_scope": {
    169       "limitations_section_present": {
    170         "applies": true,
    171         "answer": false,
    172         "justification": "Section 5 ('Trade-offs and Limitations') discusses limitations of the reviewed platforms, not limitations of the survey methodology itself. There is no section discussing the survey's own methodological limitations, potential biases, or gaps."
    173       },
    174       "threats_to_validity_specific": {
    175         "applies": true,
    176         "answer": false,
    177         "justification": "No threats to the survey's own validity are discussed. The paper does not address potential selection bias in platform choice, reliance on vendor documentation, or the risk of platform descriptions being outdated."
    178       },
    179       "scope_boundaries_stated": {
    180         "applies": true,
    181         "answer": true,
    182         "justification": "Section 1 states: 'While comprehensive, this survey does not follow a traditional review protocol but focuses on representative, influential platforms available publicly.' The paper also states its goal: 'to inform practitioners, researchers, and prospective no-code developers about the current landscape, its benefits, and its limitations.'"
    183       }
    184     },
    185     "data_integrity": {
    186       "raw_data_available": {
    187         "applies": true,
    188         "answer": false,
    189         "justification": "No raw data or structured dataset is made available. The comparison tables cannot be independently verified beyond checking the cited platform documentation."
    190       },
    191       "data_collection_described": {
    192         "applies": true,
    193         "answer": false,
    194         "justification": "No description of how platforms were identified, selected, or evaluated. The paper appears to have selected platforms ad hoc without documenting the process."
    195       },
    196       "recruitment_methods_described": {
    197         "applies": false,
    198         "answer": false,
    199         "justification": "No human participants were recruited; the study reviews software platforms."
    200       },
    201       "data_pipeline_documented": {
    202         "applies": true,
    203         "answer": false,
    204         "justification": "No data pipeline is described. The process from platform identification to the comparison tables is entirely undocumented."
    205       }
    206     },
    207     "conflicts_of_interest": {
    208       "funding_disclosed": {
    209         "applies": true,
    210         "answer": false,
    211         "justification": "No funding or acknowledgments section is present in the paper."
    212       },
    213       "affiliations_disclosed": {
    214         "applies": true,
    215         "answer": true,
    216         "justification": "Author affiliations are clearly listed: University of Washington (Pattnayak) and SVKM's Narsee Monjee Institute of Management Studies (Bohra). Neither appears to be affiliated with any of the platforms reviewed."
    217       },
    218       "funder_independent_of_outcome": {
    219         "applies": false,
    220         "answer": false,
    221         "justification": "No funding is disclosed; the work appears to be unfunded academic research."
    222       },
    223       "financial_interests_declared": {
    224         "applies": true,
    225         "answer": false,
    226         "justification": "No competing interests or financial interests statement is provided."
    227       }
    228     },
    229     "contamination": {
    230       "training_cutoff_stated": {
    231         "applies": false,
    232         "answer": false,
    233         "justification": "This survey does not evaluate any pre-trained model's capability on any benchmark."
    234       },
    235       "train_test_overlap_discussed": {
    236         "applies": false,
    237         "answer": false,
    238         "justification": "No model evaluation on benchmarks is conducted."
    239       },
    240       "benchmark_contamination_addressed": {
    241         "applies": false,
    242         "answer": false,
    243         "justification": "No benchmark evaluation is conducted."
    244       }
    245     },
    246     "human_studies": {
    247       "pre_registered": {
    248         "applies": false,
    249         "answer": false,
    250         "justification": "No human participants are involved in this survey."
    251       },
    252       "irb_or_ethics_approval": {
    253         "applies": false,
    254         "answer": false,
    255         "justification": "No human participants are involved."
    256       },
    257       "demographics_reported": {
    258         "applies": false,
    259         "answer": false,
    260         "justification": "No human participants are involved."
    261       },
    262       "inclusion_exclusion_criteria": {
    263         "applies": false,
    264         "answer": false,
    265         "justification": "No human participants are involved."
    266       },
    267       "randomization_described": {
    268         "applies": false,
    269         "answer": false,
    270         "justification": "No human participants or experimental conditions."
    271       },
    272       "blinding_described": {
    273         "applies": false,
    274         "answer": false,
    275         "justification": "No human participants or experimental conditions."
    276       },
    277       "attrition_reported": {
    278         "applies": false,
    279         "answer": false,
    280         "justification": "No human participants are involved."
    281       }
    282     },
    283     "cost_and_practicality": {
    284       "inference_cost_reported": {
    285         "applies": false,
    286         "answer": false,
    287         "justification": "This is a survey paper with no computational method of its own."
    288       },
    289       "compute_budget_stated": {
    290         "applies": false,
    291         "answer": false,
    292         "justification": "This is a survey paper with no computational experiments."
    293       }
    294     },
    295     "survey_methodology": {
    296       "prisma_or_structured_protocol": {
    297         "applies": true,
    298         "answer": false,
    299         "justification": "The paper explicitly states in Section 1: 'this survey does not follow a traditional review protocol but focuses on representative, influential platforms available publicly.' No PRISMA flow diagram, search strategy, or systematic protocol is used."
    300       },
    301       "quality_assessment_of_sources": {
    302         "applies": true,
    303         "answer": false,
    304         "justification": "The survey does not assess the quality or reliability of its source materials. Platform capabilities are described from vendor documentation and blog posts without independent verification or quality scoring."
    305       },
    306       "publication_bias_discussed": {
    307         "applies": true,
    308         "answer": false,
    309         "justification": "No discussion of publication bias, selection bias in platform choice, or whether the surveyed platforms represent a biased sample of the ecosystem."
    310       }
    311     }
    312   },
    313   "claims": [
    314     {
    315       "claim": "Zero-code LLM platforms greatly lower the barrier to creating AI-powered applications.",
    316       "evidence": "Qualitative descriptions of 7 platforms in Sections 3–4 showing that platforms like OpenAI GPTs, Bolt.new, and Cognosys enable non-programmers to build AI applications through natural language or visual interfaces.",
    317       "supported": "moderate"
    318     },
    319     {
    320       "claim": "Zero-code LLM platforms face challenges in flexibility and reliability.",
    321       "evidence": "Section 5 discusses six categories of limitations: limited customizability (5.1), scalability/performance constraints (5.2), vendor lock-in (5.3), quality/reliability of AI outputs (5.4), need for prompt engineering (5.5), and shallow learning (5.6).",
    322       "supported": "moderate"
    323     },
    324     {
    325       "claim": "The ecosystem spans from specialized chat-based agent builders to general app builders embedding AI, each serving different user needs.",
    326       "evidence": "Section 2 taxonomy and Tables 1–4 categorize platforms along interface type, LLM backend, output type, and customization dimensions.",
    327       "supported": "moderate"
    328     },
    329     {
    330       "claim": "Zero-code LLM platforms complement rather than replace traditional development.",
    331       "evidence": "Section 6 compares with traditional and low-code approaches, arguing in Section 6.3 that LLMs are being incorporated into traditional development (e.g., GitHub Copilot) and that hybrid workflows where domain experts prototype and engineers refine are emerging.",
    332       "supported": "weak"
    333     },
    334     {
    335       "claim": "Future platforms will incorporate multimodal interfaces, on-device LLMs, better orchestration, and collaborative features.",
    336       "evidence": "Section 7 discusses these trends speculatively, citing emerging technologies like multimodal models, model compression, and multi-agent orchestration. No empirical evidence is provided for these predictions.",
    337       "supported": "weak"
    338     }
    339   ],
    340   "red_flags": [
    341     {
    342       "flag": "No systematic review methodology",
    343       "detail": "The paper explicitly admits it does not follow a traditional review protocol (Section 1). Platform selection appears ad hoc with no documented inclusion/exclusion criteria, search strategy, or selection rationale. The 7 platforms chosen could reflect availability bias rather than representativeness."
    344     },
    345     {
    346       "flag": "Reliance on vendor documentation as evidence",
    347       "detail": "Platform capabilities described in Tables 2–4 and Sections 3–4 are primarily sourced from vendor documentation, blog posts, and product websites (refs 3, 8–12, 14). No independent testing or verification of claimed features was conducted. Vendor descriptions are treated as ground truth."
    348     },
    349     {
    350       "flag": "Excessive self-citation",
    351       "detail": "References 13, 17, 19, 20, 21, 22, 23, 24, and 25 (9 of 45 references, 20%) appear to be self-citations by the authors and their frequent co-authors (Pattnayak, Patel, Agarwal, Kumar, Panda). Most are tangential to the survey's topic of zero-code LLM platforms (e.g., barcode generation, NER for Indic languages, clinical QA)."
    352     },
    353     {
    354       "flag": "Mismatched keywords",
    355       "detail": "The paper's keywords ('Citation Generation, Retrieval-Augmented Generation, AI Ethics, Multimodal LLMs, Evaluation Metrics') do not match its actual content about zero-code LLM application development platforms. This suggests the keywords may have been copied from another paper."
    356     },
    357     {
    358       "flag": "Duplicated text within the paper",
    359       "detail": "The 'Form & Template Configuration' interface type description (Section 2) contains text that is nearly identical to the 'Visual Programming' description. The two paragraphs describe Flowise's visual flow builder in the same words, suggesting a copy-paste error."
    360     },
    361     {
    362       "flag": "No quality assessment of surveyed sources",
    363       "detail": "The survey treats all platforms and cited sources equally without assessing their reliability, maturity, or evidence base. This launders the signal-to-noise ratio: a well-documented open-source project (Flowise) is compared alongside a closed-source product with minimal public documentation (Cognosys) using the same framework."
    364     }
    365   ],
    366   "cited_papers": [
    367     {
    368       "title": "Evaluating the use of github copilot for code generation tasks",
    369       "authors": ["M. Chen"],
    370       "year": 2021,
    371       "arxiv_id": "2107.03374",
    372       "relevance": "Evaluates LLM-based code generation assistance (GitHub Copilot), directly relevant to AI-assisted software development."
    373     },
    374     {
    375       "title": "Survey of large language models and prompt engineering",
    376       "authors": ["P. Liu"],
    377       "year": 2023,
    378       "arxiv_id": "2304.13712",
    379       "relevance": "Surveys LLMs and prompt engineering techniques that underpin zero-code development platforms."
    380     },
    381     {
    382       "title": "Llama: Open and efficient foundation language models",
    383       "authors": ["H. Touvron"],
    384       "year": 2023,
    385       "arxiv_id": "2302.13971",
    386       "relevance": "Describes open-weight foundation models that enable model-agnostic and on-device deployment in no-code platforms."
    387     },
    388     {
    389       "title": "Training language models to follow instructions with human feedback",
    390       "authors": ["L. Ouyang"],
    391       "year": 2022,
    392       "arxiv_id": "2203.02155",
    393       "relevance": "Foundational work on instruction-following LLMs that enable natural language-driven application development."
    394     },
    395     {
    396       "title": "Retrieval-augmented generation for knowledge-intensive nlp tasks",
    397       "authors": ["P. Lewis"],
    398       "year": 2020,
    399       "relevance": "Introduces RAG, a core technique used by zero-code platforms (Dust.tt, Flowise) for knowledge integration."
    400     },
    401     {
    402       "title": "ToolLLM: Facilitating tool use for LLMs in reasoning tasks",
    403       "authors": ["X. Fan"],
    404       "year": 2023,
    405       "arxiv_id": "2310.07704",
    406       "relevance": "Addresses LLM tool use capabilities that are central to agent-based zero-code platforms."
    407     },
    408     {
    409       "title": "Why johnny can't prompt: How non-experts struggle with LLMs",
    410       "authors": ["S. Zhang"],
    411       "year": 2023,
    412       "relevance": "Studies the gap between LLM capabilities and non-expert users' ability to leverage them, directly relevant to no-code platform usability."
    413     },
    414     {
    415       "title": "Comparing traditional, low-code, and no-code development: Productivity, flexibility, and tradeoffs",
    416       "authors": ["A. Miller", "L. Zhou"],
    417       "year": 2022,
    418       "relevance": "Compares development paradigms that zero-code LLM platforms are positioned to disrupt."
    419     },
    420     {
    421       "title": "Team agents: Collaborative multi-agent systems with LLMs",
    422       "authors": ["J. Park"],
    423       "year": 2024,
    424       "relevance": "Explores multi-agent orchestration relevant to future directions of zero-code LLM platforms."
    425     },
    426     {
    427       "title": "Collaborative workflows with LLM builders and developers",
    428       "authors": ["P. Kaur", "W. Huang"],
    429       "year": 2024,
    430       "relevance": "Studies collaborative workflows between AI-assisted builders and traditional developers, relevant to hybrid development approaches."
    431     }
    432   ],
    433   "engagement_factors": {
    434     "practical_relevance": {
    435       "score": 2,
    436       "justification": "Practitioners evaluating no-code LLM platforms could use this as a starting comparison guide, though it lacks depth for serious tool selection."
    437     },
    438     "surprise_contrarian": {
    439       "score": 0,
    440       "justification": "Confirms widely held expectations that no-code LLM platforms trade flexibility for accessibility; no surprising findings."
    441     },
    442     "fear_safety": {
    443       "score": 0,
    444       "justification": "No AI safety or security concerns are raised beyond generic reliability issues."
    445     },
    446     "drama_conflict": {
    447       "score": 0,
    448       "justification": "No controversial claims or conflicts with established findings."
    449     },
    450     "demo_ability": {
    451       "score": 0,
    452       "justification": "No code, demo, or installable tool is provided."
    453     },
    454     "brand_recognition": {
    455       "score": 1,
    456       "justification": "Mentions well-known products (OpenAI GPTs, Bubble) but the paper itself is from non-prominent authors and institutions."
    457     }
    458   }
    459 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs