scan-v4.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan-v4.json (18643B)
      1 {
      2   "scan_version": 4,
      3   "paper_type": "survey",
      4   "paper": {
      5     "title": "Generative AI in the Construction Industry: A State-of-the-art Analysis",
      6     "authors": [
      7       "R. Taiwo",
      8       "I. T. Bello",
      9       "S. Abdulai",
     10       "Abdul-Mugis Yussif",
     11       "B. Salami"
     12     ],
     13     "year": 2024,
     14     "venue": "arXiv.org",
     15     "arxiv_id": "2402.09939",
     16     "doi": "10.48550/arXiv.2402.09939"
     17   },
     18   "checklist": {
     19     "claims_and_evidence": {
     20       "abstract_claims_supported": {
     21         "applies": true,
     22         "answer": true,
     23         "justification": "The abstract's specific claims are supported: the 5.2%, 9.4%, and 4.8% improvements are verified in Table 21; the review of opportunities and challenges is presented in Section 4; the framework is in Section 5; the case study is in Section 6.",
     24         "source": "opus"
     25       },
     26       "causal_claims_justified": {
     27         "applies": true,
     28         "answer": true,
     29         "justification": "The paper's main causal claim is that RAG improves GPT-4's performance. This is tested via controlled single-variable manipulation: the same GPT-4 model with and without RAG, evaluated on the same 20 questions by the same raters. This design is adequate for the causal claim, despite the small sample.",
     30         "source": "opus"
     31       },
     32       "generalization_bounded": {
     33         "applies": true,
     34         "answer": false,
     35         "justification": "The paper frames itself as 'a comprehensive analysis and practical framework' and 'a state-of-the-art analysis' of generative AI in construction. However, the evidence base is thin: only 6 papers found in the systematic review, 11 experts surveyed, and 1 case study on a single contract document with a single model. The broad framing significantly exceeds the narrow evidence.",
     36         "source": "opus"
     37       },
     38       "alternative_explanations_discussed": {
     39         "applies": true,
     40         "answer": false,
     41         "justification": "No alternative explanations are discussed for the RAG improvement. Possible confounds such as evaluator bias (experts knew which system was which), question selection effects, or small-sample variability are not considered.",
     42         "source": "opus"
     43       },
     44       "proxy_outcome_distinction": {
     45         "applies": true,
     46         "answer": false,
     47         "justification": "The paper measures subjective 5-point expert ratings of quality, relevance, and reproducibility and frames these as evidence of improved 'information retrieval and knowledge discovery' (Section 6). The gap between subjective ratings by 3 experts and actual information extraction quality is not acknowledged.",
     48         "source": "opus"
     49       }
     50     },
     51     "limitations_and_scope": {
     52       "limitations_section_present": {
     53         "applies": true,
     54         "answer": true,
     55         "justification": "Section 6.3 ('Model limitation') is a dedicated subsection discussing case study limitations. Section 7 (Conclusion) contains a substantive paragraph on overall study limitations covering database scope, panel size, and model choice.",
     56         "source": "opus"
     57       },
     58       "threats_to_validity_specific": {
     59         "applies": true,
     60         "answer": true,
     61         "justification": "Specific threats are discussed: the RAG model failed on 2 questions due to chunking strategy limitations (Section 6.3); the literature review was confined to 3 databases despite snowball searching; the Delphi panel was restricted in size; only a single LLM and embedding technique were used due to API costs (Section 7).",
     62         "source": "opus"
     63       },
     64       "scope_boundaries_stated": {
     65         "applies": true,
     66         "answer": true,
     67         "justification": "Section 7 states specific boundaries: 'confined to three databases - Scopus, Web of Science, and ScienceDirect'; 'only a single base large language model and embedding technique were utilized due to API access costs'; model 'trained on just a single contract document and may fail to transfer to new projects' (Section 6.3).",
     68         "source": "opus"
     69       }
     70     },
     71     "conflicts_of_interest": {
     72       "funding_disclosed": {
     73         "applies": true,
     74         "answer": true,
     75         "justification": "The Acknowledgment section states: 'This research is supported by the Department of Building and Real Estate, The Hong Kong Polytechnic University, and the Centre for Advances in Reliability and Safety (CAiRS).'",
     76         "source": "opus"
     77       },
     78       "affiliations_disclosed": {
     79         "applies": true,
     80         "answer": true,
     81         "justification": "All author affiliations are listed: Hong Kong Polytechnic University, CAiRS, Cardiff Metropolitan University, and Leeds Beckett University. The paper does not evaluate a product from any of these institutions.",
     82         "source": "opus"
     83       },
     84       "funder_independent_of_outcome": {
     85         "applies": true,
     86         "answer": true,
     87         "justification": "Funding is from a university department and research center with no apparent financial interest in whether generative AI succeeds or fails in the construction industry.",
     88         "source": "opus"
     89       },
     90       "financial_interests_declared": {
     91         "applies": true,
     92         "answer": false,
     93         "justification": "No competing interests or financial interests statement is included in the paper.",
     94         "source": "opus"
     95       }
     96     },
     97     "scope_and_framing": {
     98       "key_terms_defined": {
     99         "applies": true,
    100         "answer": true,
    101         "justification": "Generative AI is explicitly defined in Section 1 and Section 2 ('a branch of AI that aims to create novel and realistic data or content... based on some input or prior knowledge'). LLMs, GANs, RAG, and other key terms are defined with technical detail.",
    102         "source": "haiku"
    103       },
    104       "intended_contribution_clear": {
    105         "applies": true,
    106         "answer": true,
    107         "justification": "Three explicit objectives are listed in Section 1: (1) review and categorize opportunities/challenges, (2) propose implementation framework, (3) demonstrate via case study. The contribution is unambiguous.",
    108         "source": "haiku"
    109       },
    110       "engagement_with_prior_work": {
    111         "applies": true,
    112         "answer": true,
    113         "justification": "The paper engages with the six identified prior works (Saka et al., Ghimire et al., Liao et al., Zheng & Fischer, etc.) and explicitly positions its contribution relative to them, noting prior work focuses on specific applications without holistic coverage.",
    114         "source": "haiku"
    115       }
    116     }
    117   },
    118   "type_checklist": {
    119     "survey": {
    120       "search_and_selection": {
    121         "search_strategy_reproducible": {
    122           "applies": true,
    123           "answer": true,
    124           "justification": "The complete Boolean search string is provided verbatim in Section 3, Phase 1, including all construction and GenAI terms with OR/AND operators. The three databases are named. A reader could re-run this search.",
    125           "source": "haiku"
    126         },
    127         "inclusion_exclusion_explicit": {
    128           "applies": true,
    129           "answer": false,
    130           "justification": "The paper describes the screening process (title/abstract screening, full-text review, English-only, peer-reviewed) but never formally states explicit inclusion and exclusion criteria as a distinct list. What counted as 'relevant' is not operationalized.",
    131           "source": "haiku"
    132         },
    133         "prisma_or_structured_protocol": {
    134           "applies": true,
    135           "answer": false,
    136           "justification": "No mention of PRISMA or any other structured review protocol. The paper calls Phase 1 'systematic literature retrieval and review' but does not follow a named methodology.",
    137           "source": "haiku"
    138         },
    139         "search_terms_provided": {
    140           "applies": true,
    141           "answer": true,
    142           "justification": "The full search string is quoted directly in Section 3: ['Construction industry' OR 'AEC industry' OR ...] AND ['Generative AI' OR 'GPT' OR 'ChatGPT' OR ...].",
    143           "source": "haiku"
    144         },
    145         "databases_listed": {
    146           "applies": true,
    147           "answer": true,
    148           "justification": "Scopus, Web of Science, and ScienceDirect are explicitly listed and their selection is justified by 'broad coverage and rigorous indexing.'",
    149           "source": "haiku"
    150         },
    151         "screening_process_documented": {
    152           "applies": true,
    153           "answer": true,
    154           "justification": "Section 3 documents counts at each stage: 79 initial results → 10 after title/abstract screening → 6 after full-text review (4 original research, 2 review) → 6 peer-reviewed after snowball searching.",
    155           "source": "haiku"
    156         },
    157         "review_scope_justified": {
    158           "applies": true,
    159           "answer": false,
    160           "justification": "Database selection is briefly justified but no date range for the search is specified, no justification is given for excluding other databases (e.g., ACM DL, IEEE Xplore), and the topical boundary between generative and non-generative AI papers is not operationalized.",
    161           "source": "haiku"
    162         }
    163       },
    164       "synthesis_quality": {
    165         "conflicting_findings_acknowledged": {
    166           "applies": true,
    167           "answer": false,
    168           "justification": "With only 6 reviewed papers, the synthesis presents a uniformly positive narrative about potential. No conflicting findings or contradictory evidence from the reviewed literature is identified or discussed.",
    169           "source": "haiku"
    170         },
    171         "quality_assessment_of_sources": {
    172           "applies": true,
    173           "answer": false,
    174           "justification": "No quality rubric, risk-of-bias assessment, or structured evaluation of the 6 source papers is applied. Papers are summarized in Table 9 by objective, methods, and contributions without methodological critique.",
    175           "source": "haiku"
    176         },
    177         "publication_bias_discussed": {
    178           "applies": true,
    179           "answer": false,
    180           "justification": "Publication bias is never mentioned. The paper does not acknowledge that the 6 identified papers may skew toward successful applications, nor that the expert panel's optimistic outlook may reflect selection bias.",
    181           "source": "haiku"
    182         },
    183         "quantitative_synthesis_present": {
    184           "applies": true,
    185           "answer": false,
    186           "justification": "The synthesis of the 6 reviewed papers is entirely narrative (Table 9 summary). Quantitative results appear only for the original case study, not as aggregated synthesis of reviewed evidence.",
    187           "source": "haiku"
    188         },
    189         "recommendations_supported_by_evidence": {
    190           "applies": true,
    191           "answer": false,
    192           "justification": "The 70+ 'potential opportunities' enumerated in Tables 11-20 derive from an 11-person Delphi panel, not from the reviewed empirical literature. The framework in Section 5 is presented as validated guidance but rests on speculative expert opinion over 6 papers.",
    193           "source": "haiku"
    194         }
    195       }
    196     }
    197   },
    198   "claims": [
    199     {
    200       "claim": "RAG improves GPT-4 performance on construction contract queries by 5.2%, 9.4%, and 4.8% in quality, relevance, and reproducibility respectively.",
    201       "evidence": "Table 21 comparison: GPT-4 baseline scores 3.87/4.01/4.53 vs. GPT-4+RAG scores 4.13/4.48/4.77 across 3 raters on 20 questions from a single contract document.",
    202       "supported": "moderate"
    203     },
    204     {
    205       "claim": "Generative AI adoption in construction is at very early stages, with only 6 peer-reviewed papers found at the intersection of generative AI and construction.",
    206       "evidence": "Systematic literature search across Scopus, Web of Science, and ScienceDirect returned 79 results, narrowed to 6 peer-reviewed papers after screening and snowball searching.",
    207       "supported": "strong"
    208     },
    209     {
    210       "claim": "The global construction industry has average annual productivity growth of only 1%, far below the global economy's 2.8% and manufacturing's 3.6%.",
    211       "evidence": "Cited to McKinsey Global Institute report (Barbosa et al., 2017), reference [1] in the paper.",
    212       "supported": "moderate"
    213     },
    214     {
    215       "claim": "Expert Delphi panel identified at least 7 generative AI opportunity applications for each of text, image, and video modalities in construction.",
    216       "evidence": "11 of 15 invited experts participated; thematic analysis of qualitative responses produced Tables 11–20 enumerating opportunities per modality and project phase.",
    217       "supported": "weak"
    218     },
    219     {
    220       "claim": "RAG reduces hallucination in GPT-4 responses to construction contract queries by grounding outputs in retrieved document passages.",
    221       "evidence": "Figure 11 screenshots show GPT-4 hallucinating GCC Clause 44 details absent from the contract while GPT-4+RAG correctly cites the price adjustment formula. GPT-4 answer rate 100% vs RAG 90% (RAG declines to answer 2 questions it cannot ground).",
    222       "supported": "moderate"
    223     }
    224   ],
    225   "methodology_tags": [
    226     "qualitative",
    227     "case-study"
    228   ],
    229   "key_findings": "The systematic literature review found only 6 peer-reviewed papers on generative AI in construction, confirming the field is nascent. An 11-expert Delphi study identified numerous speculative opportunities across text, image, and video modalities organized by construction project phase. A RAG-augmented GPT-4 system outperformed baseline GPT-4 on querying a single construction contract document, improving quality, relevance, and reproducibility by 5.2%, 9.4%, and 4.8% respectively, while reducing hallucination at the cost of a lower answer rate (90% vs 100%). The paper proposes a 5-step framework for construction firms to develop custom LGMs but does not validate it beyond the single contract case study.",
    230   "red_flags": [
    231     {
    232       "flag": "Evidence base of 6 papers for broad framework",
    233       "detail": "70+ 'potential opportunities' are enumerated across 9 modality tables, but the underlying systematic review identified only 6 peer-reviewed papers. The bulk of the paper's content rests on speculative expert opinion, not reviewed evidence."
    234     },
    235     {
    236       "flag": "Single contract case study generalizability",
    237       "detail": "The quantitative evaluation uses one contract document from one project, 3 expert raters, and 20 questions with no inter-rater reliability reported. Performance conclusions cannot generalize to other contract types, projects, or domains."
    238     },
    239     {
    240       "flag": "No inter-rater reliability for Delphi or evaluation metrics",
    241       "detail": "Neither the Delphi thematic coding nor the expert quality/relevance/reproducibility ratings report inter-rater agreement statistics, making both subjective assessments unverifiable."
    242     },
    243     {
    244       "flag": "No year range in systematic search",
    245       "detail": "Section 3 does not specify any date range for the literature search, making the scope of 'state-of-the-art' undefined and the search non-reproducible in terms of temporal coverage."
    246     },
    247     {
    248       "flag": "Speculative content presented as validated",
    249       "detail": "Tables 11–20 present 'potential opportunities' from the expert panel as structured findings without acknowledging that none have been empirically demonstrated in construction contexts."
    250     }
    251   ],
    252   "cited_papers": [
    253     {
    254       "title": "GPT models in construction industry: Opportunities, limitations, and a use case validation",
    255       "relevance": "Direct predecessor on LLM applications in construction from overlapping author team; most closely related prior work."
    256     },
    257     {
    258       "title": "Opportunities and Challenges of Generative AI in Construction Industry: Focusing on Adoption of Text-Based Models",
    259       "relevance": "Prior survey on similar topic that this paper aims to extend with broader modality coverage and a framework."
    260     },
    261     {
    262       "title": "Generative AI design for building structures",
    263       "relevance": "One of the 6 reviewed papers; structural design application of generative AI in construction."
    264     },
    265     {
    266       "title": "Dynamic prompt-based virtual assistant framework for BIM information search",
    267       "relevance": "One of the 6 reviewed papers; GPT-based information retrieval in construction BIM context."
    268     },
    269     {
    270       "title": "Harnessing the Power of LLMs in Practice: A Survey on ChatGPT and Beyond",
    271       "relevance": "General LLM survey providing foundational context for construction-specific applications."
    272     },
    273     {
    274       "title": "Conversational artificial intelligence in the AEC industry: A review of present status, challenges and opportunities",
    275       "relevance": "Related review of conversational AI (non-generative) in AEC, providing context for the generative AI gap this paper addresses."
    276     },
    277     {
    278       "title": "Active Retrieval Augmented Generation",
    279       "relevance": "Technical foundation for the RAG pipeline implemented in the case study."
    280     }
    281   ],
    282   "engagement_factors": {
    283     "practical_relevance": {
    284       "score": 2,
    285       "justification": "Proposes a framework and demonstrates a RAG case study for construction contract querying, but no code or tool is released for practitioners to use."
    286     },
    287     "surprise_contrarian": {
    288       "score": 0,
    289       "justification": "Confirms the expected narrative that generative AI has potential in construction but adoption is early — no surprising or contrarian findings."
    290     },
    291     "fear_safety": {
    292       "score": 1,
    293       "justification": "Discusses hallucination risks and safety concerns of using generative AI in safety-critical construction tasks, but does not demonstrate novel attacks or existential risks."
    294     },
    295     "drama_conflict": {
    296       "score": 0,
    297       "justification": "No controversy or conflict — the paper presents a balanced view of opportunities and challenges."
    298     },
    299     "demo_ability": {
    300       "score": 0,
    301       "justification": "No code, demo, or tool is released despite describing a Streamlit-based interface."
    302     },
    303     "brand_recognition": {
    304       "score": 1,
    305       "justification": "Uses GPT-4 (OpenAI) as the base model, but the paper is from academic authors at Hong Kong Polytechnic University, not a famous AI lab."
    306     }
    307   },
    308   "hn_data": {
    309     "threads": [
    310       {
    311         "hn_id": "39453382",
    312         "title": "UFO: A UI-Focused Agent for Windows OS Interaction",
    313         "points": 1,
    314         "comments": 0,
    315         "url": "https://news.ycombinator.com/item?id=39453382"
    316       }
    317     ],
    318     "top_points": 1,
    319     "total_points": 1,
    320     "total_comments": 0
    321   }
    322 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs