ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

scan-v5.json (18124B)


      1 {
      2   "scan_version": 5,
      3   "paper_type": "survey",
      4   "paper": {
      5     "title": "Generative AI in the Construction Industry: A State-of-the-art Analysis",
      6     "authors": [
      7       "R. Taiwo",
      8       "I. T. Bello",
      9       "S. Abdulai",
     10       "Abdul-Mugis Yussif",
     11       "B. Salami"
     12     ],
     13     "year": 2024,
     14     "venue": "arXiv.org",
     15     "arxiv_id": "2402.09939",
     16     "doi": "10.48550/arXiv.2402.09939"
     17   },
     18   "checklist": {
     19     "claims_and_evidence": {
     20       "abstract_claims_supported": {
     21         "applies": true,
     22         "answer": true,
     23         "justification": "The abstract's headline claim — RAG outperforms GPT-4 by 5.2%, 9.4%, and 4.8% on quality, relevance, and reproducibility — is supported by Table 21 data. The claim of a literature gap is supported by the search returning only 6 peer-reviewed papers.",
     24         "source": "haiku"
     25       },
     26       "causal_claims_justified": {
     27         "applies": true,
     28         "answer": false,
     29         "justification": "The paper claims RAG 'improves' GPT-4, framing it causally, but the case study uses a single contract document, three expert raters, and 20 questions with no statistical significance testing or confidence intervals — insufficient to establish a causal improvement.",
     30         "source": "haiku"
     31       },
     32       "generalization_bounded": {
     33         "applies": true,
     34         "answer": false,
     35         "justification": "Sections 4.2.1–4.2.9 make sweeping claims about generative AI 'revolutionizing' and 'transforming' construction across 9 modalities, based almost entirely on speculative expert opinion and a single-document case study; these are not bounded to the tested setting.",
     36         "source": "haiku"
     37       },
     38       "alternative_explanations_discussed": {
     39         "applies": true,
     40         "answer": false,
     41         "justification": "For the RAG vs. GPT-4 comparison, no alternative explanations are considered (e.g., chunking strategy effects, rater familiarity with GPT-4 outputs, question selection bias favoring retrieval-based approaches).",
     42         "source": "haiku"
     43       },
     44       "proxy_outcome_distinction": {
     45         "applies": true,
     46         "answer": false,
     47         "justification": "Expert ratings of quality, relevance, and reproducibility on a 5-point scale are used as proxies for real-world utility, but the paper does not discuss the gap between these ratings and actual practitioner productivity or decision quality.",
     48         "source": "haiku"
     49       }
     50     },
     51     "limitations_and_scope": {
     52       "limitations_section_present": {
     53         "applies": true,
     54         "answer": true,
     55         "justification": "Section 6.3 'Model limitation' and the conclusion (Section 7) both contain dedicated discussion of limitations, including the single contract document, small expert panel, and restricted database coverage.",
     56         "source": "haiku"
     57       },
     58       "threats_to_validity_specific": {
     59         "applies": true,
     60         "answer": true,
     61         "justification": "Specific threats are named: literature review confined to three databases (possibly missing relevant articles), expert panel restricted by resource constraints, case study using only one contract and one base LLM due to API costs.",
     62         "source": "haiku"
     63       },
     64       "scope_boundaries_stated": {
     65         "applies": true,
     66         "answer": true,
     67         "justification": "The paper explicitly states the review is bounded to Scopus, Web of Science, and ScienceDirect; the case study generalizes only to the single contract document used; and the expert panel is limited to 11 professionals.",
     68         "source": "haiku"
     69       }
     70     },
     71     "conflicts_of_interest": {
     72       "funding_disclosed": {
     73         "applies": true,
     74         "answer": true,
     75         "justification": "The acknowledgment section states: 'This research is supported by the Department of Building and Real Estate, The Hong Kong Polytechnic University, and the Centre for Advances in Reliability and Safety (CAiRS).'",
     76         "source": "haiku"
     77       },
     78       "affiliations_disclosed": {
     79         "applies": true,
     80         "answer": true,
     81         "justification": "All six author affiliations (Hong Kong Polytechnic University, CAiRS, Cardiff Metropolitan University, Leeds Beckett University) are disclosed in the paper header.",
     82         "source": "haiku"
     83       },
     84       "funder_independent_of_outcome": {
     85         "applies": true,
     86         "answer": true,
     87         "justification": "Funders are academic institutions (PolyU and CAiRS research centre), not commercial AI vendors or construction firms with financial interest in the outcome of generative AI adoption claims.",
     88         "source": "haiku"
     89       },
     90       "financial_interests_declared": {
     91         "applies": true,
     92         "answer": false,
     93         "justification": "There is no competing interests statement, no declaration of patents, equity, or consulting arrangements — only a funding acknowledgment.",
     94         "source": "haiku"
     95       }
     96     },
     97     "scope_and_framing": {
     98       "key_terms_defined": {
     99         "applies": true,
    100         "answer": true,
    101         "justification": "Section 2 provides explicit definitions of generative AI, LLMs, and the seven foundational algorithm families (GANs, VAEs, transformers, diffusion models, etc.) with technical descriptions and examples.",
    102         "source": "haiku"
    103       },
    104       "intended_contribution_clear": {
    105         "applies": true,
    106         "answer": true,
    107         "justification": "Three explicit objectives are stated in the introduction: (1) review/categorize opportunities and challenges, (2) propose a framework for building custom LGMs, and (3) demonstrate the framework via a case study.",
    108         "source": "haiku"
    109       },
    110       "engagement_with_prior_work": {
    111         "applies": true,
    112         "answer": true,
    113         "justification": "The paper builds directly on Saka et al. (2024) and Ghimire et al. (2024) on GPT models in construction, and situates its contribution relative to each of the 6 identified papers in Table 9, showing how this work extends beyond prior partial reviews.",
    114         "source": "haiku"
    115       }
    116     }
    117   },
    118   "type_checklist": {
    119     "survey": {
    120       "search_and_selection": {
    121         "search_strategy_reproducible": {
    122           "applies": true,
    123           "answer": true,
    124           "justification": "The exact Boolean search string is provided in Section 3, including all OR-terms for both construction and generative AI sides, enabling full reproduction of the initial search.",
    125           "source": "haiku"
    126         },
    127         "inclusion_exclusion_explicit": {
    128           "applies": true,
    129           "answer": false,
    130           "justification": "Criteria are described narratively (title/abstract screening, English-only, full-text availability) but are not presented as a formal, explicitly labelled inclusion/exclusion list applied consistently across all screening stages.",
    131           "source": "haiku"
    132         },
    133         "prisma_or_structured_protocol": {
    134           "applies": true,
    135           "answer": false,
    136           "justification": "The paper follows a four-phase approach but does not reference PRISMA or any other structured review protocol, and no PRISMA flow diagram is provided.",
    137           "source": "haiku"
    138         },
    139         "search_terms_provided": {
    140           "applies": true,
    141           "answer": true,
    142           "justification": "The complete search string with all Boolean operators and terms is reproduced verbatim in Section 3, Phase 1.",
    143           "source": "haiku"
    144         },
    145         "databases_listed": {
    146           "applies": true,
    147           "answer": true,
    148           "justification": "Scopus, Web of Science, and ScienceDirect are explicitly named with rationale ('broad coverage and rigorous indexing of peer-reviewed publications').",
    149           "source": "haiku"
    150         },
    151         "screening_process_documented": {
    152           "applies": true,
    153           "answer": true,
    154           "justification": "The paper documents counts at successive stages: 79 initial results → 10 after title/abstract screening → 6 original/review articles after full-text review and snowball searching.",
    155           "source": "haiku"
    156         },
    157         "review_scope_justified": {
    158           "applies": true,
    159           "answer": false,
    160           "justification": "The choice of three databases is weakly justified by 'broad coverage'; however, no year range is stated or justified, and the exclusion of Google Scholar, IEEE Xplore, or ACM Digital Library — more relevant for AI research — is not addressed.",
    161           "source": "haiku"
    162         }
    163       },
    164       "synthesis_quality": {
    165         "conflicting_findings_acknowledged": {
    166           "applies": true,
    167           "answer": false,
    168           "justification": "With only 6 papers reviewed, no conflicting empirical findings are acknowledged; the synthesis presents a uniformly positive picture of generative AI potential without noting disagreements between reviewed studies.",
    169           "source": "haiku"
    170         },
    171         "quality_assessment_of_sources": {
    172           "applies": true,
    173           "answer": false,
    174           "justification": "The 6 reviewed papers are summarized in Table 9 by objective, method, and contribution, but no quality rubric, risk-of-bias assessment, or methodological rating is applied to any source paper.",
    175           "source": "haiku"
    176         },
    177         "publication_bias_discussed": {
    178           "applies": true,
    179           "answer": false,
    180           "justification": "Publication bias is never mentioned; the paper does not acknowledge that the 6 identified papers likely reflect positive outcomes and that null or negative results would be less likely to appear in those databases.",
    181           "source": "haiku"
    182         },
    183         "quantitative_synthesis_present": {
    184           "applies": true,
    185           "answer": false,
    186           "justification": "The survey component contains only narrative synthesis of 6 papers; the quantitative results in Table 21 are from the paper's own case study experiment, not aggregation across reviewed sources.",
    187           "source": "haiku"
    188         },
    189         "recommendations_supported_by_evidence": {
    190           "applies": true,
    191           "answer": false,
    192           "justification": "The five-step LGM deployment framework and 50+ opportunity tables are driven primarily by expert opinion from an 11-person Delphi panel and speculative reasoning, not by empirical evidence from the reviewed literature.",
    193           "source": "haiku"
    194         }
    195       }
    196     }
    197   },
    198   "claims": [
    199     {
    200       "claim": "RAG improves baseline GPT-4 by 5.2%, 9.4%, and 4.8% on quality, relevance, and reproducibility respectively for construction contract querying.",
    201       "evidence": "Table 21 comparing GPT-4 (3.87/4.01/4.53) vs GPT-4+RAG (4.13/4.48/4.77) across 20 expert-validated questions evaluated by 3 raters.",
    202       "supported": "moderate"
    203     },
    204     {
    205       "claim": "Generative AI adoption in the construction industry is in very early stages, with only 6 peer-reviewed articles identified at the intersection.",
    206       "evidence": "Systematic search of Scopus, Web of Science, and ScienceDirect returned 79 results, narrowed to 10, yielding only 6 relevant papers after full-text screening and snowball search.",
    207       "supported": "strong"
    208     },
    209     {
    210       "claim": "Baseline GPT-4 hallucinates content not present in the contract document when queried directly.",
    211       "evidence": "Figure 11 and Table 21 show GPT-4 fabricating details about GCC Clause 44 on question 15; quality scores as low as 1.0/5.0 on specific questions.",
    212       "supported": "strong"
    213     },
    214     {
    215       "claim": "The construction industry has annual productivity growth of only 1%, far below the global economy (2.8%) and manufacturing (3.6%).",
    216       "evidence": "Cited from McKinsey Global Institute (2017) report; not original data from this paper.",
    217       "supported": "moderate"
    218     },
    219     {
    220       "claim": "At least 7 generative AI opportunities exist for each of 9 input-output modalities across the construction project lifecycle.",
    221       "evidence": "Tables 11–19 enumerate opportunities sourced from expert Delphi discussions with 11 professionals; no empirical validation of feasibility or impact.",
    222       "supported": "weak"
    223     }
    224   ],
    225   "methodology_tags": [
    226     "qualitative",
    227     "case-study",
    228     "survey"
    229   ],
    230   "key_findings": "A systematic search of three databases identified only 6 peer-reviewed papers applying generative AI to construction, confirming extremely early adoption. An 11-expert Delphi panel identified numerous speculative opportunities across text, image, and video modalities organized by project phase, plus challenges spanning domain knowledge gaps, data scarcity, adoption resistance, and ethical concerns. A RAG-augmented GPT-4 system demonstrated improved faithfulness over baseline GPT-4 on contract document querying (5.2%, 9.4%, 4.8% improvements in quality, relevance, reproducibility), though based on a single contract, three raters, and 20 questions without statistical testing. A five-step framework for construction firms to build custom LGMs is proposed but validated only through this minimal case study.",
    231   "red_flags": [
    232     {
    233       "flag": "Trivially small literature corpus",
    234       "detail": "The systematic review identified only 6 papers, making the 'state-of-the-art analysis' framing misleading — this is closer to a gap analysis than a survey with sufficient literature for synthesis."
    235     },
    236     {
    237       "flag": "Underpowered case study",
    238       "detail": "The RAG vs. GPT-4 comparison uses one contract document, three raters, and 20 questions with no statistical significance testing — too thin to support the percentage improvement claims made in the abstract and conclusion."
    239     },
    240     {
    241       "flag": "Speculative opportunity tables without empirical grounding",
    242       "detail": "Sections 4.2.1–4.2.9 present 50+ generative AI 'opportunities' derived from expert opinion, with no empirical evidence that any of these applications are currently feasible or have been validated."
    243     },
    244     {
    245       "flag": "No PRISMA protocol or formal inclusion criteria",
    246       "detail": "The literature review process is described narratively without a PRISMA flowchart, formal inclusion/exclusion criteria, or inter-rater reliability for screening decisions."
    247     },
    248     {
    249       "flag": "Missing competing interests declaration",
    250       "detail": "Despite using proprietary OpenAI APIs and citing commercial tools extensively, no competing interests statement is provided."
    251     }
    252   ],
    253   "cited_papers": [
    254     {
    255       "title": "GPT Models in Construction Industry: Opportunities, Limitations, and a Use Case Validation",
    256       "relevance": "Direct predecessor paper by overlapping authors covering GPT models specifically in construction; this survey extends it to all generative AI modalities"
    257     },
    258     {
    259       "title": "Opportunities and Challenges of Generative AI in Construction Industry: Focusing on Adoption of Text-Based Models",
    260       "relevance": "Most closely related prior survey; this paper claims to extend it with broader modality coverage and a deployment framework"
    261     },
    262     {
    263       "title": "Generative AI Design for Building Structures",
    264       "relevance": "One of the 6 empirical papers found in the systematic review; covers GAN-based structural design generation"
    265     },
    266     {
    267       "title": "Dynamic Prompt-Based Virtual Assistant Framework for BIM Information Search",
    268       "relevance": "One of the 6 systematic review papers; demonstrates GPT-BIM integration for NL-based information retrieval"
    269     },
    270     {
    271       "title": "Automated Detection of Contractual Risk Clauses from Construction Specifications Using BERT",
    272       "relevance": "One of the 6 systematic review papers; directly relevant to the paper's case study on contract document analysis"
    273     },
    274     {
    275       "title": "Transformer Machine Learning Language Model for Auto-Alignment of Long-Term and Short-Term Plans in Construction",
    276       "relevance": "One of the 6 systematic review papers; uses GPT-2 for construction schedule planning alignment"
    277     },
    278     {
    279       "title": "Active Retrieval Augmented Generation",
    280       "relevance": "Technical foundation for the RAG pipeline implemented in the case study"
    281     },
    282     {
    283       "title": "Conversational Artificial Intelligence in the AEC Industry: A Review",
    284       "relevance": "Broader review of AI assistants in construction that contextualizes generative AI adoption"
    285     }
    286   ],
    287   "engagement_factors": {
    288     "practical_relevance": {
    289       "score": 3,
    290       "justification": "Provides a concrete five-step framework and working RAG demo that construction professionals can directly reference for GenAI adoption."
    291     },
    292     "surprise_contrarian": {
    293       "score": 1,
    294       "justification": "Uniformly promotional framing with no contrarian findings; the only mild surprise is the extremely small existing literature base (6 papers)."
    295     },
    296     "fear_safety": {
    297       "score": 1,
    298       "justification": "Discusses hallucination risks and potential for AI-generated structurally flawed designs, but these are framed as manageable challenges rather than serious safety concerns."
    299     },
    300     "drama_conflict": {
    301       "score": 0,
    302       "justification": "No controversy, conflicting claims, or adversarial framing — entirely constructive and promotional in tone."
    303     },
    304     "demo_ability": {
    305       "score": 2,
    306       "justification": "A working RAG system was built with Streamlit interface using GPT-4 and LangChain; replication requires API access but the architecture is described in sufficient detail."
    307     },
    308     "brand_recognition": {
    309       "score": 2,
    310       "justification": "Evaluates GPT-4 and Gemini Pro directly; uses ChatGPT screenshots; these brand associations increase discoverability among practitioners."
    311     }
    312   },
    313   "hn_data": {
    314     "threads": [
    315       {
    316         "hn_id": "39453382",
    317         "title": "UFO: A UI-Focused Agent for Windows OS Interaction",
    318         "points": 1,
    319         "comments": 0,
    320         "url": "https://news.ycombinator.com/item?id=39453382"
    321       }
    322     ],
    323     "top_points": 1,
    324     "total_points": 1,
    325     "total_comments": 0
    326   }
    327 }

Impressum · Datenschutz