scan.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan.json (18824B)
      1 {
      2   "paper": {
      3     "title": "Artificial intelligence assistance in foresight research: Enhancing technology assessment through data-driven methods",
      4     "authors": ["Ewa Chodakowska", "Wojciech Danilczuk", "Joanicjusz Nazarko"],
      5     "year": 2026,
      6     "venue": "Advances in Science and Technology Research Journal",
      7     "doi": "10.12913/22998624/211285"
      8   },
      9   "checklist": {
     10     "artifacts": {
     11       "code_released": {
     12         "applies": true,
     13         "answer": false,
     14         "justification": "No code repository or archive link is provided. The paper mentions using Python code generated with GPT models but does not release it."
     15       },
     16       "data_released": {
     17         "applies": true,
     18         "answer": false,
     19         "justification": "The NT FOR Podlaskie 2020 dataset used in the case study is not released. Appendices list technology and criteria names but not the raw evaluation data."
     20       },
     21       "environment_specified": {
     22         "applies": true,
     23         "answer": false,
     24         "justification": "No environment specifications, library versions, or dependency files are provided. The paper mentions Python but gives no further detail."
     25       },
     26       "reproduction_instructions": {
     27         "applies": true,
     28         "answer": false,
     29         "justification": "No step-by-step reproduction instructions are included. The methodological formulations section describes algorithms abstractly but provides no runnable instructions."
     30       }
     31     },
     32     "statistical_methodology": {
     33       "confidence_intervals_or_error_bars": {
     34         "applies": true,
     35         "answer": false,
     36         "justification": "No confidence intervals or error bars are reported for any clustering or correlation results."
     37       },
     38       "significance_tests": {
     39         "applies": true,
     40         "answer": false,
     41         "justification": "No statistical significance tests are used. Correlations between expert knowledge and ratings are reported without p-values or significance tests."
     42       },
     43       "effect_sizes_reported": {
     44         "applies": true,
     45         "answer": false,
     46         "justification": "Correlation coefficients are described as 'negative and moderate' but no specific coefficient values or effect size measures are reported in the text."
     47       },
     48       "sample_size_justified": {
     49         "applies": true,
     50         "answer": false,
     51         "justification": "The study uses 19 experts and 57 technologies with no justification for why these numbers are adequate for the claims made."
     52       },
     53       "variance_reported": {
     54         "applies": true,
     55         "answer": false,
     56         "justification": "No variance, standard deviation, or spread measures are reported for the clustering or evaluation results."
     57       }
     58     },
     59     "evaluation_design": {
     60       "baselines_included": {
     61         "applies": true,
     62         "answer": true,
     63         "justification": "The traditional four-cluster approach based on mean scores from the NT FOR project serves as a baseline, and AI-assisted clustering methods are compared against it (Table 3 vs Tables 4-6)."
     64       },
     65       "baselines_contemporary": {
     66         "applies": true,
     67         "answer": true,
     68         "justification": "The baseline is the actual method used in the original NT FOR Podlaskie 2020 project, which is the appropriate comparison for this case study."
     69       },
     70       "ablation_study": {
     71         "applies": true,
     72         "answer": false,
     73         "justification": "Multiple clustering methods are applied (hierarchical, two-dimensional, co-clustering) but no ablation study isolates specific component contributions."
     74       },
     75       "multiple_metrics": {
     76         "applies": true,
     77         "answer": false,
     78         "justification": "Only the silhouette score is mentioned as a validation metric for clustering. No other evaluation metrics are reported."
     79       },
     80       "human_evaluation": {
     81         "applies": true,
     82         "answer": false,
     83         "justification": "The paper claims AI-assisted clustering provides 'deeper insights' than traditional averaging, but no human evaluation validates whether the clusters are more useful to decision-makers."
     84       },
     85       "held_out_test_set": {
     86         "applies": false,
     87         "answer": false,
     88         "justification": "This is an exploratory clustering study, not a predictive modeling task. Held-out test sets are not applicable."
     89       },
     90       "per_category_breakdown": {
     91         "applies": true,
     92         "answer": true,
     93         "justification": "Results are broken down by attractiveness and feasibility dimensions separately, with per-cluster technology assignments shown in Tables 3-6."
     94       },
     95       "failure_cases_discussed": {
     96         "applies": true,
     97         "answer": true,
     98         "justification": "The paper acknowledges that AI-assisted clustering 'does not significantly change the overall distribution from the original results' and that GenAI suggestions 'remained within conventional frameworks,' which are effectively negative findings."
     99       },
    100       "negative_results_reported": {
    101         "applies": true,
    102         "answer": true,
    103         "justification": "The paper honestly reports that 'the diversity of the proposed analyses demonstrates various interpretation possibilities but does not fundamentally influence the achievement of the main goal' and that AI suggestions were 'unexceptional and rather modest.'"
    104       }
    105     },
    106     "claims_and_evidence": {
    107       "abstract_claims_supported": {
    108         "applies": true,
    109         "answer": true,
    110         "justification": "The abstract's claims are moderate and hedged: AI provides 'a scalable alternative' and the value 'lies more in a posteriori analysis.' These are supported by the case study results."
    111       },
    112       "causal_claims_justified": {
    113         "applies": true,
    114         "answer": false,
    115         "justification": "The paper claims 'higher expert knowledge is associated with lower evaluations' based on correlations, but uses causal language ('likely due to greater awareness of risks'). No causal identification strategy is used."
    116       },
    117       "generalization_bounded": {
    118         "applies": true,
    119         "answer": false,
    120         "justification": "The paper draws general conclusions about AI in foresight ('AI undoubtedly brings more benefits than risks') from a single case study on 57 nanotechnologies with 19 experts in one Polish region."
    121       },
    122       "alternative_explanations_discussed": {
    123         "applies": true,
    124         "answer": false,
    125         "justification": "No alternative explanations are discussed for the clustering results or expert behavior patterns. The negative correlation between knowledge and ratings is attributed to awareness of risks without considering other factors."
    126       }
    127     },
    128     "setup_transparency": {
    129       "model_versions_specified": {
    130         "applies": true,
    131         "answer": false,
    132         "justification": "The paper mentions 'ChatGPT 4o' and 'M365 Copilot' without specific version identifiers, snapshot dates, or API versions."
    133       },
    134       "prompts_provided": {
    135         "applies": true,
    136         "answer": true,
    137         "justification": "The actual prompt used for the GenAI consultation is provided in full in the paper (the detailed dataset description prompt asking for ML technique recommendations)."
    138       },
    139       "hyperparameters_reported": {
    140         "applies": true,
    141         "answer": false,
    142         "justification": "No hyperparameters are reported for the LLM queries (temperature, etc.) or for the clustering algorithms (linkage method details, number of clusters selection beyond mentioning silhouette score)."
    143       },
    144       "scaffolding_described": {
    145         "applies": false,
    146         "answer": false,
    147         "justification": "No agentic scaffolding is used. The AI tools are used as simple prompt-response systems."
    148       },
    149       "data_preprocessing_documented": {
    150         "applies": true,
    151         "answer": false,
    152         "justification": "The paper does not describe how the raw expert evaluation data was preprocessed before clustering. It is unclear whether data was normalized, how missing values were handled, or what transformations were applied."
    153       }
    154     },
    155     "limitations_and_scope": {
    156       "limitations_section_present": {
    157         "applies": true,
    158         "answer": true,
    159         "justification": "The 'Opportunities and Considerations' section and Table 7 discuss limitations including hallucination risks, need for human oversight, robustness issues, and limited AI creativity."
    160       },
    161       "threats_to_validity_specific": {
    162         "applies": true,
    163         "answer": true,
    164         "justification": "The paper identifies specific threats: 'the original project under review did not fully address the issue of attribute weighting,' the variability of results depending on method/experts/attributes, and the temporal limitation of AI-based assessment relying on different information than original experts."
    165       },
    166       "scope_boundaries_stated": {
    167         "applies": true,
    168         "answer": false,
    169         "justification": "No explicit scope boundaries are stated about what the results do NOT show. The paper does not clearly delimit the generalizability of findings from one regional foresight project."
    170       }
    171     },
    172     "data_integrity": {
    173       "raw_data_available": {
    174         "applies": true,
    175         "answer": false,
    176         "justification": "The raw expert evaluation data from the NT FOR project is not made available for verification."
    177       },
    178       "data_collection_described": {
    179         "applies": true,
    180         "answer": true,
    181         "justification": "The literature review search queries, databases (Scopus, IEEE Xplore, Web of Science), time window (2017-2025), and result counts are documented in Table 1. The case study data collection is described by reference to the NT FOR Podlaskie 2020 project."
    182       },
    183       "recruitment_methods_described": {
    184         "applies": false,
    185         "answer": false,
    186         "justification": "The experts were recruited for the original NT FOR project, not for this study. This paper reanalyzes existing project data. No new human participants were recruited."
    187       },
    188       "data_pipeline_documented": {
    189         "applies": true,
    190         "answer": false,
    191         "justification": "For the literature review, the filtering pipeline is minimal: 259 records after deduplication, but no criteria for inclusion/exclusion beyond the search query are stated. For the case study, the pipeline from raw evaluations to clustering input is not documented."
    192       }
    193     },
    194     "conflicts_of_interest": {
    195       "funding_disclosed": {
    196         "applies": true,
    197         "answer": true,
    198         "justification": "Funding is disclosed in the Acknowledgments section: 'VIA CARPATIA Universities of Technology Network' under contract no. MEiN/2022/DPI/2577."
    199       },
    200       "affiliations_disclosed": {
    201         "applies": true,
    202         "answer": true,
    203         "justification": "Author affiliations at Bialystok University of Technology and Lublin University of Technology are clearly stated."
    204       },
    205       "funder_independent_of_outcome": {
    206         "applies": true,
    207         "answer": true,
    208         "justification": "The funder (Polish Minister of Science, inter-university research program) has no apparent financial interest in the study's outcomes regarding AI in foresight."
    209       },
    210       "financial_interests_declared": {
    211         "applies": true,
    212         "answer": false,
    213         "justification": "No competing interests or financial interests statement is present in the paper."
    214       }
    215     },
    216     "contamination": {
    217       "training_cutoff_stated": {
    218         "applies": false,
    219         "answer": false,
    220         "justification": "The paper does not evaluate model capability on a benchmark. It uses GenAI to suggest analytical methods and generate code, not to measure model performance."
    221       },
    222       "train_test_overlap_discussed": {
    223         "applies": false,
    224         "answer": false,
    225         "justification": "No benchmark evaluation is performed. Contamination is not applicable."
    226       },
    227       "benchmark_contamination_addressed": {
    228         "applies": false,
    229         "answer": false,
    230         "justification": "No benchmark evaluation is performed."
    231       }
    232     },
    233     "human_studies": {
    234       "pre_registered": {
    235         "applies": false,
    236         "answer": false,
    237         "justification": "This paper reanalyzes existing data from the NT FOR project. No new human participants were recruited for this study."
    238       },
    239       "irb_or_ethics_approval": {
    240         "applies": false,
    241         "answer": false,
    242         "justification": "No new human subjects research was conducted. The paper analyzes pre-existing project data."
    243       },
    244       "demographics_reported": {
    245         "applies": false,
    246         "answer": false,
    247         "justification": "No new human participants. The 19 experts were from the original NT FOR project, not recruited for this study."
    248       },
    249       "inclusion_exclusion_criteria": {
    250         "applies": false,
    251         "answer": false,
    252         "justification": "No new human participants were recruited."
    253       },
    254       "randomization_described": {
    255         "applies": false,
    256         "answer": false,
    257         "justification": "Not an experimental study with human participants."
    258       },
    259       "blinding_described": {
    260         "applies": false,
    261         "answer": false,
    262         "justification": "Not an experimental study with human participants."
    263       },
    264       "attrition_reported": {
    265         "applies": false,
    266         "answer": false,
    267         "justification": "No new human participants were recruited."
    268       }
    269     },
    270     "cost_and_practicality": {
    271       "inference_cost_reported": {
    272         "applies": true,
    273         "answer": false,
    274         "justification": "No API costs, token counts, or computational costs are reported for the GenAI queries or clustering analyses."
    275       },
    276       "compute_budget_stated": {
    277         "applies": true,
    278         "answer": false,
    279         "justification": "No computational budget, hardware specifications, or processing time is reported."
    280       }
    281     }
    282   },
    283   "claims": [
    284     {
    285       "claim": "AI-generated analytical suggestions remained within conventional frameworks and did not go beyond the analytical canon.",
    286       "evidence": "Table 2 shows ChatGPT and Copilot both suggested standard methods (PCA, k-means, hierarchical clustering, etc.). Authors note suggestions were 'unexceptional and rather modest.'",
    287       "supported": "strong"
    288     },
    289     {
    290       "claim": "AI-assisted clustering provides deeper insights than traditional averaging methods for technology assessment.",
    291       "evidence": "Comparison of Tables 3-6 showing traditional four-cluster approach vs hierarchical clustering, two-dimensional clustering, and co-clustering. However, the paper also states the clustering 'does not significantly change the overall distribution.'",
    292       "supported": "weak"
    293     },
    294     {
    295       "claim": "Higher expert knowledge is associated with lower evaluations of both attractiveness and feasibility.",
    296       "evidence": "Pearson and Spearman correlations reported as 'negative and moderate' with domain-level correlation heatmaps in Figures 10-11, but no specific coefficient values are given in the text.",
    297       "supported": "moderate"
    298     },
    299     {
    300       "claim": "Expert similarity network analysis identified a consensus core and distinct outlier experts.",
    301       "evidence": "Figure 12 shows the network. Experts E_4, E_8, E_10 identified as outliers; E_7, E_15 as partially aligned. But no quantitative threshold for outlier classification is stated.",
    302       "supported": "moderate"
    303     }
    304   ],
    305   "methodology_tags": ["case-study", "meta-analysis"],
    306   "key_findings": "This paper combines a literature review of AI in foresight research with a case study applying AI-suggested clustering methods to data from the NT FOR Podlaskie 2020 technology assessment project. The key finding is that GenAI tools (ChatGPT 4o, Copilot) suggest only conventional analytical methods, and while AI-assisted clustering provides multiple analytical perspectives on the data, it does not fundamentally change the identification of key technologies compared to simple averaging. Expert network analysis revealed a consensus core among evaluators with identifiable outliers, and a negative correlation between expert knowledge level and technology ratings.",
    307   "red_flags": [
    308     {
    309       "flag": "No quantitative validation of claimed benefits",
    310       "detail": "The paper claims AI-assisted analysis provides 'deeper insights' but offers no quantitative measure comparing the utility of AI-suggested clustering vs traditional methods. The paper itself acknowledges the results do not significantly change outcomes."
    311     },
    312     {
    313       "flag": "Overgeneralization from single case study",
    314       "detail": "Broad claims about AI in foresight ('AI undoubtedly brings more benefits than risks') are made based on a single regional nanotechnology foresight project with 19 experts."
    315     },
    316     {
    317       "flag": "Missing statistical rigor",
    318       "detail": "Correlation coefficients are described qualitatively ('negative and moderate') without reporting actual values, p-values, or confidence intervals. Clustering validation is limited to a single mention of silhouette score."
    319     }
    320   ],
    321   "cited_papers": [
    322     {
    323       "title": "Attention Is All You Need",
    324       "authors": ["Vaswani A."],
    325       "year": 2017,
    326       "relevance": "Foundational transformer architecture paper underlying modern LLMs used in this study."
    327     },
    328     {
    329       "title": "AI-Driven Strategic Foresight: Anticipating Future Trends and Modelling Business Strategies",
    330       "authors": ["Qader Ismail Alnajem A.A."],
    331       "year": 2024,
    332       "doi": "10.1109/DASA63652.2024.10836619",
    333       "relevance": "Directly addresses AI-driven strategic foresight methods and business strategy modeling."
    334     },
    335     {
    336       "title": "Enhancing SME resilience through artificial intelligence and strategic foresight: A framework for sustainable competitiveness",
    337       "authors": ["Carayannis E.G."],
    338       "year": 2025,
    339       "doi": "10.1016/j.techsoc.2025.102835",
    340       "relevance": "Framework combining AI and foresight for organizational resilience and competitiveness."
    341     },
    342     {
    343       "title": "Artificial Intelligence in Strategic Foresight – Current Practices and Future Application",
    344       "authors": ["Brandtner P.", "Mates M."],
    345       "relevance": "Survey of current AI practices in strategic foresight, directly relevant to AI-assisted technology assessment."
    346     },
    347     {
    348       "title": "New technology foresight method based on intelligent knowledge management",
    349       "authors": ["Zhang L.", "Huang S."],
    350       "year": 2020,
    351       "doi": "10.1007/s42524-019-0062-z",
    352       "relevance": "Proposes AI-based knowledge management methods for technology foresight."
    353     }
    354   ]
    355 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs