ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

scan.json (25864B)


      1 {
      2   "paper": {
      3     "title": "Empowering Business Transformation: The Positive Impact and Ethical Considerations of Generative AI in Software Product Management - A Systematic Literature Review",
      4     "authors": ["Nishant A. Parikh"],
      5     "year": 2023,
      6     "venue": "arXiv.org",
      7     "arxiv_id": "2306.04605",
      8     "doi": "10.48550/arXiv.2306.04605"
      9   },
     10   "scan_version": 3,
     11   "active_modules": ["survey_methodology"],
     12   "methodology_tags": ["meta-analysis", "qualitative"],
     13   "key_findings": "This systematic literature review maps generative AI applications to the ISPMA software product management framework, identifying use cases across market research, customer insights, requirements engineering, code generation, UX design, and decision-making. The review finds that GenAI can enhance productivity (e.g., Copilot users completed tasks 55.8% faster), improve customer support, and automate documentation. The paper also catalogs ethical considerations including bias, transparency, data privacy, legal risks, and the 'black box' problem, framing them through the Responsible Innovation framework and GDPR principles.",
     14   "checklist": {
     15     "artifacts": {
     16       "code_released": {
     17         "applies": true,
     18         "answer": false,
     19         "justification": "No analysis code, search scripts, or data extraction tools are released. No repository URL is provided anywhere in the paper."
     20       },
     21       "data_released": {
     22         "applies": true,
     23         "answer": false,
     24         "justification": "The corpus of reviewed papers is not released as a structured dataset. No supplementary data files are provided."
     25       },
     26       "environment_specified": {
     27         "applies": true,
     28         "answer": false,
     29         "justification": "No computational environment is described. The survey could have released analysis tools with environment specifications but did not."
     30       },
     31       "reproduction_instructions": {
     32         "applies": true,
     33         "answer": false,
     34         "justification": "No step-by-step instructions for reproducing the literature search or analysis. While databases and keywords are listed (Section 5.1), the instructions are not specific enough to reproduce the exact corpus."
     35       }
     36     },
     37     "statistical_methodology": {
     38       "confidence_intervals_or_error_bars": {
     39         "applies": false,
     40         "answer": false,
     41         "justification": "This is a qualitative literature review with no statistical analysis or experiments."
     42       },
     43       "significance_tests": {
     44         "applies": false,
     45         "answer": false,
     46         "justification": "No comparative quantitative claims are made by the survey itself; it is a qualitative synthesis."
     47       },
     48       "effect_sizes_reported": {
     49         "applies": false,
     50         "answer": false,
     51         "justification": "No experiments are conducted. The survey reports effect sizes from reviewed papers (e.g., 55.8% faster) but does not generate its own."
     52       },
     53       "sample_size_justified": {
     54         "applies": false,
     55         "answer": false,
     56         "justification": "Qualitative literature review with no experimental sample."
     57       },
     58       "variance_reported": {
     59         "applies": false,
     60         "answer": false,
     61         "justification": "No experiments or quantitative aggregation performed by this survey."
     62       }
     63     },
     64     "evaluation_design": {
     65       "baselines_included": {
     66         "applies": true,
     67         "answer": false,
     68         "justification": "The survey does not compare itself against prior surveys or reviews of GenAI in product management. No baseline coverage analysis is provided."
     69       },
     70       "baselines_contemporary": {
     71         "applies": true,
     72         "answer": false,
     73         "justification": "No prior surveys are included as baselines for comparison, so contemporaneity cannot be assessed."
     74       },
     75       "ablation_study": {
     76         "applies": false,
     77         "answer": false,
     78         "justification": "This is a survey paper with no system or components to ablate."
     79       },
     80       "multiple_metrics": {
     81         "applies": false,
     82         "answer": false,
     83         "justification": "No experiments are conducted; no metrics are used to evaluate a system."
     84       },
     85       "human_evaluation": {
     86         "applies": false,
     87         "answer": false,
     88         "justification": "No system outputs to evaluate. This is a literature review."
     89       },
     90       "held_out_test_set": {
     91         "applies": false,
     92         "answer": false,
     93         "justification": "No experiments are conducted; no test sets are used."
     94       },
     95       "per_category_breakdown": {
     96         "applies": true,
     97         "answer": true,
     98         "justification": "The survey organizes findings by ISPMA framework categories (market analysis, positioning, customer insights, requirements engineering, development execution, UX design, decision-making) in Table 3 and Figure 1."
     99       },
    100       "failure_cases_discussed": {
    101         "applies": true,
    102         "answer": true,
    103         "justification": "The survey discusses limitations and failure modes of GenAI including hallucinations (Brand et al., 2023), black box problems (Cao et al., 2023), legal risks (Appel et al., 2023), and bias concerns (Sections 3.2, 5.3.3)."
    104       },
    105       "negative_results_reported": {
    106         "applies": true,
    107         "answer": true,
    108         "justification": "Ethical implications, legal risks, hallucination problems, and limitations of GenAI are discussed throughout Section 5.3.3 and the Results section on ethical implications, including concerns about accuracy, transparency, and job displacement."
    109       }
    110     },
    111     "claims_and_evidence": {
    112       "abstract_claims_supported": {
    113         "applies": true,
    114         "answer": true,
    115         "justification": "The abstract's claims use hedged language ('can assist', 'can help') and are broadly supported by the reviewed literature in Sections 3.1 and 6, Table 3. Each application area is backed by at least one cited study."
    116       },
    117       "causal_claims_justified": {
    118         "applies": true,
    119         "answer": false,
    120         "justification": "The title claims 'Positive Impact' and the paper states GenAI 'can significantly improve software product management activities.' These causal-sounding claims are passed through from individual studies without critically evaluating the causal designs of those studies or acknowledging confounding factors."
    121       },
    122       "generalization_bounded": {
    123         "applies": true,
    124         "answer": false,
    125         "justification": "The title 'Empowering Business Transformation' and conclusion claims like 'generative AI's practical application can significantly improve software product management activities' are far broader than the evidence supports. The review covers only ~15-20 papers, many from narrow domains (e.g., COVID-19 dialogue, e-commerce titles), yet generalizes to all of 'software product management.'"
    126       },
    127       "alternative_explanations_discussed": {
    128         "applies": true,
    129         "answer": false,
    130         "justification": "The survey presents the reviewed findings at face value without considering alternative explanations for reported effects. For example, the 55.8% Copilot productivity claim is reported without discussing confounds like task selection or participant skill matching."
    131       },
    132       "proxy_outcome_distinction": {
    133         "applies": true,
    134         "answer": false,
    135         "justification": "The paper freely equates proxy measures with outcomes — task completion time is treated as 'productivity,' chatbot interaction as 'customer experience improvement,' and F1 scores as 'effectiveness' — without discussing the gap between proxies and actual business outcomes."
    136       }
    137     },
    138     "setup_transparency": {
    139       "model_versions_specified": {
    140         "applies": false,
    141         "answer": false,
    142         "justification": "This is a literature review that does not use any AI models itself."
    143       },
    144       "prompts_provided": {
    145         "applies": false,
    146         "answer": false,
    147         "justification": "No prompting is used; this is a literature review."
    148       },
    149       "hyperparameters_reported": {
    150         "applies": false,
    151         "answer": false,
    152         "justification": "No experiments or model usage; this is a literature review."
    153       },
    154       "scaffolding_described": {
    155         "applies": false,
    156         "answer": false,
    157         "justification": "No agentic scaffolding is used; this is a literature review."
    158       },
    159       "data_preprocessing_documented": {
    160         "applies": true,
    161         "answer": false,
    162         "justification": "Section 5.1 lists databases and keywords, and Table 2 lists inclusion/exclusion criteria per database. However, no counts are provided at any stage — the paper never states how many papers were initially found, how many were screened, or how many were ultimately included. The actual filtering criteria are vague (e.g., 'full text' and 'peer-reviewed' but no content-based screening criteria)."
    163       }
    164     },
    165     "limitations_and_scope": {
    166       "limitations_section_present": {
    167         "applies": true,
    168         "answer": false,
    169         "justification": "There is no dedicated limitations section. The conclusion briefly mentions that 'it is essential to acknowledge and address the potential limitations' of GenAI itself, but this discusses the technology's limitations, not the study's own methodological limitations."
    170       },
    171       "threats_to_validity_specific": {
    172         "applies": true,
    173         "answer": false,
    174         "justification": "No threats to validity are discussed. The paper does not acknowledge its small corpus size, lack of quality assessment, potential selection bias, or other methodological weaknesses."
    175       },
    176       "scope_boundaries_stated": {
    177         "applies": true,
    178         "answer": false,
    179         "justification": "The paper states it covers 2016-2023 and uses the ISPMA framework, but does not explicitly state what the review does NOT cover, what types of papers were excluded, or what claims the authors are NOT making. The broad title suggests comprehensive coverage that is not delivered."
    180       }
    181     },
    182     "data_integrity": {
    183       "raw_data_available": {
    184         "applies": true,
    185         "answer": false,
    186         "justification": "The list of reviewed papers is not provided as a downloadable dataset. No supplementary materials or data files are available."
    187       },
    188       "data_collection_described": {
    189         "applies": true,
    190         "answer": true,
    191         "justification": "Section 5.1 describes the search strategy including databases (IEEE Xplore, ACM Digital Library, Google Scholar, EBSCOhost, ProQuest Central), keywords used, and time period (2016-2023). Table 2 provides per-database inclusion/exclusion criteria."
    192       },
    193       "recruitment_methods_described": {
    194         "applies": true,
    195         "answer": true,
    196         "justification": "For a survey, the 'sample' is the selected papers. The paper describes which databases were searched and what keywords were used (Section 5.1), and what inclusion/exclusion criteria were applied per database (Table 2), constituting a description of how the sample was assembled."
    197       },
    198       "data_pipeline_documented": {
    199         "applies": true,
    200         "answer": false,
    201         "justification": "No PRISMA flow diagram or stage-by-stage counts are provided. The paper goes from describing search databases to presenting synthesized results with no documentation of how many papers were found, screened, or excluded at each stage. The total number of papers reviewed is never explicitly stated."
    202       }
    203     },
    204     "conflicts_of_interest": {
    205       "funding_disclosed": {
    206         "applies": true,
    207         "answer": false,
    208         "justification": "The acknowledgments section thanks the chair and university but does not disclose any funding sources, grants, or sponsorships."
    209       },
    210       "affiliations_disclosed": {
    211         "applies": true,
    212         "answer": true,
    213         "justification": "The author's affiliation with Capitol Technology University is clearly stated on the first page."
    214       },
    215       "funder_independent_of_outcome": {
    216         "applies": false,
    217         "answer": false,
    218         "justification": "This appears to be unfunded academic thesis work (acknowledgments thank 'Dr. Burrell (my chair)' suggesting a thesis/dissertation). No external funder is disclosed."
    219       },
    220       "financial_interests_declared": {
    221         "applies": true,
    222         "answer": false,
    223         "justification": "No competing interests or financial interests statement is included anywhere in the paper."
    224       }
    225     },
    226     "contamination": {
    227       "training_cutoff_stated": {
    228         "applies": false,
    229         "answer": false,
    230         "justification": "This is a survey paper that does not evaluate any pre-trained model on any benchmark."
    231       },
    232       "train_test_overlap_discussed": {
    233         "applies": false,
    234         "answer": false,
    235         "justification": "This is a survey paper that does not evaluate any pre-trained model on any benchmark."
    236       },
    237       "benchmark_contamination_addressed": {
    238         "applies": false,
    239         "answer": false,
    240         "justification": "This is a survey paper that does not evaluate any pre-trained model on any benchmark."
    241       }
    242     },
    243     "human_studies": {
    244       "pre_registered": {
    245         "applies": false,
    246         "answer": false,
    247         "justification": "No human participants; this is a literature review."
    248       },
    249       "irb_or_ethics_approval": {
    250         "applies": false,
    251         "answer": false,
    252         "justification": "No human participants; this is a literature review."
    253       },
    254       "demographics_reported": {
    255         "applies": false,
    256         "answer": false,
    257         "justification": "No human participants; this is a literature review."
    258       },
    259       "inclusion_exclusion_criteria": {
    260         "applies": false,
    261         "answer": false,
    262         "justification": "No human participants; this is a literature review."
    263       },
    264       "randomization_described": {
    265         "applies": false,
    266         "answer": false,
    267         "justification": "No human participants; this is a literature review."
    268       },
    269       "blinding_described": {
    270         "applies": false,
    271         "answer": false,
    272         "justification": "No human participants; this is a literature review."
    273       },
    274       "attrition_reported": {
    275         "applies": false,
    276         "answer": false,
    277         "justification": "No human participants; this is a literature review."
    278       }
    279     },
    280     "cost_and_practicality": {
    281       "inference_cost_reported": {
    282         "applies": false,
    283         "answer": false,
    284         "justification": "This is a survey paper with no method that incurs computational cost."
    285       },
    286       "compute_budget_stated": {
    287         "applies": false,
    288         "answer": false,
    289         "justification": "This is a survey paper with no computational experiments."
    290       }
    291     },
    292     "survey_methodology": {
    293       "prisma_or_structured_protocol": {
    294         "applies": true,
    295         "answer": false,
    296         "justification": "Despite calling itself a 'systematic literature review,' the paper does not follow PRISMA or any structured review protocol. There is no PRISMA flow diagram, no protocol registration, and no reproducible search queries. The search strategy in Section 5.1 is ad-hoc — keywords are listed but exact queries and Boolean combinations are not documented."
    297       },
    298       "quality_assessment_of_sources": {
    299         "applies": true,
    300         "answer": false,
    301         "justification": "The survey does not assess the methodological quality of any reviewed paper. All sources are treated equally regardless of study design, sample size, or rigor. For example, a single-company pilot study (Davenport & Mittal, 2022) and a large-scale field experiment (Brynjolfsson et al., 2023) are given equal weight."
    302       },
    303       "publication_bias_discussed": {
    304         "applies": true,
    305         "answer": false,
    306         "justification": "Publication bias is never discussed. The survey does not consider whether published studies on GenAI applications skew positive, despite the overwhelmingly positive framing of the reviewed literature."
    307       }
    308     }
    309   },
    310   "claims": [
    311     {
    312       "claim": "Generative AI can assist in idea generation, market research, customer insights, product requirements engineering, and product development in software product management.",
    313       "evidence": "Mapped to ISPMA framework categories with supporting references: Karim et al. (2022) for idea generation, Brand et al. (2023) for market research, Siggelkow & Terwiesch (2023) for customer insights, Malik et al. (2022) for requirements engineering, Peng et al. (2023) for development. Summarized in Table 3 and Figure 1.",
    314       "supported": "moderate"
    315     },
    316     {
    317       "claim": "GitHub Copilot users completed tasks 55.8% faster than non-users, providing empirical evidence of AI's productivity impact.",
    318       "evidence": "Citing Peng et al. (2023) in Section 3.1.6. This is a pass-through claim from a single cited study; the survey itself does not verify or critically evaluate this finding.",
    319       "supported": "moderate"
    320     },
    321     {
    322       "claim": "A generative AI conversational assistant increased worker productivity, enhanced customer sentiment, and decreased employee turnover across 5,179 customer support agents.",
    323       "evidence": "Citing Brynjolfsson et al. (2023) in Section 3.1.3. Pass-through claim from a single study. The survey notes it was 'particularly beneficial for newer and less-skilled workers.'",
    324       "supported": "moderate"
    325     },
    326     {
    327       "claim": "Ethical implications of GenAI including fairness, data privacy, accountability, transparency, robustness, and legal risks require rigorous guidelines and regulatory measures.",
    328       "evidence": "Synthesized from Brand et al. (2023), Cao et al. (2023), Appel et al. (2023), Dwivedi et al. (2023) in Section 5.3.3 and Results. Framed through the Responsible Innovation framework (Owen et al., 2013) and GDPR principles.",
    329       "supported": "moderate"
    330     },
    331     {
    332       "claim": "The global market for generative AI is expected to reach $109 billion by 2030.",
    333       "evidence": "Citing Grand View Research (2023) industry report in the Problem Statement section. This is a market projection from an industry analyst firm, not peer-reviewed research.",
    334       "supported": "weak"
    335     }
    336   ],
    337   "red_flags": [
    338     {
    339       "flag": "No quality assessment of sources",
    340       "detail": "The survey treats all reviewed papers equally without assessing their methodological quality, rigor, or risk of bias. A single-company pilot study with 55 developers is given equal weight to a large-scale field experiment with 5,179 agents. This launders the signal-to-noise ratio of the source literature."
    341     },
    342     {
    343       "flag": "Very thin evidence base for a systematic review",
    344       "detail": "Most application categories are supported by only 1-2 papers. The total number of reviewed papers appears to be approximately 15-20, yet the conclusions make broad claims about 'empowering business transformation.' The paper never explicitly states the total number of included studies."
    345     },
    346     {
    347       "flag": "Overwhelmingly positive framing",
    348       "detail": "The title claims 'Positive Impact' before presenting evidence. The survey frames GenAI primarily as beneficial with ethical concerns as secondary caveats, rather than presenting a balanced critical analysis. This one-sided framing is inconsistent with the 'systematic review' label."
    349     },
    350     {
    351       "flag": "Systematic review label without systematic methodology",
    352       "detail": "The paper calls itself a 'systematic literature review' but lacks fundamental systematic review elements: no PRISMA protocol, no flow diagram, no paper counts at screening stages, no quality assessment rubric, no publication bias analysis, and no explicit total of included studies."
    353     },
    354     {
    355       "flag": "Claims outrun evidence",
    356       "detail": "The paper generalizes from narrow studies to broad claims. For example, a GPT-based COVID-19 dialogue study (Karim et al., 2022) is used to support GenAI's utility for 'idea generation' across all of product management. An e-commerce product title generation study supports claims about 'positioning and product definition' generally."
    357     },
    358     {
    359       "flag": "No limitations section for the review itself",
    360       "detail": "The paper discusses GenAI's limitations but never acknowledges the limitations of its own review methodology — small corpus, potential selection bias, lack of quality assessment, narrow database coverage, or the inability to draw causal conclusions from a narrative review."
    361     }
    362   ],
    363   "cited_papers": [
    364     {
    365       "title": "The impact of AI on developer productivity: Evidence from GitHub Copilot",
    366       "authors": ["S. Peng", "E. Kalliamvakou", "P. Cihon", "M. Demirer"],
    367       "year": 2023,
    368       "arxiv_id": "2302.06590",
    369       "relevance": "Empirical study of GitHub Copilot's impact on developer productivity, reporting 55.8% faster task completion — directly relevant to AI coding tool evaluation."
    370     },
    371     {
    372       "title": "Generative AI at Work",
    373       "authors": ["E. Brynjolfsson", "D. Li", "L. R. Raymond"],
    374       "year": 2023,
    375       "relevance": "Large-scale field study (5,179 agents) of GenAI conversational assistant impact on customer support productivity, relevant to AI-assisted work evaluation."
    376     },
    377     {
    378       "title": "Automatic code documentation generation using GPT-3",
    379       "authors": ["J. Y. Khan", "G. Uddin"],
    380       "year": 2022,
    381       "relevance": "Evaluates GPT-3 Codex for automated documentation generation in software engineering, relevant to LLM code generation capabilities."
    382     },
    383     {
    384       "title": "A comprehensive survey of AI-generated content (AIGC): A history of generative AI from GAN to ChatGPT",
    385       "authors": ["Y. Cao", "S. Li", "Y. Liu", "Z. Yan", "Y. Dai", "P. S. Yu", "L. Sun"],
    386       "year": 2023,
    387       "arxiv_id": "2303.04226",
    388       "relevance": "Comprehensive survey of AI-generated content including trust, interpretability, and societal concerns — relevant survey methodology comparison."
    389     },
    390     {
    391       "title": "GPT2SP: A transformer-based agile story point estimation approach",
    392       "authors": ["M. Fu", "C. Tantithamthavorn"],
    393       "year": 2022,
    394       "relevance": "Uses GPT-2 for Agile story point estimation across 23,000+ issues, relevant to LLM-based software engineering tooling."
    395     },
    396     {
    397       "title": "Towards effective AI-powered agile project management",
    398       "authors": ["H. K. Dam", "T. Tran", "J. Grundy", "A. Ghose", "Y. Kamei"],
    399       "year": 2019,
    400       "relevance": "Proposes AI framework for Agile project management including descriptive analytics, relevant to AI-assisted software development."
    401     },
    402     {
    403       "title": "\"So what if ChatGPT wrote it?\" Multidisciplinary perspectives on opportunities, challenges and implications of generative conversational AI for research, practice and policy",
    404       "authors": ["Y. K. Dwivedi", "N. Kshetri", "L. Hughes"],
    405       "year": 2023,
    406       "relevance": "Multidisciplinary analysis of ChatGPT's impact including ethical concerns, transparency, bias, and job displacement — relevant to AI safety and governance."
    407     },
    408     {
    409       "title": "Regulating ChatGPT and other large generative AI models",
    410       "authors": ["P. Hacker", "A. Engel", "M. Mauer"],
    411       "year": 2023,
    412       "arxiv_id": "2302.02337",
    413       "relevance": "Analyzes EU AI Act's adequacy for regulating large generative models, relevant to AI governance and safety frameworks."
    414     },
    415     {
    416       "title": "ALSI-Transformer: Transformer-based code comment generation with aligned lexical and syntactic information",
    417       "authors": ["Y. Park", "A. Park", "C. Kim"],
    418       "year": 2023,
    419       "relevance": "Transformer-based code comment generation model for software development, relevant to AI-assisted code understanding."
    420     },
    421     {
    422       "title": "Identifying the requirement conflicts in SRS documents using transformer-based sentence embeddings",
    423       "authors": ["G. Malik", "M. Cevik", "D. Parikh", "A. Basar"],
    424       "year": 2022,
    425       "arxiv_id": "2206.13690",
    426       "relevance": "Uses transformer embeddings for automated conflict detection in software requirements, relevant to LLM applications in software engineering."
    427     },
    428     {
    429       "title": "Opportunities for generative AI in UX modernization",
    430       "authors": ["S. Houde", "S. I. Ross", "M. Muller"],
    431       "year": 2022,
    432       "relevance": "Explores generative AI applications in UX design modernization, identifying pain points and proposing AI-driven solutions for software development."
    433     }
    434   ],
    435   "engagement_factors": {
    436     "practical_relevance": {
    437       "score": 1,
    438       "justification": "Maps GenAI applications to the ISPMA framework, giving practitioners a conceptual catalog, but provides no usable tools, templates, or actionable methods."
    439     },
    440     "surprise_contrarian": {
    441       "score": 0,
    442       "justification": "Confirms widely-held expectations that GenAI can help with various product management tasks; no surprising or contrarian findings."
    443     },
    444     "fear_safety": {
    445       "score": 1,
    446       "justification": "Discusses ethical concerns (bias, privacy, legal risks, hallucinations) but these are well-known issues presented without novel analysis."
    447     },
    448     "drama_conflict": {
    449       "score": 0,
    450       "justification": "No controversy, no critique of specific companies or claims; straightforward positive review."
    451     },
    452     "demo_ability": {
    453       "score": 0,
    454       "justification": "No code, tools, demos, or artifacts of any kind are provided."
    455     },
    456     "brand_recognition": {
    457       "score": 1,
    458       "justification": "Mentions ChatGPT, Copilot, GPT-3, and DALL-E2 but the paper itself is from an unknown university and solo author."
    459     }
    460   }
    461 }

Impressum · Datenschutz