scan.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan.json (18403B)
      1 {
      2   "paper": {
      3     "title": "Curious, Critical Thinker, Empathetic, and Ethically Responsible: Essential Soft Skills for Data Scientists in Software Engineering",
      4     "authors": ["Matheus de Morais Leça", "Ronnie de Souza Santos"],
      5     "year": 2025,
      6     "venue": "Unknown (appears to be a conference/journal submission)",
      7     "doi": null
      8   },
      9   "checklist": {
     10     "artifacts": {
     11       "code_released": {
     12         "applies": true,
     13         "answer": false,
     14         "justification": "No source code or analysis scripts are released. The paper mentions a Figshare link for job postings and interview quotations but no code."
     15       },
     16       "data_released": {
     17         "applies": true,
     18         "answer": true,
     19         "justification": "The paper provides a Figshare link (https://figshare.com/s/4fda5ee8e36e1482db61) with job postings and safe quotations from interviews (Section VII)."
     20       },
     21       "environment_specified": {
     22         "applies": true,
     23         "answer": false,
     24         "justification": "No environment or dependency specifications are provided. The paper mentions using a Python script for LinkedIn scraping and ChatGPT for coding but provides no versions or setup details."
     25       },
     26       "reproduction_instructions": {
     27         "applies": true,
     28         "answer": false,
     29         "justification": "No step-by-step reproduction instructions are provided. The methodology section describes the process narratively but does not give reproducible steps."
     30       }
     31     },
     32     "statistical_methodology": {
     33       "confidence_intervals_or_error_bars": {
     34         "applies": false,
     35         "answer": false,
     36         "justification": "This is a qualitative thematic analysis study; no statistical inference is performed."
     37       },
     38       "significance_tests": {
     39         "applies": false,
     40         "answer": false,
     41         "justification": "No comparative statistical claims are made; this is qualitative research."
     42       },
     43       "effect_sizes_reported": {
     44         "applies": false,
     45         "answer": false,
     46         "justification": "No effect sizes are relevant to a qualitative thematic analysis."
     47       },
     48       "sample_size_justified": {
     49         "applies": true,
     50         "answer": false,
     51         "justification": "The sample of 11 interviewees and 87 job postings is not justified with any formal reasoning. The paper acknowledges the small sample in the threats to validity but does not justify why these numbers are sufficient (e.g., no saturation analysis)."
     52       },
     53       "variance_reported": {
     54         "applies": false,
     55         "answer": false,
     56         "justification": "No quantitative variance measures are relevant to this qualitative study."
     57       }
     58     },
     59     "evaluation_design": {
     60       "baselines_included": {
     61         "applies": true,
     62         "answer": true,
     63         "justification": "The paper compares its findings with prior work on soft skills in software engineering (e.g., Matturro et al. 2019, Ahmed et al. 2012) in the discussion section."
     64       },
     65       "baselines_contemporary": {
     66         "applies": true,
     67         "answer": true,
     68         "justification": "The related work cited includes recent studies from 2019-2024 on soft skills in SE."
     69       },
     70       "ablation_study": {
     71         "applies": false,
     72         "answer": false,
     73         "justification": "No system with components to ablate; this is a qualitative study."
     74       },
     75       "multiple_metrics": {
     76         "applies": false,
     77         "answer": false,
     78         "justification": "No quantitative metrics are used; findings are thematic categories."
     79       },
     80       "human_evaluation": {
     81         "applies": true,
     82         "answer": false,
     83         "justification": "The paper used ChatGPT for initial open coding and manually reviewed 20% of extracted data, but there is no inter-rater reliability assessment or independent human evaluation of the thematic coding quality."
     84       },
     85       "held_out_test_set": {
     86         "applies": false,
     87         "answer": false,
     88         "justification": "Not applicable to qualitative thematic analysis."
     89       },
     90       "per_category_breakdown": {
     91         "applies": true,
     92         "answer": true,
     93         "justification": "Table I provides per-skill mention counts across 12 soft skills, and Table II provides definitions and quotations per category."
     94       },
     95       "failure_cases_discussed": {
     96         "applies": true,
     97         "answer": true,
     98         "justification": "Section III.C discusses failures in the ChatGPT-assisted coding process: hallucinations in initial prompts, missed citations in the second iteration, and overly narrow results with predefined lists."
     99       },
    100       "negative_results_reported": {
    101         "applies": true,
    102         "answer": true,
    103         "justification": "The paper reports the divergence between job postings (emphasizing general SE skills) and interviews (emphasizing innovation/ethics), noting that social responsibility skills were mentioned only 3 times in job postings despite being prominent in interviews."
    104       }
    105     },
    106     "claims_and_evidence": {
    107       "abstract_claims_supported": {
    108         "applies": true,
    109         "answer": true,
    110         "justification": "The abstract claims about curiosity, critical thinking, empathy, and ethical awareness being essential are supported by the thematic analysis results in Section IV and Tables I-II."
    111       },
    112       "causal_claims_justified": {
    113         "applies": true,
    114         "answer": false,
    115         "justification": "The paper uses causal language like 'These skills are essential for addressing the ethical and societal implications of AI' without establishing causal evidence. The study design (thematic analysis of job postings and interviews) can only identify perceived importance, not causal relationships."
    116       },
    117       "generalization_bounded": {
    118         "applies": true,
    119         "answer": true,
    120         "justification": "The paper explicitly states in Section V.C and the conclusion: 'we do not claim statistical generalizations' and that findings are 'transferable to different contexts using analytical strategies' but 'should not be assumed to apply across all settings.'"
    121       },
    122       "alternative_explanations_discussed": {
    123         "applies": true,
    124         "answer": false,
    125         "justification": "The paper does not discuss alternative explanations for why innovation and social responsibility skills emerged as important. For instance, it does not consider whether the interview questions themselves primed participants toward ethical responses, or whether the LinkedIn keyword search biased results."
    126       }
    127     },
    128     "setup_transparency": {
    129       "model_versions_specified": {
    130         "applies": true,
    131         "answer": false,
    132         "justification": "The paper mentions using 'an advanced natural language processing model from the ChatGPT platform' (Section III.C) without specifying which model version (GPT-3.5, GPT-4, etc.)."
    133       },
    134       "prompts_provided": {
    135         "applies": true,
    136         "answer": true,
    137         "justification": "Full prompt text for both job posting analysis and interview analysis is provided in Section III.C."
    138       },
    139       "hyperparameters_reported": {
    140         "applies": true,
    141         "answer": false,
    142         "justification": "No ChatGPT hyperparameters (temperature, etc.) are reported."
    143       },
    144       "scaffolding_described": {
    145         "applies": false,
    146         "answer": false,
    147         "justification": "No agentic scaffolding is used; ChatGPT is used as a one-shot coding tool."
    148       },
    149       "data_preprocessing_documented": {
    150         "applies": true,
    151         "answer": true,
    152         "justification": "Section III.A documents the filtering pipeline: 1,600 initial job postings → filtered by job title keywords → 87 retained. The filtering criteria (specific title terms) are stated."
    153       }
    154     },
    155     "limitations_and_scope": {
    156       "limitations_section_present": {
    157         "applies": true,
    158         "answer": true,
    159         "justification": "Section V.C 'Threats to Validity' provides a substantive multi-paragraph discussion of limitations."
    160       },
    161       "threats_to_validity_specific": {
    162         "applies": true,
    163         "answer": true,
    164         "justification": "The threats section identifies specific issues: English-language-only job postings missing non-English markets, LinkedIn not being the dominant platform in all regions, geographic homogeneity of interviewees (mostly Western companies), and small sample size limiting statistical generalization."
    165       },
    166       "scope_boundaries_stated": {
    167         "applies": true,
    168         "answer": true,
    169         "justification": "The paper explicitly states it does not claim statistical generalization, that findings reflect Western practices, and that 'our conclusions can be applied to comparable studies or contexts but should not be assumed to apply across all settings' (Section V.C)."
    170       }
    171     },
    172     "data_integrity": {
    173       "raw_data_available": {
    174         "applies": true,
    175         "answer": true,
    176         "justification": "Job postings and safe interview quotations are available via Figshare (Section VII). Some quotations were removed for privacy."
    177       },
    178       "data_collection_described": {
    179         "applies": true,
    180         "answer": true,
    181         "justification": "Section III.A describes LinkedIn data collection (keywords, 12 countries, filtering process) and Section III.B describes interview data collection (dates June 1 - July 5, 2024, semi-structured format, 23-42 minute duration)."
    182       },
    183       "recruitment_methods_described": {
    184         "applies": true,
    185         "answer": true,
    186         "justification": "Section III.B describes convenience sampling with an open invitation, snowball sampling via participant referrals, and theoretical sampling to target specific demographics."
    187       },
    188       "data_pipeline_documented": {
    189         "applies": true,
    190         "answer": true,
    191         "justification": "The pipeline from collection through ChatGPT-assisted open coding, manual 20% verification, to thematic grouping is described in Sections III.A-C and Figure 1."
    192       }
    193     },
    194     "conflicts_of_interest": {
    195       "funding_disclosed": {
    196         "applies": true,
    197         "answer": false,
    198         "justification": "No funding source is mentioned anywhere in the paper."
    199       },
    200       "affiliations_disclosed": {
    201         "applies": true,
    202         "answer": true,
    203         "justification": "Both authors are listed as University of Calgary affiliates."
    204       },
    205       "funder_independent_of_outcome": {
    206         "applies": false,
    207         "answer": false,
    208         "justification": "No funding is disclosed; appears to be unfunded academic research."
    209       },
    210       "financial_interests_declared": {
    211         "applies": true,
    212         "answer": false,
    213         "justification": "No competing interests statement is present in the paper."
    214       }
    215     },
    216     "contamination": {
    217       "training_cutoff_stated": {
    218         "applies": false,
    219         "answer": false,
    220         "justification": "The paper does not evaluate a pre-trained model on a benchmark; ChatGPT is used as a coding tool, not as the subject of evaluation."
    221       },
    222       "train_test_overlap_discussed": {
    223         "applies": false,
    224         "answer": false,
    225         "justification": "No benchmark evaluation of model capability is performed."
    226       },
    227       "benchmark_contamination_addressed": {
    228         "applies": false,
    229         "answer": false,
    230         "justification": "No benchmark evaluation is performed."
    231       }
    232     },
    233     "human_studies": {
    234       "pre_registered": {
    235         "applies": true,
    236         "answer": false,
    237         "justification": "No pre-registration is mentioned for this study involving 11 interview participants."
    238       },
    239       "irb_or_ethics_approval": {
    240         "applies": true,
    241         "answer": false,
    242         "justification": "Section III.D discusses ethical standards and informed consent but does not mention IRB or ethics board approval."
    243       },
    244       "demographics_reported": {
    245         "applies": true,
    246         "answer": true,
    247         "justification": "Section IV reports demographics: 36% non-male, 18% non-white, 18% LGBTQIA+, 9% neurodivergent, 54% with 5+ years experience."
    248       },
    249       "inclusion_exclusion_criteria": {
    250         "applies": true,
    251         "answer": true,
    252         "justification": "Section III.B states participants were professionals actively engaged in developing AI-powered systems, and describes the sampling approach (convenience, snowball, theoretical) with diversity criteria."
    253       },
    254       "randomization_described": {
    255         "applies": false,
    256         "answer": false,
    257         "justification": "This is not an experimental study with treatment conditions; it is a qualitative interview study."
    258       },
    259       "blinding_described": {
    260         "applies": false,
    261         "answer": false,
    262         "justification": "Blinding is not applicable to a qualitative interview study."
    263       },
    264       "attrition_reported": {
    265         "applies": true,
    266         "answer": false,
    267         "justification": "No information about how many people were invited versus how many participated, or whether any participants dropped out."
    268       }
    269     },
    270     "cost_and_practicality": {
    271       "inference_cost_reported": {
    272         "applies": false,
    273         "answer": false,
    274         "justification": "This is a qualitative study, not a system or method proposal. ChatGPT cost is incidental."
    275       },
    276       "compute_budget_stated": {
    277         "applies": false,
    278         "answer": false,
    279         "justification": "This is a qualitative study with no significant compute requirements."
    280       }
    281     }
    282   },
    283   "claims": [
    284     {
    285       "claim": "Data scientists require soft skills beyond traditional SE skills, particularly in innovation (curiosity, critical thinking) and social responsibility (empathy, ethical awareness).",
    286       "evidence": "Thematic analysis of 87 job postings and 11 interviews yielded 12 soft skills across 5 categories. Innovation and social responsibility emerged as distinct from general SE skills (Tables I-II, Section IV).",
    287       "supported": "moderate"
    288     },
    289     {
    290       "claim": "Job postings emphasize coordination and management skills while practitioners emphasize innovation and social responsibility skills.",
    291       "evidence": "Table I shows coordination skills had 142 mentions vs. innovation (5) and social responsibility (3) in job postings, while interviews highlighted curiosity, ethical awareness, and empathy (Section IV.B-C).",
    292       "supported": "moderate"
    293     },
    294     {
    295       "claim": "ChatGPT-assisted open coding with manual 20% verification produced accurate results with no false positives.",
    296       "evidence": "Section III.C states 'The manual review confirmed that all retrieved data was accurate, and no false positives were detected during this process.'",
    297       "supported": "weak"
    298     }
    299   ],
    300   "methodology_tags": ["qualitative"],
    301   "key_findings": "Through thematic analysis of 87 LinkedIn job postings and 11 practitioner interviews, the study identifies 12 soft skills for data scientists grouped into five categories: coordination, engineering, management, innovation, and social responsibility. The key finding is that while traditional SE soft skills (communication, collaboration) remain important, emerging skills like curiosity, critical thinking, empathy, and ethical awareness are increasingly valued, particularly by practitioners working on AI systems that impact society. Job postings and practitioner perspectives diverge, with employers emphasizing general team skills while practitioners emphasize ethics and innovation.",
    302   "red_flags": [
    303     {
    304       "flag": "LLM-assisted coding without inter-rater reliability",
    305       "detail": "ChatGPT was used for initial open coding of qualitative data. Only 20% was manually verified, and no inter-rater reliability (e.g., Cohen's kappa) was computed. The claim of 'no false positives' in a 20% sample does not validate the remaining 80%."
    306     },
    307     {
    308       "flag": "Unspecified ChatGPT version",
    309       "detail": "The paper uses 'an advanced natural language processing model from the ChatGPT platform' without specifying which model or version, making the coding process non-reproducible."
    310     },
    311     {
    312       "flag": "Very small interview sample",
    313       "detail": "11 interviews with no saturation analysis or justification for stopping at that number. Convenience and snowball sampling introduce significant selection bias."
    314     },
    315     {
    316       "flag": "Search keyword bias in job postings",
    317       "detail": "Searching LinkedIn with 'Soft Skills' as a keyword may have biased the sample toward postings that explicitly mention soft skills, missing postings that value them without using that term."
    318     }
    319   ],
    320   "cited_papers": [
    321     {
    322       "title": "Future of software development with generative AI",
    323       "authors": ["J. Sauvola", "S. Tarkoma", "M. Klemettinen", "J. Riekki", "D. Doermann"],
    324       "year": 2024,
    325       "relevance": "Directly addresses how generative AI is changing software development practices."
    326     },
    327     {
    328       "title": "How does machine learning change software development practices?",
    329       "authors": ["Z. Wan", "X. Xia", "D. Lo", "G. C. Murphy"],
    330       "year": 2019,
    331       "relevance": "Empirical study on ML's impact on SE practices, relevant to understanding workforce skill changes."
    332     },
    333     {
    334       "title": "A survey on bias and fairness in machine learning",
    335       "authors": ["N. Mehrabi", "F. Morstatter", "N. Saxena", "K. Lerman", "A. Galstyan"],
    336       "year": 2021,
    337       "relevance": "Comprehensive survey on AI bias relevant to understanding why ethical awareness is needed."
    338     },
    339     {
    340       "title": "A systematic mapping study on soft skills in software engineering",
    341       "authors": ["G. Matturro", "F. Raschetti", "C. Fontán"],
    342       "year": 2019,
    343       "relevance": "Key baseline for soft skills in SE that this paper extends to data scientists."
    344     },
    345     {
    346       "title": "Sampling in software engineering research: A critical review and guidelines",
    347       "authors": ["S. Baltes", "P. Ralph"],
    348       "year": 2022,
    349       "relevance": "Methodological guidance on sampling in SE research, relevant to evaluating study quality."
    350     },
    351     {
    352       "title": "Artificial intelligence and bias: Challenges, implications, and remedies",
    353       "authors": ["A. Min"],
    354       "year": 2023,
    355       "relevance": "Discusses AI bias challenges that motivate the need for ethical soft skills."
    356     }
    357   ]
    358 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs