scan.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan.json (16755B)
      1 {
      2   "paper": {
      3     "title": "AI-Powered Solutions in Computer Science: A Comprehensive COPRAS Evaluation",
      4     "authors": ["Srinivasa Rao Kolusu"],
      5     "year": 2024,
      6     "venue": "REST Journal on Data Analytics and Artificial Intelligence",
      7     "doi": "https://doi.org/10.46632/jdaai/3/1/9"
      8   },
      9   "checklist": {
     10     "artifacts": {
     11       "code_released": {
     12         "applies": true,
     13         "answer": false,
     14         "justification": "No code repository or archive is mentioned anywhere in the paper."
     15       },
     16       "data_released": {
     17         "applies": true,
     18         "answer": false,
     19         "justification": "The dataset of 5 tools with 4 metrics appears only in the paper tables. No external data download or source is provided. The origin of the data values (e.g., accuracy 92%, efficiency 15 hrs/week) is never explained."
     20       },
     21       "environment_specified": {
     22         "applies": true,
     23         "answer": false,
     24         "justification": "No computational environment or software tools are described. The COPRAS calculations appear to be manual arithmetic but no tool or environment is specified."
     25       },
     26       "reproduction_instructions": {
     27         "applies": true,
     28         "answer": false,
     29         "justification": "No reproduction instructions are provided. The COPRAS method is described generically but there are no step-by-step instructions to reproduce the specific calculations."
     30       }
     31     },
     32     "statistical_methodology": {
     33       "confidence_intervals_or_error_bars": {
     34         "applies": true,
     35         "answer": false,
     36         "justification": "No confidence intervals or error bars are reported. All values are single point estimates with no uncertainty quantification."
     37       },
     38       "significance_tests": {
     39         "applies": true,
     40         "answer": false,
     41         "justification": "The paper ranks tools and makes comparative claims (e.g., NLP-Powered Chatbot is 'most optimal') but uses no statistical significance tests. Rankings are based solely on COPRAS arithmetic."
     42       },
     43       "effect_sizes_reported": {
     44         "applies": true,
     45         "answer": false,
     46         "justification": "No effect sizes are reported. Differences between tools are presented as raw COPRAS scores without any measure of effect magnitude or practical significance."
     47       },
     48       "sample_size_justified": {
     49         "applies": true,
     50         "answer": false,
     51         "justification": "The study evaluates only 5 tools across 4 metrics. No justification is given for why these 5 tools were selected or why 4 criteria are sufficient."
     52       },
     53       "variance_reported": {
     54         "applies": true,
     55         "answer": false,
     56         "justification": "No variance or spread measures are reported. Each tool has a single value per metric with no indication of measurement uncertainty or variability."
     57       }
     58     },
     59     "evaluation_design": {
     60       "baselines_included": {
     61         "applies": true,
     62         "answer": false,
     63         "justification": "No baselines or alternative evaluation methods are compared against COPRAS. The paper does not compare its rankings to those from TOPSIS, VIKOR, or other MCDM methods despite citing them in references."
     64       },
     65       "baselines_contemporary": {
     66         "applies": true,
     67         "answer": false,
     68         "justification": "No baselines are included at all, so contemporariness cannot be assessed."
     69       },
     70       "ablation_study": {
     71         "applies": true,
     72         "answer": false,
     73         "justification": "No ablation study is performed. The paper uses equal weights (0.25 each) but never tests how different weight assignments would affect rankings."
     74       },
     75       "multiple_metrics": {
     76         "applies": true,
     77         "answer": false,
     78         "justification": "The final evaluation produces a single ranking via COPRAS (Qi/Ui values). While input data has 4 criteria, the evaluation output is a single composite score."
     79       },
     80       "human_evaluation": {
     81         "applies": true,
     82         "answer": false,
     83         "justification": "No human evaluation is included. The input metrics (accuracy, efficiency, innovation, resource usage) appear to be assumed values with no explanation of how they were measured or validated."
     84       },
     85       "held_out_test_set": {
     86         "applies": false,
     87         "answer": false,
     88         "justification": "This is an MCDM evaluation paper, not a machine learning study. There is no train/test split concept applicable here."
     89       },
     90       "per_category_breakdown": {
     91         "applies": true,
     92         "answer": true,
     93         "justification": "Tables 1-5 provide per-tool breakdowns across all four criteria (accuracy, efficiency, innovation, resource usage) as well as intermediate COPRAS calculations (Bi, Ci, Min(Ci)/Ci)."
     94       },
     95       "failure_cases_discussed": {
     96         "applies": true,
     97         "answer": false,
     98         "justification": "No failure cases or limitations of the COPRAS rankings are discussed. The paper does not examine scenarios where COPRAS might produce misleading rankings."
     99       },
    100       "negative_results_reported": {
    101         "applies": true,
    102         "answer": false,
    103         "justification": "No negative results are reported. Every tool is presented positively with no discussion of scenarios where the method fails or produces counterintuitive results."
    104       }
    105     },
    106     "claims_and_evidence": {
    107       "abstract_claims_supported": {
    108         "applies": true,
    109         "answer": false,
    110         "justification": "The abstract claims the study 'rigorously assessed' solutions and 'reveals significant insights into AI's potential.' The assessment uses fabricated-looking input data with no provenance, and the insights are straightforward COPRAS arithmetic, not rigorous analysis."
    111       },
    112       "causal_claims_justified": {
    113         "applies": true,
    114         "answer": false,
    115         "justification": "The paper implicitly makes causal claims by ranking tools as 'most optimal' and recommending them for specific use cases, but there is no causal analysis. The COPRAS method is a ranking technique, not a causal inference method."
    116       },
    117       "generalization_bounded": {
    118         "applies": true,
    119         "answer": false,
    120         "justification": "The title claims 'Comprehensive' evaluation but only examines 5 unnamed/generic tools with 4 metrics of unknown provenance. No boundaries on generalization are stated."
    121       },
    122       "alternative_explanations_discussed": {
    123         "applies": true,
    124         "answer": false,
    125         "justification": "No alternative explanations for the rankings are discussed. The paper does not consider whether different weight schemes, different criteria, or different MCDM methods would yield different conclusions."
    126       }
    127     },
    128     "setup_transparency": {
    129       "model_versions_specified": {
    130         "applies": false,
    131         "answer": false,
    132         "justification": "This paper does not use or evaluate any AI/ML models directly. It applies the COPRAS mathematical method to a table of metrics."
    133       },
    134       "prompts_provided": {
    135         "applies": false,
    136         "answer": false,
    137         "justification": "No prompting or LLMs are used in the methodology."
    138       },
    139       "hyperparameters_reported": {
    140         "applies": true,
    141         "answer": true,
    142         "justification": "The COPRAS weights are reported in Table 3 (all 0.25). This is the primary hyperparameter of the method."
    143       },
    144       "scaffolding_described": {
    145         "applies": false,
    146         "answer": false,
    147         "justification": "No agentic scaffolding is used. This is a manual MCDM calculation."
    148       },
    149       "data_preprocessing_documented": {
    150         "applies": true,
    151         "answer": false,
    152         "justification": "The source of the input data (Table 1) is completely undocumented. There is no explanation of how accuracy percentages, efficiency hours, innovation scores, or resource usage values were obtained or measured."
    153       }
    154     },
    155     "limitations_and_scope": {
    156       "limitations_section_present": {
    157         "applies": true,
    158         "answer": false,
    159         "justification": "There is no limitations section. The conclusion mentions future research directions but does not discuss limitations of the current study."
    160       },
    161       "threats_to_validity_specific": {
    162         "applies": true,
    163         "answer": false,
    164         "justification": "No threats to validity are discussed anywhere in the paper."
    165       },
    166       "scope_boundaries_stated": {
    167         "applies": true,
    168         "answer": false,
    169         "justification": "No scope boundaries are stated. The paper does not clarify what the results do NOT show or what settings are excluded."
    170       }
    171     },
    172     "data_integrity": {
    173       "raw_data_available": {
    174         "applies": true,
    175         "answer": false,
    176         "justification": "The raw data is presented in Table 1 but its provenance is completely unknown. There is no way to verify whether these numbers are real measurements, estimates, or fabricated values."
    177       },
    178       "data_collection_described": {
    179         "applies": true,
    180         "answer": false,
    181         "justification": "No data collection procedure is described. The paper never explains how accuracy, efficiency, innovation, or resource usage values were obtained for the 5 tools."
    182       },
    183       "recruitment_methods_described": {
    184         "applies": false,
    185         "answer": false,
    186         "justification": "No human participants are involved. The study applies COPRAS to a table of tool metrics."
    187       },
    188       "data_pipeline_documented": {
    189         "applies": true,
    190         "answer": false,
    191         "justification": "The COPRAS calculation pipeline (normalization → weighting → Bi/Ci → ranking) is described, but the pipeline from real-world measurement to Table 1 input data is completely undocumented."
    192       }
    193     },
    194     "conflicts_of_interest": {
    195       "funding_disclosed": {
    196         "applies": true,
    197         "answer": false,
    198         "justification": "No funding information is disclosed anywhere in the paper."
    199       },
    200       "affiliations_disclosed": {
    201         "applies": true,
    202         "answer": true,
    203         "justification": "The author is identified as 'Sr. Technical Account Manager, Dallas, Texas, USA' with an IEEE email address."
    204       },
    205       "funder_independent_of_outcome": {
    206         "applies": true,
    207         "answer": false,
    208         "justification": "No funding is disclosed, so independence cannot be assessed. The absence of a funding disclosure is a NO per the schema."
    209       },
    210       "financial_interests_declared": {
    211         "applies": true,
    212         "answer": false,
    213         "justification": "No competing interests or financial interests statement is present in the paper."
    214       }
    215     },
    216     "contamination": {
    217       "training_cutoff_stated": {
    218         "applies": false,
    219         "answer": false,
    220         "justification": "This paper does not evaluate any pre-trained model on a benchmark. It applies a mathematical MCDM method to a table of metrics."
    221       },
    222       "train_test_overlap_discussed": {
    223         "applies": false,
    224         "answer": false,
    225         "justification": "No pre-trained models or benchmarks are evaluated."
    226       },
    227       "benchmark_contamination_addressed": {
    228         "applies": false,
    229         "answer": false,
    230         "justification": "No pre-trained models or benchmarks are evaluated."
    231       }
    232     },
    233     "human_studies": {
    234       "pre_registered": {
    235         "applies": false,
    236         "answer": false,
    237         "justification": "No human participants are involved in this study."
    238       },
    239       "irb_or_ethics_approval": {
    240         "applies": false,
    241         "answer": false,
    242         "justification": "No human participants are involved in this study."
    243       },
    244       "demographics_reported": {
    245         "applies": false,
    246         "answer": false,
    247         "justification": "No human participants are involved in this study."
    248       },
    249       "inclusion_exclusion_criteria": {
    250         "applies": false,
    251         "answer": false,
    252         "justification": "No human participants are involved in this study."
    253       },
    254       "randomization_described": {
    255         "applies": false,
    256         "answer": false,
    257         "justification": "No human participants are involved in this study."
    258       },
    259       "blinding_described": {
    260         "applies": false,
    261         "answer": false,
    262         "justification": "No human participants are involved in this study."
    263       },
    264       "attrition_reported": {
    265         "applies": false,
    266         "answer": false,
    267         "justification": "No human participants are involved in this study."
    268       }
    269     },
    270     "cost_and_practicality": {
    271       "inference_cost_reported": {
    272         "applies": false,
    273         "answer": false,
    274         "justification": "This is an MCDM evaluation paper, not a system that incurs computational costs. The COPRAS calculations are simple arithmetic."
    275       },
    276       "compute_budget_stated": {
    277         "applies": false,
    278         "answer": false,
    279         "justification": "No significant computation is involved. COPRAS is simple matrix arithmetic."
    280       }
    281     }
    282   },
    283   "claims": [
    284     {
    285       "claim": "The NLP-Powered Chatbot for IT Support is the most optimal AI-powered solution, ranking first with a utility score of 100.0000.",
    286       "evidence": "Table 6 shows Qi=0.220, Ui=100.0000, Rank=1 for the NLP-Powered Chatbot.",
    287       "supported": "weak"
    288     },
    289     {
    290       "claim": "The Automated Code Review Tool ranks second with a utility score of 98.4424.",
    291       "evidence": "Table 6 shows Qi=0.217, Ui=98.4424, Rank=2.",
    292       "supported": "weak"
    293     },
    294     {
    295       "claim": "The COPRAS method provides a structured and systematic approach to evaluating and prioritizing AI-powered tools.",
    296       "evidence": "The paper demonstrates the COPRAS calculation steps in Tables 2-6.",
    297       "supported": "moderate"
    298     }
    299   ],
    300   "methodology_tags": ["case-study"],
    301   "key_findings": "The paper applies the COPRAS multi-criteria decision-making method to rank 5 AI-powered tools (code review, bug tracking, software testing, predictive maintenance, NLP chatbot) across accuracy, efficiency, innovation, and resource usage. The NLP-Powered Chatbot ranks first and Predictive Maintenance ranks last. However, the input data has no documented provenance — the metrics appear to be assumed rather than measured, making the rankings meaningless as empirical findings.",
    302   "red_flags": [
    303     {
    304       "flag": "Data provenance unknown",
    305       "detail": "The input data in Table 1 (accuracy percentages, efficiency hours, innovation scores, resource usage) has no documented source. There is no explanation of how these values were measured, estimated, or obtained. They appear to be fabricated examples rather than empirical measurements."
    306     },
    307     {
    308       "flag": "No limitations or threats to validity",
    309       "detail": "The paper has no limitations section, no threats to validity, and no discussion of the many assumptions underlying the analysis (equal weights, choice of criteria, source of data)."
    310     },
    311     {
    312       "flag": "Circular reasoning",
    313       "detail": "The paper feeds assumed numbers into a deterministic formula and presents the output as a 'finding.' The COPRAS ranking is entirely determined by the input values, which are not validated against any ground truth."
    314     },
    315     {
    316       "flag": "Overclaimed rigor",
    317       "detail": "The abstract claims solutions are 'rigorously assessed' and the study 'reveals significant insights,' but the analysis is straightforward arithmetic on unvalidated input data."
    318     },
    319     {
    320       "flag": "Generic tools with no specificity",
    321       "detail": "The 5 'AI-powered solutions' evaluated are generic categories (e.g., 'Automated Code Review Tool') rather than specific, identifiable products. No specific tools are named or evaluated."
    322     }
    323   ],
    324   "cited_papers": [
    325     {
    326       "title": "AI-powered vulnerability detection for secure source code development",
    327       "authors": ["Sampath Rajapaksha", "Janaka Senanayake", "Harsha Kalutarage", "Mhd Omar Al-Kadri"],
    328       "year": 2022,
    329       "relevance": "Directly addresses AI-powered code security analysis, relevant to the survey's scope on AI in software engineering."
    330     },
    331     {
    332       "title": "AI-Powered Cyber Threats: A Systematic Review",
    333       "authors": ["Mafaz Alanezi", "Ruah Mouad Alyas AL-Azzawi"],
    334       "year": 2024,
    335       "relevance": "Systematic review of AI-powered cyber threats, relevant to AI safety and security concerns in the survey."
    336     },
    337     {
    338       "title": "AI-Powered Cloud Security: A Study on the Integration of Artificial Intelligence and Machine Learning for Improved Threat Detection and Prevention",
    339       "authors": ["Thamer Abdel-Wahid"],
    340       "year": 2024,
    341       "relevance": "Covers AI/ML integration for security, relevant to AI-powered software quality and safety."
    342     }
    343   ]
    344 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs