scan.json - ai-research-survey - Systematic scan of agentic development research. What's signal, what's noise.

scan.json (18675B)
      1 {
      2   "paper": {
      3     "title": "Best Practices for Using AI Tools as an Author, Peer Reviewer, or Editor",
      4     "authors": [
      5       "Tiffany I Leung",
      6       "Taiane de Azevedo Cardoso",
      7       "Amaryllis Mavragani",
      8       "Gunther Eysenbach"
      9     ],
     10     "year": 2023,
     11     "venue": "Journal of Medical Internet Research",
     12     "doi": "10.2196/51584"
     13   },
     14   "checklist": {
     15     "artifacts": {
     16       "code_released": {
     17         "applies": false,
     18         "answer": false,
     19         "justification": "This is an editorial/policy paper with no code or software artifact. There is no analysis code to release."
     20       },
     21       "data_released": {
     22         "applies": false,
     23         "answer": false,
     24         "justification": "This editorial does not collect or analyze data. It presents editorial policies and recommendations with no underlying dataset."
     25       },
     26       "environment_specified": {
     27         "applies": false,
     28         "answer": false,
     29         "justification": "No computational environment is relevant; this is a policy editorial with no experiments or analysis."
     30       },
     31       "reproduction_instructions": {
     32         "applies": false,
     33         "answer": false,
     34         "justification": "There is nothing to reproduce; the paper is an editorial stating publisher policies and recommendations."
     35       }
     36     },
     37     "statistical_methodology": {
     38       "confidence_intervals_or_error_bars": {
     39         "applies": false,
     40         "answer": false,
     41         "justification": "No empirical data or statistical analysis is presented. This is a policy editorial."
     42       },
     43       "significance_tests": {
     44         "applies": false,
     45         "answer": false,
     46         "justification": "No comparative empirical claims are made that would require significance tests."
     47       },
     48       "effect_sizes_reported": {
     49         "applies": false,
     50         "answer": false,
     51         "justification": "No empirical effects are measured. This is a policy editorial."
     52       },
     53       "sample_size_justified": {
     54         "applies": false,
     55         "answer": false,
     56         "justification": "No sample or data collection exists in this editorial."
     57       },
     58       "variance_reported": {
     59         "applies": false,
     60         "answer": false,
     61         "justification": "No experimental runs or quantitative results are reported."
     62       }
     63     },
     64     "evaluation_design": {
     65       "baselines_included": {
     66         "applies": false,
     67         "answer": false,
     68         "justification": "This is a policy editorial, not a study with an evaluation. There are no baselines to compare against."
     69       },
     70       "baselines_contemporary": {
     71         "applies": false,
     72         "answer": false,
     73         "justification": "No baselines are relevant to a policy editorial."
     74       },
     75       "ablation_study": {
     76         "applies": false,
     77         "answer": false,
     78         "justification": "No system or method is proposed that would have components to ablate."
     79       },
     80       "multiple_metrics": {
     81         "applies": false,
     82         "answer": false,
     83         "justification": "No evaluation metrics are used; this is a policy editorial."
     84       },
     85       "human_evaluation": {
     86         "applies": false,
     87         "answer": false,
     88         "justification": "No system outputs are produced or evaluated. This is a policy editorial."
     89       },
     90       "held_out_test_set": {
     91         "applies": false,
     92         "answer": false,
     93         "justification": "No test sets are relevant to this policy editorial."
     94       },
     95       "per_category_breakdown": {
     96         "applies": false,
     97         "answer": false,
     98         "justification": "No quantitative results exist to break down by category."
     99       },
    100       "failure_cases_discussed": {
    101         "applies": false,
    102         "answer": false,
    103         "justification": "No system or method is evaluated, so there are no failure cases in the empirical sense."
    104       },
    105       "negative_results_reported": {
    106         "applies": false,
    107         "answer": false,
    108         "justification": "No experiments are conducted, so there are no negative results to report."
    109       }
    110     },
    111     "claims_and_evidence": {
    112       "abstract_claims_supported": {
    113         "applies": true,
    114         "answer": true,
    115         "justification": "The abstract states the paper will 'outline the current state of editorial policies on generative AI' and 'provide JMIR Publications' editorial policies.' The body delivers on both claims with detailed policy tables and discussion for authors, peer reviewers, and editors."
    116       },
    117       "causal_claims_justified": {
    118         "applies": false,
    119         "answer": false,
    120         "justification": "The paper makes no causal claims. It presents policy recommendations and describes existing guidance from various organizations."
    121       },
    122       "generalization_bounded": {
    123         "applies": true,
    124         "answer": true,
    125         "justification": "The paper explicitly scopes its recommendations to JMIR Publications' policies and the scientific publishing context. It references specific organizations (COPE, WAME, NIH) and does not claim its policies apply universally beyond JMIR journals."
    126       },
    127       "alternative_explanations_discussed": {
    128         "applies": false,
    129         "answer": false,
    130         "justification": "This is a policy editorial presenting recommendations, not empirical results. There are no empirical findings for which alternative explanations would be relevant."
    131       }
    132     },
    133     "setup_transparency": {
    134       "model_versions_specified": {
    135         "applies": false,
    136         "answer": false,
    137         "justification": "No models are used in experiments. The paper discusses AI tools conceptually and references specific terms of service documents (e.g., OpenAI March 14, 2023 Terms of Use; Anthropic July 8, 2023 Terms of Service), but does not run any model."
    138       },
    139       "prompts_provided": {
    140         "applies": false,
    141         "answer": false,
    142         "justification": "No prompting is used. This is a policy editorial, not an empirical study using LLMs."
    143       },
    144       "hyperparameters_reported": {
    145         "applies": false,
    146         "answer": false,
    147         "justification": "No experiments are conducted, so no hyperparameters are relevant."
    148       },
    149       "scaffolding_described": {
    150         "applies": false,
    151         "answer": false,
    152         "justification": "No agentic scaffolding or system is proposed or evaluated."
    153       },
    154       "data_preprocessing_documented": {
    155         "applies": false,
    156         "answer": false,
    157         "justification": "No data is collected or preprocessed. This is a policy editorial."
    158       }
    159     },
    160     "limitations_and_scope": {
    161       "limitations_section_present": {
    162         "applies": true,
    163         "answer": false,
    164         "justification": "No limitations section or equivalent discussion is present in this editorial. The 'Closing Comments' section is brief and does not discuss limitations of the policies or recommendations."
    165       },
    166       "threats_to_validity_specific": {
    167         "applies": true,
    168         "answer": false,
    169         "justification": "No threats to validity are discussed. The editorial does not acknowledge potential weaknesses of its recommendations, such as enforceability challenges or the rapidly changing nature of AI terms of service."
    170       },
    171       "scope_boundaries_stated": {
    172         "applies": true,
    173         "answer": true,
    174         "justification": "The paper's scope is implicitly bounded to JMIR Publications' editorial policies. It explicitly states these are 'JMIR Publications' editorial policies on these issues' and references specific JMIR knowledge base articles. The Closing Comments note the 'rapidly evolving nature of AI technologies' suggesting these policies are time-bound."
    175       }
    176     },
    177     "data_integrity": {
    178       "raw_data_available": {
    179         "applies": false,
    180         "answer": false,
    181         "justification": "No data is collected. This is a policy editorial with no underlying dataset."
    182       },
    183       "data_collection_described": {
    184         "applies": false,
    185         "answer": false,
    186         "justification": "No data collection takes place. The paper synthesizes existing policies and organizational guidance."
    187       },
    188       "recruitment_methods_described": {
    189         "applies": false,
    190         "answer": false,
    191         "justification": "No participants are recruited. This is a policy editorial."
    192       },
    193       "data_pipeline_documented": {
    194         "applies": false,
    195         "answer": false,
    196         "justification": "No data pipeline exists. This is a policy editorial."
    197       }
    198     },
    199     "conflicts_of_interest": {
    200       "funding_disclosed": {
    201         "applies": true,
    202         "answer": false,
    203         "justification": "No funding source is mentioned. The Acknowledgments section states 'This manuscript was produced as a result of discussion among JMIR Publications staff and managers' but does not disclose funding."
    204       },
    205       "affiliations_disclosed": {
    206         "applies": true,
    207         "answer": true,
    208         "justification": "Author affiliations are clearly disclosed. All four authors are affiliated with JMIR Publications, and this is stated both in the author list and in the Conflicts of Interest section."
    209       },
    210       "funder_independent_of_outcome": {
    211         "applies": true,
    212         "answer": false,
    213         "justification": "The paper is written by JMIR Publications staff advocating for JMIR Publications' editorial policies. The funder/employer (JMIR Publications) has a direct interest in the policies being presented favorably. GE is disclosed as founder, CEO, and executive editor who 'receives a salary and owns equity.'"
    214       },
    215       "financial_interests_declared": {
    216         "applies": true,
    217         "answer": true,
    218         "justification": "The Conflicts of Interest section clearly declares that TIL is scientific editorial director, TdAC and AM are scientific editors, and GE is founder, CEO, and executive editor of JMIR Publications who 'receives a salary and owns equity.'"
    219       }
    220     },
    221     "contamination": {
    222       "training_cutoff_stated": {
    223         "applies": false,
    224         "answer": false,
    225         "justification": "No pre-trained model is evaluated on any benchmark. This is a policy editorial."
    226       },
    227       "train_test_overlap_discussed": {
    228         "applies": false,
    229         "answer": false,
    230         "justification": "No model evaluation is performed. This is a policy editorial."
    231       },
    232       "benchmark_contamination_addressed": {
    233         "applies": false,
    234         "answer": false,
    235         "justification": "No benchmarks are used. This is a policy editorial."
    236       }
    237     },
    238     "human_studies": {
    239       "pre_registered": {
    240         "applies": false,
    241         "answer": false,
    242         "justification": "No human participants are involved. This is a policy editorial."
    243       },
    244       "irb_or_ethics_approval": {
    245         "applies": false,
    246         "answer": false,
    247         "justification": "No human subjects study is conducted."
    248       },
    249       "demographics_reported": {
    250         "applies": false,
    251         "answer": false,
    252         "justification": "No human participants are involved."
    253       },
    254       "inclusion_exclusion_criteria": {
    255         "applies": false,
    256         "answer": false,
    257         "justification": "No human participants are recruited."
    258       },
    259       "randomization_described": {
    260         "applies": false,
    261         "answer": false,
    262         "justification": "No experimental study with human participants is conducted."
    263       },
    264       "blinding_described": {
    265         "applies": false,
    266         "answer": false,
    267         "justification": "No experimental study is conducted."
    268       },
    269       "attrition_reported": {
    270         "applies": false,
    271         "answer": false,
    272         "justification": "No human participants are involved."
    273       }
    274     },
    275     "cost_and_practicality": {
    276       "inference_cost_reported": {
    277         "applies": false,
    278         "answer": false,
    279         "justification": "This is a policy editorial with no computational method whose cost would be reported."
    280       },
    281       "compute_budget_stated": {
    282         "applies": false,
    283         "answer": false,
    284         "justification": "No computation is performed. This is a policy editorial."
    285       }
    286     }
    287   },
    288   "claims": [
    289     {
    290       "claim": "AI cannot be a listed coauthor on a manuscript because of inability to be accountable for content written.",
    291       "evidence": "Cites COPE guidance, WAME recommendations, and an examination of ChatGPT against CRediT authorship criteria (Section: For Authors, references 2, 10-16).",
    292       "supported": "moderate"
    293     },
    294     {
    295       "claim": "The free version of ChatGPT should not be used for assisting with peer review due to potential information leakage based on its March 2023 Terms of Use.",
    296       "evidence": "The paper reproduces relevant excerpts from OpenAI's Terms of Use (Section: For Peer Reviewers, Figure 1, Multimedia Appendix 1) showing that input may be reused for service improvement.",
    297       "supported": "moderate"
    298     },
    299     {
    300       "claim": "Anthropic's Claude free version should not be used for peer review because reviewers do not have 'all rights' to the manuscript content per Claude's Terms of Service.",
    301       "evidence": "The paper quotes Anthropic's July 8, 2023 Terms of Service (Section: For Peer Reviewers, Figure 1, Multimedia Appendix 2).",
    302       "supported": "moderate"
    303     },
    304     {
    305       "claim": "GPTZero poses a risk of information leakage or loss of confidentiality based on its terms of use.",
    306       "evidence": "The paper references GPTZero's terms of use (Multimedia Appendix 3) but does not reproduce or quote the specific problematic clauses.",
    307       "supported": "weak"
    308     }
    309   ],
    310   "methodology_tags": [
    311     "qualitative"
    312   ],
    313   "key_findings": "This editorial outlines JMIR Publications' policies on generative AI use in scientific publishing, organized around three stakeholder groups: authors, peer reviewers, and editors. The core principles are accountability (humans must take responsibility for AI-generated content), transparency (AI use must be disclosed), and confidentiality (AI tools must not leak manuscript content). The paper argues against banning AI in peer review but provides detailed guidance on when specific tools (ChatGPT free version, Claude free version) should not be used based on their terms of service.",
    314   "red_flags": [
    315     {
    316       "flag": "Self-serving editorial by publisher staff",
    317       "detail": "All four authors are JMIR Publications employees, including the founder/CEO who owns equity. The editorial promotes JMIR Publications' own policies as best practices without independent assessment. One author (T. Leung) also served as editor of the piece, which is noted as 'non-peer-reviewed.'"
    318     },
    319     {
    320       "flag": "Non-peer-reviewed",
    321       "detail": "The paper explicitly states 'this is a non-peer-reviewed article' (submitted 28.08.23, accepted 28.08.23, published 31.08.23 — same-day acceptance). No external review process validated the recommendations."
    322     },
    323     {
    324       "flag": "No systematic methodology for policy comparison",
    325       "detail": "The editorial references policies from COPE, WAME, NIH, and various publishers but does not systematically survey or compare these policies. The selection of cited policies appears unsystematic and potentially cherry-picked to support JMIR's position."
    326     },
    327     {
    328       "flag": "Terms of service analysis is shallow and time-bound",
    329       "detail": "The paper's analysis of ChatGPT, Claude, and GPTZero terms of service is based on specific dated versions (March 2023, July 2023, January 2023) that are likely already outdated. The analysis cites brief excerpts without thorough legal analysis."
    330     }
    331   ],
    332   "cited_papers": [
    333     {
    334       "title": "The imperative for regulatory oversight of large language models (or generative AI) in healthcare",
    335       "authors": ["Bertalan Meskó", "Eric J Topol"],
    336       "year": 2023,
    337       "doi": "10.1038/s41746-023-00873-0",
    338       "relevance": "Discusses regulatory frameworks for LLMs in healthcare, relevant to AI governance and safety."
    339     },
    340     {
    341       "title": "GPT-4 technical report",
    342       "authors": ["OpenAI"],
    343       "year": 2023,
    344       "relevance": "Core reference for GPT-4 capabilities and acknowledged biases/limitations in the technical report."
    345     },
    346     {
    347       "title": "Artificial intelligence can generate fraudulent but authentic-looking scientific medical articles: Pandora's box has been opened",
    348       "authors": ["Martin Májovský", "Martin Černý", "Martin Kasal", "Martin Komarc", "David Netuka"],
    349       "year": 2023,
    350       "doi": "10.2196/46924",
    351       "relevance": "Empirical study showing ChatGPT can generate convincing fraudulent scientific articles, directly relevant to AI safety in research."
    352     },
    353     {
    354       "title": "Survey of hallucination in natural language generation",
    355       "authors": ["Ziwei Ji", "Nayeon Lee", "Rita Frieske"],
    356       "year": 2023,
    357       "doi": "10.1145/3571730",
    358       "relevance": "Comprehensive survey of LLM hallucination, relevant to understanding failure modes of AI code and text generation."
    359     },
    360     {
    361       "title": "The ethics of disclosing the use of artificial intelligence tools in writing scholarly manuscripts",
    362       "authors": ["Mohamad Hosseini", "David B Resnik", "Kristi Holmes"],
    363       "year": 2023,
    364       "doi": "10.1177/17470161231180449",
    365       "relevance": "Discusses ethics of AI disclosure in scholarly work, relevant to methodology transparency in AI-assisted research."
    366     },
    367     {
    368       "title": "Fighting reviewer fatigue or amplifying bias? Considerations and recommendations for use of ChatGPT and other large language models in scholarly peer review",
    369       "authors": ["Mohamad Hosseini", "Serge P J M Horbach"],
    370       "year": 2023,
    371       "doi": "10.1186/s41073-023-00133-5",
    372       "relevance": "Examines risks and benefits of LLMs in peer review, directly relevant to AI-assisted evaluation of research quality."
    373     },
    374     {
    375       "title": "ChatGPT and large language models in academia: opportunities and challenges",
    376       "authors": ["Joel G Meyer", "Ryan J Urbanowicz", "Patrick C N Martin"],
    377       "year": 2023,
    378       "doi": "10.1186/s13040-023-00339-9",
    379       "relevance": "Broad survey of LLM opportunities and challenges in academic settings, relevant to AI productivity and risk assessment."
    380     },
    381     {
    382       "title": "Comparing scientific abstracts generated by ChatGPT to real abstracts with detectors and blinded human reviewers",
    383       "authors": ["Catherine A Gao", "Frederick M Howard", "Nikolay S Markov"],
    384       "year": 2023,
    385       "doi": "10.1038/s41746-023-00819-6",
    386       "relevance": "Evaluates AI-generated text detection methods, relevant to AI safety and capability assessment methodologies."
    387     }
    388   ]
    389 }
	ai-research-survey Systematic scan of agentic development research. What's signal, what's noise.
	git clone https://git.shiptheloop.com/ai-research-survey.git
	Log \| Files \| Refs