validate-scan.py (9909B)
1 #!/usr/bin/env python3 2 """ 3 Validate scan.json files against the scan schema. 4 5 Usage: 6 python scripts/validate-scan.py papers/<slug>/scan.json # Validate one file 7 python scripts/validate-scan.py --all # Validate all scan.json files 8 python scripts/validate-scan.py --all --quiet # Only print failures 9 10 Exit 0 if all valid, exit 1 if any failures. 11 """ 12 13 import json 14 import sys 15 from pathlib import Path 16 17 ROOT = Path(__file__).resolve().parent.parent 18 SCHEMA_PATH = ROOT / "schema" / "scan.schema.json" 19 PAPERS_DIR = ROOT / "papers" 20 21 # Base 50 questions: category -> list of required question keys 22 BASE_QUESTIONS = { 23 "artifacts": ["code_released", "data_released", "environment_specified", "reproduction_instructions"], 24 "statistical_methodology": ["confidence_intervals_or_error_bars", "significance_tests", "effect_sizes_reported", "sample_size_justified", "variance_reported"], 25 "evaluation_design": ["baselines_included", "baselines_contemporary", "ablation_study", "multiple_metrics", "human_evaluation", "held_out_test_set", "per_category_breakdown", "failure_cases_discussed", "negative_results_reported"], 26 "claims_and_evidence": ["abstract_claims_supported", "causal_claims_justified", "generalization_bounded", "alternative_explanations_discussed"], 27 "setup_transparency": ["model_versions_specified", "prompts_provided", "hyperparameters_reported", "scaffolding_described", "data_preprocessing_documented"], 28 "limitations_and_scope": ["limitations_section_present", "threats_to_validity_specific", "scope_boundaries_stated"], 29 "data_integrity": ["raw_data_available", "data_collection_described", "recruitment_methods_described", "data_pipeline_documented"], 30 "conflicts_of_interest": ["funding_disclosed", "affiliations_disclosed", "funder_independent_of_outcome", "financial_interests_declared"], 31 "contamination": ["training_cutoff_stated", "train_test_overlap_discussed", "benchmark_contamination_addressed"], 32 "human_studies": ["pre_registered", "irb_or_ethics_approval", "demographics_reported", "inclusion_exclusion_criteria", "randomization_described", "blinding_described", "attrition_reported"], 33 "cost_and_practicality": ["inference_cost_reported", "compute_budget_stated"], 34 } 35 36 # V2 conditional categories (optional) 37 CONDITIONAL_QUESTIONS = { 38 "experimental_rigor": ["seed_sensitivity_reported", "number_of_runs_stated", "hyperparameter_search_budget", "best_config_selection_justified", "multiple_comparison_correction", "self_comparison_bias_addressed", "compute_budget_vs_performance", "benchmark_construct_validity"], 39 "data_leakage": ["temporal_leakage_addressed", "feature_leakage_addressed", "non_independence_addressed", "leakage_detection_method"], 40 "survey_methodology": ["prisma_or_structured_protocol", "quality_assessment_of_sources", "publication_bias_discussed"], 41 } 42 43 VALID_METHODOLOGY_TAGS = {"rct", "observational", "benchmark-eval", "case-study", "meta-analysis", "theoretical", "qualitative"} 44 VALID_SUPPORT_LEVELS = {"strong", "moderate", "weak", "unsupported"} 45 46 47 def validate_checklist_item(item, path): 48 """Validate a single checklist item. Returns list of error strings.""" 49 errors = [] 50 if not isinstance(item, dict): 51 return [f"{path}: not an object"] 52 53 for field in ("applies", "answer", "justification"): 54 if field not in item: 55 errors.append(f"{path}: missing '{field}'") 56 57 if "applies" in item and not isinstance(item["applies"], bool): 58 errors.append(f"{path}.applies: not a boolean") 59 if "answer" in item and not isinstance(item["answer"], bool): 60 errors.append(f"{path}.answer: not a boolean") 61 if "justification" in item: 62 if not isinstance(item["justification"], str): 63 errors.append(f"{path}.justification: not a string") 64 elif len(item["justification"].strip()) == 0: 65 errors.append(f"{path}.justification: empty") 66 67 # Constraint: applies=false implies answer=false 68 if item.get("applies") is False and item.get("answer") is True: 69 errors.append(f"{path}: applies=false but answer=true (invalid)") 70 71 return errors 72 73 74 def validate_scan(data, filepath=""): 75 """Validate a scan.json object. Returns list of error strings.""" 76 errors = [] 77 prefix = filepath + ": " if filepath else "" 78 79 # Top-level required fields 80 for field in ("paper", "checklist", "claims", "methodology_tags", "key_findings", "red_flags", "cited_papers"): 81 if field not in data: 82 errors.append(f"{prefix}missing required field '{field}'") 83 84 # Paper metadata 85 paper = data.get("paper", {}) 86 for field in ("title", "authors", "year"): 87 if field not in paper: 88 errors.append(f"{prefix}paper: missing '{field}'") 89 90 # Checklist — base categories (required) 91 checklist = data.get("checklist", {}) 92 for cat, questions in BASE_QUESTIONS.items(): 93 if cat not in checklist: 94 errors.append(f"{prefix}checklist: missing category '{cat}'") 95 continue 96 cat_obj = checklist[cat] 97 if not isinstance(cat_obj, dict): 98 errors.append(f"{prefix}checklist.{cat}: not an object") 99 continue 100 for q in questions: 101 if q not in cat_obj: 102 errors.append(f"{prefix}checklist.{cat}: missing question '{q}'") 103 else: 104 errors.extend(validate_checklist_item(cat_obj[q], f"{prefix}checklist.{cat}.{q}")) 105 106 # Checklist — optional questions added mid-v2 (validate if present, don't require) 107 OPTIONAL_BASE = { 108 "claims_and_evidence": ["proxy_outcome_distinction"], 109 "experimental_rigor": ["scaffold_confound_addressed"], 110 } 111 for cat, questions in OPTIONAL_BASE.items(): 112 if cat in checklist: 113 cat_obj = checklist[cat] 114 for q in questions: 115 if q in cat_obj: 116 errors.extend(validate_checklist_item(cat_obj[q], f"{prefix}checklist.{cat}.{q}")) 117 118 # Checklist — conditional categories (validate if present) 119 for cat, questions in CONDITIONAL_QUESTIONS.items(): 120 if cat not in checklist: 121 continue 122 cat_obj = checklist[cat] 123 if not isinstance(cat_obj, dict): 124 errors.append(f"{prefix}checklist.{cat}: not an object") 125 continue 126 for q in questions: 127 if q not in cat_obj: 128 errors.append(f"{prefix}checklist.{cat}: missing question '{q}' (category present but incomplete)") 129 else: 130 errors.extend(validate_checklist_item(cat_obj[q], f"{prefix}checklist.{cat}.{q}")) 131 132 # Methodology tags 133 tags = data.get("methodology_tags", []) 134 if not isinstance(tags, list): 135 errors.append(f"{prefix}methodology_tags: not an array") 136 else: 137 for tag in tags: 138 if tag not in VALID_METHODOLOGY_TAGS: 139 errors.append(f"{prefix}methodology_tags: invalid tag '{tag}'") 140 141 # Claims 142 claims = data.get("claims", []) 143 if not isinstance(claims, list): 144 errors.append(f"{prefix}claims: not an array") 145 else: 146 for i, c in enumerate(claims): 147 for field in ("claim", "evidence", "supported"): 148 if field not in c: 149 errors.append(f"{prefix}claims[{i}]: missing '{field}'") 150 if c.get("supported") and c["supported"] not in VALID_SUPPORT_LEVELS: 151 errors.append(f"{prefix}claims[{i}].supported: invalid value '{c['supported']}'") 152 153 # key_findings 154 if "key_findings" in data and not isinstance(data["key_findings"], str): 155 errors.append(f"{prefix}key_findings: not a string") 156 157 # red_flags 158 flags = data.get("red_flags", []) 159 if not isinstance(flags, list): 160 errors.append(f"{prefix}red_flags: not an array") 161 else: 162 for i, f in enumerate(flags): 163 for field in ("flag", "detail"): 164 if field not in f: 165 errors.append(f"{prefix}red_flags[{i}]: missing '{field}'") 166 167 # cited_papers 168 cited = data.get("cited_papers", []) 169 if not isinstance(cited, list): 170 errors.append(f"{prefix}cited_papers: not an array") 171 else: 172 for i, c in enumerate(cited): 173 for field in ("title", "relevance"): 174 if field not in c: 175 errors.append(f"{prefix}cited_papers[{i}]: missing '{field}'") 176 177 # scan_version (optional, integer if present) 178 if "scan_version" in data: 179 if not isinstance(data["scan_version"], int): 180 errors.append(f"{prefix}scan_version: not an integer") 181 182 # active_modules (optional, array of strings if present) 183 if "active_modules" in data: 184 if not isinstance(data["active_modules"], list): 185 errors.append(f"{prefix}active_modules: not an array") 186 187 return errors 188 189 190 def main(): 191 args = sys.argv[1:] 192 quiet = "--quiet" in args 193 args = [a for a in args if a != "--quiet"] 194 195 if not args: 196 print("Usage: python scripts/validate-scan.py <path> | --all [--quiet]") 197 sys.exit(1) 198 199 if args[0] == "--all": 200 files = sorted(PAPERS_DIR.glob("*/scan.json")) 201 else: 202 files = [Path(a) for a in args if not a.startswith("--")] 203 204 total = 0 205 failed = 0 206 for f in files: 207 total += 1 208 try: 209 data = json.loads(f.read_text()) 210 except (json.JSONDecodeError, FileNotFoundError) as e: 211 print(f"FAIL {f}: {e}") 212 failed += 1 213 continue 214 215 errors = validate_scan(data, str(f)) 216 if errors: 217 failed += 1 218 print(f"FAIL {f}:") 219 for err in errors: 220 print(f" {err}") 221 elif not quiet: 222 print(f"OK {f}") 223 224 print(f"\n{total} files checked, {total - failed} passed, {failed} failed") 225 sys.exit(1 if failed else 0) 226 227 228 if __name__ == "__main__": 229 main()