deep-eval.schema.json (2913B)
1 { 2 "$schema": "https://json-schema.org/draft/2020-12/schema", 3 "$id": "deep-eval.schema.json", 4 "title": "Deep Evaluation Result", 5 "description": "Schema for optional deep evaluation of a paper. Produced by the deep-eval agent for papers selected for closer scrutiny.", 6 "type": "object", 7 "required": ["paper_id", "code_runs", "results_reproduce", "benchmark_contamination_check", "additional_findings"], 8 "properties": { 9 "paper_id": { 10 "type": "string", 11 "description": "Registry ID of the paper being evaluated." 12 }, 13 "code_runs": { 14 "type": "object", 15 "description": "Whether the released code runs successfully.", 16 "required": ["attempted", "success", "details"], 17 "properties": { 18 "attempted": { 19 "type": "boolean", 20 "description": "Whether code execution was attempted (false if no code released)." 21 }, 22 "success": { 23 "type": ["boolean", "null"], 24 "description": "Whether the code ran successfully. Null if not attempted." 25 }, 26 "details": { 27 "type": "string", 28 "description": "Description of what happened: environment setup, errors encountered, workarounds needed." 29 } 30 } 31 }, 32 "results_reproduce": { 33 "type": "object", 34 "description": "Whether key results from the paper can be reproduced.", 35 "required": ["attempted", "success", "details"], 36 "properties": { 37 "attempted": { 38 "type": "boolean", 39 "description": "Whether reproduction was attempted." 40 }, 41 "success": { 42 "type": ["boolean", "null"], 43 "description": "Whether results were reproduced within reasonable tolerance. Null if not attempted." 44 }, 45 "details": { 46 "type": "string", 47 "description": "What was attempted, what matched, what diverged, and by how much." 48 } 49 } 50 }, 51 "benchmark_contamination_check": { 52 "type": "object", 53 "description": "Check for potential benchmark contamination or data leakage.", 54 "required": ["checked", "concerns"], 55 "properties": { 56 "checked": { 57 "type": "boolean", 58 "description": "Whether contamination was checked." 59 }, 60 "concerns": { 61 "type": "string", 62 "description": "Any contamination concerns found, or 'none' if clean." 63 } 64 } 65 }, 66 "additional_findings": { 67 "type": "array", 68 "description": "Any other notable findings from deep evaluation.", 69 "items": { 70 "type": "object", 71 "required": ["finding", "detail"], 72 "properties": { 73 "finding": { 74 "type": "string", 75 "description": "Short label for the finding." 76 }, 77 "detail": { 78 "type": "string", 79 "description": "Detailed explanation." 80 } 81 } 82 } 83 } 84 } 85 }