registry.schema.json (2189B)
1 { 2 "$schema": "https://json-schema.org/draft/2020-12/schema", 3 "$id": "registry.schema.json", 4 "title": "Registry Entry", 5 "description": "Schema for a single line in registry.jsonl. Each line represents one paper in the survey.", 6 "type": "object", 7 "required": ["id", "title", "authors", "year", "source", "status", "added"], 8 "properties": { 9 "id": { 10 "type": "string", 11 "pattern": "^[a-z0-9-]+$", 12 "description": "URL-safe slug identifying this paper (e.g., 'metr-rct-2025')." 13 }, 14 "title": { 15 "type": "string", 16 "description": "Full paper title." 17 }, 18 "authors": { 19 "type": "array", 20 "items": { "type": "string" }, 21 "description": "List of author names." 22 }, 23 "year": { 24 "type": "integer", 25 "description": "Publication year." 26 }, 27 "venue": { 28 "type": "string", 29 "description": "Publication venue (journal, conference, preprint server)." 30 }, 31 "source_url": { 32 "type": "string", 33 "format": "uri", 34 "description": "Primary URL where the paper can be found." 35 }, 36 "doi": { 37 "type": "string", 38 "description": "Digital Object Identifier, if available." 39 }, 40 "arxiv_id": { 41 "type": "string", 42 "pattern": "^\\d{4}\\.\\d{4,5}$", 43 "description": "arXiv identifier (e.g., '2507.09089')." 44 }, 45 "source": { 46 "type": "string", 47 "enum": ["manual", "arxiv", "huggingface", "semantic_scholar", "inbox"], 48 "description": "How this paper was discovered." 49 }, 50 "status": { 51 "type": "string", 52 "enum": ["queued", "downloaded", "scanned", "deep_eval", "excluded"], 53 "description": "Current pipeline status." 54 }, 55 "tags": { 56 "type": "array", 57 "items": { "type": "string" }, 58 "description": "Topic tags for categorization." 59 }, 60 "directory": { 61 "type": "string", 62 "description": "Relative path to paper directory under papers/." 63 }, 64 "added": { 65 "type": "string", 66 "format": "date", 67 "description": "Date this entry was added (YYYY-MM-DD)." 68 }, 69 "notes": { 70 "type": "string", 71 "description": "Free-text notes about this paper." 72 } 73 } 74 }