claim.py (6237B)
1 #!/usr/bin/env python3 2 """ 3 Paper claim system for parallel scanning. Prevents two agents from 4 working on the same paper. Claims expire after 1 hour. 5 6 Claims are stored as empty files: papers/<slug>/.claimed_<timestamp> 7 8 Usage: 9 python scripts/claim.py list # List unclaimed papers ready to scan 10 python scripts/claim.py list --limit 10 # First 10 unclaimed 11 python scripts/claim.py take <slug> # Claim a paper (prints "ok" or "taken") 12 python scripts/claim.py take-next # Atomically list + claim next available (prints slug or "none") 13 python scripts/claim.py take-next --limit 5 # Claim next from first 5 available 14 python scripts/claim.py done <slug> # Mark scan complete, remove claim 15 python scripts/claim.py fail <slug> # Release claim without completing 16 python scripts/claim.py status # Show claim summary 17 """ 18 19 import sys 20 import time 21 from pathlib import Path 22 23 ROOT = Path(__file__).resolve().parent.parent 24 PAPERS_DIR = ROOT / "papers" 25 26 CLAIM_PREFIX = ".claimed_" 27 CLAIM_EXPIRY_SECONDS = 3600 # 1 hour 28 29 30 def get_claim_file(slug): 31 """Find an active (non-expired) claim file for a slug.""" 32 paper_dir = PAPERS_DIR / slug 33 if not paper_dir.exists(): 34 return None 35 for f in paper_dir.glob(f"{CLAIM_PREFIX}*"): 36 try: 37 ts = float(f.name[len(CLAIM_PREFIX):]) 38 if time.time() - ts < CLAIM_EXPIRY_SECONDS: 39 return f 40 else: 41 # Expired — clean up 42 f.unlink() 43 except (ValueError, OSError): 44 f.unlink() 45 return None 46 47 48 def is_claimed(slug): 49 return get_claim_file(slug) is not None 50 51 52 def claim(slug): 53 """Try to claim a paper. Returns True if claimed, False if already taken.""" 54 if is_claimed(slug): 55 return False 56 paper_dir = PAPERS_DIR / slug 57 if not paper_dir.exists(): 58 return False 59 claim_file = paper_dir / f"{CLAIM_PREFIX}{time.time():.3f}" 60 claim_file.touch() 61 return True 62 63 64 def release(slug): 65 """Release a claim.""" 66 paper_dir = PAPERS_DIR / slug 67 for f in paper_dir.glob(f"{CLAIM_PREFIX}*"): 68 f.unlink() 69 70 71 def list_ready(limit=None, rescan_v1=False): 72 """List paper slugs that have paper.txt, no scan.json, and no active claim. 73 If rescan_v1=True, also include papers with v1 scan.json (no scan_version field).""" 74 ready = [] 75 for txt in sorted(PAPERS_DIR.glob("*/paper.txt")): 76 slug = txt.parent.name 77 scan = txt.parent / "scan.json" 78 if scan.exists(): 79 if rescan_v1: 80 try: 81 import json 82 data = json.loads(scan.read_text()) 83 if data.get("scan_version", 1) >= 2: 84 continue # Already v2, skip 85 except (json.JSONDecodeError, KeyError): 86 pass # Broken scan.json, include for rescan 87 else: 88 continue 89 if is_claimed(slug): 90 continue 91 ready.append(slug) 92 if limit and len(ready) >= limit: 93 break 94 return ready 95 96 97 def status(): 98 """Show summary of claims and scan progress.""" 99 total_txt = 0 100 total_scanned = 0 101 total_claimed = 0 102 total_unclaimed = 0 103 104 for txt in PAPERS_DIR.glob("*/paper.txt"): 105 total_txt += 1 106 slug = txt.parent.name 107 scan = txt.parent / "scan.json" 108 if scan.exists(): 109 total_scanned += 1 110 elif is_claimed(slug): 111 total_claimed += 1 112 else: 113 total_unclaimed += 1 114 115 print(f"Papers with text: {total_txt}") 116 print(f" Scanned: {total_scanned}") 117 print(f" Claimed: {total_claimed}") 118 print(f" Available: {total_unclaimed}") 119 120 121 def take_next(limit=None, rescan_v1=False): 122 """Atomically find the next unclaimed paper and claim it. Returns slug or None.""" 123 import json as _json 124 for txt in sorted(PAPERS_DIR.glob("*/paper.txt")): 125 slug = txt.parent.name 126 scan = txt.parent / "scan.json" 127 if scan.exists(): 128 if rescan_v1: 129 try: 130 data = _json.loads(scan.read_text()) 131 if data.get("scan_version", 1) >= 2: 132 continue 133 except (ValueError, KeyError): 134 pass 135 else: 136 continue 137 if is_claimed(slug): 138 continue 139 if claim(slug): 140 return slug 141 continue 142 return None 143 144 145 def main(): 146 args = sys.argv[1:] 147 if not args: 148 print("Usage: python scripts/claim.py [list|take|take-next|done|fail|status]") 149 sys.exit(1) 150 151 cmd = args[0] 152 153 if cmd == "list": 154 limit = None 155 rescan_v1 = "--rescan-v1" in args 156 for i, arg in enumerate(args): 157 if arg == "--limit" and i + 1 < len(args): 158 limit = int(args[i + 1]) 159 ready = list_ready(limit, rescan_v1=rescan_v1) 160 for slug in ready: 161 print(slug) 162 163 elif cmd == "take": 164 if len(args) < 2: 165 print("Usage: python scripts/claim.py take <slug>") 166 sys.exit(1) 167 slug = args[1] 168 if claim(slug): 169 print("ok") 170 else: 171 print("taken") 172 sys.exit(1) 173 174 elif cmd == "take-next": 175 limit = None 176 rescan_v1 = "--rescan-v1" in args 177 for i, arg in enumerate(args): 178 if arg == "--limit" and i + 1 < len(args): 179 limit = int(args[i + 1]) 180 slug = take_next(limit, rescan_v1=rescan_v1) 181 if slug: 182 print(slug) 183 else: 184 print("none") 185 sys.exit(1) 186 187 elif cmd == "done": 188 if len(args) < 2: 189 print("Usage: python scripts/claim.py done <slug>") 190 sys.exit(1) 191 release(args[1]) 192 print("ok") 193 194 elif cmd == "fail": 195 if len(args) < 2: 196 print("Usage: python scripts/claim.py fail <slug>") 197 sys.exit(1) 198 release(args[1]) 199 print("released") 200 201 elif cmd == "status": 202 status() 203 204 else: 205 print(f"Unknown command: {cmd}") 206 sys.exit(1) 207 208 209 if __name__ == "__main__": 210 main()