ai-research-survey

Systematic scan of agentic development research. What's signal, what's noise.
git clone https://git.shiptheloop.com/ai-research-survey.git
Log | Files | Refs

claim.py (6237B)


      1 #!/usr/bin/env python3
      2 """
      3 Paper claim system for parallel scanning. Prevents two agents from
      4 working on the same paper. Claims expire after 1 hour.
      5 
      6 Claims are stored as empty files: papers/<slug>/.claimed_<timestamp>
      7 
      8 Usage:
      9     python scripts/claim.py list                  # List unclaimed papers ready to scan
     10     python scripts/claim.py list --limit 10       # First 10 unclaimed
     11     python scripts/claim.py take <slug>            # Claim a paper (prints "ok" or "taken")
     12     python scripts/claim.py take-next              # Atomically list + claim next available (prints slug or "none")
     13     python scripts/claim.py take-next --limit 5    # Claim next from first 5 available
     14     python scripts/claim.py done <slug>            # Mark scan complete, remove claim
     15     python scripts/claim.py fail <slug>            # Release claim without completing
     16     python scripts/claim.py status                 # Show claim summary
     17 """
     18 
     19 import sys
     20 import time
     21 from pathlib import Path
     22 
     23 ROOT = Path(__file__).resolve().parent.parent
     24 PAPERS_DIR = ROOT / "papers"
     25 
     26 CLAIM_PREFIX = ".claimed_"
     27 CLAIM_EXPIRY_SECONDS = 3600  # 1 hour
     28 
     29 
     30 def get_claim_file(slug):
     31     """Find an active (non-expired) claim file for a slug."""
     32     paper_dir = PAPERS_DIR / slug
     33     if not paper_dir.exists():
     34         return None
     35     for f in paper_dir.glob(f"{CLAIM_PREFIX}*"):
     36         try:
     37             ts = float(f.name[len(CLAIM_PREFIX):])
     38             if time.time() - ts < CLAIM_EXPIRY_SECONDS:
     39                 return f
     40             else:
     41                 # Expired — clean up
     42                 f.unlink()
     43         except (ValueError, OSError):
     44             f.unlink()
     45     return None
     46 
     47 
     48 def is_claimed(slug):
     49     return get_claim_file(slug) is not None
     50 
     51 
     52 def claim(slug):
     53     """Try to claim a paper. Returns True if claimed, False if already taken."""
     54     if is_claimed(slug):
     55         return False
     56     paper_dir = PAPERS_DIR / slug
     57     if not paper_dir.exists():
     58         return False
     59     claim_file = paper_dir / f"{CLAIM_PREFIX}{time.time():.3f}"
     60     claim_file.touch()
     61     return True
     62 
     63 
     64 def release(slug):
     65     """Release a claim."""
     66     paper_dir = PAPERS_DIR / slug
     67     for f in paper_dir.glob(f"{CLAIM_PREFIX}*"):
     68         f.unlink()
     69 
     70 
     71 def list_ready(limit=None, rescan_v1=False):
     72     """List paper slugs that have paper.txt, no scan.json, and no active claim.
     73     If rescan_v1=True, also include papers with v1 scan.json (no scan_version field)."""
     74     ready = []
     75     for txt in sorted(PAPERS_DIR.glob("*/paper.txt")):
     76         slug = txt.parent.name
     77         scan = txt.parent / "scan.json"
     78         if scan.exists():
     79             if rescan_v1:
     80                 try:
     81                     import json
     82                     data = json.loads(scan.read_text())
     83                     if data.get("scan_version", 1) >= 2:
     84                         continue  # Already v2, skip
     85                 except (json.JSONDecodeError, KeyError):
     86                     pass  # Broken scan.json, include for rescan
     87             else:
     88                 continue
     89         if is_claimed(slug):
     90             continue
     91         ready.append(slug)
     92         if limit and len(ready) >= limit:
     93             break
     94     return ready
     95 
     96 
     97 def status():
     98     """Show summary of claims and scan progress."""
     99     total_txt = 0
    100     total_scanned = 0
    101     total_claimed = 0
    102     total_unclaimed = 0
    103 
    104     for txt in PAPERS_DIR.glob("*/paper.txt"):
    105         total_txt += 1
    106         slug = txt.parent.name
    107         scan = txt.parent / "scan.json"
    108         if scan.exists():
    109             total_scanned += 1
    110         elif is_claimed(slug):
    111             total_claimed += 1
    112         else:
    113             total_unclaimed += 1
    114 
    115     print(f"Papers with text: {total_txt}")
    116     print(f"  Scanned:   {total_scanned}")
    117     print(f"  Claimed:   {total_claimed}")
    118     print(f"  Available: {total_unclaimed}")
    119 
    120 
    121 def take_next(limit=None, rescan_v1=False):
    122     """Atomically find the next unclaimed paper and claim it. Returns slug or None."""
    123     import json as _json
    124     for txt in sorted(PAPERS_DIR.glob("*/paper.txt")):
    125         slug = txt.parent.name
    126         scan = txt.parent / "scan.json"
    127         if scan.exists():
    128             if rescan_v1:
    129                 try:
    130                     data = _json.loads(scan.read_text())
    131                     if data.get("scan_version", 1) >= 2:
    132                         continue
    133                 except (ValueError, KeyError):
    134                     pass
    135             else:
    136                 continue
    137         if is_claimed(slug):
    138             continue
    139         if claim(slug):
    140             return slug
    141         continue
    142     return None
    143 
    144 
    145 def main():
    146     args = sys.argv[1:]
    147     if not args:
    148         print("Usage: python scripts/claim.py [list|take|take-next|done|fail|status]")
    149         sys.exit(1)
    150 
    151     cmd = args[0]
    152 
    153     if cmd == "list":
    154         limit = None
    155         rescan_v1 = "--rescan-v1" in args
    156         for i, arg in enumerate(args):
    157             if arg == "--limit" and i + 1 < len(args):
    158                 limit = int(args[i + 1])
    159         ready = list_ready(limit, rescan_v1=rescan_v1)
    160         for slug in ready:
    161             print(slug)
    162 
    163     elif cmd == "take":
    164         if len(args) < 2:
    165             print("Usage: python scripts/claim.py take <slug>")
    166             sys.exit(1)
    167         slug = args[1]
    168         if claim(slug):
    169             print("ok")
    170         else:
    171             print("taken")
    172             sys.exit(1)
    173 
    174     elif cmd == "take-next":
    175         limit = None
    176         rescan_v1 = "--rescan-v1" in args
    177         for i, arg in enumerate(args):
    178             if arg == "--limit" and i + 1 < len(args):
    179                 limit = int(args[i + 1])
    180         slug = take_next(limit, rescan_v1=rescan_v1)
    181         if slug:
    182             print(slug)
    183         else:
    184             print("none")
    185             sys.exit(1)
    186 
    187     elif cmd == "done":
    188         if len(args) < 2:
    189             print("Usage: python scripts/claim.py done <slug>")
    190             sys.exit(1)
    191         release(args[1])
    192         print("ok")
    193 
    194     elif cmd == "fail":
    195         if len(args) < 2:
    196             print("Usage: python scripts/claim.py fail <slug>")
    197             sys.exit(1)
    198         release(args[1])
    199         print("released")
    200 
    201     elif cmd == "status":
    202         status()
    203 
    204     else:
    205         print(f"Unknown command: {cmd}")
    206         sys.exit(1)
    207 
    208 
    209 if __name__ == "__main__":
    210     main()

Impressum · Datenschutz