backfill_short_ids.py (1822B)
1 #!/usr/bin/env python3 2 """Backfill short_id and short_cell_id into all existing meta.json files, 3 and rebuild results/index.jsonl with the new fields.""" 4 5 import hashlib 6 import json 7 from pathlib import Path 8 9 RESULTS_DIR = Path(__file__).resolve().parent.parent / "results" 10 RUNS_DIR = RESULTS_DIR / "runs" 11 12 13 def short_hash(s: str) -> str: 14 return hashlib.sha256(s.encode()).hexdigest()[:8] 15 16 17 def main(): 18 if not RUNS_DIR.exists(): 19 print("No runs directory found") 20 return 21 22 run_dirs = sorted(d for d in RUNS_DIR.iterdir() if d.is_dir()) 23 updated = 0 24 index_entries = [] 25 26 for run_dir in run_dirs: 27 meta_path = run_dir / "meta.json" 28 if not meta_path.exists(): 29 continue 30 31 meta = json.loads(meta_path.read_text()) 32 run_id = meta.get("run_id", "") 33 cell_id = meta.get("cell_id", "") 34 35 if not run_id or not cell_id: 36 continue 37 38 meta["short_id"] = short_hash(run_id) 39 meta["short_cell_id"] = short_hash(cell_id) 40 meta_path.write_text(json.dumps(meta, indent=2) + "\n") 41 updated += 1 42 43 # Build index entry 44 index_entries.append({ 45 "run_id": run_id, 46 "task": meta.get("task", ""), 47 "model": meta.get("model", ""), 48 "cell_id": cell_id, 49 "short_id": meta["short_id"], 50 "short_cell_id": meta["short_cell_id"], 51 "completed_at": meta.get("completed_at", ""), 52 }) 53 54 # Rebuild index.jsonl 55 index_path = RESULTS_DIR / "index.jsonl" 56 with open(index_path, "w") as f: 57 for entry in index_entries: 58 f.write(json.dumps(entry) + "\n") 59 60 print(f"Updated {updated} meta.json files") 61 print(f"Rebuilt {index_path} with {len(index_entries)} entries") 62 63 64 if __name__ == "__main__": 65 main()