migrate-run-ids.py - loop-benchmarking - Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.

migrate-run-ids.py (5963B)
      1 #!/usr/bin/env python3
      2 """Migrate legacy run IDs to new abbreviated format.
      3 
      4 Renames results/runs/ directories, artifacts/ directories,
      5 and updates meta.json + eval_results.json with new IDs.
      6 
      7 Also normalizes sub_agents -> strategy and playwright on -> available.
      8 
      9 Usage:
     10     python3 harness/migrate-run-ids.py [--dry-run]
     11 """
     12 
     13 import json
     14 import sys
     15 from pathlib import Path
     16 
     17 PROJECT_DIR = Path(__file__).resolve().parent.parent
     18 
     19 sys.path.insert(0, str(PROJECT_DIR / "harness" / "lib"))
     20 from compute_grid import AXIS_ABBREV, VALUE_ABBREV
     21 
     22 
     23 def compute_new_cell_id(meta: dict) -> str:
     24     """Compute the new abbreviated cell_id from meta fields."""
     25     task = meta.get("task", "tetris")
     26     # All axis names that should appear in cell_id
     27     skip_keys = {
     28         "task", "cell_id", "run_id", "run_number", "runs_per_cell",
     29         "max_budget_usd", "timeout_seconds", "base_tools",
     30         "started_at", "completed_at", "wall_time_seconds", "exit_code",
     31         "claude_version", "short_id", "short_cell_id", "actual_model",
     32     }
     33     axis_names = sorted(k for k in meta.keys() if k not in skip_keys)
     34     actual_model = meta.get("actual_model", meta.get("model", ""))
     35     parts = [task]
     36     for k in axis_names:
     37         val = meta[k]
     38         if k == "model":
     39             val = actual_model  # use resolved model name
     40         abbrev_key = AXIS_ABBREV.get(k, k)
     41         abbrev_val = VALUE_ABBREV.get(str(val), str(val))
     42         parts.append(f"{abbrev_key}={abbrev_val}")
     43     return "_".join(parts)
     44 
     45 
     46 def normalize_meta(meta: dict) -> dict:
     47     """Normalize old schema fields to new format."""
     48     # sub_agents -> strategy
     49     if "sub_agents" in meta and "strategy" not in meta:
     50         meta["strategy"] = "use_subagents" if meta["sub_agents"] == "on" else "none"
     51         del meta["sub_agents"]
     52 
     53     # playwright on -> available
     54     if meta.get("playwright") == "on":
     55         meta["playwright"] = "available"
     56 
     57     # Rename legacy model names to versioned
     58     model_rename = {"haiku": "haiku-4.5", "sonnet": "sonnet-4.6", "opus": "opus-4.6"}
     59     if meta.get("model") in model_rename:
     60         meta["model"] = model_rename[meta["model"]]
     61     if meta.get("actual_model") in model_rename:
     62         meta["actual_model"] = model_rename[meta["actual_model"]]
     63 
     64     # Add defaults for new axes
     65     defaults = {
     66         "tests_provided": "none",
     67         "strategy": "none",
     68         "design_guidance": "none",
     69         "architecture": "none",
     70         "error_checking": "none",
     71         "context_noise": "clean",
     72         "renderer": "none",
     73     }
     74     for key, default in defaults.items():
     75         if key not in meta:
     76             meta[key] = default
     77 
     78     return meta
     79 
     80 
     81 def main():
     82     dry_run = "--dry-run" in sys.argv
     83 
     84     runs_dir = PROJECT_DIR / "results" / "runs"
     85     artifacts_dir = PROJECT_DIR / "artifacts"
     86 
     87     if not runs_dir.exists():
     88         print("No results/runs/ directory found")
     89         return
     90 
     91     renames = []
     92     for run_dir in sorted(runs_dir.iterdir()):
     93         if not run_dir.is_dir():
     94             continue
     95 
     96         meta_path = run_dir / "meta.json"
     97         if not meta_path.exists():
     98             continue
     99 
    100         meta = json.loads(meta_path.read_text())
    101         old_run_id = meta.get("run_id", run_dir.name)
    102 
    103         # Extract run number from directory name
    104         run_num = ""
    105         if "_run" in run_dir.name:
    106             run_num = "_run" + run_dir.name.rsplit("_run", 1)[1]
    107 
    108         # Normalize meta
    109         meta = normalize_meta(meta)
    110 
    111         # Compute new cell_id
    112         new_cell_id = compute_new_cell_id(meta)
    113         new_run_id = new_cell_id + run_num
    114 
    115         if new_run_id == run_dir.name:
    116             continue  # Already in new format
    117 
    118         new_run_dir = runs_dir / new_run_id
    119         if new_run_dir.exists():
    120             print(f"  SKIP (target exists): {run_dir.name} -> {new_run_id}")
    121             continue
    122 
    123         renames.append({
    124             "old_dir": run_dir,
    125             "new_dir": new_run_dir,
    126             "old_run_id": old_run_id,
    127             "new_run_id": new_run_id,
    128             "new_cell_id": new_cell_id,
    129             "meta": meta,
    130             "meta_path": meta_path,
    131         })
    132 
    133     print(f"Found {len(renames)} runs to migrate")
    134 
    135     if dry_run:
    136         for r in renames[:5]:
    137             print(f"  {r['old_dir'].name}")
    138             print(f"  -> {r['new_run_id']}")
    139             print()
    140         if len(renames) > 5:
    141             print(f"  ... and {len(renames) - 5} more")
    142         return
    143 
    144     migrated = 0
    145     for r in renames:
    146         # Update meta.json
    147         meta = r["meta"]
    148         meta["cell_id"] = r["new_cell_id"]
    149         meta["run_id"] = r["new_run_id"]
    150         r["meta_path"].write_text(json.dumps(meta, indent=2))
    151 
    152         # Rename run directory
    153         r["old_dir"].rename(r["new_dir"])
    154 
    155         # Rename artifact directory if it exists
    156         old_artifact = artifacts_dir / r["old_run_id"]
    157         if old_artifact.exists():
    158             new_artifact = artifacts_dir / r["new_run_id"]
    159             if not new_artifact.exists():
    160                 old_artifact.rename(new_artifact)
    161 
    162         migrated += 1
    163 
    164     print(f"Migrated {migrated} runs")
    165 
    166     # Rebuild index
    167     index_path = PROJECT_DIR / "results" / "index.jsonl"
    168     count = 0
    169     with open(index_path, "w") as f:
    170         for run_dir in sorted(runs_dir.iterdir()):
    171             meta_path = run_dir / "meta.json"
    172             eval_path = run_dir / "eval_results.json"
    173             if meta_path.exists() and eval_path.exists():
    174                 meta = json.loads(meta_path.read_text())
    175                 entry = {
    176                     "run_id": meta.get("run_id", run_dir.name),
    177                     "task": meta.get("task"),
    178                     "model": meta.get("model"),
    179                     "cell_id": meta.get("cell_id"),
    180                     "completed_at": meta.get("completed_at"),
    181                 }
    182                 f.write(json.dumps(entry) + "\n")
    183                 count += 1
    184     print(f"Rebuilt index with {count} entries")
    185 
    186 
    187 if __name__ == "__main__":
    188     main()
	loop-benchmarking Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.
	git clone https://git.shiptheloop.com/loop-benchmarking.git
	Log \| Files \| Refs \| README