Add --commit-every N flag for periodic analyze+push - loop-benchmarking - Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.

commit 7b4564c84f68f63d9b0de348c5fa77c396fe7650
parent 9151010083c5ed6be0381775a8dd0c8c3f55a792
Author: Brian Graham <brian@buildingbetterteams.de>
Date:   Mon,  6 Apr 2026 20:03:37 +0200

Add --commit-every N flag for periodic analyze+push

Runs analysis and pushes results every N completed runs.
Useful for long sweeps where you want to see results incrementally.

Usage: python3 harness/run.py grid.yaml main_effects --provider zai -j 4 --commit-every 20

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Diffstat:
M harness/run.py  | 34 ++++++++++++++++++++++++++++++++++

1 file changed, 34 insertions(+), 0 deletions(-)
diff --git a/harness/run.py b/harness/run.py
@@ -747,6 +747,7 @@ def main():
     baseline_model = None
     provider_filter = None
     max_runs = None
+    commit_every = None
     grid_file = str(PROJECT_DIR / "grid.yaml")
     profile = "smoke"
 
@@ -768,6 +769,9 @@ def main():
         elif args[i] in ("-n", "--max-runs") and i + 1 < len(args):
             max_runs = int(args[i + 1])
             i += 2
+        elif args[i] == "--commit-every" and i + 1 < len(args):
+            commit_every = int(args[i + 1])
+            i += 2
         elif args[i] == "--reeval":
             do_reeval = True
             i += 1
@@ -870,6 +874,34 @@ def main():
         print(f"Total jobs:  {len(jobs)}")
     print()
 
+    # Periodic commit helper
+    _last_commit_count = [0]  # mutable for closure
+
+    def periodic_commit(completed_so_far):
+        if not commit_every or completed_so_far - _last_commit_count[0] < commit_every:
+            return
+        _last_commit_count[0] = completed_so_far
+        log(f"  --- Checkpoint: analyzing and pushing {completed_so_far} completed runs ---")
+        # Run analysis
+        analysis_dir = results_dir / "analysis"
+        analysis_dir.mkdir(exist_ok=True)
+        for metric in ["score", "cost", "turns", "wall_time", "gameplay", "sonarqube", "code_quality", "structural", "transcript", "build_quality"]:
+            try:
+                effects = analyze_main_effects(str(results_dir), metric)
+                (analysis_dir / f"main_effects_{metric}.json").write_text(json.dumps(effects, indent=2))
+            except Exception:
+                pass
+        # Commit and push
+        try:
+            subprocess.run(["git", "add", "-A", "results/", "artifacts/"], cwd=str(PROJECT_DIR), capture_output=True, timeout=30)
+            total_runs = len(list((results_dir / "runs").iterdir()))
+            msg = f"Checkpoint: {completed_so_far} runs ({total_runs} total)"
+            subprocess.run(["git", "commit", "-m", msg], cwd=str(PROJECT_DIR), capture_output=True, timeout=30)
+            subprocess.run(["git", "push"], cwd=str(PROJECT_DIR), capture_output=True, timeout=60)
+            log(f"  --- Pushed checkpoint ---")
+        except Exception as e:
+            log(f"  --- Checkpoint push failed: {e} ---")
+
     # Start auth keepalive in background (refreshes OAuth token every 5 min)
     auth_keepalive = subprocess.Popen(
         ["bash", str(SCRIPT_DIR / "lib" / "keep-auth-alive.sh"), "300"],
@@ -892,6 +924,7 @@ def main():
                 skipped += 1
             else:
                 failed += 1
+            periodic_commit(completed)
     else:
         # Parallel with rolling concurrency
         with ThreadPoolExecutor(max_workers=parallel) as executor:
@@ -919,6 +952,7 @@ def main():
 
                     total_done = completed + skipped + failed
                     log(f"  Progress: {total_done}/{len(jobs)} ({completed} completed, {skipped} skipped, {failed} failed)")
+                    periodic_commit(completed)
 
     # Stop auth keepalive
     auth_keepalive.terminate()

	loop-benchmarking Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.
	git clone https://git.shiptheloop.com/loop-benchmarking.git
	Log \| Files \| Refs \| README