loop-benchmarking

Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.
git clone https://git.shiptheloop.com/loop-benchmarking.git
Log | Files | Refs | README

commit 9151010083c5ed6be0381775a8dd0c8c3f55a792
parent 96ae9ecef7c6590632967d2aa49cf464c5c2a30f
Author: Brian Graham <brian@buildingbetterteams.de>
Date:   Mon,  6 Apr 2026 19:36:24 +0200

Add -n/--max-runs flag to limit total runs

Usage: python3 harness/run.py grid.yaml main_effects --provider zai -n 10 -j 4

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Diffstat:
Mharness/run.py | 10+++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/harness/run.py b/harness/run.py @@ -746,6 +746,7 @@ def main(): parallel = 1 baseline_model = None provider_filter = None + max_runs = None grid_file = str(PROJECT_DIR / "grid.yaml") profile = "smoke" @@ -764,6 +765,9 @@ def main(): elif args[i] == "--provider" and i + 1 < len(args): provider_filter = args[i + 1] i += 2 + elif args[i] in ("-n", "--max-runs") and i + 1 < len(args): + max_runs = int(args[i + 1]) + i += 2 elif args[i] == "--reeval": do_reeval = True i += 1 @@ -859,7 +863,11 @@ def main(): for run_num in range(1, runs_per_cell + 1): jobs.append((cell, run_num)) - print(f"Total jobs: {len(jobs)}") + if max_runs and len(jobs) > max_runs: + jobs = jobs[:max_runs] + print(f"Total jobs: {len(jobs)} (limited by -n {max_runs})") + else: + print(f"Total jobs: {len(jobs)}") print() # Start auth keepalive in background (refreshes OAuth token every 5 min)

Impressum · Datenschutz