loop-benchmarking

Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.
git clone https://git.shiptheloop.com/loop-benchmarking.git
Log | Files | Refs | README

commit 2d012184ea2e786f842637b1f6501ccc3dd0802b
parent 2f6ed75dc4e056efff1ea561d015f298ff63592b
Author: Brian Graham <brian@buildingbetterteams.de>
Date:   Tue,  7 Apr 2026 18:04:07 +0200

Add --runs-per-cell flag to override runs_per_cell from grid.yaml

Usage: --runs-per-cell 1 for broad coverage, backfill later with 3.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Diffstat:
Mharness/run.py | 6+++++-
1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/harness/run.py b/harness/run.py @@ -747,6 +747,7 @@ def main(): provider_filter = None max_runs = None commit_every = None + override_runs_per_cell = None grid_file = str(PROJECT_DIR / "grid.yaml") profile = "smoke" @@ -771,6 +772,9 @@ def main(): elif args[i] == "--commit-every" and i + 1 < len(args): commit_every = int(args[i + 1]) i += 2 + elif args[i] == "--runs-per-cell" and i + 1 < len(args): + override_runs_per_cell = int(args[i + 1]) + i += 2 elif args[i] == "--reeval": do_reeval = True i += 1 @@ -864,7 +868,7 @@ def main(): # Build the full list of (cell, run_num) jobs jobs = [] for cell in cells: - runs_per_cell = cell.get("runs_per_cell", 3) + runs_per_cell = override_runs_per_cell or cell.get("runs_per_cell", 3) for run_num in range(1, runs_per_cell + 1): jobs.append((cell, run_num))

Impressum · Datenschutz