Fix compute_grid OOM: fail on unknown profile, stream via generator, dispatch DOE designs - loop-benchmarking - Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.

commit e82be6aca0708fad30ff11975bd16e5be13f53ff
parent 6678831b7fac8cd35467d4539afc9ce70d68d388
Author: Brian Graham <brian@buildingbetterteams.de>
Date:   Wed, 15 Apr 2026 13:47:11 +0200

Fix compute_grid OOM: fail on unknown profile, stream via generator, dispatch DOE designs

Three fixes in one pass:

1. get_axes() used to silently fall back to the full top-level grid when
   given an unknown profile name. With 23 axes this expands to ~40B
   cartesian combinations, and the process OOMed the host (7.6GB+ before
   swap-stormed into D-state). Now it raises ValueError listing the
   known profiles.

2. compute_cells() accumulated every cell in a list before returning.
   Even with lazy itertools.product, building the intermediate list
   defeats it. Converted to a generator yielding one cell at a time.
   Streaming the 'full' profile now peaks at ~12MB RSS instead of
   unbounded growth. The only in-repo consumer (harness/run.py) already
   materializes via a list comprehension, so the change is transparent
   there.

3. compute_grid.py now recognizes the DOE design names (main_effects,
   plackett_burman, interaction_hunt) and dispatches to
   experiment_design.py. Previously 'compute_grid.py grid.yaml
   main_effects' triggered the silent fallback (bug #1) because
   main_effects is a design, not a profile. Now it produces the
   expected one-at-a-time sweep.

Unknown names now print the full list of valid profiles and designs
instead of silently misbehaving.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Diffstat:
M harness/lib/compute_grid.py  | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------

1 file changed, 51 insertions(+), 28 deletions(-)
diff --git a/harness/lib/compute_grid.py b/harness/lib/compute_grid.py
@@ -95,14 +95,18 @@ def load_grid(path):
 
 
 def get_axes(grid, profile_name):
-    """Get axis values for a given profile, falling back to top-level axes."""
+    """Get axis values for a given profile. Raises if the profile is unknown."""
     top_axes = {name: spec["values"] for name, spec in grid["axes"].items()}
+    profiles = grid.get("profiles", {})
 
-    if profile_name not in grid.get("profiles", {}):
-        return top_axes
+    if profile_name not in profiles:
+        raise ValueError(
+            f"unknown profile '{profile_name}'. Known profiles: {sorted(profiles.keys())}"
+        )
 
-    profile = grid["profiles"][profile_name]
+    profile = profiles[profile_name]
     if "axes" not in profile:
+        # Profile intentionally omits axes (e.g. 'full') to use the full top-level grid.
         return top_axes
 
     # Profile axes override top-level axes
@@ -141,14 +145,17 @@ def apply_task_overrides(axes, task, grid):
 
 
 def compute_cells(grid, profile_name):
+    """Yield one cell dict at a time.
+
+    Streams the cartesian product so peak memory stays at O(1 cell) regardless
+    of profile size. Callers that need a list should wrap with list(...).
+    """
     base_axes = get_axes(grid, profile_name)
     runs_per_cell = get_runs_per_cell(grid, profile_name)
     exclusions = grid.get("exclusions", [])
     tasks = grid["tasks"]
     defaults = grid["defaults"]
 
-    cells = []
-
     for task in tasks:
         axes = apply_task_overrides(base_axes, task, grid)
         axis_names = sorted(axes.keys())
@@ -157,13 +164,7 @@ def compute_cells(grid, profile_name):
         for combo in product(*axis_values):
             cell = dict(zip(axis_names, combo))
 
-            # Check exclusions
-            excluded = False
-            for exclusion in exclusions:
-                if matches_exclusion(cell, exclusion):
-                    excluded = True
-                    break
-            if excluded:
+            if any(matches_exclusion(cell, e) for e in exclusions):
                 continue
 
             # actual_model = model (no mapping needed, models are their real names)
@@ -171,36 +172,58 @@ def compute_cells(grid, profile_name):
 
             # Build cell ID from task + abbreviated axis values (deterministic, filename-safe)
             cell_id_parts = [task] + [f"{AXIS_ABBREV.get(k, k)}={VALUE_ABBREV.get(str(cell[k]), cell[k])}" for k in axis_names]
-            cell_id = "_".join(cell_id_parts)
-
-            # Resolve budget value
-            budget_key = cell.get("max_budget", "low")
-            budget_usd = defaults["budget"].get(budget_key, 0.50)
+            cell["cell_id"] = "_".join(cell_id_parts)
 
             cell["task"] = task
-            cell["cell_id"] = cell_id
             cell["runs_per_cell"] = runs_per_cell
-            cell["max_budget_usd"] = budget_usd
+            cell["max_budget_usd"] = defaults["budget"].get(cell.get("max_budget", "low"), 0.50)
             cell["timeout_seconds"] = defaults["timeout_seconds"]
 
-            cells.append(cell)
+            yield cell
+
 
-    return cells
+DESIGNS = ("main_effects", "plackett_burman", "interaction_hunt")
 
 
 def main():
     if len(sys.argv) < 2:
-        print("Usage: compute_grid.py <grid_file> [profile]", file=sys.stderr)
+        print("Usage: compute_grid.py <grid_file> [profile|design] [design_args]", file=sys.stderr)
+        print("  interaction_hunt takes a 3rd arg: comma-separated axis names", file=sys.stderr)
         sys.exit(1)
 
     grid_file = sys.argv[1]
-    profile = sys.argv[2] if len(sys.argv) > 2 else "smoke"
+    name = sys.argv[2] if len(sys.argv) > 2 else "smoke"
 
     grid = load_grid(grid_file)
-    cells = compute_cells(grid, profile)
-
-    for cell in cells:
-        print(json.dumps(cell))
+    profiles = grid.get("profiles", {})
+
+    if name in profiles:
+        for cell in compute_cells(grid, name):
+            print(json.dumps(cell))
+        return
+
+    if name in DESIGNS:
+        from experiment_design import (
+            main_effects_plan,
+            plackett_burman_plan,
+            interaction_hunt_plan,
+        )
+        if name == "main_effects":
+            cells = main_effects_plan(grid)
+        elif name == "plackett_burman":
+            cells = plackett_burman_plan(grid)
+        else:  # interaction_hunt
+            if len(sys.argv) < 4:
+                print("ERROR: interaction_hunt requires comma-separated axis names as 3rd arg", file=sys.stderr)
+                sys.exit(1)
+            cells = interaction_hunt_plan(grid, sys.argv[3].split(","))
+        for cell in cells:
+            print(json.dumps(cell))
+        return
+
+    known = sorted(profiles.keys()) + list(DESIGNS)
+    print(f"ERROR: unknown profile or design '{name}'. Known: {known}", file=sys.stderr)
+    sys.exit(1)
 
 
 if __name__ == "__main__":

	loop-benchmarking Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.
	git clone https://git.shiptheloop.com/loop-benchmarking.git
	Log \| Files \| Refs \| README