loop-benchmarking

Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.
git clone https://git.shiptheloop.com/loop-benchmarking.git
Log | Files | Refs | README

commit 625d14b3b226e882d25a00909c6d47ab82d0080b
parent e59ff443edb659c9d21d3fef8d708bd29176f827
Author: Brian Graham <brian@buildingbetterteams.de>
Date:   Wed,  8 Apr 2026 07:17:24 +0200

Fix argument list too long for noise cells

Large prompts (>100KB from context noise) exceeded OS arg limit.
Now writes prompt to temp file and uses bash -c with cat for large prompts.
Also deleted 20 gemma runs with 403 auth errors.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Diffstat:
Mharness/run.py | 24++++++++++++++++++++----
1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/harness/run.py b/harness/run.py @@ -23,6 +23,7 @@ Usage: import hashlib import json import os +import shlex import signal import shutil import subprocess @@ -241,10 +242,16 @@ def invoke_claude(cell: dict, workspace: Path, run_dir: Path, project_dir: Path, # Auth helper for --bare mode auth_helper = str(SCRIPT_DIR / "lib" / "get-oauth-token.sh") - cmd = [ + # For large prompts (noise cells), write to temp file and read via shell + prompt_file = None + if len(prompt) > 100000: + prompt_file = Path(tempfile.mktemp(suffix=".txt", prefix="prompt-")) + prompt_file.write_text(prompt) + + # Build base command (prompt added separately for large prompts) + cmd_base = [ "claude", "--bare", - "-p", prompt, "--model", cli_model, "--output-format", "stream-json", "--verbose", @@ -255,13 +262,22 @@ def invoke_claude(cell: dict, workspace: Path, run_dir: Path, project_dir: Path, ] if effort: - cmd.extend(["--effort", effort]) + cmd_base.extend(["--effort", effort]) # Context file if cell.get("context_file") == "provided": ctx_file = project_dir / "tasks" / cell["task"] / "context.md" if ctx_file.exists(): - cmd.extend(["--append-system-prompt", ctx_file.read_text()]) + cmd_base.extend(["--append-system-prompt", ctx_file.read_text()]) + + # Build final command: for large prompts, use shell to read from file + if prompt_file: + # Use shell to cat the prompt file into -p to avoid arg list limit + cmd_str = " ".join(shlex.quote(c) for c in cmd_base) + cmd = ["bash", "-c", f'{cmd_str} -p "$(cat {shlex.quote(str(prompt_file))})"'] + use_shell = False # already wrapped in bash -c + else: + cmd = [*cmd_base, "-p", prompt] # Run claude transcript_path = run_dir / "transcript.jsonl"

Impressum · Datenschutz