commit d42b0c8c388e6c37c066c61580ddc83ca243222d
parent 9a92d93df24f576c70bd662ec11a72aa97951b93
Author: Brian Graham <brian@buildingbetterteams.de>
Date: Tue, 7 Apr 2026 12:34:56 +0200
Stop deleting turns=1 and timeout runs as invalid
GLM models legitimately complete in 1 turn. Timeouts can produce
valid work. Only reject: turns=0 (no work), missing output, invalid API key.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Diffstat:
2 files changed, 32 insertions(+), 48 deletions(-)
diff --git a/harness/clean-and-reeval.py b/harness/clean-and-reeval.py
@@ -48,12 +48,10 @@ def clean_bad_runs():
exit_code = meta.get("exit_code")
result_text = output.get("result", "")
- if cost in (None, 0):
- bad, reason = True, f"cost={cost}"
- elif turns in (None, 1):
- bad, reason = True, f"turns={turns}"
- elif exit_code == 124:
- bad, reason = True, "timeout"
+ if cost is None and turns in (None, 0):
+ bad, reason = True, "no cost and no turns"
+ elif turns in (None, 0) and (cost is None or cost == 0):
+ bad, reason = True, f"turns={turns}, cost={cost}"
elif "Invalid API key" in str(result_text):
bad, reason = True, "invalid API key"
diff --git a/harness/run.py b/harness/run.py
@@ -591,58 +591,44 @@ def log(msg: str):
def is_valid_run(run_dir: Path) -> bool:
"""Check whether a completed run directory contains valid results.
- Returns False (invalid) if any of these are true:
- - claude_output.json has total_cost_usd of 0, null, or missing
- - claude_output.json has num_turns of 1, null, or missing (real runs always have >1)
- - meta.json has exit_code of 124 (timeout)
- - claude_output.json contains "Invalid API key" in the result field
- - transcript.jsonl has fewer than 5 lines (too short to be a real session)
+ Returns False (invalid) only for unambiguous failures:
+ - claude_output.json missing entirely
+ - num_turns is 0 or null (no work done at all)
+ - "Invalid API key" in the result field
+ - transcript.jsonl missing or empty
+
+ Does NOT reject: turns=1 (GLM models complete in 1 turn),
+ timeouts (may have produced valid work), cost=0 with turns>0.
"""
- # Check meta.json for timeout
- meta_path = run_dir / "meta.json"
- if meta_path.exists():
- try:
- meta = json.loads(meta_path.read_text())
- if meta.get("exit_code") == 124:
- return False
- except (json.JSONDecodeError, OSError):
- return False
-
- # Check transcript.jsonl line count
+ # Check transcript exists
transcript_path = run_dir / "transcript.jsonl"
- if transcript_path.exists():
- try:
- lines = transcript_path.read_text().strip().split("\n")
- if len(lines) < 5:
- return False
- except OSError:
+ if not transcript_path.exists():
+ return False
+ try:
+ lines = transcript_path.read_text().strip().split("\n")
+ if len(lines) < 3:
return False
- else:
+ except OSError:
return False
# Check claude_output.json
output_path = run_dir / "claude_output.json"
- if output_path.exists():
- try:
- output = json.loads(output_path.read_text())
- except (json.JSONDecodeError, OSError):
- return False
+ if not output_path.exists():
+ return False
- # total_cost_usd: 0, null, or missing
- cost = output.get("total_cost_usd")
- if not cost: # catches None, 0, 0.0, and missing (None from .get)
- return False
+ try:
+ output = json.loads(output_path.read_text())
+ except (json.JSONDecodeError, OSError):
+ return False
- # num_turns: 1, null, or missing
- num_turns = output.get("num_turns")
- if num_turns is None or num_turns <= 1:
- return False
+ # num_turns: 0 or null = no work done
+ num_turns = output.get("num_turns")
+ if num_turns is None or num_turns == 0:
+ return False
- # "Invalid API key" in result field
- result_text = output.get("result", "")
- if isinstance(result_text, str) and "Invalid API key" in result_text:
- return False
- else:
+ # "Invalid API key" in result field
+ result_text = output.get("result", "")
+ if isinstance(result_text, str) and "Invalid API key" in result_text:
return False
return True