commit 0a40a42daa93a0cd9f1f7824fcd49ed4b7ae45a6
parent c025b7ca4c96967759955589f80afbfe941906ab
Author: Brian Graham <brian@buildingbetterteams.de>
Date: Sat, 4 Apr 2026 10:46:47 +0200
Fix pipeline: reeval only when explicitly requested, auto-analyze on new runs
- --reeval: only runs when you pass the flag (for changed eval scripts)
- Analysis runs automatically when any new runs completed (no flag needed)
- Normal sweep: run experiments -> analyze -> commit. No wasted reeval.
- --full-pipeline: reeval everything + analyze (for eval script changes)
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Diffstat:
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/harness/run.py b/harness/run.py
@@ -749,10 +749,10 @@ def main():
print(f"Completed: {completed} | Skipped: {skipped} | Failed: {failed}")
print("=" * 40)
- # Re-evaluate all runs with latest eval scripts
- if do_reeval or (completed > 0 and do_analyze):
+ # Re-evaluate all runs with latest eval scripts (only when explicitly requested)
+ if do_reeval:
print()
- print("Re-evaluating all runs with latest eval scripts...")
+ print("Re-evaluating ALL runs with latest eval scripts...")
reeval_result = subprocess.run(
["python3", str(SCRIPT_DIR / "reeval.py"), str(results_dir), "-j", str(max(parallel, 4))],
cwd=str(PROJECT_DIR),
@@ -763,7 +763,7 @@ def main():
print("Re-evaluation had errors (continuing).")
# Run analysis and save results
- if do_analyze:
+ if do_analyze or completed > 0:
print()
print("Running analysis...")
analysis_dir = results_dir / "analysis"