commit fec57ee83892b809897124d42887597de29fa9b8
parent fe686981c1fb42be99b4b1a078c818496970dad6
Author: Brian Graham <brian@buildingbetterteams.de>
Date: Mon, 6 Apr 2026 15:54:34 +0200
Fix serve process leak in gameplay bot eval
Gameplay bot starts an HTTP server (npx serve) per eval run. If Playwright
times out or crashes, the afterAll cleanup never runs and serve processes
accumulate. Found 684 orphaned serve processes consuming ~24GB memory.
Fix: use Popen with start_new_session=True so playwright + child serve
processes share a process group. Kill the entire group via os.killpg() in
a finally block, ensuring cleanup on success, failure, and timeout.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Diffstat:
| M | harness/run.py | | | 64 | +++++++++++++++++++++++++++++++++++++++++----------------------- |
1 file changed, 41 insertions(+), 23 deletions(-)
diff --git a/harness/run.py b/harness/run.py
@@ -21,6 +21,7 @@ Usage:
import json
import os
+import signal
import shutil
import subprocess
import sys
@@ -324,44 +325,61 @@ def evaluate(task_dir: Path, workspace: Path, cell: dict, run_dir: Path):
bot_env = os.environ.copy()
bot_env["WORKSPACE_PATH"] = str(workspace)
bot_env["REPORT_OUTPUT_PATH"] = str(report_path)
- bot_result = subprocess.run(
+ bot_proc = subprocess.Popen(
["npx", "playwright", "test", "--config", str(playwright_config)],
cwd=str(PROJECT_DIR),
- capture_output=True,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
text=True,
- timeout=180,
env=bot_env,
+ start_new_session=True,
)
- if report_path.exists():
- report_data = json.loads(report_path.read_text())
- summary = report_data.get("summary", {})
- results["gameplay_bot"] = {
- "pass": summary.get("failed", 1) == 0,
- "score": summary.get("score", 0),
- "total": summary.get("total", 0),
- "passed": summary.get("passed", 0),
- "failed": summary.get("failed", 0),
- "report": report_data,
- }
- else:
+ try:
+ stdout, stderr = bot_proc.communicate(timeout=180)
+ except subprocess.TimeoutExpired:
+ # Kill entire process group (playwright + child serve processes)
+ try:
+ os.killpg(os.getpgid(bot_proc.pid), signal.SIGTERM)
+ except Exception:
+ pass
+ bot_proc.kill()
+ bot_proc.wait()
results["gameplay_bot"] = {
"pass": False,
"score": 0,
- "error": f"Report file not created. Exit code: {bot_result.returncode}. "
- f"stderr: {bot_result.stderr[:1000]}",
+ "error": "Gameplay bot timed out after 180 seconds",
}
+ else:
+ if report_path.exists():
+ report_data = json.loads(report_path.read_text())
+ summary = report_data.get("summary", {})
+ results["gameplay_bot"] = {
+ "pass": summary.get("failed", 1) == 0,
+ "score": summary.get("score", 0),
+ "total": summary.get("total", 0),
+ "passed": summary.get("passed", 0),
+ "failed": summary.get("failed", 0),
+ "report": report_data,
+ }
+ else:
+ results["gameplay_bot"] = {
+ "pass": False,
+ "score": 0,
+ "error": f"Report file not created. Exit code: {bot_proc.returncode}. "
+ f"stderr: {stderr[:1000] if stderr else ''}",
+ }
+ finally:
+ # Always clean up the process group to prevent orphaned serve processes
+ try:
+ os.killpg(os.getpgid(bot_proc.pid), signal.SIGTERM)
+ except Exception:
+ pass
except FileNotFoundError:
results["gameplay_bot"] = {
"pass": False,
"score": 0,
"error": "Playwright (npx) not found. Install with: npm install -D @playwright/test",
}
- except subprocess.TimeoutExpired:
- results["gameplay_bot"] = {
- "pass": False,
- "score": 0,
- "error": "Gameplay bot timed out after 180 seconds",
- }
except Exception as e:
results["gameplay_bot"] = {
"pass": False,