loop-benchmarking

Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.
git clone https://git.shiptheloop.com/loop-benchmarking.git
Log | Files | Refs | README

commit 23bc276e52ad4b72483d00e988f165fe956483fb
parent 507ce236a8d395f65614eb65e9b10ab32c6438d2
Author: Brian Graham <brian@buildingbetterteams.de>
Date:   Tue,  7 Apr 2026 13:56:36 +0200

Increase gameplay bot timeout to 300s (was 180s)

Falling piece detector needs more time to try multiple start triggers.
Playwright timeout 120s -> 240s, harness timeout 180s -> 300s.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Diffstat:
Mharness/run.py | 4++--
Mtasks/tetris/eval/playwright.config.ts | 2+-
2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/harness/run.py b/harness/run.py @@ -446,7 +446,7 @@ def evaluate(task_dir: Path, workspace: Path, cell: dict, run_dir: Path): start_new_session=True, ) try: - stdout, stderr = bot_proc.communicate(timeout=180) + stdout, stderr = bot_proc.communicate(timeout=300) except subprocess.TimeoutExpired: # Kill entire process group (playwright + child serve processes) try: @@ -458,7 +458,7 @@ def evaluate(task_dir: Path, workspace: Path, cell: dict, run_dir: Path): results["gameplay_bot"] = { "pass": False, "score": 0, - "error": "Gameplay bot timed out after 180 seconds", + "error": "Gameplay bot timed out after 300 seconds", } else: if report_path.exists(): diff --git a/tasks/tetris/eval/playwright.config.ts b/tasks/tetris/eval/playwright.config.ts @@ -3,7 +3,7 @@ import { defineConfig } from "@playwright/test"; export default defineConfig({ testDir: "./gameplay-bot", testMatch: "index.ts", - timeout: 120_000, // 2 minutes per test + timeout: 240_000, // 4 minutes per test retries: 0, workers: 1, // sequential -- only one game at a time reporter: [["list"]],

Impressum · Datenschutz