commit 7fbe88ce2a1febb0954305d10f4e1878570e0f14
parent 53c719fefdf6f437deb0b34eb1b8dbff56d06643
Author: Brian Graham <brian@buildingbetterteams.de>
Date: Thu, 9 Apr 2026 11:48:33 +0200
Verify game interactivity via DOM + screenshot after start detection
Start detection now requires the game to respond to gameplay inputs
(ArrowLeft/Right/Down) before confirming a mechanism worked. Checks
both screenshot changes AND DOM state changes (class names, styles on
grid children). This catches:
- False starts from Space (visual change but not interactive)
- Games that rebuild DOM via innerHTML (screenshot identical but DOM differs)
Spanish game e2e04e75 now correctly starts via "Iniciar Juego" button
(was falsely starting via Space). Score went from 18% to 75%.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Diffstat:
1 file changed, 109 insertions(+), 12 deletions(-)
diff --git a/tasks/tetris/eval/gameplay-bot/calibrate.ts b/tasks/tetris/eval/gameplay-bot/calibrate.ts
@@ -314,6 +314,75 @@ async function detectVisualChange(
}
/**
+ * Verify that the game is actually interactive -- gameplay inputs cause
+ * visible state changes. This distinguishes a truly started game from
+ * animations, overlays, or other false positives.
+ *
+ * Sends ArrowLeft then ArrowRight and checks if the page responds.
+ * A game that started will move a piece; a static page won't change.
+ */
+async function verifyInteractivity(page: Page): Promise<boolean> {
+ try {
+ // Wait for at least one render frame before baseline
+ await page.waitForTimeout(200);
+
+ // Capture baseline: both screenshot and DOM state
+ const baseline = await page.screenshot();
+ const domBefore = await page.evaluate(() => {
+ // Snapshot the largest grid-like container's child class/style state
+ const candidates = document.querySelectorAll(
+ '[class*="board"], [class*="grid"], [class*="field"], [id*="board"], [id*="grid"], table'
+ );
+ let best = "";
+ for (const el of candidates) {
+ const snap = Array.from(el.children).map(c =>
+ (c as HTMLElement).className + (c as HTMLElement).style.cssText
+ ).join("|");
+ if (snap.length > best.length) best = snap;
+ }
+ // Also capture body innerHTML hash as fallback
+ if (!best) best = document.body.innerHTML.substring(0, 5000);
+ return best;
+ });
+
+ // Try multiple inputs
+ for (const key of ["ArrowLeft", "ArrowRight", "ArrowDown"]) {
+ await page.keyboard.press(key);
+ await page.waitForTimeout(200);
+
+ // Check screenshot change
+ const after = await page.screenshot();
+ if (!baseline.equals(after)) {
+ return true;
+ }
+
+ // Check DOM state change (catches games where screenshot is identical
+ // but DOM classes/styles changed -- e.g. innerHTML-rebuilt grids)
+ const domAfter = await page.evaluate(() => {
+ const candidates = document.querySelectorAll(
+ '[class*="board"], [class*="grid"], [class*="field"], [id*="board"], [id*="grid"], table'
+ );
+ let best = "";
+ for (const el of candidates) {
+ const snap = Array.from(el.children).map(c =>
+ (c as HTMLElement).className + (c as HTMLElement).style.cssText
+ ).join("|");
+ if (snap.length > best.length) best = snap;
+ }
+ if (!best) best = document.body.innerHTML.substring(0, 5000);
+ return best;
+ });
+ if (domAfter !== domBefore) {
+ return true;
+ }
+ }
+ return false;
+ } catch {
+ return false;
+ }
+}
+
+/**
* Cluster adjacent points using flood fill.
* Two points are adjacent if they differ by at most 1 in both row and column.
*/
@@ -395,7 +464,11 @@ async function detectStartMechanism(page: Page): Promise<StartDetectionResult> {
const result = await detectVisualChange(page, { frames: 6, intervalMs: 200 });
log(`Phase 1 result: changed=${result.changed}`);
if (result.changed) {
- return { mechanism: "auto" };
+ const interactive = await verifyInteractivity(page);
+ if (interactive) {
+ return { mechanism: "auto" };
+ }
+ log("Phase 1: visual change detected but game not interactive (animation?)");
}
}
@@ -577,6 +650,17 @@ async function tryDomButtons(
const result = await detectVisualChange(page, { frames: 3, intervalMs: 100, before });
console.log(`[start-detect] After click "${info.text}": changed=${result.changed}`);
if (result.changed) {
+ // Wait for the game to fully initialize after button click
+ await page.waitForTimeout(300);
+ // Verify the game is actually interactive after clicking this button
+ const interactive = await verifyInteractivity(page);
+ if (!interactive) {
+ console.log(`[start-detect] Button "${info.text}" caused visual change but game not interactive, continuing...`);
+ // Try pressing Escape to undo and continue
+ try { await page.keyboard.press("Escape"); await page.waitForTimeout(50); } catch {}
+ continue;
+ }
+
// Check if the element disappeared after clicking
const disappeared = await page.evaluate(
({ selector }) => {
@@ -676,15 +760,19 @@ async function tryCanvasClicks(
const result = await detectVisualChange(page, { frames: 3, intervalMs: 100, before });
if (result.changed) {
- return {
- mechanism: "click_canvas",
- startButton: {
- selector: "canvas",
- text: `canvas click at ${pos.label}`,
- disappeared: false,
- position: { x: Math.round(pos.x), y: Math.round(pos.y) },
- },
- };
+ const interactive = await verifyInteractivity(page);
+ if (interactive) {
+ return {
+ mechanism: "click_canvas",
+ startButton: {
+ selector: "canvas",
+ text: `canvas click at ${pos.label}`,
+ disappeared: false,
+ position: { x: Math.round(pos.x), y: Math.round(pos.y) },
+ },
+ };
+ }
+ console.log(`[start-detect] Canvas click at ${pos.label} caused change but not interactive`);
}
} catch { /* continue */ }
}
@@ -719,7 +807,12 @@ async function tryKeyboardTriggers(
const result = await detectVisualChange(page, { frames: 3, intervalMs: 100, before });
if (result.changed) {
- return { mechanism: mechanismMap[key] };
+ // Verify the game is actually interactive, not just an animation
+ const interactive = await verifyInteractivity(page);
+ if (interactive) {
+ return { mechanism: mechanismMap[key] };
+ }
+ console.log(`[start-detect] ${key} caused visual change but game not interactive, continuing...`);
}
} catch { /* continue */ }
}
@@ -745,7 +838,11 @@ async function tryKeyboardTriggers(
const result = await detectVisualChange(page, { frames: 3, intervalMs: 100, before });
if (result.changed) {
- return { mechanism: mechanismMap[key] };
+ const interactive = await verifyInteractivity(page);
+ if (interactive) {
+ return { mechanism: mechanismMap[key] };
+ }
+ console.log(`[start-detect] ${key}+click caused visual change but game not interactive, continuing...`);
}
} catch { /* continue */ }
}