commit 8dc9ec566791cf32913b7ea8f3ba37a789ef0b86
parent 14d5747dc2f92d77eaa7655145f2fe71bcde4d0a
Author: Brian Graham <brian@buildingbetterteams.de>
Date: Fri, 10 Apr 2026 18:58:30 +0200
V2 fix: handle absolute-positioned active piece overlays
Game 8fe72fce uses absolute-positioned div overlays for the falling
piece, separate from the 200 grid cells. The grid reader was missing
the active piece because it only read the first 200 children.
Fix:
- Added refreshGridDetection() in driver: re-detects grid without
full re-calibration, called by verifyGameStarted() after start clicks
- readDomGrid() now reads overlay children (>200 children) and computes
which grid cell each absolute-positioned overlay falls into
- Widened child-count ranges from 180-220 to 180-230 to accommodate overlays
- Added screenshotGridArea() and captureGridDomFingerprint() as fallback
signals for verifyGameStarted when grid-based detection misses
Results: 8fe72fce 0% -> 95% (matches human's 20/20).
Overall V2 vs human: 82% -> 86% agreement.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Diffstat:
3 files changed, 305 insertions(+), 10 deletions(-)
diff --git a/tasks/tetris/eval/gameplay-bot-v2/bot.ts b/tasks/tetris/eval/gameplay-bot-v2/bot.ts
@@ -1915,8 +1915,15 @@ async function verifyGameStarted(driver: TetrisDriver): Promise<{
}
} catch { /* continue */ }
- // 2. tryStartMechanism() populated a minimal calibration for us. If it
- // couldn't find a grid, the candidate is not a real start.
+ // 2. tryStartMechanism() populated a minimal calibration for us, but some
+ // games create their grid cells dynamically inside an animation frame
+ // after the start button is clicked -- the initial detectGrid() can run
+ // before the grid is fully populated. Refresh grid detection now so we
+ // pick up any cells that appeared in the meantime.
+ try {
+ await driver.refreshGridDetection();
+ } catch { /* leave whatever tryStartMechanism populated */ }
+
let cal;
try {
cal = driver.getCalibration();
@@ -1947,26 +1954,55 @@ async function verifyGameStarted(driver: TetrisDriver): Promise<{
}
// 4. Evidence: press ArrowLeft and see if the grid changes (movement works).
+ // Capture pixel + DOM fingerprint snapshots at each step so we can fall
+ // back to either signal for games that render the active piece outside
+ // the cell layout (e.g. absolute-positioned divs floating over the grid).
+ // Pixel diff is clipped to the grid area; DOM fingerprint catches changes
+ // even when the piece is currently off-screen.
let movementSeen = false;
+ let movementPixelsChanged = false;
+ let movementDomChanged = false;
try {
+ const pxBefore = await driver.screenshotGridArea();
+ const fpBefore = await driver.captureGridDomFingerprint();
const before = await driver.readGrid();
await driver.pressKey("left");
await driver.wait(250);
const after = await driver.readGrid();
+ const pxAfter = await driver.screenshotGridArea();
+ const fpAfter = await driver.captureGridDomFingerprint();
if (before.grid && after.grid && driver.gridsAreDifferent(before.grid, after.grid)) {
movementSeen = true;
}
+ if (pxBefore && pxAfter && !pxBefore.equals(pxAfter)) {
+ movementPixelsChanged = true;
+ }
+ if (fpBefore && fpAfter && fpBefore !== fpAfter) {
+ movementDomChanged = true;
+ }
} catch { /* fall through to auto-drop check */ }
// 5. Evidence: wait 1.1s and see if the grid changes on its own (auto-drop).
let autoDropSeen = false;
+ let autoDropPixelsChanged = false;
+ let autoDropDomChanged = false;
try {
+ const pxBefore = await driver.screenshotGridArea();
+ const fpBefore = await driver.captureGridDomFingerprint();
const before = await driver.readGrid();
await driver.wait(1100);
const after = await driver.readGrid();
+ const pxAfter = await driver.screenshotGridArea();
+ const fpAfter = await driver.captureGridDomFingerprint();
if (before.grid && after.grid && driver.gridsAreDifferent(before.grid, after.grid)) {
autoDropSeen = true;
}
+ if (pxBefore && pxAfter && !pxBefore.equals(pxAfter)) {
+ autoDropPixelsChanged = true;
+ }
+ if (fpBefore && fpAfter && fpBefore !== fpAfter) {
+ autoDropDomChanged = true;
+ }
} catch { /* fall through */ }
// 6. Second chance at game-over after interaction.
@@ -1987,6 +2023,32 @@ async function verifyGameStarted(driver: TetrisDriver): Promise<{
return { ok: true, reason: "grid changes on its own (auto-drop)" };
}
+ // 6b. Pixel-based fallback: if the grid reader can't see movement but the
+ // grid-area pixels changed both on key press AND during auto-drop, we're
+ // almost certainly looking at a running Tetris game that renders its
+ // active piece outside the cell layout (absolute divs, canvas overlay,
+ // etc). Require BOTH signals to avoid accepting spurious animations
+ // (cursor blink, score tick) as gameplay.
+ if (movementPixelsChanged && autoDropPixelsChanged) {
+ return {
+ ok: true,
+ reason: "grid-area pixels change on key press and on auto-drop (piece rendered outside cells)",
+ };
+ }
+
+ // 6c. DOM-fingerprint fallback: when the active piece is an absolute-
+ // positioned overlay that happens to be off-screen in the current
+ // viewport (tall sidebars that push the grid out of frame), pixel diff
+ // can come back clean while the DOM still reflects the moving piece.
+ // Require BOTH a key-press-driven change AND an auto-drop-driven change
+ // so static pages with idle timers don't slip through.
+ if (movementDomChanged && autoDropDomChanged) {
+ return {
+ ok: true,
+ reason: "grid container DOM changes on key press and on auto-drop (piece rendered outside cells)",
+ };
+ }
+
// 7. Weaker fallback: if the grid is populated in a plausible range
// (some pieces visible somewhere) and there's no game over, accept it
// provisionally. The downstream phases will weed out dead starts.
diff --git a/tasks/tetris/eval/gameplay-bot-v2/driver.ts b/tasks/tetris/eval/gameplay-bot-v2/driver.ts
@@ -349,7 +349,7 @@ export class PlaywrightDriver implements TetrisDriver {
for (const container of containers) {
const ch = container.children;
if (
- (ch.length >= 180 && ch.length <= 220) ||
+ (ch.length >= 180 && ch.length <= 230) ||
(ch.length >= 18 && ch.length <= 22 &&
ch[0]?.children.length >= 8 && ch[0]?.children.length <= 12)
) {
@@ -372,7 +372,7 @@ export class PlaywrightDriver implements TetrisDriver {
const allElements = document.querySelectorAll("div, section, main, article");
for (const el of allElements) {
const ch = el.children;
- if (ch.length >= 180 && ch.length <= 220) {
+ if (ch.length >= 180 && ch.length <= 230) {
const firstChild = ch[0] as HTMLElement;
if (!firstChild) continue;
const firstRect = firstChild.getBoundingClientRect();
@@ -725,6 +725,62 @@ export class PlaywrightDriver implements TetrisDriver {
return this.cal!;
}
+ /**
+ * Lightweight grid re-detection without any side effects. Unlike
+ * recalibrate() / calibrate(), this never clicks, presses keys, or
+ * runs detectStartMechanism(). Safe to call from verifyGameStarted()
+ * mid-start-discovery -- if the page has since spawned its grid (e.g.
+ * a DOM game that builds cells inside requestAnimationFrame after a
+ * start button click), the cached calibration gets updated; otherwise
+ * this.cal is left untouched.
+ */
+ async refreshGridDetection(): Promise<void> {
+ // Short settle delay: some games build their grid inside the first few
+ // animation frames after startGame() runs, so the initial detectGrid()
+ // inside tryStartMechanism() may have fired before the DOM was ready.
+ await this.page.waitForTimeout(200);
+ const gridDetection = await this.detectGrid();
+ if (!gridDetection.gridBounds) return;
+
+ const backgroundColor =
+ gridDetection.renderer === "canvas"
+ ? await this.sampleBackgroundColor(
+ gridDetection.gridBounds,
+ gridDetection.cellWidth,
+ gridDetection.cellHeight
+ )
+ : null;
+
+ if (this.cal) {
+ this.cal = {
+ ...this.cal,
+ renderer: gridDetection.renderer,
+ gridDetected: true,
+ gridBounds: gridDetection.gridBounds,
+ cellWidth: gridDetection.cellWidth,
+ cellHeight: gridDetection.cellHeight,
+ backgroundColor,
+ gridDetectedAt: "after_start",
+ };
+ } else {
+ this.cal = {
+ renderer: gridDetection.renderer,
+ gridDetected: true,
+ gridBounds: gridDetection.gridBounds,
+ cellWidth: gridDetection.cellWidth,
+ cellHeight: gridDetection.cellHeight,
+ controls: { ...DEFAULT_CONTROLS },
+ startMechanism: "unknown",
+ scoreElementSelector: null,
+ levelElementSelector: null,
+ backgroundColor,
+ consoleErrors: [...this.consoleErrors],
+ gridConfidence: 0,
+ gridDetectedAt: "after_start",
+ };
+ }
+ }
+
getCalibration(): DriverCalibration {
if (!this.cal) throw new Error("calibrate() must be called before getCalibration()");
return this.cal;
@@ -1330,6 +1386,108 @@ export class PlaywrightDriver implements TetrisDriver {
return await this.page.screenshot();
}
+ async screenshotGridArea(): Promise<Buffer | null> {
+ const cal = this.cal;
+ if (!cal || !cal.gridBounds) return null;
+ const b = cal.gridBounds;
+ // For DOM renderers, gridBounds are viewport coordinates and can be clipped
+ // directly. For canvas renderers they are internal canvas coordinates, so
+ // re-derive the on-page bounds from the canvas location to stay accurate.
+ try {
+ if (cal.renderer === "canvas") {
+ const boundingBox = await this.page.locator("canvas").first().boundingBox();
+ if (!boundingBox) return null;
+ return await this.page.screenshot({
+ clip: {
+ x: Math.max(0, Math.round(boundingBox.x)),
+ y: Math.max(0, Math.round(boundingBox.y)),
+ width: Math.max(1, Math.round(boundingBox.width)),
+ height: Math.max(1, Math.round(boundingBox.height)),
+ },
+ });
+ }
+ return await this.page.screenshot({
+ clip: {
+ x: Math.max(0, Math.round(b.x)),
+ y: Math.max(0, Math.round(b.y)),
+ width: Math.max(1, Math.round(b.width)),
+ height: Math.max(1, Math.round(b.height)),
+ },
+ });
+ } catch {
+ return null;
+ }
+ }
+
+ async captureGridDomFingerprint(): Promise<string> {
+ try {
+ return await this.page.evaluate(() => {
+ // Locate the most plausible grid container. Mirrors the detection in
+ // detectGrid() but runs standalone so the fingerprint works even when
+ // the calibration has not committed to a specific grid yet.
+ const findContainer = (): Element | null => {
+ const tables = document.querySelectorAll("table");
+ for (const table of tables) {
+ const rows = table.querySelectorAll("tr");
+ if (rows.length >= 18) {
+ const firstRow = rows[0].querySelectorAll("td");
+ if (firstRow.length >= 8 && firstRow.length <= 12) return table;
+ }
+ }
+ const namedCandidates = document.querySelectorAll(
+ '[class*="board"], [class*="grid"], [class*="field"], ' +
+ '[id*="board"], [id*="grid"], [id*="field"]'
+ );
+ for (const c of namedCandidates) {
+ const ch = c.children;
+ if (ch.length >= 180 && ch.length <= 230) return c;
+ if (
+ ch.length >= 18 && ch.length <= 22 &&
+ ch[0] && ch[0].children.length >= 8 && ch[0].children.length <= 12
+ ) {
+ return c;
+ }
+ }
+ // Heuristic scan for any container with ~200 uniform children.
+ const allElements = document.querySelectorAll("div, section, main, article");
+ for (const el of allElements) {
+ const ch = el.children;
+ if (ch.length >= 180 && ch.length <= 230) return el;
+ }
+ return null;
+ };
+
+ const container = findContainer();
+ if (!container) return "";
+
+ const parts: string[] = [];
+ parts.push(`count=${container.children.length}`);
+
+ // Serialize each child: class, inline position, inline background.
+ // Only inline styles (not computed) -- avoids paying for
+ // getComputedStyle() on 200+ elements per fingerprint, and in practice
+ // absolute-positioned piece overlays always use inline top/left/bg.
+ let i = 0;
+ for (const child of container.children) {
+ if (i >= 260) break; // hard cap to bound work
+ const el = child as HTMLElement;
+ const cls = el.className || "";
+ const style = el.style;
+ const left = style.left || "";
+ const top = style.top || "";
+ const bg = style.backgroundColor || "";
+ const color = style.getPropertyValue("--color") || "";
+ parts.push(`${i}:${cls}:${left}:${top}:${bg}:${color}`);
+ i++;
+ }
+
+ return parts.join("|");
+ });
+ } catch {
+ return "";
+ }
+ }
+
async measureDropInterval(): Promise<number> {
try {
const intervals: number[] = [];
@@ -1482,16 +1640,61 @@ export class PlaywrightDriver implements TetrisDriver {
}
}
+ // Overlay detection: some games render the active piece as absolute-
+ // positioned sibling divs inside the grid container (so they float
+ // over the static cell grid). These are NOT part of the cell loop but
+ // their position tells us which grid cell they occupy. Called after
+ // building the cell-based grid; only overwrites empty cells.
+ function applyOverlayPieces(
+ container: Element, cellGrid: boolean[][], cellsConsumed: number,
+ actualRows: number, actualCols: number
+ ): void {
+ const containerRect = container.getBoundingClientRect();
+ if (containerRect.width <= 0 || containerRect.height <= 0) return;
+ const cellW = containerRect.width / actualCols;
+ const cellH = containerRect.height / actualRows;
+ if (cellW < 5 || cellH < 5) return;
+
+ const allChildren = container.children;
+ for (let i = cellsConsumed; i < allChildren.length; i++) {
+ const el = allChildren[i] as HTMLElement;
+ const style = window.getComputedStyle(el);
+ // Skip statically-positioned siblings -- we only want pieces
+ // that float over the grid.
+ if (style.position !== "absolute" && style.position !== "fixed") continue;
+ const rect = el.getBoundingClientRect();
+ if (rect.width <= 0 || rect.height <= 0) continue;
+ // Center of the overlay element, relative to the container
+ const cx = rect.left + rect.width / 2 - containerRect.left;
+ const cy = rect.top + rect.height / 2 - containerRect.top;
+ const col = Math.floor(cx / cellW);
+ const row = Math.floor(cy / cellH);
+ if (row < 0 || row >= actualRows || col < 0 || col >= actualCols) continue;
+ cellGrid[row][col] = true;
+ }
+ }
+
// Strategy 2: named grid containers
const containers = document.querySelectorAll(
'[class*="board"], [class*="grid"], [class*="field"], [id*="board"], [id*="grid"], [id*="field"]'
);
for (const container of containers) {
const children = container.children;
- if (children.length >= rows * cols - 10 && children.length <= rows * cols + 10) {
+ // Container has rows*cols (+/- 10) static cells, optionally followed
+ // by a handful of absolute-positioned children that act as piece
+ // overlays (e.g. the active piece rendered on top of the static grid).
+ // Accept up to 30 extra children beyond the cell count.
+ if (children.length >= rows * cols - 10 && children.length <= rows * cols + 30) {
const actualCols = cols;
- const actualRows = Math.round(children.length / actualCols);
- const allCells = Array.from(children).slice(0, actualRows * actualCols) as HTMLElement[];
+ // If we're at the short end of the range, fall back to the old
+ // behaviour and derive actualRows from the child count. Otherwise
+ // assume the extras are overlays and use the full grid dimensions.
+ const isShortGrid = children.length <= rows * cols + 4;
+ const actualRows = isShortGrid
+ ? Math.round(children.length / actualCols)
+ : rows;
+ const cellsConsumed = actualRows * actualCols;
+ const allCells = Array.from(children).slice(0, cellsConsumed) as HTMLElement[];
const emptyBg = detectEmptyBg(allCells);
const result: boolean[][] = [];
for (let r = 0; r < actualRows; r++) {
@@ -1502,6 +1705,9 @@ export class PlaywrightDriver implements TetrisDriver {
}
result.push(rowData);
}
+ // Overlay detection is a no-op when there are no extra children
+ // past the static cell grid, so it's safe to always call.
+ applyOverlayPieces(container, result, cellsConsumed, actualRows, actualCols);
return result;
}
if (children.length >= rows - 2 && children.length <= rows + 2) {
@@ -1533,7 +1739,7 @@ export class PlaywrightDriver implements TetrisDriver {
const allElements = document.querySelectorAll("div, section, main, article");
for (const el of allElements) {
const ch = el.children;
- if (ch.length >= 180 && ch.length <= 220) {
+ if (ch.length >= 180 && ch.length <= 230) {
const firstChild = ch[0] as HTMLElement;
if (!firstChild) continue;
const firstRect = firstChild.getBoundingClientRect();
@@ -2141,7 +2347,7 @@ export class PlaywrightDriver implements TetrisDriver {
);
for (const container of containers) {
const children = container.children;
- if (children.length >= 180 && children.length <= 220) {
+ if (children.length >= 180 && children.length <= 230) {
const rect = container.getBoundingClientRect();
return { type: "dom" as const, bounds: { x: rect.x, y: rect.y, width: rect.width, height: rect.height }, rows: Math.round(children.length / 10), cols: 10 };
}
@@ -2158,7 +2364,7 @@ export class PlaywrightDriver implements TetrisDriver {
const allElements = document.querySelectorAll("div, section, main, article");
for (const el of allElements) {
const ch = el.children;
- if (ch.length >= 180 && ch.length <= 220) {
+ if (ch.length >= 180 && ch.length <= 230) {
const firstChild = ch[0] as HTMLElement;
if (!firstChild) continue;
const firstRect = firstChild.getBoundingClientRect();
diff --git a/tasks/tetris/eval/gameplay-bot-v2/types.ts b/tasks/tetris/eval/gameplay-bot-v2/types.ts
@@ -137,6 +137,14 @@ export interface TetrisDriver {
surveyPage(): Promise<SurveyData>;
calibrate(): Promise<DriverCalibration>;
recalibrate(): Promise<DriverCalibration>;
+ /**
+ * Lightweight, side-effect-free grid re-detection. Does NOT click, press
+ * keys, or run start-mechanism detection. If the page has since spawned
+ * its grid (common with DOM games that build cells in requestAnimationFrame
+ * after a start button click), the cached calibration is updated; otherwise
+ * the current calibration is left untouched.
+ */
+ refreshGridDetection(): Promise<void>;
getCalibration(): DriverCalibration;
// -- Start mechanism discovery/verification bridge --
@@ -179,6 +187,25 @@ export interface TetrisDriver {
// -- Screenshots --
screenshot(): Promise<Buffer>;
+ /**
+ * Screenshot clipped to the grid area (uses current calibration's gridBounds).
+ * Returns null if no grid has been detected yet. Useful as a fallback for
+ * verifyGameStarted when the grid reader can't see active pieces because the
+ * game renders them outside the cell layout (e.g. absolute-positioned divs
+ * floating over the grid).
+ */
+ screenshotGridArea(): Promise<Buffer | null>;
+ /**
+ * Compute a string fingerprint of the grid container's DOM state, capturing
+ * child count, class names, and inline position styles. Used by the start
+ * verification fallback to detect piece movement for games that render the
+ * active piece as absolute-positioned divs outside the cell layout -- those
+ * changes are invisible to the grid reader and may be off-screen for pixel
+ * diffs, but they always show up in the DOM.
+ *
+ * Returns an empty string if no grid container could be located.
+ */
+ captureGridDomFingerprint(): Promise<string>;
measureDropInterval(): Promise<number>;
}