commit d90ff0c861644921c16ac1de3ee07a4cad53ed23
parent 9417cc444a07fdb588a0e10d992ca2dc3fe67c1e
Author: Brian Graham <brian@buildingbetterteams.de>
Date: Sun, 5 Apr 2026 23:34:40 +0200
Improve gameplay bot calibration with fallbacks and DOM grid detection
Calibration:
- 2s pre-wait for DOM to settle before detection
- Broadened button text: restart, reset, new (not just start/play)
- Click any button as fallback when named button not found
- Click body to catch games that start on any click
- Press 'a' key for games that start on any keydown
- recalibrateWithRetry(): when first calibration fails, tries all
start mechanisms again with 1.5s waits, re-scanning grid each time
Grid detection:
- Heuristic DOM scan: finds containers with 180-220 uniformly-sized
children (flat grid) or 18-22 rows of 8-12 cells (row-based grid)
- detectEmptyBg(): samples cells to find the most common background
color as the "empty" reference
- Expanded filled-cell detection: checks occupied/locked classes,
data-type attribute, background color comparison
- Relaxed count matching for non-standard grid sizes
- DOM fallback in readGrid() when renderer is unknown
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Diffstat:
3 files changed, 385 insertions(+), 50 deletions(-)
diff --git a/tasks/tetris/eval/gameplay-bot/calibrate.ts b/tasks/tetris/eval/gameplay-bot/calibrate.ts
@@ -24,12 +24,33 @@ export async function calibrate(page: Page): Promise<CalibrationResult> {
const consoleErrors: string[] = [];
page.on("pageerror", (err) => consoleErrors.push(err.message));
- const startMechanism = await detectStartMechanism(page);
- const { renderer, gridBounds, cellWidth, cellHeight } = await detectGrid(page);
- const backgroundColor =
+ // Wait for DOM to fully settle (scripts, animations, timers)
+ await page.waitForTimeout(2000);
+
+ let startMechanism = await detectStartMechanism(page);
+ let { renderer, gridBounds, cellWidth, cellHeight } = await detectGrid(page);
+ let backgroundColor =
renderer === "canvas" && gridBounds
? await sampleBackgroundColor(page, gridBounds, cellWidth, cellHeight)
: null;
+
+ // Re-calibration fallback: if start or grid detection failed, retry with
+ // longer waits and re-scan after each start attempt
+ if (startMechanism === "unknown" || gridBounds === null) {
+ const retry = await recalibrateWithRetry(page, startMechanism, gridBounds);
+ if (retry.startMechanism !== "unknown") startMechanism = retry.startMechanism;
+ if (retry.gridBounds) {
+ renderer = retry.renderer;
+ gridBounds = retry.gridBounds;
+ cellWidth = retry.cellWidth;
+ cellHeight = retry.cellHeight;
+ backgroundColor =
+ renderer === "canvas" && gridBounds
+ ? await sampleBackgroundColor(page, gridBounds, cellWidth, cellHeight)
+ : null;
+ }
+ }
+
const controls = await detectControls(page);
const scoreElementSelector = await detectScoreElement(page);
@@ -112,10 +133,21 @@ async function detectStartMechanism(page: Page): Promise<StartMechanism> {
}
prevShot = newShot;
- // 5. Look for a start/play button
+ // 5. Click the body/document (some games start on any click)
+ try {
+ await page.locator("body").click({ position: { x: 100, y: 100 } });
+ await page.waitForTimeout(500);
+ newShot = await page.screenshot();
+ if (!Buffer.from(prevShot).equals(Buffer.from(newShot))) {
+ return "click_canvas";
+ }
+ prevShot = newShot;
+ } catch { /* continue */ }
+
+ // 6. Look for a start/play/restart button
try {
const button = page.locator("button, a, [role='button']").filter({
- hasText: /start|play|begin|new game/i,
+ hasText: /start|play|begin|new game|restart|reset|new/i,
}).first();
if ((await button.count()) > 0) {
await button.click();
@@ -131,7 +163,7 @@ async function detectStartMechanism(page: Page): Promise<StartMechanism> {
// Also try elements that aren't buttons but have matching text
try {
const textMatch = page.locator(
- ':text-matches("start|play|begin|new.game", "i")'
+ ':text-matches("start|play|begin|new.game|restart|reset", "i")'
).first();
if ((await textMatch.count()) > 0) {
await textMatch.click();
@@ -144,8 +176,22 @@ async function detectStartMechanism(page: Page): Promise<StartMechanism> {
}
} catch { /* continue */ }
- // 6. Press any key (try a few)
- for (const key of ["p", "s", "n", "Escape"]) {
+ // 7. Try clicking any <button> element regardless of text
+ try {
+ const anyButton = page.locator("button").first();
+ if ((await anyButton.count()) > 0) {
+ await anyButton.click();
+ await page.waitForTimeout(500);
+ newShot = await page.screenshot();
+ if (!Buffer.from(prevShot).equals(Buffer.from(newShot))) {
+ return "button";
+ }
+ prevShot = newShot;
+ }
+ } catch { /* continue */ }
+
+ // 8. Press any key (try a few -- catches games that start on any keydown)
+ for (const key of ["a", "p", "s", "n", "Escape"]) {
await page.keyboard.press(key);
await page.waitForTimeout(300);
newShot = await page.screenshot();
@@ -158,6 +204,107 @@ async function detectStartMechanism(page: Page): Promise<StartMechanism> {
return "unknown";
}
+/**
+ * Re-calibration fallback: try ALL start mechanisms again with longer waits,
+ * re-scanning for the grid after each attempt. Used when the first pass
+ * failed to detect the start mechanism or the grid.
+ */
+async function recalibrateWithRetry(
+ page: Page,
+ currentStart: StartMechanism,
+ currentGrid: GridBounds | null
+): Promise<GridDetection & { startMechanism: StartMechanism }> {
+ let startMechanism: StartMechanism = currentStart;
+ let gridResult: GridDetection = {
+ renderer: "unknown",
+ gridBounds: currentGrid,
+ cellWidth: 0,
+ cellHeight: 0,
+ };
+
+ // Ordered list of start attempts with longer waits between each
+ const attempts: Array<{ name: StartMechanism; action: () => Promise<void> }> = [
+ {
+ name: "click_canvas",
+ action: async () => {
+ const canvas = page.locator("canvas").first();
+ if ((await canvas.count()) > 0) await canvas.click();
+ },
+ },
+ {
+ name: "click_canvas",
+ action: async () => {
+ await page.locator("body").click({ position: { x: 200, y: 200 } });
+ },
+ },
+ {
+ name: "enter",
+ action: async () => { await page.keyboard.press("Enter"); },
+ },
+ {
+ name: "space",
+ action: async () => { await page.keyboard.press("Space"); },
+ },
+ {
+ name: "button",
+ action: async () => {
+ const btn = page.locator("button").first();
+ if ((await btn.count()) > 0) await btn.click();
+ },
+ },
+ {
+ name: "button",
+ action: async () => {
+ const btn = page.locator("button, a, [role='button']").filter({
+ hasText: /start|play|begin|restart|reset|new/i,
+ }).first();
+ if ((await btn.count()) > 0) await btn.click();
+ },
+ },
+ {
+ name: "anykey",
+ action: async () => { await page.keyboard.press("a"); },
+ },
+ {
+ name: "anykey",
+ action: async () => { await page.keyboard.press("ArrowDown"); },
+ },
+ ];
+
+ let prevShot = await page.screenshot();
+
+ for (const attempt of attempts) {
+ try {
+ await attempt.action();
+ await page.waitForTimeout(1500);
+
+ const newShot = await page.screenshot();
+ const changed = !Buffer.from(prevShot).equals(Buffer.from(newShot));
+
+ if (changed && startMechanism === "unknown") {
+ startMechanism = attempt.name;
+ }
+
+ // Re-scan for grid after each attempt
+ if (!gridResult.gridBounds) {
+ const detected = await detectGrid(page);
+ if (detected.gridBounds) {
+ gridResult = detected;
+ }
+ }
+
+ // If we have both, stop early
+ if (startMechanism !== "unknown" && gridResult.gridBounds) {
+ break;
+ }
+
+ prevShot = newShot;
+ } catch { /* continue */ }
+ }
+
+ return { ...gridResult, startMechanism };
+}
+
interface GridDetection {
renderer: RendererType;
gridBounds: GridBounds | null;
@@ -278,6 +425,54 @@ async function detectGrid(page: Page): Promise<GridDetection> {
}
}
+ // Heuristic scan: look for ANY container with many same-sized children
+ // arranged in a grid pattern, even without specific class/id naming
+ const allElements = document.querySelectorAll("div, section, main, article");
+ for (const el of allElements) {
+ const ch = el.children;
+ // Flat list of ~200 cells (10x20)
+ if (ch.length >= 180 && ch.length <= 220) {
+ const firstChild = ch[0] as HTMLElement;
+ if (!firstChild) continue;
+ const firstRect = firstChild.getBoundingClientRect();
+ if (firstRect.width < 5 || firstRect.height < 5) continue;
+ let uniform = true;
+ for (let i = 1; i < Math.min(10, ch.length); i++) {
+ const r = (ch[i] as HTMLElement).getBoundingClientRect();
+ if (Math.abs(r.width - firstRect.width) > 2 || Math.abs(r.height - firstRect.height) > 2) {
+ uniform = false;
+ break;
+ }
+ }
+ if (uniform) {
+ const cols = 10;
+ const rows = Math.round(ch.length / cols);
+ const rect = el.getBoundingClientRect();
+ return {
+ type: "dom" as const,
+ bounds: { x: rect.x, y: rect.y, width: rect.width, height: rect.height },
+ rows,
+ cols,
+ };
+ }
+ }
+ // Container with ~20 row children, each having ~10 cell children
+ if (ch.length >= 18 && ch.length <= 22) {
+ const firstRowCells = ch[0].children;
+ if (firstRowCells.length >= 8 && firstRowCells.length <= 12) {
+ const rect = el.getBoundingClientRect();
+ if (rect.width > 50 && rect.height > 100) {
+ return {
+ type: "dom" as const,
+ bounds: { x: rect.x, y: rect.y, width: rect.width, height: rect.height },
+ rows: ch.length,
+ cols: firstRowCells.length,
+ };
+ }
+ }
+ }
+ }
+
return null;
});
diff --git a/tasks/tetris/eval/gameplay-bot/grid-reader.ts b/tasks/tetris/eval/gameplay-bot/grid-reader.ts
@@ -24,6 +24,10 @@ export async function readGrid(
if (cal.gridBounds) {
return await readCanvasGrid(page, cal.gridBounds, cal.cellWidth, cal.cellHeight, cal.backgroundColor);
}
+ // Last resort: try DOM reader even if renderer is unknown
+ // (the grid may have appeared after calibration)
+ const domGrid = await readDomGrid(page);
+ if (domGrid) return domGrid;
return null;
} catch {
return null;
@@ -117,6 +121,64 @@ async function readDomGrid(page: Page): Promise<Grid | null> {
}
}
+ // Helper: determine if a cell element is "filled" by checking its
+ // background color, class names, and data attributes. Also accepts
+ // an optional "empty" reference color so we can distinguish filled
+ // cells in games that use a non-standard background (e.g. dark gray
+ // for empty cells instead of transparent/black).
+ function isCellFilled(cell: HTMLElement, emptyBg?: string): boolean {
+ const style = window.getComputedStyle(cell);
+ const bg = style.backgroundColor;
+ const cls = cell.className.toLowerCase();
+
+ // Class/data attribute hints always win
+ if (
+ cls.includes("filled") ||
+ cls.includes("active") ||
+ cls.includes("block") ||
+ cls.includes("piece") ||
+ cls.includes("occupied") ||
+ cls.includes("locked") ||
+ cell.dataset.filled === "true" ||
+ cell.dataset.type !== undefined
+ ) {
+ return true;
+ }
+
+ // If we have a known empty background, compare against it
+ if (emptyBg && bg === emptyBg) return false;
+
+ // Default: non-transparent, non-black background = filled
+ return (
+ bg !== "" &&
+ bg !== "rgba(0, 0, 0, 0)" &&
+ bg !== "transparent" &&
+ bg !== "rgb(0, 0, 0)"
+ );
+ }
+
+ // Determine the "empty cell" background by sampling a few cells
+ // and picking the most common background color
+ function detectEmptyBg(cells: HTMLElement[]): string | undefined {
+ const colorCounts = new Map<string, number>();
+ for (const cell of cells) {
+ const bg = window.getComputedStyle(cell).backgroundColor;
+ colorCounts.set(bg, (colorCounts.get(bg) || 0) + 1);
+ }
+ // The most common color is likely the empty cell color
+ let maxCount = 0;
+ let emptyBg: string | undefined;
+ for (const [color, count] of colorCounts) {
+ if (count > maxCount) {
+ maxCount = count;
+ emptyBg = color;
+ }
+ }
+ // Only use if it appears in > 60% of cells (most cells should be empty)
+ if (emptyBg && maxCount > cells.length * 0.6) return emptyBg;
+ return undefined;
+ }
+
// Strategy 2: look for a grid/flex container with child cells
const containers = document.querySelectorAll(
'[class*="board"], [class*="grid"], [class*="field"], [id*="board"], [id*="grid"], [id*="field"]'
@@ -124,54 +186,120 @@ async function readDomGrid(page: Page): Promise<Grid | null> {
for (const container of containers) {
const children = container.children;
// Could be a flat list of 200 cells (10x20) or 20 rows of 10 cells
- if (children.length === rows * cols) {
+ if (children.length >= rows * cols - 10 && children.length <= rows * cols + 10) {
+ const actualCols = cols;
+ const actualRows = Math.round(children.length / actualCols);
+ const allCells = Array.from(children).slice(0, actualRows * actualCols) as HTMLElement[];
+ const emptyBg = detectEmptyBg(allCells);
const result: boolean[][] = [];
- for (let r = 0; r < rows; r++) {
+ for (let r = 0; r < actualRows; r++) {
const rowData: boolean[] = [];
- for (let c = 0; c < cols; c++) {
- const cell = children[r * cols + c] as HTMLElement;
- const style = window.getComputedStyle(cell);
- const bg = style.backgroundColor;
- const cls = cell.className.toLowerCase();
- const isFilled =
- (bg !== "" && bg !== "rgba(0, 0, 0, 0)" && bg !== "transparent" && bg !== "rgb(0, 0, 0)") ||
- cls.includes("filled") ||
- cls.includes("active") ||
- cls.includes("block") ||
- cls.includes("piece") ||
- cell.dataset.filled === "true";
- rowData.push(isFilled);
+ for (let c = 0; c < actualCols; c++) {
+ const cell = allCells[r * actualCols + c];
+ rowData.push(cell ? isCellFilled(cell, emptyBg) : false);
}
result.push(rowData);
}
return result;
}
// Could be 20 row containers each with 10 cells
- if (children.length === rows) {
- let valid = true;
- const result: boolean[][] = [];
- for (let r = 0; r < rows; r++) {
- const rowEl = children[r];
- const cells = rowEl.children;
- if (cells.length < cols) { valid = false; break; }
- const rowData: boolean[] = [];
- for (let c = 0; c < cols; c++) {
- const cell = cells[c] as HTMLElement;
- const style = window.getComputedStyle(cell);
- const bg = style.backgroundColor;
- const cls = cell.className.toLowerCase();
- const isFilled =
- (bg !== "" && bg !== "rgba(0, 0, 0, 0)" && bg !== "transparent" && bg !== "rgb(0, 0, 0)") ||
- cls.includes("filled") ||
- cls.includes("active") ||
- cls.includes("block") ||
- cls.includes("piece") ||
- cell.dataset.filled === "true";
- rowData.push(isFilled);
+ if (children.length >= rows - 2 && children.length <= rows + 2) {
+ const firstRowCells = children[0]?.children;
+ if (firstRowCells && firstRowCells.length >= cols - 2 && firstRowCells.length <= cols + 2) {
+ const actualRows = children.length;
+ const actualCols = firstRowCells.length;
+ // Collect all cells for empty-bg detection
+ const allCells: HTMLElement[] = [];
+ for (let r = 0; r < actualRows; r++) {
+ const cells = children[r].children;
+ for (let c = 0; c < Math.min(actualCols, cells.length); c++) {
+ allCells.push(cells[c] as HTMLElement);
+ }
+ }
+ const emptyBg = detectEmptyBg(allCells);
+ let valid = true;
+ const result: boolean[][] = [];
+ for (let r = 0; r < actualRows; r++) {
+ const rowEl = children[r];
+ const cells = rowEl.children;
+ if (cells.length < actualCols) { valid = false; break; }
+ const rowData: boolean[] = [];
+ for (let c = 0; c < actualCols; c++) {
+ rowData.push(isCellFilled(cells[c] as HTMLElement, emptyBg));
+ }
+ result.push(rowData);
+ }
+ if (valid) return result;
+ }
+ }
+ }
+
+ // Strategy 3: heuristic scan for ANY container with many same-sized
+ // children arranged in a grid pattern (no class/id naming required)
+ const allElements = document.querySelectorAll("div, section, main, article");
+ for (const el of allElements) {
+ const ch = el.children;
+ // Flat list of ~200 cells
+ if (ch.length >= 180 && ch.length <= 220) {
+ const firstChild = ch[0] as HTMLElement;
+ if (!firstChild) continue;
+ const firstRect = firstChild.getBoundingClientRect();
+ if (firstRect.width < 5 || firstRect.height < 5) continue;
+ let uniform = true;
+ for (let i = 1; i < Math.min(10, ch.length); i++) {
+ const r = (ch[i] as HTMLElement).getBoundingClientRect();
+ if (Math.abs(r.width - firstRect.width) > 2 || Math.abs(r.height - firstRect.height) > 2) {
+ uniform = false;
+ break;
+ }
+ }
+ if (uniform) {
+ const actualCols = cols;
+ const actualRows = Math.round(ch.length / actualCols);
+ const allCells = Array.from(ch).slice(0, actualRows * actualCols) as HTMLElement[];
+ const emptyBg = detectEmptyBg(allCells);
+ const result: boolean[][] = [];
+ for (let r = 0; r < actualRows; r++) {
+ const rowData: boolean[] = [];
+ for (let c = 0; c < actualCols; c++) {
+ const cell = allCells[r * actualCols + c];
+ rowData.push(cell ? isCellFilled(cell, emptyBg) : false);
+ }
+ result.push(rowData);
+ }
+ return result;
+ }
+ }
+ // Container with ~20 row children each having ~10 cell children
+ if (ch.length >= 18 && ch.length <= 22) {
+ const firstRowCells = ch[0]?.children;
+ if (firstRowCells && firstRowCells.length >= 8 && firstRowCells.length <= 12) {
+ const rect = el.getBoundingClientRect();
+ if (rect.width > 50 && rect.height > 100) {
+ const actualRows = ch.length;
+ const actualCols = firstRowCells.length;
+ const allCells: HTMLElement[] = [];
+ for (let r = 0; r < actualRows; r++) {
+ const cells = ch[r].children;
+ for (let c = 0; c < Math.min(actualCols, cells.length); c++) {
+ allCells.push(cells[c] as HTMLElement);
+ }
+ }
+ const emptyBg = detectEmptyBg(allCells);
+ let valid = true;
+ const result: boolean[][] = [];
+ for (let r = 0; r < actualRows; r++) {
+ const cells = ch[r].children;
+ if (cells.length < actualCols) { valid = false; break; }
+ const rowData: boolean[] = [];
+ for (let c = 0; c < actualCols; c++) {
+ rowData.push(isCellFilled(cells[c] as HTMLElement, emptyBg));
+ }
+ result.push(rowData);
+ }
+ if (valid) return result;
}
- result.push(rowData);
}
- if (valid) return result;
}
}
diff --git a/tasks/tetris/eval/gameplay-bot/tests.ts b/tasks/tetris/eval/gameplay-bot/tests.ts
@@ -458,16 +458,28 @@ async function testAllPiecesRotate(
// Start the game (use camelCase startMechanism from CalibrationResult)
if (cal.startMechanism === "button") {
- const btn = page.locator("button").filter({ hasText: /start|play|begin|new/i }).first();
- if (await btn.count() > 0) await btn.click();
+ const btn = page.locator("button, a, [role='button']").filter({ hasText: /start|play|begin|new|restart|reset/i }).first();
+ if (await btn.count() > 0) {
+ await btn.click();
+ } else {
+ // Fall back to clicking any button
+ const anyBtn = page.locator("button").first();
+ if (await anyBtn.count() > 0) await anyBtn.click();
+ }
} else if (cal.startMechanism === "space") {
await page.keyboard.press("Space");
} else if (cal.startMechanism === "enter") {
await page.keyboard.press("Enter");
} else if (cal.startMechanism === "click_canvas") {
- await page.locator("canvas, [class*='game'], [id*='game']").first().click({ force: true });
+ try {
+ await page.locator("canvas, [class*='game'], [id*='game']").first().click({ force: true });
+ } catch {
+ await page.locator("body").click({ position: { x: 200, y: 200 } });
+ }
+ } else if (cal.startMechanism === "anykey") {
+ await page.keyboard.press("a");
}
- await page.waitForTimeout(1000);
+ await page.waitForTimeout(1500);
const rotatedPieces = new Set<string>();
const failedPieces = new Set<string>();