commit 43fb9fa4943a04511bffd96ccd1ba7e925d1ef15
parent 69173d2750e5cab2d6a94d1c152116be336341c2
Author: Brian Graham <brian@buildingbetterteams.de>
Date: Wed, 8 Apr 2026 09:59:16 +0200
Rewrite start detection: 5-phase, language-agnostic, visual change
Phase 1: auto-start (10 frames at 100ms, no input)
Phase 2: DOM buttons by visual prominence (no text matching)
Phase 3: canvas click grid (center, upper, lower, 3x3)
Phase 4: keyboard triggers with combos
Phase 5: retry all phases
detectVisualChange: Level 1 (any change) + Level 2 (gameplay pattern)
30-second total budget. Stateful button recording.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Diffstat:
3 files changed, 499 insertions(+), 292 deletions(-)
diff --git a/tasks/tetris/eval/gameplay-bot/calibrate.ts b/tasks/tetris/eval/gameplay-bot/calibrate.ts
@@ -27,7 +27,9 @@ export async function calibrate(page: Page): Promise<CalibrationResult> {
// Wait for DOM to fully settle (scripts, animations, timers)
await page.waitForTimeout(2000);
- let startMechanism = await detectStartMechanism(page);
+ let startResult = await detectStartMechanism(page);
+ let startMechanism: StartMechanism = startResult.mechanism;
+ let startButton = startResult.startButton;
let { renderer, gridBounds, cellWidth, cellHeight } = await detectGrid(page);
let backgroundColor =
renderer === "canvas" && gridBounds
@@ -69,7 +71,7 @@ export async function calibrate(page: Page): Promise<CalibrationResult> {
gridConfidence: 0,
});
- return {
+ const result: CalibrationResult = {
renderer,
gridDetected: gridBounds !== null,
gridBounds,
@@ -82,6 +84,12 @@ export async function calibrate(page: Page): Promise<CalibrationResult> {
consoleErrors,
gridConfidence,
};
+
+ if (startButton) {
+ result.startButton = startButton;
+ }
+
+ return result;
}
/**
@@ -158,194 +166,154 @@ async function measureGridConfidence(
}
/**
- * Detect a falling piece by taking 5 screenshots ~200ms apart and looking
- * for a cluster of colored pixels that moved downward between frames.
+ * Take a screenshot and sample it into a grid of "colored" (true) / "background" (false)
+ * values. Reusable building block for visual change detection.
+ */
+async function sampleScreenshot(
+ page: Page,
+ sampleCols: number,
+ sampleRows: number,
+ colorThreshold: number = 40
+): Promise<boolean[][]> {
+ const shot = await page.screenshot();
+ const base64 = shot.toString("base64");
+ const grid = await page.evaluate(
+ async ({ base64, sampleCols, sampleRows, colorThreshold }) => {
+ const img = new Image();
+ const loaded = new Promise<void>((resolve, reject) => {
+ img.onload = () => resolve();
+ img.onerror = () => reject(new Error("image decode failed"));
+ });
+ img.src = `data:image/png;base64,${base64}`;
+ await loaded;
+
+ const canvas = document.createElement("canvas");
+ canvas.width = img.width;
+ canvas.height = img.height;
+ const ctx = canvas.getContext("2d")!;
+ ctx.drawImage(img, 0, 0);
+
+ const stepX = img.width / sampleCols;
+ const stepY = img.height / sampleRows;
+
+ const colors: number[][] = [];
+ for (let r = 0; r < sampleRows; r++) {
+ const row: number[] = [];
+ for (let c = 0; c < sampleCols; c++) {
+ const px = Math.floor(c * stepX + stepX / 2);
+ const py = Math.floor(r * stepY + stepY / 2);
+ const pixel = ctx.getImageData(px, py, 1, 1).data;
+ row.push(pixel[0] * 1000000 + pixel[1] * 1000 + pixel[2]);
+ }
+ colors.push(row);
+ }
+
+ const colorCounts = new Map<number, number>();
+ for (const row of colors) {
+ for (const c of row) {
+ colorCounts.set(c, (colorCounts.get(c) || 0) + 1);
+ }
+ }
+ let bgColor = 0;
+ let bgCount = 0;
+ for (const [color, count] of colorCounts) {
+ if (count > bgCount) {
+ bgCount = count;
+ bgColor = color;
+ }
+ }
+ const bgR = Math.floor(bgColor / 1000000);
+ const bgG = Math.floor((bgColor % 1000000) / 1000);
+ const bgB = bgColor % 1000;
+
+ const result: boolean[][] = [];
+ for (let r = 0; r < sampleRows; r++) {
+ const row: boolean[] = [];
+ for (let c = 0; c < sampleCols; c++) {
+ const v = colors[r][c];
+ const pR = Math.floor(v / 1000000);
+ const pG = Math.floor((v % 1000000) / 1000);
+ const pB = v % 1000;
+ const dist = Math.sqrt(
+ (pR - bgR) ** 2 + (pG - bgG) ** 2 + (pB - bgB) ** 2
+ );
+ row.push(dist > colorThreshold);
+ }
+ result.push(row);
+ }
+ return result;
+ },
+ { base64, sampleCols, sampleRows, colorThreshold }
+ );
+ return grid;
+}
+
+/**
+ * Detect visual change by taking multiple screenshots at fast intervals.
*
- * This works for canvas, DOM, SVG, WebGL -- any rendering approach.
- * It does NOT require the grid reader or calibrated grid bounds.
+ * Two-level detection:
+ * Level 1: did >5% of sampled pixels change between any two consecutive frames?
+ * Level 2: did something move downward (gameplay pattern)?
*
- * Implementation: divide the visible page into a grid of sample points
- * (~20 columns x ~40 rows). Read pixel colors at each point via screenshot
- * buffer. Between consecutive frames, look for a group of colored
- * (non-background) points that disappeared from one position and appeared
- * lower -- a "falling cluster" of roughly 4 cells (2x2 to 4x1 bounding box).
+ * Level 1 alone is enough to confirm the game responded to input.
+ * Level 2 confirms actual gameplay (piece falling).
*/
-async function detectFallingPiece(page: Page): Promise<boolean> {
+async function detectVisualChange(
+ page: Page,
+ options?: { frames?: number; intervalMs?: number }
+): Promise<{ changed: boolean; gameplayDetected: boolean }> {
+ const FRAMES = options?.frames ?? 10;
+ const INTERVAL = options?.intervalMs ?? 100;
const SAMPLE_COLS = 20;
const SAMPLE_ROWS = 40;
- const SCREENSHOTS = 5;
- const INTERVAL_MS = 200;
- // Minimum downward shift in sample-grid rows to count as "falling"
- const MIN_DOWN_SHIFT = 1;
- // Cluster size bounds (roughly a tetromino: 3-6 sample points)
- const MIN_CLUSTER = 3;
- const MAX_CLUSTER = 12;
- // Color distance threshold to distinguish filled from background
- const COLOR_THRESHOLD = 40;
-
- // Take screenshots
- const shots: Buffer[] = [];
- for (let i = 0; i < SCREENSHOTS; i++) {
- shots.push(await page.screenshot());
- if (i < SCREENSHOTS - 1) await page.waitForTimeout(INTERVAL_MS);
- }
+ const CHANGE_THRESHOLD = 0.05;
- // Parse each screenshot into a grid of "colored" (true) / "background" (false)
- // by sampling pixel colors at evenly spaced points.
- // We use page.evaluate to decode the PNG in the browser via canvas.
const grids: boolean[][][] = [];
-
- for (const shot of shots) {
- const base64 = shot.toString("base64");
- const grid = await page.evaluate(
- async ({ base64, sampleCols, sampleRows, colorThreshold }) => {
- // Decode the screenshot PNG into pixel data
- const img = new Image();
- const loaded = new Promise<void>((resolve, reject) => {
- img.onload = () => resolve();
- img.onerror = () => reject(new Error("image decode failed"));
- });
- img.src = `data:image/png;base64,${base64}`;
- await loaded;
-
- const canvas = document.createElement("canvas");
- canvas.width = img.width;
- canvas.height = img.height;
- const ctx = canvas.getContext("2d")!;
- ctx.drawImage(img, 0, 0);
-
- const stepX = img.width / sampleCols;
- const stepY = img.height / sampleRows;
-
- // First pass: sample all pixel colors
- const colors: number[][] = [];
- for (let r = 0; r < sampleRows; r++) {
- const row: number[] = [];
- for (let c = 0; c < sampleCols; c++) {
- const px = Math.floor(c * stepX + stepX / 2);
- const py = Math.floor(r * stepY + stepY / 2);
- const pixel = ctx.getImageData(px, py, 1, 1).data;
- // Store as a single luminance-like value for quick background detection
- // and the full RGB for distance checks
- row.push(pixel[0] * 1000000 + pixel[1] * 1000 + pixel[2]);
- }
- colors.push(row);
- }
-
- // Determine background color: the most common color in the sample grid
- const colorCounts = new Map<number, number>();
- for (const row of colors) {
- for (const c of row) {
- colorCounts.set(c, (colorCounts.get(c) || 0) + 1);
- }
- }
- let bgColor = 0;
- let bgCount = 0;
- for (const [color, count] of colorCounts) {
- if (count > bgCount) {
- bgCount = count;
- bgColor = color;
- }
- }
- const bgR = Math.floor(bgColor / 1000000);
- const bgG = Math.floor((bgColor % 1000000) / 1000);
- const bgB = bgColor % 1000;
-
- // Second pass: mark cells as "colored" if they differ from background
- const result: boolean[][] = [];
- for (let r = 0; r < sampleRows; r++) {
- const row: boolean[] = [];
- for (let c = 0; c < sampleCols; c++) {
- const v = colors[r][c];
- const pR = Math.floor(v / 1000000);
- const pG = Math.floor((v % 1000000) / 1000);
- const pB = v % 1000;
- const dist = Math.sqrt(
- (pR - bgR) ** 2 + (pG - bgG) ** 2 + (pB - bgB) ** 2
- );
- row.push(dist > colorThreshold);
- }
- result.push(row);
- }
- return result;
- },
- { base64, sampleCols: SAMPLE_COLS, sampleRows: SAMPLE_ROWS, colorThreshold: COLOR_THRESHOLD }
- );
+ for (let i = 0; i < FRAMES; i++) {
+ const grid = await sampleScreenshot(page, SAMPLE_COLS, SAMPLE_ROWS);
grids.push(grid);
+ if (i < FRAMES - 1) await page.waitForTimeout(INTERVAL);
}
- // Compare consecutive frame pairs to find downward-moving clusters
+ // Level 1: check for any significant change between consecutive frames
+ let changed = false;
for (let f = 0; f < grids.length - 1; f++) {
- const prev = grids[f];
- const curr = grids[f + 1];
-
- // Find cells that were colored in prev but not in curr ("disappeared")
- const disappeared: [number, number][] = [];
- // Find cells that are colored in curr but not in prev ("appeared")
- const appeared: [number, number][] = [];
-
+ let diffs = 0;
for (let r = 0; r < SAMPLE_ROWS; r++) {
for (let c = 0; c < SAMPLE_COLS; c++) {
- if (prev[r][c] && !curr[r][c]) disappeared.push([r, c]);
- if (!prev[r][c] && curr[r][c]) appeared.push([r, c]);
+ if (grids[f][r][c] !== grids[f + 1][r][c]) diffs++;
}
}
+ if (diffs / (SAMPLE_ROWS * SAMPLE_COLS) > CHANGE_THRESHOLD) {
+ changed = true;
+ break;
+ }
+ }
- // If nothing changed, no falling piece in this frame pair
- if (disappeared.length === 0 || appeared.length === 0) continue;
-
- // Cluster the disappeared points using simple flood fill
- const disappearedClusters = clusterPoints(disappeared);
- const appearedClusters = clusterPoints(appeared);
-
- // For each disappeared cluster, look for a matching appeared cluster
- // that is shifted downward (same rough column range, higher row numbers)
- for (const dCluster of disappearedClusters) {
- if (dCluster.length < MIN_CLUSTER || dCluster.length > MAX_CLUSTER) continue;
-
- const dMinCol = Math.min(...dCluster.map(([, c]) => c));
- const dMaxCol = Math.max(...dCluster.map(([, c]) => c));
- const dMinRow = Math.min(...dCluster.map(([r]) => r));
- const dCenterCol = (dMinCol + dMaxCol) / 2;
-
- for (const aCluster of appearedClusters) {
- if (aCluster.length < MIN_CLUSTER || aCluster.length > MAX_CLUSTER) continue;
-
- const aMinCol = Math.min(...aCluster.map(([, c]) => c));
- const aMaxCol = Math.max(...aCluster.map(([, c]) => c));
- const aMinRow = Math.min(...aCluster.map(([r]) => r));
- const aCenterCol = (aMinCol + aMaxCol) / 2;
-
- // Check: appeared cluster is below disappeared cluster
- // and in roughly the same column range
- const colOverlap = Math.abs(dCenterCol - aCenterCol) <= 3;
- const movedDown = aMinRow > dMinRow && (aMinRow - dMinRow) >= MIN_DOWN_SHIFT;
+ if (!changed) return { changed: false, gameplayDetected: false };
- if (colOverlap && movedDown) {
- return true;
- }
+ // Level 2: check for downward movement pattern
+ let gameplayDetected = false;
+ for (let f = 0; f < grids.length - 1; f++) {
+ const disappeared: [number, number][] = [];
+ const appeared: [number, number][] = [];
+ for (let r = 0; r < SAMPLE_ROWS; r++) {
+ for (let c = 0; c < SAMPLE_COLS; c++) {
+ if (grids[f][r][c] && !grids[f + 1][r][c]) disappeared.push([r, c]);
+ if (!grids[f][r][c] && grids[f + 1][r][c]) appeared.push([r, c]);
}
}
-
- // Also check if the overall set of colored points shifted down
- // (handles cases where clusters partially overlap between frames)
- if (disappeared.length >= MIN_CLUSTER && appeared.length >= MIN_CLUSTER) {
- const dAvgRow = disappeared.reduce((s, [r]) => s + r, 0) / disappeared.length;
- const aAvgRow = appeared.reduce((s, [r]) => s + r, 0) / appeared.length;
- const dAvgCol = disappeared.reduce((s, [, c]) => s + c, 0) / disappeared.length;
- const aAvgCol = appeared.reduce((s, [, c]) => s + c, 0) / appeared.length;
-
- if (
- aAvgRow > dAvgRow + MIN_DOWN_SHIFT &&
- Math.abs(aAvgCol - dAvgCol) <= 3 &&
- Math.abs(disappeared.length - appeared.length) <= 4
- ) {
- return true;
+ if (disappeared.length >= 3 && appeared.length >= 3) {
+ const avgDisRow = disappeared.reduce((s, [r]) => s + r, 0) / disappeared.length;
+ const avgAppRow = appeared.reduce((s, [r]) => s + r, 0) / appeared.length;
+ if (avgAppRow > avgDisRow) {
+ gameplayDetected = true;
+ break;
}
}
}
- return false;
+ return { changed, gameplayDetected };
}
/**
@@ -389,125 +357,361 @@ function clusterPoints(points: [number, number][]): [number, number][][] {
return clusters;
}
+/** Result of the 5-phase start detection. */
+interface StartDetectionResult {
+ mechanism: StartMechanism;
+ startButton?: CalibrationResult["startButton"];
+}
+
/**
- * Try multiple mechanisms to start the game.
- * After each trigger, runs the falling piece detector to confirm
- * the game actually started (not just a title screen animation).
+ * 5-phase start detection. Language-agnostic, visual-first.
*
- * The ONLY way to confirm start is detecting a falling piece.
+ * Phase 1: Auto-start (1s, no input)
+ * Phase 2: DOM button discovery (click all clickable elements by prominence)
+ * Phase 3: Canvas click grid (for canvas-rendered buttons)
+ * Phase 4: Keyboard triggers (Enter, Space, ArrowDown, Z, combos)
+ * Phase 5: Retry phases 2-4 (some games need two interactions)
+ *
+ * Total budget: 30 seconds.
*/
-async function detectStartMechanism(page: Page): Promise<StartMechanism> {
- // Ordered list of triggers to try
- const triggers: Array<{ name: StartMechanism; action: () => Promise<void> }> = [
- // 1. Wait 3s (auto-start games)
- {
- name: "auto",
- action: async () => {
- await page.waitForTimeout(3000);
- },
- },
- // 2. Click the canvas
- {
- name: "click_canvas",
- action: async () => {
- const canvas = page.locator("canvas").first();
- if ((await canvas.count()) > 0) await canvas.click();
- },
- },
- // 3. Click any game-like container
- {
- name: "click_canvas",
- action: async () => {
- const container = page.locator(
- '[class*="game"], [class*="board"], [id*="game"], [id*="board"]'
- ).first();
- if ((await container.count()) > 0) await container.click();
- },
- },
- // 4. Press Enter
- {
- name: "enter",
- action: async () => { await page.keyboard.press("Enter"); },
- },
- // 5. Press Space
- {
- name: "space",
- action: async () => { await page.keyboard.press("Space"); },
- },
- // 6. Click body
- {
- name: "click_canvas",
- action: async () => {
- await page.locator("body").click({ position: { x: 100, y: 100 } });
- },
- },
- // 7. Click a start/play button
- {
- name: "button",
- action: async () => {
- const button = page.locator("button, a, [role='button']").filter({
- hasText: /start|play|begin|new game|restart|reset|new/i,
- }).first();
- if ((await button.count()) > 0) await button.click();
- },
- },
- // 8. Click text that looks like a start prompt
- {
- name: "button",
- action: async () => {
- const textMatch = page.locator(
- ':text-matches("start|play|begin|new.game|restart|reset", "i")'
- ).first();
- if ((await textMatch.count()) > 0) await textMatch.click();
- },
- },
- // 9. Click any button regardless of text
- {
- name: "button",
- action: async () => {
- const anyButton = page.locator("button").first();
- if ((await anyButton.count()) > 0) await anyButton.click();
- },
- },
- // 10. Press various keys (catches games that start on any keydown)
- {
- name: "anykey",
- action: async () => {
- for (const key of ["a", "p", "s", "n", "Escape"]) {
- await page.keyboard.press(key);
- await page.waitForTimeout(100);
+async function detectStartMechanism(page: Page): Promise<StartDetectionResult> {
+ const deadline = Date.now() + 30000;
+
+ const budgetExceeded = () => Date.now() >= deadline;
+
+ // ---- Phase 1: Auto-start (no input, 1 second) ----
+ {
+ const result = await detectVisualChange(page, { frames: 10, intervalMs: 100 });
+ if (result.changed) {
+ return { mechanism: "auto" };
+ }
+ }
+
+ // ---- Phase 2: DOM button discovery (language-agnostic) ----
+ {
+ const phase2Result = await tryDomButtons(page, budgetExceeded);
+ if (phase2Result) return phase2Result;
+ }
+
+ // ---- Phase 3: Canvas click grid ----
+ if (!budgetExceeded()) {
+ const phase3Result = await tryCanvasClicks(page, budgetExceeded);
+ if (phase3Result) return phase3Result;
+ }
+
+ // ---- Phase 4: Keyboard triggers ----
+ if (!budgetExceeded()) {
+ const phase4Result = await tryKeyboardTriggers(page, budgetExceeded);
+ if (phase4Result) return phase4Result;
+ }
+
+ // ---- Phase 5: Retry phases 2-4 (some games need two interactions) ----
+ if (!budgetExceeded()) {
+ const phase2Retry = await tryDomButtons(page, budgetExceeded);
+ if (phase2Retry) return phase2Retry;
+ }
+ if (!budgetExceeded()) {
+ const phase3Retry = await tryCanvasClicks(page, budgetExceeded);
+ if (phase3Retry) return phase3Retry;
+ }
+ if (!budgetExceeded()) {
+ const phase4Retry = await tryKeyboardTriggers(page, budgetExceeded);
+ if (phase4Retry) return phase4Retry;
+ }
+
+ return { mechanism: "unknown" };
+}
+
+/**
+ * Phase 2: Find all clickable DOM elements (language-agnostic, no text matching).
+ * Sort by visual prominence (size, centrality). Click each and observe.
+ */
+async function tryDomButtons(
+ page: Page,
+ budgetExceeded: () => boolean
+): Promise<StartDetectionResult | null> {
+ try {
+ // Collect all clickable elements
+ const clickableSelector =
+ 'button, a, [role="button"], [onclick], input[type="button"], input[type="submit"]';
+ const visualSelector =
+ '[class*="btn"], [class*="button"], [class*="start"], [class*="play"], ' +
+ '[id*="start"], [id*="play"], [id*="btn"]';
+
+ // Gather element info (position, size, text) for sorting
+ const elementInfos = await page.evaluate(
+ ({ clickableSelector, visualSelector }) => {
+ const seen = new Set<Element>();
+ const results: Array<{
+ index: number;
+ text: string;
+ x: number;
+ y: number;
+ width: number;
+ height: number;
+ area: number;
+ centerDist: number;
+ selector: string;
+ }> = [];
+
+ const allElements: Element[] = [];
+ for (const el of document.querySelectorAll(clickableSelector)) {
+ if (!seen.has(el)) {
+ seen.add(el);
+ allElements.push(el);
+ }
+ }
+ for (const el of document.querySelectorAll(visualSelector)) {
+ if (!seen.has(el)) {
+ seen.add(el);
+ allElements.push(el);
+ }
+ }
+
+ const pageW = window.innerWidth;
+ const pageH = window.innerHeight;
+ const pageCenterX = pageW / 2;
+ const pageCenterY = pageH / 2;
+
+ for (let i = 0; i < allElements.length; i++) {
+ const el = allElements[i];
+ const rect = el.getBoundingClientRect();
+ if (rect.width < 5 || rect.height < 5) continue;
+ if (rect.top > pageH || rect.left > pageW) continue;
+
+ const cx = rect.left + rect.width / 2;
+ const cy = rect.top + rect.height / 2;
+ const centerDist = Math.sqrt((cx - pageCenterX) ** 2 + (cy - pageCenterY) ** 2);
+
+ let selector = "";
+ if (el.id) {
+ selector = `#${el.id}`;
+ } else if ((el as HTMLElement).className) {
+ const cls = (el as HTMLElement).className.toString().split(" ")[0];
+ if (cls) selector = `${el.tagName.toLowerCase()}.${cls}`;
+ }
+ if (!selector) selector = `${el.tagName.toLowerCase()}:nth-of-type(${i + 1})`;
+
+ results.push({
+ index: i,
+ text: (el.textContent || "").trim().slice(0, 50),
+ x: Math.round(cx),
+ y: Math.round(cy),
+ width: rect.width,
+ height: rect.height,
+ area: rect.width * rect.height,
+ centerDist,
+ selector,
+ });
}
+
+ // Sort by visual prominence: larger elements first, then closer to center
+ results.sort((a, b) => b.area - a.area || a.centerDist - b.centerDist);
+
+ return results;
},
- },
- // 11. Press ArrowDown (some games start on directional input)
- {
- name: "anykey",
- action: async () => { await page.keyboard.press("ArrowDown"); },
- },
+ { clickableSelector, visualSelector }
+ );
+
+ // Click each element and observe for visual change
+ for (const info of elementInfos) {
+ if (budgetExceeded()) break;
+
+ try {
+ // Check if element still exists before clicking
+ const wasVisible = await page.evaluate(
+ ({ x, y }) => {
+ const el = document.elementFromPoint(x, y);
+ return el !== null;
+ },
+ { x: info.x, y: info.y }
+ );
+ if (!wasVisible) continue;
+
+ await page.mouse.click(info.x, info.y);
+ await page.waitForTimeout(200);
+
+ const result = await detectVisualChange(page, { frames: 10, intervalMs: 100 });
+ if (result.changed) {
+ // Check if the element disappeared after clicking
+ const disappeared = await page.evaluate(
+ ({ selector }) => {
+ if (!selector) return false;
+ try {
+ const el = document.querySelector(selector);
+ if (!el) return true;
+ const rect = el.getBoundingClientRect();
+ return rect.width === 0 || rect.height === 0;
+ } catch {
+ return false;
+ }
+ },
+ { selector: info.selector }
+ );
+
+ return {
+ mechanism: "button",
+ startButton: {
+ selector: info.selector,
+ text: info.text,
+ disappeared,
+ position: { x: info.x, y: info.y },
+ },
+ };
+ }
+
+ // No change -- try pressing Escape to undo any menu we opened
+ try {
+ await page.keyboard.press("Escape");
+ await page.waitForTimeout(100);
+ } catch { /* ignore */ }
+ } catch { /* continue to next element */ }
+ }
+ } catch { /* phase 2 failed entirely */ }
+
+ return null;
+}
+
+/**
+ * Phase 3: Click the canvas at strategic positions.
+ * Center first, then upper-center, lower-center, then a 3x3 grid.
+ */
+async function tryCanvasClicks(
+ page: Page,
+ budgetExceeded: () => boolean
+): Promise<StartDetectionResult | null> {
+ // Find the canvas or primary game container
+ let targetBox: { x: number; y: number; width: number; height: number } | null = null;
+
+ try {
+ const canvas = page.locator("canvas").first();
+ if ((await canvas.count()) > 0) {
+ targetBox = await canvas.boundingBox();
+ }
+ } catch { /* no canvas */ }
+
+ if (!targetBox) {
+ // Try the viewport itself
+ const viewport = page.viewportSize();
+ if (viewport) {
+ targetBox = { x: 0, y: 0, width: viewport.width, height: viewport.height };
+ }
+ }
+
+ if (!targetBox) return null;
+
+ const cx = targetBox.x + targetBox.width / 2;
+ const cy = targetBox.y + targetBox.height / 2;
+
+ // Click positions: center, upper-center, lower-center, then 3x3 grid
+ const positions: Array<{ x: number; y: number; label: string }> = [
+ { x: cx, y: cy, label: "center" },
+ { x: cx, y: targetBox.y + targetBox.height * 0.25, label: "upper-center" },
+ { x: cx, y: targetBox.y + targetBox.height * 0.75, label: "lower-center" },
];
- for (const trigger of triggers) {
+ // Add 3x3 grid positions (skip center since we already have it)
+ for (let row = 0; row < 3; row++) {
+ for (let col = 0; col < 3; col++) {
+ if (row === 1 && col === 1) continue; // skip center duplicate
+ positions.push({
+ x: targetBox.x + targetBox.width * (col + 0.5) / 3,
+ y: targetBox.y + targetBox.height * (row + 0.5) / 3,
+ label: `grid_${row}_${col}`,
+ });
+ }
+ }
+
+ for (const pos of positions) {
+ if (budgetExceeded()) break;
+
+ try {
+ await page.mouse.click(pos.x, pos.y);
+ await page.waitForTimeout(200);
+
+ const result = await detectVisualChange(page, { frames: 10, intervalMs: 100 });
+ if (result.changed) {
+ return {
+ mechanism: "click_canvas",
+ startButton: {
+ selector: "canvas",
+ text: `canvas click at ${pos.label}`,
+ disappeared: false,
+ position: { x: Math.round(pos.x), y: Math.round(pos.y) },
+ },
+ };
+ }
+ } catch { /* continue */ }
+ }
+
+ return null;
+}
+
+/**
+ * Phase 4: Keyboard triggers.
+ * Try Enter, Space, ArrowDown, Z individually,
+ * then click-then-Enter and click-then-Space combos.
+ */
+async function tryKeyboardTriggers(
+ page: Page,
+ budgetExceeded: () => boolean
+): Promise<StartDetectionResult | null> {
+ const mechanismMap: Record<string, StartMechanism> = {
+ Enter: "enter",
+ Space: "space",
+ ArrowDown: "anykey",
+ z: "anykey",
+ };
+
+ // Single key presses
+ for (const key of ["Enter", "Space", "ArrowDown", "z"]) {
+ if (budgetExceeded()) break;
+
try {
- await trigger.action();
- // Wait for overlay to dismiss and first piece to start falling
- await page.waitForTimeout(1500);
+ await page.keyboard.press(key);
+ await page.waitForTimeout(200);
+
+ const result = await detectVisualChange(page, { frames: 10, intervalMs: 100 });
+ if (result.changed) {
+ return { mechanism: mechanismMap[key] };
+ }
+ } catch { /* continue */ }
+ }
+
+ // Combo: click canvas center, then Enter / Space
+ for (const key of ["Enter", "Space"]) {
+ if (budgetExceeded()) break;
+
+ try {
+ const canvas = page.locator("canvas").first();
+ if ((await canvas.count()) > 0) {
+ await canvas.click();
+ } else {
+ const viewport = page.viewportSize();
+ if (viewport) {
+ await page.mouse.click(viewport.width / 2, viewport.height / 2);
+ }
+ }
+ await page.waitForTimeout(100);
+ await page.keyboard.press(key);
+ await page.waitForTimeout(200);
- if (await detectFallingPiece(page)) {
- return trigger.name;
+ const result = await detectVisualChange(page, { frames: 10, intervalMs: 100 });
+ if (result.changed) {
+ return { mechanism: mechanismMap[key] };
}
- } catch { /* continue to next trigger */ }
+ } catch { /* continue */ }
}
- return "unknown";
+ return null;
}
/**
- * Re-calibration fallback: try ALL start mechanisms again with longer waits,
+ * Re-calibration fallback: try start mechanisms again with longer waits,
* re-scanning for the grid after each attempt. Used when the first pass
* failed to detect the start mechanism or the grid.
*
- * Uses the falling piece detector (not screenshot comparison) to confirm
- * the game actually started.
+ * Uses detectVisualChange() to confirm the game responded.
*/
async function recalibrateWithRetry(
page: Page,
@@ -522,7 +726,6 @@ async function recalibrateWithRetry(
cellHeight: 0,
};
- // Ordered list of start attempts with longer waits between each
const attempts: Array<{ name: StartMechanism; action: () => Promise<void> }> = [
{
name: "click_canvas",
@@ -553,19 +756,6 @@ async function recalibrateWithRetry(
},
},
{
- name: "button",
- action: async () => {
- const btn = page.locator("button, a, [role='button']").filter({
- hasText: /start|play|begin|restart|reset|new/i,
- }).first();
- if ((await btn.count()) > 0) await btn.click();
- },
- },
- {
- name: "anykey",
- action: async () => { await page.keyboard.press("a"); },
- },
- {
name: "anykey",
action: async () => { await page.keyboard.press("ArrowDown"); },
},
@@ -574,11 +764,11 @@ async function recalibrateWithRetry(
for (const attempt of attempts) {
try {
await attempt.action();
- await page.waitForTimeout(1500);
+ await page.waitForTimeout(200);
- // Use falling piece detector instead of screenshot comparison
if (startMechanism === "unknown") {
- if (await detectFallingPiece(page)) {
+ const result = await detectVisualChange(page, { frames: 10, intervalMs: 100 });
+ if (result.changed) {
startMechanism = attempt.name;
}
}
diff --git a/tasks/tetris/eval/gameplay-bot/tests.ts b/tasks/tetris/eval/gameplay-bot/tests.ts
@@ -637,13 +637,23 @@ function deriveTestResults(
});
// 2. game_starts
- results.push({
- name: "game_starts",
- pass: session.started,
- detail: session.started
- ? `started via ${session.startMechanism}`
- : "could not start game with any mechanism",
- });
+ {
+ let startDetail: string;
+ if (session.started) {
+ startDetail = `started via ${session.startMechanism}`;
+ if (cal.startButton) {
+ const btn = cal.startButton;
+ startDetail += ` (${btn.selector}, "${btn.text}"${btn.disappeared ? ", disappeared after click" : ""})`;
+ }
+ } else {
+ startDetail = "could not start game with any mechanism";
+ }
+ results.push({
+ name: "game_starts",
+ pass: session.started,
+ detail: startDetail,
+ });
+ }
// Helper: produce a skip result for tests whose prerequisite phase was skipped
const skipResult = (name: string, reason: string): TestResult => ({
diff --git a/tasks/tetris/eval/gameplay-bot/types.ts b/tasks/tetris/eval/gameplay-bot/types.ts
@@ -47,6 +47,13 @@ export interface CalibrationResult {
consoleErrors: string[];
/** Fraction of grid reads that returned non-null during calibration polling. */
gridConfidence: number;
+ /** Details about the button that started the game, if any. */
+ startButton?: {
+ selector: string;
+ text: string;
+ disappeared: boolean;
+ position: { x: number; y: number };
+ };
}
/** Result of an individual test. */