commit d3de069d27a61685d29370f5ede9d3d99486d6b3
parent 17a4bada036386de83204177a3af6db3546666c3
Author: Brian Graham <brian@buildingbetterteams.de>
Date: Tue, 7 Apr 2026 07:37:20 +0200
Rewrite bot start detection: falling piece detector, conditional phases
Start detection: detects a falling piece (downward-moving cluster of
colored pixels) instead of screenshot comparison. Eliminates false
positives from title screens, hover effects, canvas-rendered buttons.
Conditional phases: mechanics requires start, gameplay requires mechanics,
game over requires gameplay. Skipped phases report "skipped: prerequisite
not met" instead of false positives.
Game over: stacks pieces via hard drops + grid reader verification of
filled top rows. Removes screenshot comparison approach.
piece_locks: requires grid reader reliability, no longer passes on
static screens with gridDetected=false.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Diffstat:
2 files changed, 584 insertions(+), 250 deletions(-)
diff --git a/tasks/tetris/eval/gameplay-bot/calibrate.ts b/tasks/tetris/eval/gameplay-bot/calibrate.ts
@@ -158,136 +158,344 @@ async function measureGridConfidence(
}
/**
- * Try multiple mechanisms to start the game.
- * Takes a screenshot before and after each attempt, comparing
- * to see if the game state changed.
+ * Detect a falling piece by taking 3 screenshots ~800ms apart and looking
+ * for a cluster of colored pixels that moved downward between frames.
+ *
+ * This works for canvas, DOM, SVG, WebGL -- any rendering approach.
+ * It does NOT require the grid reader or calibrated grid bounds.
+ *
+ * Implementation: divide the visible page into a grid of sample points
+ * (~20 columns x ~40 rows). Read pixel colors at each point via screenshot
+ * buffer. Between consecutive frames, look for a group of colored
+ * (non-background) points that disappeared from one position and appeared
+ * lower -- a "falling cluster" of roughly 4 cells (2x2 to 4x1 bounding box).
*/
-async function detectStartMechanism(page: Page): Promise<StartMechanism> {
- // Take initial screenshot
- let prevShot = await page.screenshot();
-
- // 1. Wait 3 seconds (auto-start)
- await page.waitForTimeout(3000);
- let newShot = await page.screenshot();
- if (!Buffer.from(prevShot).equals(Buffer.from(newShot))) {
- return "auto";
+async function detectFallingPiece(page: Page): Promise<boolean> {
+ const SAMPLE_COLS = 20;
+ const SAMPLE_ROWS = 40;
+ const SCREENSHOTS = 3;
+ const INTERVAL_MS = 800;
+ // Minimum downward shift in sample-grid rows to count as "falling"
+ const MIN_DOWN_SHIFT = 1;
+ // Cluster size bounds (roughly a tetromino: 3-6 sample points)
+ const MIN_CLUSTER = 3;
+ const MAX_CLUSTER = 12;
+ // Color distance threshold to distinguish filled from background
+ const COLOR_THRESHOLD = 40;
+
+ // Take screenshots
+ const shots: Buffer[] = [];
+ for (let i = 0; i < SCREENSHOTS; i++) {
+ shots.push(await page.screenshot());
+ if (i < SCREENSHOTS - 1) await page.waitForTimeout(INTERVAL_MS);
}
- prevShot = newShot;
- // 2. Click the canvas or game container
- try {
- const canvas = page.locator("canvas").first();
- if ((await canvas.count()) > 0) {
- await canvas.click();
- await page.waitForTimeout(500);
- newShot = await page.screenshot();
- if (!Buffer.from(prevShot).equals(Buffer.from(newShot))) {
- return "click_canvas";
- }
- prevShot = newShot;
- }
- } catch { /* continue */ }
+ // Parse each screenshot into a grid of "colored" (true) / "background" (false)
+ // by sampling pixel colors at evenly spaced points.
+ // We use page.evaluate to decode the PNG in the browser via canvas.
+ const grids: boolean[][][] = [];
+
+ for (const shot of shots) {
+ const base64 = shot.toString("base64");
+ const grid = await page.evaluate(
+ async ({ base64, sampleCols, sampleRows, colorThreshold }) => {
+ // Decode the screenshot PNG into pixel data
+ const img = new Image();
+ const loaded = new Promise<void>((resolve, reject) => {
+ img.onload = () => resolve();
+ img.onerror = () => reject(new Error("image decode failed"));
+ });
+ img.src = `data:image/png;base64,${base64}`;
+ await loaded;
+
+ const canvas = document.createElement("canvas");
+ canvas.width = img.width;
+ canvas.height = img.height;
+ const ctx = canvas.getContext("2d")!;
+ ctx.drawImage(img, 0, 0);
+
+ const stepX = img.width / sampleCols;
+ const stepY = img.height / sampleRows;
+
+ // First pass: sample all pixel colors
+ const colors: number[][] = [];
+ for (let r = 0; r < sampleRows; r++) {
+ const row: number[] = [];
+ for (let c = 0; c < sampleCols; c++) {
+ const px = Math.floor(c * stepX + stepX / 2);
+ const py = Math.floor(r * stepY + stepY / 2);
+ const pixel = ctx.getImageData(px, py, 1, 1).data;
+ // Store as a single luminance-like value for quick background detection
+ // and the full RGB for distance checks
+ row.push(pixel[0] * 1000000 + pixel[1] * 1000 + pixel[2]);
+ }
+ colors.push(row);
+ }
- // Try clicking any game-like container
- try {
- const container = page.locator(
- '[class*="game"], [class*="board"], [id*="game"], [id*="board"]'
- ).first();
- if ((await container.count()) > 0) {
- await container.click();
- await page.waitForTimeout(500);
- newShot = await page.screenshot();
- if (!Buffer.from(prevShot).equals(Buffer.from(newShot))) {
- return "click_canvas";
+ // Determine background color: the most common color in the sample grid
+ const colorCounts = new Map<number, number>();
+ for (const row of colors) {
+ for (const c of row) {
+ colorCounts.set(c, (colorCounts.get(c) || 0) + 1);
+ }
+ }
+ let bgColor = 0;
+ let bgCount = 0;
+ for (const [color, count] of colorCounts) {
+ if (count > bgCount) {
+ bgCount = count;
+ bgColor = color;
+ }
+ }
+ const bgR = Math.floor(bgColor / 1000000);
+ const bgG = Math.floor((bgColor % 1000000) / 1000);
+ const bgB = bgColor % 1000;
+
+ // Second pass: mark cells as "colored" if they differ from background
+ const result: boolean[][] = [];
+ for (let r = 0; r < sampleRows; r++) {
+ const row: boolean[] = [];
+ for (let c = 0; c < sampleCols; c++) {
+ const v = colors[r][c];
+ const pR = Math.floor(v / 1000000);
+ const pG = Math.floor((v % 1000000) / 1000);
+ const pB = v % 1000;
+ const dist = Math.sqrt(
+ (pR - bgR) ** 2 + (pG - bgG) ** 2 + (pB - bgB) ** 2
+ );
+ row.push(dist > colorThreshold);
+ }
+ result.push(row);
+ }
+ return result;
+ },
+ { base64, sampleCols: SAMPLE_COLS, sampleRows: SAMPLE_ROWS, colorThreshold: COLOR_THRESHOLD }
+ );
+ grids.push(grid);
+ }
+
+ // Compare consecutive frame pairs to find downward-moving clusters
+ for (let f = 0; f < grids.length - 1; f++) {
+ const prev = grids[f];
+ const curr = grids[f + 1];
+
+ // Find cells that were colored in prev but not in curr ("disappeared")
+ const disappeared: [number, number][] = [];
+ // Find cells that are colored in curr but not in prev ("appeared")
+ const appeared: [number, number][] = [];
+
+ for (let r = 0; r < SAMPLE_ROWS; r++) {
+ for (let c = 0; c < SAMPLE_COLS; c++) {
+ if (prev[r][c] && !curr[r][c]) disappeared.push([r, c]);
+ if (!prev[r][c] && curr[r][c]) appeared.push([r, c]);
}
- prevShot = newShot;
}
- } catch { /* continue */ }
- // 3. Press Enter
- await page.keyboard.press("Enter");
- await page.waitForTimeout(500);
- newShot = await page.screenshot();
- if (!Buffer.from(prevShot).equals(Buffer.from(newShot))) {
- return "enter";
- }
- prevShot = newShot;
-
- // 4. Press Space
- await page.keyboard.press("Space");
- await page.waitForTimeout(500);
- newShot = await page.screenshot();
- if (!Buffer.from(prevShot).equals(Buffer.from(newShot))) {
- return "space";
- }
- prevShot = newShot;
+ // If nothing changed, no falling piece in this frame pair
+ if (disappeared.length === 0 || appeared.length === 0) continue;
- // 5. Click the body/document (some games start on any click)
- try {
- await page.locator("body").click({ position: { x: 100, y: 100 } });
- await page.waitForTimeout(500);
- newShot = await page.screenshot();
- if (!Buffer.from(prevShot).equals(Buffer.from(newShot))) {
- return "click_canvas";
- }
- prevShot = newShot;
- } catch { /* continue */ }
+ // Cluster the disappeared points using simple flood fill
+ const disappearedClusters = clusterPoints(disappeared);
+ const appearedClusters = clusterPoints(appeared);
- // 6. Look for a start/play/restart button
- try {
- const button = page.locator("button, a, [role='button']").filter({
- hasText: /start|play|begin|new game|restart|reset|new/i,
- }).first();
- if ((await button.count()) > 0) {
- await button.click();
- await page.waitForTimeout(500);
- newShot = await page.screenshot();
- if (!Buffer.from(prevShot).equals(Buffer.from(newShot))) {
- return "button";
+ // For each disappeared cluster, look for a matching appeared cluster
+ // that is shifted downward (same rough column range, higher row numbers)
+ for (const dCluster of disappearedClusters) {
+ if (dCluster.length < MIN_CLUSTER || dCluster.length > MAX_CLUSTER) continue;
+
+ const dMinCol = Math.min(...dCluster.map(([, c]) => c));
+ const dMaxCol = Math.max(...dCluster.map(([, c]) => c));
+ const dMinRow = Math.min(...dCluster.map(([r]) => r));
+ const dCenterCol = (dMinCol + dMaxCol) / 2;
+
+ for (const aCluster of appearedClusters) {
+ if (aCluster.length < MIN_CLUSTER || aCluster.length > MAX_CLUSTER) continue;
+
+ const aMinCol = Math.min(...aCluster.map(([, c]) => c));
+ const aMaxCol = Math.max(...aCluster.map(([, c]) => c));
+ const aMinRow = Math.min(...aCluster.map(([r]) => r));
+ const aCenterCol = (aMinCol + aMaxCol) / 2;
+
+ // Check: appeared cluster is below disappeared cluster
+ // and in roughly the same column range
+ const colOverlap = Math.abs(dCenterCol - aCenterCol) <= 3;
+ const movedDown = aMinRow > dMinRow && (aMinRow - dMinRow) >= MIN_DOWN_SHIFT;
+
+ if (colOverlap && movedDown) {
+ return true;
+ }
}
- prevShot = newShot;
}
- } catch { /* continue */ }
- // Also try elements that aren't buttons but have matching text
- try {
- const textMatch = page.locator(
- ':text-matches("start|play|begin|new.game|restart|reset", "i")'
- ).first();
- if ((await textMatch.count()) > 0) {
- await textMatch.click();
- await page.waitForTimeout(500);
- newShot = await page.screenshot();
- if (!Buffer.from(prevShot).equals(Buffer.from(newShot))) {
- return "button";
+ // Also check if the overall set of colored points shifted down
+ // (handles cases where clusters partially overlap between frames)
+ if (disappeared.length >= MIN_CLUSTER && appeared.length >= MIN_CLUSTER) {
+ const dAvgRow = disappeared.reduce((s, [r]) => s + r, 0) / disappeared.length;
+ const aAvgRow = appeared.reduce((s, [r]) => s + r, 0) / appeared.length;
+ const dAvgCol = disappeared.reduce((s, [, c]) => s + c, 0) / disappeared.length;
+ const aAvgCol = appeared.reduce((s, [, c]) => s + c, 0) / appeared.length;
+
+ if (
+ aAvgRow > dAvgRow + MIN_DOWN_SHIFT &&
+ Math.abs(aAvgCol - dAvgCol) <= 3 &&
+ Math.abs(disappeared.length - appeared.length) <= 4
+ ) {
+ return true;
}
- prevShot = newShot;
}
- } catch { /* continue */ }
+ }
- // 7. Try clicking any <button> element regardless of text
- try {
- const anyButton = page.locator("button").first();
- if ((await anyButton.count()) > 0) {
- await anyButton.click();
- await page.waitForTimeout(500);
- newShot = await page.screenshot();
- if (!Buffer.from(prevShot).equals(Buffer.from(newShot))) {
- return "button";
+ return false;
+}
+
+/**
+ * Cluster adjacent points using flood fill.
+ * Two points are adjacent if they differ by at most 1 in both row and column.
+ */
+function clusterPoints(points: [number, number][]): [number, number][][] {
+ const clusters: [number, number][][] = [];
+ const visited = new Set<string>();
+
+ for (const [r, c] of points) {
+ const key = `${r},${c}`;
+ if (visited.has(key)) continue;
+
+ const cluster: [number, number][] = [];
+ const stack: [number, number][] = [[r, c]];
+ visited.add(key);
+
+ while (stack.length > 0) {
+ const [cr, cc] = stack.pop()!;
+ cluster.push([cr, cc]);
+
+ // Check all 8 neighbors
+ for (let dr = -1; dr <= 1; dr++) {
+ for (let dc = -1; dc <= 1; dc++) {
+ if (dr === 0 && dc === 0) continue;
+ const nr = cr + dr;
+ const nc = cc + dc;
+ const nk = `${nr},${nc}`;
+ if (!visited.has(nk) && points.some(([pr, pc]) => pr === nr && pc === nc)) {
+ visited.add(nk);
+ stack.push([nr, nc]);
+ }
+ }
}
- prevShot = newShot;
}
- } catch { /* continue */ }
- // 8. Press any key (try a few -- catches games that start on any keydown)
- for (const key of ["a", "p", "s", "n", "Escape"]) {
- await page.keyboard.press(key);
- await page.waitForTimeout(300);
- newShot = await page.screenshot();
- if (!Buffer.from(prevShot).equals(Buffer.from(newShot))) {
- return "anykey";
- }
- prevShot = newShot;
+ clusters.push(cluster);
+ }
+
+ return clusters;
+}
+
+/**
+ * Try multiple mechanisms to start the game.
+ * After each trigger, runs the falling piece detector to confirm
+ * the game actually started (not just a title screen animation).
+ *
+ * The ONLY way to confirm start is detecting a falling piece.
+ */
+async function detectStartMechanism(page: Page): Promise<StartMechanism> {
+ // Ordered list of triggers to try
+ const triggers: Array<{ name: StartMechanism; action: () => Promise<void> }> = [
+ // 1. Wait 3s (auto-start games)
+ {
+ name: "auto",
+ action: async () => {
+ await page.waitForTimeout(3000);
+ },
+ },
+ // 2. Click the canvas
+ {
+ name: "click_canvas",
+ action: async () => {
+ const canvas = page.locator("canvas").first();
+ if ((await canvas.count()) > 0) await canvas.click();
+ },
+ },
+ // 3. Click any game-like container
+ {
+ name: "click_canvas",
+ action: async () => {
+ const container = page.locator(
+ '[class*="game"], [class*="board"], [id*="game"], [id*="board"]'
+ ).first();
+ if ((await container.count()) > 0) await container.click();
+ },
+ },
+ // 4. Press Enter
+ {
+ name: "enter",
+ action: async () => { await page.keyboard.press("Enter"); },
+ },
+ // 5. Press Space
+ {
+ name: "space",
+ action: async () => { await page.keyboard.press("Space"); },
+ },
+ // 6. Click body
+ {
+ name: "click_canvas",
+ action: async () => {
+ await page.locator("body").click({ position: { x: 100, y: 100 } });
+ },
+ },
+ // 7. Click a start/play button
+ {
+ name: "button",
+ action: async () => {
+ const button = page.locator("button, a, [role='button']").filter({
+ hasText: /start|play|begin|new game|restart|reset|new/i,
+ }).first();
+ if ((await button.count()) > 0) await button.click();
+ },
+ },
+ // 8. Click text that looks like a start prompt
+ {
+ name: "button",
+ action: async () => {
+ const textMatch = page.locator(
+ ':text-matches("start|play|begin|new.game|restart|reset", "i")'
+ ).first();
+ if ((await textMatch.count()) > 0) await textMatch.click();
+ },
+ },
+ // 9. Click any button regardless of text
+ {
+ name: "button",
+ action: async () => {
+ const anyButton = page.locator("button").first();
+ if ((await anyButton.count()) > 0) await anyButton.click();
+ },
+ },
+ // 10. Press various keys (catches games that start on any keydown)
+ {
+ name: "anykey",
+ action: async () => {
+ for (const key of ["a", "p", "s", "n", "Escape"]) {
+ await page.keyboard.press(key);
+ await page.waitForTimeout(100);
+ }
+ },
+ },
+ // 11. Press ArrowDown (some games start on directional input)
+ {
+ name: "anykey",
+ action: async () => { await page.keyboard.press("ArrowDown"); },
+ },
+ ];
+
+ for (const trigger of triggers) {
+ try {
+ await trigger.action();
+ // Give the game a moment to react before checking for a falling piece
+ await page.waitForTimeout(300);
+
+ if (await detectFallingPiece(page)) {
+ return trigger.name;
+ }
+ } catch { /* continue to next trigger */ }
}
return "unknown";
@@ -297,6 +505,9 @@ async function detectStartMechanism(page: Page): Promise<StartMechanism> {
* Re-calibration fallback: try ALL start mechanisms again with longer waits,
* re-scanning for the grid after each attempt. Used when the first pass
* failed to detect the start mechanism or the grid.
+ *
+ * Uses the falling piece detector (not screenshot comparison) to confirm
+ * the game actually started.
*/
async function recalibrateWithRetry(
page: Page,
@@ -360,18 +571,16 @@ async function recalibrateWithRetry(
},
];
- let prevShot = await page.screenshot();
-
for (const attempt of attempts) {
try {
await attempt.action();
- await page.waitForTimeout(1500);
-
- const newShot = await page.screenshot();
- const changed = !Buffer.from(prevShot).equals(Buffer.from(newShot));
+ await page.waitForTimeout(500);
- if (changed && startMechanism === "unknown") {
- startMechanism = attempt.name;
+ // Use falling piece detector instead of screenshot comparison
+ if (startMechanism === "unknown") {
+ if (await detectFallingPiece(page)) {
+ startMechanism = attempt.name;
+ }
}
// Re-scan for grid after each attempt
@@ -386,8 +595,6 @@ async function recalibrateWithRetry(
if (startMechanism !== "unknown" && gridResult.gridBounds) {
break;
}
-
- prevShot = newShot;
} catch { /* continue */ }
}
diff --git a/tasks/tetris/eval/gameplay-bot/tests.ts b/tasks/tetris/eval/gameplay-bot/tests.ts
@@ -13,7 +13,7 @@ import {
identifyPieceType,
countCompleteRows,
} from "./grid-reader";
-import { hardDrop, playGame, tryFillRow, stackToGameOver } from "./player";
+import { hardDrop, playGame, tryFillRow } from "./player";
import { calibrate } from "./calibrate";
/**
@@ -85,7 +85,7 @@ export async function runAllTests(
};
}
- // ---- Phase 2: Calibrate ----
+ // ---- Phase 2: Calibrate + detect start (always runs) ----
let cal: CalibrationResult;
try {
cal = await calibrate(page);
@@ -101,42 +101,56 @@ export async function runAllTests(
if (!session.consoleErrors.includes(e)) session.consoleErrors.push(e);
}
- // ---- Phase 3: Observation session -- basic mechanics ----
- // Test auto-drop, movement, rotation, hard drop via grid reader
- if (cal.gridDetected) {
+ // ---- Phase 3: Basic mechanics -- ONLY if game started (falling piece detected) ----
+ let mechanicsSucceeded = false;
+ if (session.started && cal.gridDetected) {
await runBasicMechanicsPhase(page, cal, session);
+ // Mechanics succeeded if we observed at least 1 event
+ mechanicsSucceeded =
+ session.movementsObserved > 0 ||
+ session.rotationsObserved > 0 ||
+ session.hardDropsObserved > 0 ||
+ session.events.some((e) => e.type === "piece_moved");
}
- // ---- Phase 4: Reload + calibrate for gameplay ----
- try {
- await loadGamePage(page, serverUrl);
- cal = await calibrate(page);
- session.started = session.started || cal.startMechanism !== "unknown";
- } catch { /* continue with existing state */ }
+ // ---- Phase 4: Gameplay (play to win) -- ONLY if mechanics had at least 1 success ----
+ let gameplayPlacedPieces = false;
+ if (mechanicsSucceeded) {
+ try {
+ await loadGamePage(page, serverUrl);
+ cal = await calibrate(page);
+ session.started = session.started || cal.startMechanism !== "unknown";
+ } catch { /* continue with existing state */ }
- // ---- Phase 5: Extended gameplay with integrated score tracking ----
- await runGameplayPhase(page, cal, session, gameplay);
+ await runGameplayPhase(page, cal, session, gameplay);
+ gameplayPlacedPieces = gameplay.pieces_placed > 0;
+ }
- // ---- Phase 6: Game over test ----
- try {
- await loadGamePage(page, serverUrl);
- cal = await calibrate(page);
- } catch { /* continue */ }
+ // ---- Phase 5: Game over -- ONLY if gameplay placed pieces ----
+ if (gameplayPlacedPieces) {
+ try {
+ await loadGamePage(page, serverUrl);
+ cal = await calibrate(page);
+ } catch { /* continue */ }
- await runGameOverPhase(page, cal, session);
+ await runGameOverPhase(page, cal, session);
+ }
- // ---- Phase 7: 30-second endurance play ----
- try {
- await loadGamePage(page, serverUrl);
- cal = await calibrate(page);
- } catch { /* continue */ }
+ // ---- Phase 6: Endurance -- ONLY if gameplay worked ----
+ if (gameplayPlacedPieces) {
+ try {
+ await loadGamePage(page, serverUrl);
+ cal = await calibrate(page);
+ } catch { /* continue */ }
- await runEndurancePhase(page, cal, session, gameplay, consoleErrors);
+ await runEndurancePhase(page, cal, session, gameplay, consoleErrors);
+ }
session.durationSeconds = gameplay.play_duration_seconds;
// ---- Derive test results from session data ----
- const testResults = deriveTestResults(session, cal, loadResult, consoleErrors, gameplay);
+ const phaseState = { mechanicsSucceeded, gameplayPlacedPieces };
+ const testResults = deriveTestResults(session, cal, loadResult, consoleErrors, gameplay, phaseState);
return { testResults, calibration: cal, gameplay, session };
}
@@ -470,18 +484,79 @@ async function runGameplayPhase(
}
/**
- * Stack pieces to trigger game over.
+ * Stack pieces to trigger game over using grid reader verification.
+ *
+ * Instead of screenshot comparison (which false-positives on static screens),
+ * we:
+ * 1. Hard drop 30-40 pieces rapidly in the same column to build a tower
+ * 2. After each batch of 5 drops, check grid for filled cells in the top 4 rows
+ * 3. If top rows are filled AND new drops don't change the grid, game is over
+ * 4. Also check for "game over" text in DOM as a secondary signal
*/
async function runGameOverPhase(
page: Page,
cal: CalibrationResult,
session: GameSession
): Promise<void> {
- const isOver = await stackToGameOver(page, cal, 40);
- if (isOver) {
- session.gameOverDetected = true;
- session.events.push({ type: "game_over", frame: session.frames });
+ const MAX_DROPS = 40;
+ const BATCH_SIZE = 5;
+
+ for (let i = 0; i < MAX_DROPS; i++) {
+ await page.keyboard.press(cal.controls.drop);
+ await page.waitForTimeout(150);
+
+ // Check after each batch of drops
+ if ((i + 1) % BATCH_SIZE === 0) {
+ const grid = await readGrid(page, cal);
+ if (grid) {
+ session.gridReadSuccess++;
+ session.frames++;
+
+ if (hasFilledInTopRows(grid, 4)) {
+ // Top rows are filled -- check if new drops actually change the grid
+ await page.keyboard.press(cal.controls.drop);
+ await page.waitForTimeout(300);
+ const gridAfter = await readGrid(page, cal);
+ if (gridAfter) {
+ session.gridReadSuccess++;
+ session.frames++;
+ if (!gridsAreDifferent(grid, gridAfter)) {
+ // Grid didn't change after a drop -- game is over
+ session.gameOverDetected = true;
+ session.events.push({ type: "game_over", frame: session.frames });
+ return;
+ }
+ }
+ }
+ } else {
+ session.gridReadFail++;
+ session.frames++;
+ }
+ }
}
+
+ // Final check: look for game over text in DOM
+ try {
+ const hasGameOverText = await page.evaluate(() => {
+ const text = document.body.innerText.toLowerCase();
+ return (
+ text.includes("game over") ||
+ text.includes("gameover") ||
+ text.includes("you lose") ||
+ text.includes("try again") ||
+ text.includes("play again")
+ );
+ });
+ if (hasGameOverText) {
+ // Only trust DOM text if we also saw pieces in the grid (prevents false
+ // positives from static pages that happen to have "restart" text)
+ const finalGrid = await readGrid(page, cal);
+ if (finalGrid && countFilled(finalGrid) > 10) {
+ session.gameOverDetected = true;
+ session.events.push({ type: "game_over", frame: session.frames });
+ }
+ }
+ } catch { /* ignore */ }
}
/**
@@ -537,12 +612,18 @@ const ALL_TEST_NAMES = [
"playable_30s",
];
+interface PhaseState {
+ mechanicsSucceeded: boolean;
+ gameplayPlacedPieces: boolean;
+}
+
function deriveTestResults(
session: GameSession,
cal: CalibrationResult,
loadResult: LoadResult,
consoleErrors: string[],
- gameplay: GameplayStats
+ gameplay: GameplayStats,
+ phaseState: PhaseState
): TestResult[] {
const results: TestResult[] = [];
const gridReliable = session.gridReadSuccess > 0 &&
@@ -564,34 +645,49 @@ function deriveTestResults(
: "could not start game with any mechanism",
});
+ // Helper: produce a skip result for tests whose prerequisite phase was skipped
+ const skipResult = (name: string, reason: string): TestResult => ({
+ name,
+ pass: false,
+ detail: `skipped: ${reason}`,
+ });
+
// 3. auto_drop -- MUST be verified via grid reader
- const autoDropEvents = session.events.filter(
- (e) => e.type === "piece_moved" && e.direction === "down" &&
- // Only count the first few frames (before we sent any input)
- e.frame <= 2
- );
- if (autoDropEvents.length > 0) {
- results.push({
- name: "auto_drop",
- pass: true,
- detail: "grid state changed after 5s with no input (grid-verified)",
- });
- } else if (!gridReliable) {
- results.push({
- name: "auto_drop",
- pass: false,
- detail: "grid reader unreliable, cannot verify auto-drop",
- });
+ if (!session.started) {
+ results.push(skipResult("auto_drop", "game did not start"));
} else {
- results.push({
- name: "auto_drop",
- pass: false,
- detail: "piece did not move down in 5 seconds (grid-verified)",
- });
+ const autoDropEvents = session.events.filter(
+ (e) => e.type === "piece_moved" && e.direction === "down" &&
+ // Only count the first few frames (before we sent any input)
+ e.frame <= 2
+ );
+ if (autoDropEvents.length > 0) {
+ results.push({
+ name: "auto_drop",
+ pass: true,
+ detail: "grid state changed after 5s with no input (grid-verified)",
+ });
+ } else if (!gridReliable) {
+ results.push({
+ name: "auto_drop",
+ pass: false,
+ detail: "grid reader unreliable, cannot verify auto-drop",
+ });
+ } else {
+ results.push({
+ name: "auto_drop",
+ pass: false,
+ detail: "piece did not move down in 5 seconds (grid-verified)",
+ });
+ }
}
// 4-6. movement tests
for (const dir of ["left", "right", "down"] as const) {
+ if (!session.started) {
+ results.push(skipResult(`move_${dir}`, "game did not start"));
+ continue;
+ }
const moveEvents = session.events.filter(
(e) => e.type === "piece_moved" && e.direction === dir
);
@@ -617,7 +713,9 @@ function deriveTestResults(
}
// 7. rotate
- if (session.rotationsObserved > 0) {
+ if (!session.started) {
+ results.push(skipResult("rotate", "game did not start"));
+ } else if (session.rotationsObserved > 0) {
results.push({
name: "rotate",
pass: true,
@@ -638,30 +736,35 @@ function deriveTestResults(
}
// 7b. all_pieces_rotate -- derived from piece types seen
- // We can only confidently test this if we saw multiple piece types
- const nonOPieceTypes = [...session.pieceTypes].filter((t) => t !== "O" && t !== "unknown");
- if (session.rotationsObserved > 0 && nonOPieceTypes.length > 0) {
- results.push({
- name: "all_pieces_rotate",
- pass: true,
- detail: `rotation observed, piece types seen: [${[...session.pieceTypes].join(", ")}]`,
- });
- } else if (session.rotationsObserved > 0) {
- results.push({
- name: "all_pieces_rotate",
- pass: true,
- detail: "rotation confirmed but could not identify individual piece types",
- });
+ if (!session.started) {
+ results.push(skipResult("all_pieces_rotate", "game did not start"));
} else {
- results.push({
- name: "all_pieces_rotate",
- pass: false,
- detail: "could not detect any piece rotations via grid reader",
- });
+ const nonOPieceTypes = [...session.pieceTypes].filter((t) => t !== "O" && t !== "unknown");
+ if (session.rotationsObserved > 0 && nonOPieceTypes.length > 0) {
+ results.push({
+ name: "all_pieces_rotate",
+ pass: true,
+ detail: `rotation observed, piece types seen: [${[...session.pieceTypes].join(", ")}]`,
+ });
+ } else if (session.rotationsObserved > 0) {
+ results.push({
+ name: "all_pieces_rotate",
+ pass: true,
+ detail: "rotation confirmed but could not identify individual piece types",
+ });
+ } else {
+ results.push({
+ name: "all_pieces_rotate",
+ pass: false,
+ detail: "could not detect any piece rotations via grid reader",
+ });
+ }
}
// 8. hard_drop
- if (session.hardDropsObserved > 0) {
+ if (!session.started) {
+ results.push(skipResult("hard_drop", "game did not start"));
+ } else if (session.hardDropsObserved > 0) {
results.push({
name: "hard_drop",
pass: true,
@@ -681,38 +784,48 @@ function deriveTestResults(
});
}
- // 9. piece_locks
- const lockEvents = session.events.filter((e) => e.type === "piece_locked");
- if (lockEvents.length > 0) {
- results.push({
- name: "piece_locks",
- pass: true,
- detail: `filled cells persist at bottom (grid-verified, ${lockEvents.length} lock event(s))`,
- });
- } else if (session.piecesLocked > 0 && session.piecesSpawned > 0) {
- // Only trust locked count if we also detected spawns (prevents false positives
- // from static UI being misread as game state)
- results.push({
- name: "piece_locks",
- pass: true,
- detail: `${session.piecesLocked} piece(s) locked during play`,
- });
- } else if (session.piecesLocked > 0 && session.piecesSpawned === 0) {
+ // 9. piece_locks -- only trust if grid is reliable
+ if (!session.started) {
+ results.push(skipResult("piece_locks", "game did not start"));
+ } else if (!gridReliable) {
results.push({
name: "piece_locks",
pass: false,
- detail: `${session.piecesLocked} lock event(s) but 0 spawns detected - likely false positive from UI misread`,
+ detail: "grid reader unreliable, cannot verify piece locking",
});
} else {
- results.push({
- name: "piece_locks",
- pass: false,
- detail: "could not verify piece locking via grid reader",
- });
+ const lockEvents = session.events.filter((e) => e.type === "piece_locked");
+ if (lockEvents.length > 0) {
+ results.push({
+ name: "piece_locks",
+ pass: true,
+ detail: `filled cells persist at bottom (grid-verified, ${lockEvents.length} lock event(s))`,
+ });
+ } else if (session.piecesLocked > 0 && session.piecesSpawned > 0) {
+ results.push({
+ name: "piece_locks",
+ pass: true,
+ detail: `${session.piecesLocked} piece(s) locked during play`,
+ });
+ } else if (session.piecesLocked > 0 && session.piecesSpawned === 0) {
+ results.push({
+ name: "piece_locks",
+ pass: false,
+ detail: `${session.piecesLocked} lock event(s) but 0 spawns detected - likely false positive from UI misread`,
+ });
+ } else {
+ results.push({
+ name: "piece_locks",
+ pass: false,
+ detail: "could not verify piece locking via grid reader",
+ });
+ }
}
// 10. new_piece_spawns
- if (session.piecesSpawned > 0) {
+ if (!session.started) {
+ results.push(skipResult("new_piece_spawns", "game did not start"));
+ } else if (session.piecesSpawned > 0) {
results.push({
name: "new_piece_spawns",
pass: true,
@@ -727,7 +840,9 @@ function deriveTestResults(
}
// 11. multiple_pieces
- if (session.piecesLocked >= 3 && session.piecesSpawned > 0) {
+ if (!phaseState.mechanicsSucceeded) {
+ results.push(skipResult("multiple_pieces", "mechanics phase not met"));
+ } else if (session.piecesLocked >= 3 && session.piecesSpawned > 0) {
results.push({
name: "multiple_pieces",
pass: true,
@@ -742,7 +857,9 @@ function deriveTestResults(
}
// 12. line_clear
- if (session.linesCleared > 0) {
+ if (!phaseState.mechanicsSucceeded) {
+ results.push(skipResult("line_clear", "mechanics phase not met"));
+ } else if (session.linesCleared > 0) {
results.push({
name: "line_clear",
pass: true,
@@ -757,7 +874,9 @@ function deriveTestResults(
}
// 13. score_changes
- if (session.scoreValues.length >= 2) {
+ if (!phaseState.mechanicsSucceeded) {
+ results.push(skipResult("score_changes", "mechanics phase not met"));
+ } else if (session.scoreValues.length >= 2) {
const min = Math.min(...session.scoreValues);
const max = Math.max(...session.scoreValues);
if (max > min) {
@@ -787,37 +906,45 @@ function deriveTestResults(
});
}
- // 14. game_over
- results.push({
- name: "game_over",
- pass: session.gameOverDetected,
- detail: session.gameOverDetected
- ? "game stopped after stacking to top"
- : "could not trigger or detect game over",
- });
-
- // 15. playable_30s
- const crashed = session.consoleErrors.length > 0 || gameplay.errors_during_play > 3;
- if (!crashed && gameplay.play_duration_seconds >= 10) {
- results.push({
- name: "playable_30s",
- pass: true,
- detail: `played for ${gameplay.play_duration_seconds}s, placed ${gameplay.pieces_placed} pieces, no crashes`,
- });
- } else if (crashed) {
- results.push({
- name: "playable_30s",
- pass: false,
- detail: `${session.consoleErrors.length} console error(s), ${gameplay.errors_during_play} play errors`,
- });
+ // 14. game_over -- requires gameplay to have placed pieces
+ if (!phaseState.gameplayPlacedPieces) {
+ results.push(skipResult("game_over", "gameplay phase not met"));
} else {
results.push({
- name: "playable_30s",
- pass: false,
- detail: `only played for ${gameplay.play_duration_seconds}s`,
+ name: "game_over",
+ pass: session.gameOverDetected,
+ detail: session.gameOverDetected
+ ? "game stopped after stacking to top (grid-verified)"
+ : "could not trigger or detect game over via grid reader",
});
}
+ // 15. playable_30s -- requires gameplay to have worked
+ if (!phaseState.gameplayPlacedPieces) {
+ results.push(skipResult("playable_30s", "gameplay phase not met"));
+ } else {
+ const crashed = session.consoleErrors.length > 0 || gameplay.errors_during_play > 3;
+ if (!crashed && gameplay.play_duration_seconds >= 10) {
+ results.push({
+ name: "playable_30s",
+ pass: true,
+ detail: `played for ${gameplay.play_duration_seconds}s, placed ${gameplay.pieces_placed} pieces, no crashes`,
+ });
+ } else if (crashed) {
+ results.push({
+ name: "playable_30s",
+ pass: false,
+ detail: `${session.consoleErrors.length} console error(s), ${gameplay.errors_during_play} play errors`,
+ });
+ } else {
+ results.push({
+ name: "playable_30s",
+ pass: false,
+ detail: `only played for ${gameplay.play_duration_seconds}s`,
+ });
+ }
+ }
+
return results;
}