commit 3bde26d36a17e8b79525bbe582d3ab13b8d8387b
parent d162c5ba603ac08e3db2a7fe0919dd0494c4f14d
Author: Brian Graham <brian@buildingbetterteams.de>
Date: Fri, 10 Apr 2026 14:36:48 +0200
V2 bot: caching, bot/driver bridge, fixed CCW rotation test
Three improvements merged:
1. Calibration caching (driver.ts): caches start mechanism, controls,
grid bounds across reloads. Detects drift, flags conflicts. Eliminates
timeouts from repeated full calibration.
2. Bot/driver bridge (bot.ts, driver.ts, types.ts): bot verifies game
actually started before driver commits to a mechanism. Checks grid
populated, movement responsive, no game-over text. discoverStartCandidates,
tryStartMechanism, confirmStartMechanism, rejectStartMechanism methods.
3. CCW rotation test (bot.ts): fixed broken sequential test that was
tautologically true. Now reloads page between Z and X tests, compares
rotation states from same baseline.
Results vs human calibration (9 games):
- V1: 56/97 = 58% agreement
- V2: 80/98 = 82% agreement
Major wins: e2e04e75 (Spanish 18% -> 85%, perfect agreement),
4949d521 (trail bug 18% -> 67%), cbbff570 (18% -> 67%),
9805c24a (80% -> 95%), 7a348b81 (correctly finds working start button).
Known regression: 8fe72fce went 44% -> 0% because bridge's strict
verification rejects start mechanisms when benign startup console
errors occur. Needs follow-up: distinguish pre-start errors from
fatal errors.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Diffstat:
4 files changed, 1219 insertions(+), 39 deletions(-)
diff --git a/tasks/tetris/eval/gameplay-bot-v2/bot.ts b/tasks/tetris/eval/gameplay-bot-v2/bot.ts
@@ -6,7 +6,9 @@ import type {
PieceType,
TetrisDriver,
DriverCalibration,
+ CalibrationDrift,
GridSnapshot,
+ StartCandidate,
TestResult,
GameplayStats,
GameSession,
@@ -341,6 +343,7 @@ export async function runAllTests(
session: GameSession;
survey: SurveyData;
competitivePlay: CompetitivePlayResult | null;
+ calibrationDrift: CalibrationDrift;
}> {
const gameplay: GameplayStats = {
pieces_placed: 0,
@@ -391,15 +394,32 @@ export async function runAllTests(
testResults: failedTests,
calibration: emptyCalibration(driver.getConsoleErrors()),
gameplay, session, survey, competitivePlay,
+ calibrationDrift: driver.getCalibrationDrift(),
};
}
// ---- Pre-test survey ----
survey = await driver.surveyPage();
- // ---- Phase 2: Calibrate + detect start ----
+ // ---- Phase 2: Discover + verify start, then calibrate ----
+ // Bridge flow: try each candidate, ask verifyGameStarted() to confirm,
+ // commit the first verified candidate. On false positive, reload and try
+ // the next. If nothing verifies, tell the driver to reject and skip the
+ // legacy fuzzy detector (which historically clicked Pause buttons etc.).
let cal: DriverCalibration;
+ let verified: { candidate: StartCandidate } | null = null;
try {
+ verified = await detectStartWithVerification(driver, serverUrl);
+ } catch (err) {
+ console.log(`[bot] bridge detection threw: ${err instanceof Error ? err.message : String(err)}`);
+ }
+
+ try {
+ if (verified) {
+ driver.confirmStartMechanism(verified.candidate);
+ } else {
+ driver.rejectStartMechanism();
+ }
cal = await driver.calibrate();
session.started = cal.startMechanism !== "unknown";
session.startMechanism = cal.startMechanism;
@@ -538,7 +558,7 @@ export async function runAllTests(
}
} catch { /* continue */ }
- competitivePlay = await runCompetitivePlayPhase(driver, session, gameplay);
+ competitivePlay = await runCompetitivePlayPhase(driver, session, gameplay, serverUrl);
} else if (!session.skippedPhases.some((p) => p.startsWith("competitive:"))) {
session.skippedPhases.push("competitive: gameplay failed");
}
@@ -549,7 +569,8 @@ export async function runAllTests(
const phaseState = { gameStarted, mechanicsWork, piecesWork, gameplayWorks };
const testResults = deriveTestResults(session, cal, loadResult, driver.getConsoleErrors(), gameplay, phaseState, competitivePlay);
- return { testResults, calibration: cal, gameplay, session, survey, competitivePlay };
+ const calibrationDrift = driver.getCalibrationDrift();
+ return { testResults, calibration: cal, gameplay, session, survey, competitivePlay, calibrationDrift };
}
// ---------------------------------------------------------------------------
@@ -858,11 +879,209 @@ async function runEndurancePhase(
}
}
+/**
+ * Test whether the game supports counter-clockwise rotation.
+ *
+ * The naive "press Z then press rotate, compare" approach is broken because
+ * rotation state is ordinal: after Z (state 0->3) then rotate (3->0), the
+ * piece returns to the original state regardless of direction, so the two
+ * intermediate snapshots always differ. That test is a tautology.
+ *
+ * Instead we run each key against a FRESH baseline game by reloading the
+ * page between presses. Both presses are then measured from rotation
+ * state 0. If the two resulting grids match, both keys rotate in the same
+ * direction; if they differ, they rotate opposite.
+ *
+ * Returns { done: false, ccw: null } when a reliable signal is unavailable
+ * (e.g. no active piece, O-piece baseline, rotate key doesn't rotate at all).
+ */
+async function testRotationDirection(
+ driver: TetrisDriver,
+ serverUrl: string
+): Promise<{ done: boolean; ccw: boolean | null }> {
+ // Helper: reload, start game, wait for an active piece to be visible.
+ // On a fresh game the settled grid is empty, so we read WITHOUT passing a
+ // settled grid and let readGrid use its top-6-rows fallback to detect the
+ // active piece.
+ const freshBaseline = async (): Promise<{
+ piece: PieceType | null;
+ grid: Grid | null;
+ settled: Grid | null;
+ }> => {
+ try {
+ const load = await driver.loadPage(serverUrl);
+ if (!load.loaded) return { piece: null, grid: null, settled: null };
+ } catch {
+ return { piece: null, grid: null, settled: null };
+ }
+ try {
+ await driver.calibrate();
+ } catch {
+ return { piece: null, grid: null, settled: null };
+ }
+
+ // Use an empty grid as the "settled" reference so the active piece is
+ // detected as the full delta (any filled cell in current == active).
+ // This is robust even when the game hasn't yet spawned a piece at the
+ // moment of the first read.
+ const emptySettled: Grid = Array.from({ length: GRID_ROWS }, () =>
+ Array.from({ length: GRID_COLS }, () => false)
+ );
+
+ // Poll up to ~3s for an active piece to appear.
+ let snap = await driver.readGrid(emptySettled);
+ let attempts = 0;
+ while (
+ (!snap.activePieceCells || snap.activePieceCells.length !== 4) &&
+ attempts < 30
+ ) {
+ await driver.wait(100);
+ snap = await driver.readGrid(emptySettled);
+ attempts++;
+ }
+
+ if (!snap.activePieceCells || snap.activePieceCells.length !== 4) {
+ return { piece: null, grid: snap.grid, settled: emptySettled };
+ }
+ return {
+ piece: snap.activePieceType,
+ grid: snap.grid,
+ settled: emptySettled,
+ };
+ };
+
+ // Extract the active piece cells from a grid (assumes the grid contains
+ // only the active piece, which is the case on a fresh game where the
+ // settled grid is empty). Returns a position-normalized shape string so
+ // that comparisons ignore where on the board the piece sits.
+ const shapeKey = (grid: Grid): string | null => {
+ const cells: [number, number][] = [];
+ for (let r = 0; r < grid.length; r++) {
+ for (let c = 0; c < grid[r].length; c++) {
+ if (grid[r][c]) cells.push([r, c]);
+ }
+ }
+ if (cells.length !== 4) return null;
+ const minR = Math.min(...cells.map(([r]) => r));
+ const minC = Math.min(...cells.map(([, c]) => c));
+ return cells
+ .map(([r, c]) => `${r - minR},${c - minC}`)
+ .sort()
+ .join("|");
+ };
+
+ // Helper: press a key, wait briefly for the game to process it, and
+ // return the post-press grid if the piece's SHAPE differs from baseline.
+ // We compare shape (not full grid) so that piece falling during the wait
+ // does not confound the rotation measurement.
+ const measureKeyShape = async (
+ pressFn: () => Promise<void>,
+ baseShape: string
+ ): Promise<string | null> => {
+ await pressFn();
+ await driver.wait(80);
+ const snap = await driver.readGrid();
+ if (!snap.grid) return null;
+ const shape = shapeKey(snap.grid);
+ if (!shape) return null;
+ if (shape === baseShape) return null;
+ return shape;
+ };
+
+ // Only J, L, T pieces have 4 visually-distinct rotation states. I, S, Z
+ // have only 2 (rotating CW vs CCW from state 0 produces an identical
+ // visual). O is rotationally symmetric. So we can only distinguish
+ // rotation directions using J, L, or T pieces.
+ const DISTINGUISHABLE: Set<PieceType> = new Set<PieceType>(["J", "L", "T"]);
+ const log = (msg: string) => console.log(`[ccw] ${msg}`);
+
+ // Per-piece-type sample: the shape after pressing the calibrated rotate
+ // key from a fresh-spawn baseline. Keyed by piece type so Trial 2 can
+ // match whatever piece type its fresh reload happens to produce.
+ const trial1Shapes = new Map<PieceType, string>();
+
+ // ----- Trial 1: collect rotate-key shape samples for several piece types -----
+ for (let attempt = 0; attempt < 10; attempt++) {
+ const b = await freshBaseline();
+ if (!b.grid || !b.piece) continue;
+ if (!DISTINGUISHABLE.has(b.piece)) continue;
+ if (trial1Shapes.has(b.piece)) continue;
+ const baseShape = shapeKey(b.grid);
+ if (!baseShape) continue;
+ const afterShape = await measureKeyShape(
+ () => driver.pressKey("rotate"),
+ baseShape
+ );
+ if (afterShape) {
+ trial1Shapes.set(b.piece, afterShape);
+ log(`trial1: rotate changed ${b.piece} (samples: ${trial1Shapes.size})`);
+ if (trial1Shapes.size >= DISTINGUISHABLE.size) break;
+ } else {
+ log(`trial1: rotate did NOT change ${b.piece}`);
+ }
+ }
+ if (trial1Shapes.size === 0) {
+ log("could not establish any Trial 1 reference direction");
+ return { done: false, ccw: null };
+ }
+
+ // ----- Trial 2: press the raw "z" key from a fresh baseline whose piece
+ // type matches one of our Trial 1 samples, and compare the resulting
+ // shape to the corresponding Trial 1 shape. -----
+ for (let attempt = 0; attempt < 10; attempt++) {
+ const b = await freshBaseline();
+ if (!b.grid || !b.piece) continue;
+ const rotateShape = trial1Shapes.get(b.piece);
+ if (!rotateShape) continue;
+ const baseShape = shapeKey(b.grid);
+ if (!baseShape) continue;
+ const afterShape = await measureKeyShape(
+ () => driver.pressRawKey("z"),
+ baseShape
+ );
+ if (!afterShape) {
+ log(`trial2: z caused no shape change on ${b.piece} -> CCW not supported`);
+ return { done: true, ccw: false };
+ }
+ const opposite = afterShape !== rotateShape;
+ log(
+ `trial2: ${b.piece} rotate=${rotateShape} z=${afterShape} opposite=${opposite}`
+ );
+ return { done: true, ccw: opposite };
+ }
+
+ log("could not find Trial 2 baseline with matching piece");
+ return { done: false, ccw: null };
+}
+
async function runCompetitivePlayPhase(
driver: TetrisDriver,
session: GameSession,
- gameplay: GameplayStats
+ gameplay: GameplayStats,
+ serverUrl: string
): Promise<CompetitivePlayResult> {
+ // Dedicated rotation-direction test (run BEFORE the main play loop so that
+ // each key press is measured from a fresh baseline game state). See
+ // testRotationDirection() for details.
+ let ccwTestDone = false;
+ let ccwResult: boolean | null = null;
+ try {
+ const rotResult = await testRotationDirection(driver, serverUrl);
+ ccwTestDone = rotResult.done;
+ ccwResult = rotResult.ccw;
+ } catch {
+ ccwTestDone = false;
+ ccwResult = null;
+ }
+
+ // Reload once more so competitive play starts from a clean game state.
+ try {
+ await driver.loadPage(serverUrl);
+ await driver.calibrate();
+ } catch {
+ /* continue: play loop will still attempt to run */
+ }
+
const start = Date.now();
const maxDuration = 60000;
@@ -914,8 +1133,6 @@ async function runCompetitivePlayPhase(
let pollCount = 0;
let consecutiveClears = 0;
let maxCombo = 0;
- let ccwTestDone = false;
- let ccwResult: boolean | null = null;
let softDropTestDone = false;
let softDropDistinct: boolean | null = null;
@@ -979,29 +1196,6 @@ async function runCompetitivePlayPhase(
const pieceType = snap.activePieceType || "unknown";
session.pieceTypes.add(pieceType);
- // CCW rotation test
- if (!ccwTestDone && result.pieces_placed > 5 && result.pieces_placed % 7 === 0) {
- const gridBeforeZ = await driver.readGrid(settledGrid);
- await driver.pressRawKey("z");
- await driver.wait(60);
- const gridAfterZ = await driver.readGrid(settledGrid);
-
- if (gridBeforeZ.grid && gridAfterZ.grid && driver.gridsAreDifferent(gridBeforeZ.grid, gridAfterZ.grid)) {
- const gridBeforeUp = await driver.readGrid(settledGrid);
- await driver.pressKey("rotate");
- await driver.wait(60);
- const gridAfterUp = await driver.readGrid(settledGrid);
-
- if (gridBeforeUp.grid && gridAfterUp.grid) {
- ccwResult = driver.gridsAreDifferent(gridAfterZ.grid, gridAfterUp.grid);
- ccwTestDone = true;
- }
- } else {
- ccwResult = false;
- ccwTestDone = true;
- }
- }
-
// Soft drop test
if (!softDropTestDone && result.pieces_placed > 3 && result.pieces_placed % 5 === 0) {
const snapBeforeDown = await driver.readGrid(settledGrid);
@@ -1688,3 +1882,198 @@ function deriveTestResults(
return results;
}
+
+// ---------------------------------------------------------------------------
+// Start-mechanism verification bridge
+// ---------------------------------------------------------------------------
+
+/**
+ * The bot's gameplay-grounded check for "did the game really start?"
+ *
+ * This is the feedback channel the driver leans on. The driver can only see
+ * pixel and DOM deltas, so it can be fooled by Pause buttons, overlays, or
+ * spurious animations. The bot reads the grid, presses real gameplay keys,
+ * and watches for tetris-like behavior.
+ *
+ * Returns true only when the evidence clearly points to a started game:
+ * - grid detected and populated in a sane range (pieces, not chrome)
+ * - ArrowLeft causes a piece-like change, OR
+ * - waiting ~1s causes the grid to change (auto-drop), OR
+ * - the grid transitions in any measurable way that's not just chrome
+ * - no immediate game over text
+ */
+async function verifyGameStarted(driver: TetrisDriver): Promise<{
+ ok: boolean;
+ reason: string;
+}> {
+ // 1. Instant rejection: game over text visible means we started into a
+ // dead state, or clicked a Restart that then immediately ended again.
+ try {
+ const gameOverText = await driver.detectGameOverText();
+ if (gameOverText) {
+ return { ok: false, reason: `immediate game over: "${gameOverText}"` };
+ }
+ } catch { /* continue */ }
+
+ // 2. tryStartMechanism() populated a minimal calibration for us. If it
+ // couldn't find a grid, the candidate is not a real start.
+ let cal;
+ try {
+ cal = driver.getCalibration();
+ } catch {
+ cal = null;
+ }
+ if (!cal || !cal.gridDetected) {
+ return { ok: false, reason: "no grid detected after start attempt" };
+ }
+
+ // 3. Read the grid. Need a sane fill level (pieces, not chrome).
+ const snap = await driver.readGrid();
+ if (!snap.grid) {
+ return { ok: false, reason: "grid read failed" };
+ }
+ const totalCells = snap.grid.length * (snap.grid[0]?.length || 0);
+ if (totalCells === 0) {
+ return { ok: false, reason: "grid has zero cells" };
+ }
+ const fillRatio = snap.filledCount / totalCells;
+ // A running game may legitimately start empty, so 0 cells is allowed.
+ // But >60% filled likely means we're reading chrome as cells.
+ if (fillRatio > 0.6) {
+ return {
+ ok: false,
+ reason: `grid ${Math.round(fillRatio * 100)}% filled (likely reading chrome)`,
+ };
+ }
+
+ // 4. Evidence: press ArrowLeft and see if the grid changes (movement works).
+ let movementSeen = false;
+ try {
+ const before = await driver.readGrid();
+ await driver.pressKey("left");
+ await driver.wait(250);
+ const after = await driver.readGrid();
+ if (before.grid && after.grid && driver.gridsAreDifferent(before.grid, after.grid)) {
+ movementSeen = true;
+ }
+ } catch { /* fall through to auto-drop check */ }
+
+ // 5. Evidence: wait 1.1s and see if the grid changes on its own (auto-drop).
+ let autoDropSeen = false;
+ try {
+ const before = await driver.readGrid();
+ await driver.wait(1100);
+ const after = await driver.readGrid();
+ if (before.grid && after.grid && driver.gridsAreDifferent(before.grid, after.grid)) {
+ autoDropSeen = true;
+ }
+ } catch { /* fall through */ }
+
+ // 6. Second chance at game-over after interaction.
+ try {
+ const gameOverText = await driver.detectGameOverText();
+ if (gameOverText) {
+ return { ok: false, reason: `game over after interaction: "${gameOverText}"` };
+ }
+ } catch { /* continue */ }
+
+ if (movementSeen && autoDropSeen) {
+ return { ok: true, reason: "movement and auto-drop both observed" };
+ }
+ if (movementSeen) {
+ return { ok: true, reason: "movement key changes the grid" };
+ }
+ if (autoDropSeen) {
+ return { ok: true, reason: "grid changes on its own (auto-drop)" };
+ }
+
+ // 7. Weaker fallback: if the grid is populated in a plausible range
+ // (some pieces visible somewhere) and there's no game over, accept it
+ // provisionally. The downstream phases will weed out dead starts.
+ if (snap.filledCount > 0 && snap.filledCount < totalCells * 0.5) {
+ return {
+ ok: false,
+ reason: `grid populated (${snap.filledCount} cells) but no movement or auto-drop observed`,
+ };
+ }
+
+ return { ok: false, reason: "no gameplay evidence detected" };
+}
+
+/**
+ * Full discovery loop: ask the driver for candidates, try each, verify with
+ * verifyGameStarted(), and return the first candidate the bot trusts. Reloads
+ * the page between candidates so each attempt starts from a clean state.
+ */
+async function detectStartWithVerification(
+ driver: TetrisDriver,
+ serverUrl: string
+): Promise<{ candidate: StartCandidate } | null> {
+ const log = (msg: string) => console.log(`[bot:start] ${msg}`);
+
+ const candidates = await driver.discoverStartCandidates();
+ log(`discovered ${candidates.length} candidate(s)`);
+
+ for (let i = 0; i < candidates.length; i++) {
+ const candidate = candidates[i];
+ log(`(${i + 1}/${candidates.length}) trying: ${candidate.label}`);
+
+ // Apply without committing.
+ let tryResult;
+ try {
+ tryResult = await driver.tryStartMechanism(candidate);
+ } catch (err) {
+ log(` tryStartMechanism threw: ${err instanceof Error ? err.message : String(err)}`);
+ await reloadAndClear(driver, serverUrl);
+ continue;
+ }
+
+ // Skip candidates with no observable effect at all.
+ if (!tryResult.visualChanged && !tryResult.domChanged && candidate.mechanism !== "auto") {
+ log(` no visual/DOM change, skipping`);
+ continue;
+ }
+ if (tryResult.errorOccurred) {
+ log(` JS error fired during attempt, skipping`);
+ await reloadAndClear(driver, serverUrl);
+ continue;
+ }
+
+ // Ask the bot's own verification.
+ let verification;
+ try {
+ verification = await verifyGameStarted(driver);
+ } catch (err) {
+ log(` verifyGameStarted threw: ${err instanceof Error ? err.message : String(err)}`);
+ await reloadAndClear(driver, serverUrl);
+ continue;
+ }
+
+ if (verification.ok) {
+ log(` VERIFIED: ${verification.reason}`);
+ // Important: the page is already in a started state. We clear the
+ // driver's cached calibration (without reloading) so the follow-up
+ // calibrate() call will re-apply the candidate from scratch -- that
+ // way the phase separation (load -> apply -> verify) stays consistent
+ // across downstream phases that reload the page.
+ await reloadAndClear(driver, serverUrl);
+ return { candidate };
+ }
+
+ log(` REJECTED: ${verification.reason}`);
+ await reloadAndClear(driver, serverUrl);
+ }
+
+ log("no candidate verified");
+ return null;
+}
+
+/** Reload the page and clear any in-flight confirmed candidate. */
+async function reloadAndClear(driver: TetrisDriver, serverUrl: string): Promise<void> {
+ try {
+ driver.clearConfirmedStartMechanism();
+ } catch { /* ignore */ }
+ try {
+ await driver.loadPage(serverUrl);
+ } catch { /* ignore */ }
+}
diff --git a/tasks/tetris/eval/gameplay-bot-v2/driver.ts b/tasks/tetris/eval/gameplay-bot-v2/driver.ts
@@ -8,9 +8,12 @@ import type {
RendererType,
Controls,
StartMechanism,
+ StartCandidate,
+ TryStartResult,
SurveyData,
PieceType,
DriverCalibration,
+ CalibrationDrift,
GridSnapshot,
TetrisDriver,
} from "./types";
@@ -162,12 +165,111 @@ function identifyPieceType(cells: [number, number][]): PieceType {
}
// ---------------------------------------------------------------------------
+// Calibration cache helpers
+// ---------------------------------------------------------------------------
+
+function cloneCalibration(cal: DriverCalibration): DriverCalibration {
+ const copy: DriverCalibration = {
+ renderer: cal.renderer,
+ gridDetected: cal.gridDetected,
+ gridBounds: cal.gridBounds ? { ...cal.gridBounds } : null,
+ cellWidth: cal.cellWidth,
+ cellHeight: cal.cellHeight,
+ controls: { ...cal.controls },
+ startMechanism: cal.startMechanism,
+ scoreElementSelector: cal.scoreElementSelector,
+ levelElementSelector: cal.levelElementSelector,
+ backgroundColor: cal.backgroundColor ? [...cal.backgroundColor] as [number, number, number] : null,
+ consoleErrors: [...cal.consoleErrors],
+ gridConfidence: cal.gridConfidence,
+ gridDetectedAt: cal.gridDetectedAt,
+ };
+ if (cal.startButton) {
+ copy.startButton = {
+ selector: cal.startButton.selector,
+ text: cal.startButton.text,
+ disappeared: cal.startButton.disappeared,
+ position: { ...cal.startButton.position },
+ };
+ }
+ return copy;
+}
+
+function gridBoundsSimilar(a: GridBounds, b: GridBounds): boolean {
+ // Tolerate rendering jitter but flag anything beyond ~10% size change.
+ const tol = Math.max(20, Math.min(a.width, b.width) * 0.15);
+ return (
+ Math.abs(a.x - b.x) < tol &&
+ Math.abs(a.y - b.y) < tol &&
+ Math.abs(a.width - b.width) < tol &&
+ Math.abs(a.height - b.height) < tol
+ );
+}
+
+/**
+ * Returns a list of field names that differ between the baseline calibration
+ * and a fresh one. Empty list means no drift detected.
+ */
+function diffCalibrations(baseline: DriverCalibration, fresh: DriverCalibration): string[] {
+ const changes: string[] = [];
+
+ if (baseline.startMechanism !== fresh.startMechanism) {
+ changes.push("start_mechanism");
+ }
+ const baseSel = baseline.startButton?.selector ?? null;
+ const freshSel = fresh.startButton?.selector ?? null;
+ if (baseSel !== freshSel) changes.push("start_button_selector");
+
+ if (baseline.renderer !== fresh.renderer) changes.push("renderer");
+
+ if (!!baseline.gridBounds !== !!fresh.gridBounds) {
+ changes.push("grid_bounds");
+ } else if (baseline.gridBounds && fresh.gridBounds) {
+ if (!gridBoundsSimilar(baseline.gridBounds, fresh.gridBounds)) {
+ changes.push("grid_bounds");
+ }
+ }
+
+ const bc = baseline.controls;
+ const fc = fresh.controls;
+ if (bc.left !== fc.left || bc.right !== fc.right || bc.down !== fc.down ||
+ bc.rotate !== fc.rotate || bc.drop !== fc.drop) {
+ changes.push("controls");
+ }
+
+ if (baseline.scoreElementSelector !== fresh.scoreElementSelector) {
+ changes.push("score_element");
+ }
+ if (baseline.levelElementSelector !== fresh.levelElementSelector) {
+ changes.push("level_element");
+ }
+
+ return changes;
+}
+
+// ---------------------------------------------------------------------------
// PlaywrightDriver
// ---------------------------------------------------------------------------
export class PlaywrightDriver implements TetrisDriver {
private page: Page;
private cal: DriverCalibration | null = null;
+ // First successful calibration, used as the cache baseline across reloads.
+ private firstCal: DriverCalibration | null = null;
+ // Candidate confirmed by the bot's verification bridge. When set, calibrate()
+ // replays this candidate instead of rediscovering the start mechanism.
+ private confirmedCandidate: StartCandidate | null = null;
+ // Set by the bot when bridge verification definitively failed -- the legacy
+ // detectStartMechanism() fallback must NOT run and override the bot's verdict.
+ private startRejected: boolean = false;
+ // Cumulative drift info across the session.
+ private drift: CalibrationDrift = {
+ drifted: false,
+ changes: [],
+ recalibrations: 0,
+ cacheHits: 0,
+ cacheMisses: 0,
+ };
private consoleErrors: string[] = [];
private log = (msg: string) => console.log(`[driver] ${msg}`);
@@ -314,11 +416,83 @@ export class PlaywrightDriver implements TetrisDriver {
}
async calibrate(): Promise<DriverCalibration> {
+ // Fast path: try applying the cached calibration from a prior run.
+ if (this.firstCal) {
+ this.drift.recalibrations++;
+ const cached = await this.applyCachedCalibration();
+ if (cached) {
+ this.drift.cacheHits++;
+ this.cal = cached;
+ this.log(
+ `[cache] hit: replayed start="${cached.startMechanism}" renderer=${cached.renderer} ` +
+ `(hits=${this.drift.cacheHits}, misses=${this.drift.cacheMisses})`
+ );
+ return cached;
+ }
+ this.drift.cacheMisses++;
+ this.log(
+ `[cache] miss: cached calibration no longer works, doing full recalibration ` +
+ `(hits=${this.drift.cacheHits}, misses=${this.drift.cacheMisses})`
+ );
+ }
+
+ const fresh = await this.fullCalibrate();
+ this.cal = fresh;
+
+ if (!this.firstCal) {
+ // First time -- freeze a copy as the baseline for drift detection.
+ this.firstCal = cloneCalibration(fresh);
+ } else {
+ // Not the first time: compute drift vs baseline.
+ const changes = diffCalibrations(this.firstCal, fresh);
+ if (changes.length > 0) {
+ this.drift.drifted = true;
+ for (const c of changes) {
+ if (!this.drift.changes.includes(c)) this.drift.changes.push(c);
+ }
+ this.log(`CONFLICT: calibration drifted: [${changes.join(", ")}]`);
+ }
+ }
+
+ return fresh;
+ }
+
+ // Runs the full (expensive) calibration flow. Does not touch firstCal/drift.
+ private async fullCalibrate(): Promise<DriverCalibration> {
await this.page.waitForTimeout(2000);
- let startResult = await this.detectStartMechanism();
- let startMechanism: StartMechanism = startResult.mechanism;
- let startButton = startResult.startButton;
+ let startMechanism: StartMechanism;
+ let startButton: DriverCalibration["startButton"] | undefined;
+
+ if (this.confirmedCandidate) {
+ // Bot already verified the start. Replay it instead of rediscovering.
+ this.log(
+ `[bridge] replaying confirmed candidate: ${this.confirmedCandidate.label}`
+ );
+ const applied = await this.applyCandidate(this.confirmedCandidate);
+ startMechanism = applied.ok ? this.confirmedCandidate.mechanism : "unknown";
+ if (applied.ok && (this.confirmedCandidate.mechanism === "button" || this.confirmedCandidate.mechanism === "click_canvas")) {
+ startButton = {
+ selector: this.confirmedCandidate.selector ?? "canvas",
+ text: this.confirmedCandidate.text ?? this.confirmedCandidate.label,
+ disappeared: false,
+ position: this.confirmedCandidate.position ?? { x: 0, y: 0 },
+ };
+ }
+ await this.page.waitForTimeout(this.confirmedCandidate.waitMs ?? 400);
+ } else if (this.startRejected) {
+ // Bot's bridge verification rejected every candidate. Do NOT run the
+ // legacy fallback; it has historically produced false positives
+ // (e.g. clicking Pause) that the bridge was designed to prevent.
+ this.log(`[bridge] start rejected by bot; skipping legacy detection`);
+ startMechanism = "unknown";
+ startButton = undefined;
+ } else {
+ const startResult = await this.detectStartMechanism();
+ startMechanism = startResult.mechanism;
+ startButton = startResult.startButton;
+ }
+
let gridDetection = await this.detectGrid();
let { renderer, gridBounds, cellWidth, cellHeight } = gridDetection;
let backgroundColor =
@@ -326,8 +500,8 @@ export class PlaywrightDriver implements TetrisDriver {
? await this.sampleBackgroundColor(gridBounds, cellWidth, cellHeight)
: null;
- // Re-calibration fallback
- if (startMechanism === "unknown" || gridBounds === null) {
+ // Re-calibration fallback (skipped when bot already confirmed or rejected the start).
+ if (!this.confirmedCandidate && !this.startRejected && (startMechanism === "unknown" || gridBounds === null)) {
const retry = await this.recalibrateWithRetry(startMechanism, gridBounds);
if (retry.startMechanism !== "unknown") startMechanism = retry.startMechanism;
if (retry.startButton) startButton = retry.startButton;
@@ -354,7 +528,7 @@ export class PlaywrightDriver implements TetrisDriver {
gridDetectedAt: "initial",
});
- this.cal = {
+ const cal: DriverCalibration = {
renderer,
gridDetected: gridBounds !== null,
gridBounds,
@@ -370,11 +544,157 @@ export class PlaywrightDriver implements TetrisDriver {
gridDetectedAt: "initial",
};
- if (startButton) {
- this.cal.startButton = startButton;
+ if (startButton) cal.startButton = startButton;
+ return cal;
+ }
+
+ /**
+ * Attempt to replay the cached calibration on the current page.
+ * Returns a completed DriverCalibration on success, null on failure.
+ * On success, the game should be started and the grid detected.
+ */
+ private async applyCachedCalibration(): Promise<DriverCalibration | null> {
+ const base = this.firstCal;
+ if (!base) return null;
+
+ try {
+ // Small settle delay -- a freshly-loaded page may still be booting.
+ await this.page.waitForTimeout(800);
+
+ // Step 1: re-apply the cached start mechanism.
+ const started = await this.replayStartMechanism(base);
+ if (!started) {
+ this.log(
+ `CONFLICT: cached start mechanism '${base.startMechanism}` +
+ (base.startButton ? ` ${base.startButton.selector}` : "") +
+ `' no longer works`
+ );
+ return null;
+ }
+
+ // Step 2: verify the grid is back (same renderer, similar bounds).
+ await this.page.waitForTimeout(300);
+ const grid = await this.detectGrid();
+ if (!grid.gridBounds) {
+ this.log("CONFLICT: cached start worked but no grid detected");
+ return null;
+ }
+ if (base.gridBounds && !gridBoundsSimilar(base.gridBounds, grid.gridBounds)) {
+ this.log(
+ `CONFLICT: grid bounds changed significantly ` +
+ `(was ${JSON.stringify(base.gridBounds)}, now ${JSON.stringify(grid.gridBounds)})`
+ );
+ return null;
+ }
+ if (base.renderer !== "unknown" && grid.renderer !== base.renderer) {
+ this.log(`CONFLICT: renderer changed from ${base.renderer} to ${grid.renderer}`);
+ return null;
+ }
+
+ const backgroundColor =
+ grid.renderer === "canvas" && grid.gridBounds
+ ? await this.sampleBackgroundColor(grid.gridBounds, grid.cellWidth, grid.cellHeight)
+ : base.backgroundColor;
+
+ const cal: DriverCalibration = {
+ renderer: grid.renderer,
+ gridDetected: true,
+ gridBounds: grid.gridBounds,
+ cellWidth: grid.cellWidth,
+ cellHeight: grid.cellHeight,
+ controls: { ...base.controls },
+ startMechanism: base.startMechanism,
+ scoreElementSelector: base.scoreElementSelector,
+ levelElementSelector: base.levelElementSelector,
+ backgroundColor,
+ consoleErrors: [...this.consoleErrors],
+ gridConfidence: base.gridConfidence,
+ gridDetectedAt: "initial",
+ fromCache: true,
+ };
+ if (base.startButton) cal.startButton = { ...base.startButton };
+ return cal;
+ } catch (err) {
+ this.log(
+ `[cache] replay threw: ${err instanceof Error ? err.message : String(err)}`
+ );
+ return null;
}
+ }
- return this.cal;
+ /**
+ * Perform the cached start action. Returns true if a visual change occurred.
+ */
+ private async replayStartMechanism(base: DriverCalibration): Promise<boolean> {
+ try {
+ const before = await this.page.screenshot();
+
+ switch (base.startMechanism) {
+ case "auto":
+ // Nothing to replay -- game should already be running.
+ await this.page.waitForTimeout(400);
+ break;
+ case "enter":
+ await this.page.keyboard.press("Enter");
+ break;
+ case "space":
+ await this.page.keyboard.press("Space");
+ break;
+ case "anykey":
+ await this.page.keyboard.press("ArrowDown");
+ break;
+ case "click_canvas": {
+ const pos = base.startButton?.position;
+ if (pos) {
+ await this.page.mouse.click(pos.x, pos.y);
+ } else {
+ const canvas = this.page.locator("canvas").first();
+ if ((await canvas.count()) > 0) await canvas.click();
+ else return false;
+ }
+ break;
+ }
+ case "button": {
+ // Prefer the cached selector; fall back to coordinate click.
+ let clicked = false;
+ const sel = base.startButton?.selector;
+ if (sel) {
+ try {
+ const locator = this.page.locator(sel).first();
+ const count = await locator.count();
+ if (count > 0) {
+ await locator.click({ timeout: 2000 });
+ clicked = true;
+ }
+ } catch { /* fall through to coordinate click */ }
+ }
+ if (!clicked && base.startButton?.position) {
+ const pos = base.startButton.position;
+ await this.page.mouse.click(pos.x, pos.y);
+ clicked = true;
+ }
+ if (!clicked) return false;
+ break;
+ }
+ default:
+ return false;
+ }
+
+ await this.page.waitForTimeout(500);
+
+ // For auto-start, we already have no input -- just verify something changed
+ // relative to the blank/initial page state.
+ if (base.startMechanism === "auto") {
+ const after = await this.page.screenshot();
+ return !before.equals(after);
+ }
+
+ // For the other mechanisms, a visual change after the action is the signal.
+ const result = await this.detectVisualChange({ frames: 3, intervalMs: 100, before });
+ return result.changed;
+ } catch {
+ return false;
+ }
}
async recalibrate(): Promise<DriverCalibration> {
@@ -410,6 +730,405 @@ export class PlaywrightDriver implements TetrisDriver {
return this.cal;
}
+ getCalibrationDrift(): CalibrationDrift {
+ return {
+ drifted: this.drift.drifted,
+ changes: [...this.drift.changes],
+ recalibrations: this.drift.recalibrations,
+ cacheHits: this.drift.cacheHits,
+ cacheMisses: this.drift.cacheMisses,
+ };
+ }
+
+ // -- Start-mechanism verification bridge --
+ //
+ // The bot drives start detection explicitly via this trio:
+ // 1. discoverStartCandidates() -- returns ordered list
+ // 2. tryStartMechanism(candidate) -- applies one, reports deltas
+ // 3. confirmStartMechanism(candidate) -- commits after bot verification
+ //
+ // Unlike detectStartMechanism(), tryStartMechanism() does NOT judge the
+ // outcome. It only reports observable deltas so the bot can run its own
+ // gameplay-based checks before committing.
+
+ async discoverStartCandidates(): Promise<StartCandidate[]> {
+ const candidates: StartCandidate[] = [];
+
+ // 1. Auto-start: no action, just wait briefly.
+ candidates.push({
+ mechanism: "auto",
+ label: "auto-start (wait 1.2s)",
+ waitMs: 1200,
+ });
+
+ // 2. DOM buttons, sorted by prominence (start-ish first, disabled/pause-ish last).
+ try {
+ const buttons = await this.collectButtonCandidates();
+ for (const b of buttons) {
+ // Skip disabled buttons -- they cannot start a game.
+ if (b.disabled) continue;
+ candidates.push({
+ mechanism: "button",
+ label: `button "${b.text || b.selector}"`,
+ selector: b.selector,
+ text: b.text,
+ position: { x: b.x, y: b.y },
+ });
+ }
+ } catch { /* no buttons */ }
+
+ // 3. Keyboard triggers.
+ candidates.push({ mechanism: "enter", label: "key Enter", key: "Enter" });
+ candidates.push({ mechanism: "space", label: "key Space", key: "Space" });
+ candidates.push({ mechanism: "anykey", label: "key ArrowDown", key: "ArrowDown" });
+
+ // 4. Canvas clicks (if a canvas exists).
+ try {
+ const canvas = this.page.locator("canvas").first();
+ if ((await canvas.count()) > 0) {
+ const box = await canvas.boundingBox();
+ if (box) {
+ const cx = box.x + box.width / 2;
+ const cy = box.y + box.height / 2;
+ candidates.push({
+ mechanism: "click_canvas",
+ label: "canvas click center",
+ position: { x: Math.round(cx), y: Math.round(cy) },
+ });
+ candidates.push({
+ mechanism: "click_canvas",
+ label: "canvas click upper",
+ position: { x: Math.round(cx), y: Math.round(box.y + box.height * 0.25) },
+ });
+ candidates.push({
+ mechanism: "click_canvas",
+ label: "canvas click lower",
+ position: { x: Math.round(cx), y: Math.round(box.y + box.height * 0.75) },
+ });
+ }
+ }
+ } catch { /* no canvas */ }
+
+ return candidates;
+ }
+
+ async tryStartMechanism(candidate: StartCandidate): Promise<TryStartResult> {
+ const errorsBefore = this.consoleErrors.length;
+
+ let before: Buffer | null = null;
+ let domBefore = "";
+ let clickableBefore = 0;
+ try {
+ before = await this.page.screenshot();
+ const snap = await this.snapshotDomState();
+ domBefore = snap.domKey;
+ clickableBefore = snap.clickableCount;
+ } catch { /* screenshot can fail on teardown */ }
+
+ let applied = { ok: false };
+ try {
+ applied = await this.applyCandidate(candidate);
+ } catch { /* treat as not applied */ }
+
+ if (!applied.ok) {
+ return {
+ visualChanged: false,
+ domChanged: false,
+ errorOccurred: this.consoleErrors.length > errorsBefore,
+ newClickableElements: 0,
+ removedElements: 0,
+ };
+ }
+
+ // Give the game a moment to react.
+ await this.page.waitForTimeout(candidate.waitMs ?? 300);
+
+ let visualChanged = false;
+ let domChanged = false;
+ let newClickableElements = 0;
+ let removedElements = 0;
+
+ try {
+ if (before) {
+ const after = await this.page.screenshot();
+ visualChanged = !before.equals(after);
+ }
+ const snap = await this.snapshotDomState();
+ domChanged = snap.domKey !== domBefore;
+ const delta = snap.clickableCount - clickableBefore;
+ if (delta > 0) newClickableElements = delta;
+ else if (delta < 0) removedElements = -delta;
+ } catch { /* report what we have */ }
+
+ // Populate a minimal calibration so verifyGameStarted can call readGrid().
+ // The bot may reject this candidate, in which case clearConfirmedStartMechanism()
+ // will wipe this.cal along with the rest of the bridge state.
+ try {
+ const gridDetection = await this.detectGrid();
+ if (gridDetection.gridBounds) {
+ const backgroundColor =
+ gridDetection.renderer === "canvas"
+ ? await this.sampleBackgroundColor(
+ gridDetection.gridBounds,
+ gridDetection.cellWidth,
+ gridDetection.cellHeight
+ )
+ : null;
+ this.cal = {
+ renderer: gridDetection.renderer,
+ gridDetected: true,
+ gridBounds: gridDetection.gridBounds,
+ cellWidth: gridDetection.cellWidth,
+ cellHeight: gridDetection.cellHeight,
+ controls: { ...DEFAULT_CONTROLS },
+ startMechanism: candidate.mechanism,
+ scoreElementSelector: null,
+ levelElementSelector: null,
+ backgroundColor,
+ consoleErrors: [...this.consoleErrors],
+ gridConfidence: 0,
+ gridDetectedAt: "after_start",
+ };
+ }
+ } catch { /* no grid detected yet */ }
+
+ return {
+ visualChanged,
+ domChanged,
+ errorOccurred: this.consoleErrors.length > errorsBefore,
+ newClickableElements,
+ removedElements,
+ };
+ }
+
+ confirmStartMechanism(candidate: StartCandidate): void {
+ this.confirmedCandidate = candidate;
+ this.startRejected = false;
+ this.log(`[bridge] confirmed start candidate: ${candidate.label}`);
+ }
+
+ clearConfirmedStartMechanism(): void {
+ if (this.confirmedCandidate) {
+ this.log(`[bridge] cleared confirmed start candidate`);
+ }
+ this.confirmedCandidate = null;
+ // Drop cached calibrations so a reload starts fresh.
+ this.firstCal = null;
+ this.cal = null;
+ }
+
+ rejectStartMechanism(): void {
+ this.startRejected = true;
+ this.confirmedCandidate = null;
+ // Drop cached calibrations; subsequent calibrate() calls must run fresh
+ // but MUST NOT attempt any start detection.
+ this.firstCal = null;
+ this.cal = null;
+ this.log(`[bridge] start mechanism rejected by bot`);
+ }
+
+ /** Shared helper: apply a candidate without judging the outcome. */
+ private async applyCandidate(candidate: StartCandidate): Promise<{ ok: boolean }> {
+ try {
+ switch (candidate.mechanism) {
+ case "auto":
+ // Nothing to click/press; the wait happens in tryStartMechanism.
+ return { ok: true };
+ case "enter":
+ case "space":
+ case "anykey": {
+ const key = candidate.key
+ ?? (candidate.mechanism === "enter" ? "Enter"
+ : candidate.mechanism === "space" ? "Space"
+ : "ArrowDown");
+ await this.page.keyboard.press(key);
+ return { ok: true };
+ }
+ case "button": {
+ const sel = candidate.selector;
+ let clicked = false;
+ if (sel) {
+ try {
+ const locator = this.page.locator(sel).first();
+ if ((await locator.count()) > 0) {
+ await locator.click({ timeout: 2000 });
+ clicked = true;
+ }
+ } catch { /* fall through */ }
+ }
+ if (!clicked && candidate.position) {
+ await this.page.mouse.click(candidate.position.x, candidate.position.y);
+ clicked = true;
+ }
+ return { ok: clicked };
+ }
+ case "click_canvas": {
+ if (candidate.position) {
+ await this.page.mouse.click(candidate.position.x, candidate.position.y);
+ return { ok: true };
+ }
+ const canvas = this.page.locator("canvas").first();
+ if ((await canvas.count()) > 0) {
+ await canvas.click();
+ return { ok: true };
+ }
+ return { ok: false };
+ }
+ default:
+ return { ok: false };
+ }
+ } catch {
+ return { ok: false };
+ }
+ }
+
+ /**
+ * DOM snapshot used to cheaply detect whether tryStartMechanism() caused
+ * meaningful structural changes on the page.
+ */
+ private async snapshotDomState(): Promise<{ domKey: string; clickableCount: number }> {
+ try {
+ return await this.page.evaluate(() => {
+ const clickableSelector =
+ 'button, a, [role="button"], [onclick], input[type="button"], input[type="submit"]';
+ const clickable = document.querySelectorAll(clickableSelector);
+ const clickableCount = clickable.length;
+
+ // Compact key describing the interactive skeleton.
+ const parts: string[] = [];
+ clickable.forEach((el, i) => {
+ if (i > 40) return;
+ const rect = (el as HTMLElement).getBoundingClientRect();
+ parts.push(
+ `${el.tagName.toLowerCase()}:${(el.textContent || "").trim().slice(0, 20)}:${Math.round(rect.width)}x${Math.round(rect.height)}`
+ );
+ });
+ const canvasCount = document.querySelectorAll("canvas").length;
+ parts.push(`canvas=${canvasCount}`);
+
+ // Also include a short excerpt of body text so things like "Paused"
+ // toggling to "Game Over" register as changes.
+ const bodyText = (document.body?.innerText || "")
+ .replace(/\s+/g, " ")
+ .trim()
+ .slice(0, 300);
+ parts.push(`body=${bodyText}`);
+
+ return { domKey: parts.join("|"), clickableCount };
+ });
+ } catch {
+ return { domKey: "", clickableCount: 0 };
+ }
+ }
+
+ /**
+ * Return clickable elements sorted by prominence. Used by
+ * discoverStartCandidates(). Boosts "start"-like labels and demotes
+ * "pause"-like labels.
+ */
+ private async collectButtonCandidates(): Promise<Array<{
+ text: string; selector: string; x: number; y: number; disabled: boolean;
+ }>> {
+ return await this.page.evaluate(() => {
+ const seen = new Set<Element>();
+ const results: Array<{
+ index: number; text: string; x: number; y: number;
+ width: number; height: number; area: number; centerDist: number;
+ selector: string; hasBackground: boolean; disabled: boolean;
+ }> = [];
+
+ const clickableSelector =
+ 'button, a, [role="button"], [onclick], input[type="button"], input[type="submit"]';
+ for (const el of document.querySelectorAll(clickableSelector)) {
+ if (!seen.has(el)) seen.add(el);
+ }
+
+ const allEls = document.querySelectorAll("*");
+ for (const el of allEls) {
+ if (seen.has(el)) continue;
+ try {
+ const style = window.getComputedStyle(el);
+ if (style.cursor === "pointer") seen.add(el);
+ } catch { /* skip */ }
+ }
+
+ const pageW = window.innerWidth;
+ const pageH = window.innerHeight;
+ const pageCenterX = pageW / 2;
+ const pageCenterY = pageH / 2;
+
+ let idx = 0;
+ for (const el of seen) {
+ const rect = el.getBoundingClientRect();
+ if (rect.width < 5 || rect.height < 5) continue;
+ // Skip elements that are far outside the document (negative coords or
+ // > 3x the viewport) but allow buttons that are below the fold -- we
+ // use locator.click() which scrolls them into view.
+ if (rect.top < -200 || rect.left < -200) continue;
+ if (rect.top > pageH * 3 || rect.left > pageW * 3) continue;
+ if (rect.width > pageW * 0.8 && rect.height > pageH * 0.8) continue;
+
+ const cx = rect.left + rect.width / 2;
+ const cy = rect.top + rect.height / 2;
+ const centerDist = Math.sqrt((cx - pageCenterX) ** 2 + (cy - pageCenterY) ** 2);
+
+ let hasBackground = false;
+ try {
+ const style = window.getComputedStyle(el as HTMLElement);
+ const bg = style.backgroundColor;
+ if (bg && bg !== "transparent" && bg !== "rgba(0, 0, 0, 0)") hasBackground = true;
+ } catch { /* skip */ }
+
+ // Check for a stable id or class selector; disabled buttons are still
+ // surfaced so the bot can verify that clicking them doesn't start
+ // the game (fail fast on false positives).
+ const disabled = (el as HTMLInputElement).disabled === true
+ || el.getAttribute("aria-disabled") === "true";
+
+ let selector = "";
+ if (el.id) selector = `#${el.id}`;
+ else if ((el as HTMLElement).className) {
+ const cls = (el as HTMLElement).className.toString().split(" ")[0];
+ if (cls) selector = `${el.tagName.toLowerCase()}.${cls}`;
+ }
+ if (!selector) selector = `${el.tagName.toLowerCase()}:nth-of-type(${idx + 1})`;
+
+ results.push({
+ index: idx, text: (el.textContent || "").trim().slice(0, 50),
+ x: Math.round(cx), y: Math.round(cy),
+ width: rect.width, height: rect.height,
+ area: rect.width * rect.height, centerDist, selector, hasBackground,
+ disabled,
+ });
+ idx++;
+ }
+
+ // Sort: prefer "start"-like labels first, "pause"/"restart"-like last,
+ // disabled elements demoted, then prominence.
+ const isStartish = (text: string): number => {
+ const t = text.toLowerCase();
+ if (/\bstart\b|\bplay\b|\bbegin\b|\bgo\b|\binicio\b|\bjugar\b|\bempezar\b|\bcomenzar\b|\bnueva\b|new game/.test(t)) return 0;
+ if (/\brestart\b|\breset\b|\bplay again\b/.test(t)) return 2;
+ if (/\bpause\b|\bstop\b|\bquit\b|\bexit\b|\bpausa\b|\bsalir\b/.test(t)) return 3;
+ return 1;
+ };
+
+ results.sort((a, b) => {
+ const ai = isStartish(a.text);
+ const bi = isStartish(b.text);
+ if (ai !== bi) return ai - bi;
+ if (a.disabled !== b.disabled) return a.disabled ? 1 : -1;
+ if (a.hasBackground !== b.hasBackground) return a.hasBackground ? -1 : 1;
+ if (Math.abs(b.area - a.area) > 100) return b.area - a.area;
+ return a.centerDist - b.centerDist;
+ });
+
+ return results.map(r => ({
+ text: r.text, selector: r.selector, x: r.x, y: r.y, disabled: r.disabled,
+ }));
+ });
+ }
+
// -- Grid Reading --
async readGrid(settledGrid?: Grid | null): Promise<GridSnapshot> {
diff --git a/tasks/tetris/eval/gameplay-bot-v2/index.ts b/tasks/tetris/eval/gameplay-bot-v2/index.ts
@@ -106,7 +106,7 @@ test.describe("Tetris Gameplay Bot v2", () => {
const driver = new PlaywrightDriver(page);
// Create the Bot (which gets the Driver) and run everything
- const { testResults, calibration, gameplay, session, survey, competitivePlay } =
+ const { testResults, calibration, gameplay, session, survey, competitivePlay, calibrationDrift } =
await runAllTests(driver, serverUrl);
// Accessibility check
@@ -190,6 +190,7 @@ test.describe("Tetris Gameplay Bot v2", () => {
issue_count: a11yIssues.length,
pass: a11yIssues.length === 0,
},
+ calibration_drift: calibrationDrift,
};
// Write report to file
@@ -231,6 +232,11 @@ test.describe("Tetris Gameplay Bot v2", () => {
console.log(` Bugs: [${competitivePlay.bugs_detected.join(", ")}]`);
}
}
+ console.log(
+ `\nCalibration cache: ${calibrationDrift.cacheHits} hits / ${calibrationDrift.cacheMisses} misses ` +
+ `(${calibrationDrift.recalibrations} recalibrations)` +
+ (calibrationDrift.drifted ? ` -- DRIFTED: [${calibrationDrift.changes.join(", ")}]` : "")
+ );
console.log(`\nSurvey: canvas=${survey.has_canvas}, dom_grid=${survey.has_dom_grid}, overlay=${survey.has_overlay}, clickable=${survey.clickable_elements}`);
console.log(`Report written to: ${reportPath}`);
console.log("==============================\n");
diff --git a/tasks/tetris/eval/gameplay-bot-v2/types.ts b/tasks/tetris/eval/gameplay-bot-v2/types.ts
@@ -31,6 +31,42 @@ export type StartMechanism =
| "anykey"
| "unknown";
+/**
+ * A candidate start mechanism discovered by the driver and verified by the bot.
+ * The bot iterates candidates, asks the driver to try each, then decides
+ * whether the result actually represents a started Tetris game.
+ */
+export interface StartCandidate {
+ /** Which mechanism type this candidate represents. */
+ mechanism: StartMechanism;
+ /** Human-readable label for logs. */
+ label: string;
+ /** CSS selector for buttons. */
+ selector?: string;
+ /** Visible text for buttons. */
+ text?: string;
+ /** Key to press for keyboard triggers. */
+ key?: string;
+ /** Pixel position for clicks. */
+ position?: { x: number; y: number };
+ /** Milliseconds to wait before measuring (for auto-start). */
+ waitMs?: number;
+}
+
+/** What happened when a start mechanism was applied, without committing. */
+export interface TryStartResult {
+ /** Did the screenshot pixels change? */
+ visualChanged: boolean;
+ /** Did the DOM snapshot change? */
+ domChanged: boolean;
+ /** Was there a JS error during the attempt? */
+ errorOccurred: boolean;
+ /** Clickable elements that appeared after applying. */
+ newClickableElements: number;
+ /** Clickable elements that disappeared after applying. */
+ removedElements: number;
+}
+
/** Standard Tetris piece types. */
export type PieceType = "I" | "O" | "T" | "S" | "Z" | "J" | "L" | "unknown";
@@ -64,6 +100,16 @@ export interface DriverCalibration {
disappeared: boolean;
position: { x: number; y: number };
};
+ fromCache?: boolean;
+}
+
+/** Summary of how much the latest calibration differs from the first one. */
+export interface CalibrationDrift {
+ drifted: boolean;
+ changes: string[];
+ recalibrations: number;
+ cacheHits: number;
+ cacheMisses: number;
}
/** Grid snapshot: the grid state plus derived information the bot needs. */
@@ -93,6 +139,25 @@ export interface TetrisDriver {
recalibrate(): Promise<DriverCalibration>;
getCalibration(): DriverCalibration;
+ // -- Start mechanism discovery/verification bridge --
+ /** Return candidate start mechanisms in priority order. Does not apply them. */
+ discoverStartCandidates(): Promise<StartCandidate[]>;
+ /** Apply a candidate and report observable deltas. Does NOT commit. */
+ tryStartMechanism(candidate: StartCandidate): Promise<TryStartResult>;
+ /** Commit a verified start mechanism so subsequent calibrations reuse it. */
+ confirmStartMechanism(
+ candidate: StartCandidate
+ ): void;
+ /** Forget the confirmed mechanism (e.g. after reloading to try a different candidate). */
+ clearConfirmedStartMechanism(): void;
+ /**
+ * Tell the driver the bot's bridge verification rejected every candidate.
+ * This prevents calibrate() from running the legacy fallback detector,
+ * which historically produced false positives like clicking Pause.
+ */
+ rejectStartMechanism(): void;
+ getCalibrationDrift(): CalibrationDrift;
+
// -- Grid Reading --
readGrid(settledGrid?: Grid | null): Promise<GridSnapshot>;
gridsAreDifferent(a: Grid | null, b: Grid | null): boolean;
@@ -230,4 +295,5 @@ export interface BotReport {
issue_count: number;
pass: boolean;
};
+ calibration_drift?: CalibrationDrift;
}