loop-benchmarking

Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.
git clone https://git.shiptheloop.com/loop-benchmarking.git
Log | Files | Refs | README

bot.ts (94206B)


      1 // Bot: "The Brain" -- phase orchestration, AI decisions, test derivation
      2 // NEVER imports from Playwright directly. Uses only TetrisDriver interface.
      3 
      4 import type {
      5   Grid,
      6   PieceType,
      7   TetrisDriver,
      8   DriverCalibration,
      9   CalibrationDrift,
     10   GridSnapshot,
     11   StartCandidate,
     12   TestResult,
     13   GameplayStats,
     14   GameSession,
     15   CompetitivePlayResult,
     16   SurveyData,
     17 } from "./types";
     18 
     19 // ---------------------------------------------------------------------------
     20 // Pierre Dellacherie's 4-heuristic Tetris AI (2003) with Colin Fahey's
     21 // GA-optimized weights. Reference implementation: LeeYiyuan/tetrisai (MIT)
     22 // ---------------------------------------------------------------------------
     23 
     24 const W_HEIGHT = -0.510066;
     25 const W_LINES = 0.760666;
     26 const W_HOLES = -0.35663;
     27 const W_BUMPINESS = -0.184483;
     28 
     29 const GRID_ROWS = 20;
     30 const GRID_COLS = 10;
     31 
     32 const PIECES: Record<string, [number, number][][]> = {
     33   I: [
     34     [[0, 0], [0, 1], [0, 2], [0, 3]],
     35     [[0, 0], [1, 0], [2, 0], [3, 0]],
     36     [[0, 0], [0, 1], [0, 2], [0, 3]],
     37     [[0, 0], [1, 0], [2, 0], [3, 0]],
     38   ],
     39   O: [
     40     [[0, 0], [0, 1], [1, 0], [1, 1]],
     41     [[0, 0], [0, 1], [1, 0], [1, 1]],
     42     [[0, 0], [0, 1], [1, 0], [1, 1]],
     43     [[0, 0], [0, 1], [1, 0], [1, 1]],
     44   ],
     45   T: [
     46     [[0, 1], [1, 0], [1, 1], [1, 2]],
     47     [[0, 0], [1, 0], [1, 1], [2, 0]],
     48     [[0, 0], [0, 1], [0, 2], [1, 1]],
     49     [[0, 1], [1, 0], [1, 1], [2, 1]],
     50   ],
     51   S: [
     52     [[0, 1], [0, 2], [1, 0], [1, 1]],
     53     [[0, 0], [1, 0], [1, 1], [2, 1]],
     54     [[0, 1], [0, 2], [1, 0], [1, 1]],
     55     [[0, 0], [1, 0], [1, 1], [2, 1]],
     56   ],
     57   Z: [
     58     [[0, 0], [0, 1], [1, 1], [1, 2]],
     59     [[0, 1], [1, 0], [1, 1], [2, 0]],
     60     [[0, 0], [0, 1], [1, 1], [1, 2]],
     61     [[0, 1], [1, 0], [1, 1], [2, 0]],
     62   ],
     63   J: [
     64     [[0, 0], [1, 0], [1, 1], [1, 2]],
     65     [[0, 0], [0, 1], [1, 0], [2, 0]],
     66     [[0, 0], [0, 1], [0, 2], [1, 2]],
     67     [[0, 1], [1, 1], [2, 1], [2, 0]],
     68   ],
     69   L: [
     70     [[0, 2], [1, 0], [1, 1], [1, 2]],
     71     [[0, 0], [1, 0], [2, 0], [2, 1]],
     72     [[0, 0], [0, 1], [0, 2], [1, 0]],
     73     [[0, 0], [0, 1], [1, 1], [2, 1]],
     74   ],
     75 };
     76 
     77 interface Placement {
     78   rotations: number;
     79   column: number;
     80   score: number;
     81   linesCleared: number;
     82   pieceType: string;
     83 }
     84 
     85 function findBestPlacement(board: Grid, pieceType: PieceType): Placement | null {
     86   const rotations = PIECES[pieceType];
     87   if (!rotations) return findBestPlacementGeneric(board);
     88 
     89   let bestScore = -Infinity;
     90   let bestPlacement: Placement | null = null;
     91 
     92   for (let rot = 0; rot < rotations.length; rot++) {
     93     const shape = rotations[rot];
     94     const minCol = Math.min(...shape.map(([, c]) => c));
     95     const maxCol = Math.max(...shape.map(([, c]) => c));
     96     const pieceWidth = maxCol - minCol + 1;
     97 
     98     for (let col = -minCol; col <= GRID_COLS - pieceWidth + (-minCol); col++) {
     99       const simResult = simulateDropPiece(board, shape, col);
    100       if (!simResult) continue;
    101 
    102       const { cleared, resultBoard } = simResult;
    103       const score =
    104         W_HEIGHT * aggregateHeight(resultBoard) +
    105         W_LINES * cleared +
    106         W_HOLES * countHoles(resultBoard) +
    107         W_BUMPINESS * bumpiness(resultBoard);
    108 
    109       if (score > bestScore) {
    110         bestScore = score;
    111         bestPlacement = { rotations: rot, column: col, score, linesCleared: cleared, pieceType };
    112       }
    113     }
    114   }
    115 
    116   return bestPlacement;
    117 }
    118 
    119 function findBestPlacementGeneric(board: Grid): Placement | null {
    120   let bestScore = -Infinity;
    121   let bestPlacement: Placement | null = null;
    122 
    123   for (let col = 0; col < GRID_COLS; col++) {
    124     const simGrid = simulateDropSingleCell(board, col);
    125     if (!simGrid) continue;
    126     const { cleared, resultBoard } = clearLines(simGrid);
    127     const score =
    128       W_HEIGHT * aggregateHeight(resultBoard) +
    129       W_LINES * cleared +
    130       W_HOLES * countHoles(resultBoard) +
    131       W_BUMPINESS * bumpiness(resultBoard);
    132     if (score > bestScore) {
    133       bestScore = score;
    134       bestPlacement = { rotations: 0, column: col, score, linesCleared: cleared, pieceType: "unknown" };
    135     }
    136   }
    137   return bestPlacement;
    138 }
    139 
    140 function simulateDropPiece(
    141   board: Grid, shape: [number, number][], col: number
    142 ): { cleared: number; resultBoard: Grid } | null {
    143   let landRow = -1;
    144 
    145   for (let row = 0; row <= GRID_ROWS; row++) {
    146     let valid = true;
    147     for (const [dr, dc] of shape) {
    148       const r = row + dr;
    149       const c = col + dc;
    150       if (r >= GRID_ROWS || c < 0 || c >= GRID_COLS) { valid = false; break; }
    151       if (r >= 0 && board[r][c]) { valid = false; break; }
    152     }
    153     if (!valid) { landRow = row - 1; break; }
    154   }
    155 
    156   if (landRow < 0) {
    157     let valid = true;
    158     for (const [dr, dc] of shape) {
    159       const r = dr;
    160       const c = col + dc;
    161       if (r >= GRID_ROWS || c < 0 || c >= GRID_COLS || (r >= 0 && board[r][c])) { valid = false; break; }
    162     }
    163     if (valid) landRow = 0;
    164     else return null;
    165   }
    166 
    167   const newBoard: Grid = board.map((row) => [...row]);
    168   for (const [dr, dc] of shape) {
    169     const r = landRow + dr;
    170     const c = col + dc;
    171     if (r >= 0 && r < GRID_ROWS && c >= 0 && c < GRID_COLS) newBoard[r][c] = true;
    172   }
    173 
    174   return clearLines(newBoard);
    175 }
    176 
    177 function simulateDropSingleCell(board: Grid, col: number): Grid | null {
    178   if (col < 0 || col >= GRID_COLS) return null;
    179   let landRow = -1;
    180   for (let r = GRID_ROWS - 1; r >= 0; r--) {
    181     if (!board[r][col]) { landRow = r; break; }
    182   }
    183   if (landRow < 0) return null;
    184   const newGrid: Grid = board.map((row) => [...row]);
    185   newGrid[landRow][col] = true;
    186   return newGrid;
    187 }
    188 
    189 function stripActivePiece(grid: Grid, activeCells: [number, number][]): Grid {
    190   const result: Grid = grid.map((row) => [...row]);
    191   for (const [r, c] of activeCells) {
    192     if (r >= 0 && r < result.length && c >= 0 && c < result[r].length) result[r][c] = false;
    193   }
    194   return result;
    195 }
    196 
    197 function clearLines(grid: Grid): { cleared: number; resultBoard: Grid } {
    198   const remaining: boolean[][] = [];
    199   let cleared = 0;
    200   for (const row of grid) {
    201     if (row.every(Boolean)) cleared++;
    202     else remaining.push([...row]);
    203   }
    204   while (remaining.length < GRID_ROWS) remaining.unshift(new Array(GRID_COLS).fill(false));
    205   return { cleared, resultBoard: remaining };
    206 }
    207 
    208 function aggregateHeight(grid: Grid): number {
    209   let total = 0;
    210   for (let col = 0; col < GRID_COLS; col++) {
    211     for (let row = 0; row < GRID_ROWS; row++) {
    212       if (grid[row]?.[col]) { total += GRID_ROWS - row; break; }
    213     }
    214   }
    215   return total;
    216 }
    217 
    218 function countHoles(grid: Grid): number {
    219   let holes = 0;
    220   for (let col = 0; col < GRID_COLS; col++) {
    221     let blockFound = false;
    222     for (let row = 0; row < GRID_ROWS; row++) {
    223       if (grid[row]?.[col]) blockFound = true;
    224       else if (blockFound) holes++;
    225     }
    226   }
    227   return holes;
    228 }
    229 
    230 function bumpiness(grid: Grid): number {
    231   const heights: number[] = [];
    232   for (let col = 0; col < GRID_COLS; col++) {
    233     let h = 0;
    234     for (let row = 0; row < GRID_ROWS; row++) {
    235       if (grid[row]?.[col]) { h = GRID_ROWS - row; break; }
    236     }
    237     heights.push(h);
    238   }
    239   let bump = 0;
    240   for (let i = 0; i < heights.length - 1; i++) bump += Math.abs(heights[i] - heights[i + 1]);
    241   return bump;
    242 }
    243 
    244 function countFilled(grid: Grid): number {
    245   let count = 0;
    246   for (const row of grid) for (const cell of row) if (cell) count++;
    247   return count;
    248 }
    249 
    250 // ---------------------------------------------------------------------------
    251 // Helper
    252 // ---------------------------------------------------------------------------
    253 
    254 function boundingBox(cells: [number, number][]): { w: number; h: number } {
    255   const minRow = Math.min(...cells.map(([r]) => r));
    256   const maxRow = Math.max(...cells.map(([r]) => r));
    257   const minCol = Math.min(...cells.map(([, c]) => c));
    258   const maxCol = Math.max(...cells.map(([, c]) => c));
    259   return { w: maxCol - minCol + 1, h: maxRow - minRow + 1 };
    260 }
    261 
    262 /**
    263  * Compute a position-invariant key for an active piece shape. Subtracts the
    264  * minimum row/col so the shape is translated to (0,0), then sorts and joins.
    265  * Two active-piece cell sets that differ only by position (e.g. because the
    266  * piece auto-dropped during the test) will produce the same key.
    267  */
    268 function normalizedShapeKey(cells: [number, number][]): string {
    269   if (cells.length === 0) return "";
    270   const minRow = Math.min(...cells.map(([r]) => r));
    271   const minCol = Math.min(...cells.map(([, c]) => c));
    272   return cells
    273     .map(([r, c]) => `${r - minRow},${c - minCol}`)
    274     .sort()
    275     .join("|");
    276 }
    277 
    278 function countFilledInTopRows(grid: Grid, rows: number): number {
    279   let count = 0;
    280   for (let r = 0; r < Math.min(rows, grid.length); r++) {
    281     for (let c = 0; c < grid[r].length; c++) if (grid[r][c]) count++;
    282   }
    283   return count;
    284 }
    285 
    286 // ---------------------------------------------------------------------------
    287 // Phase orchestration
    288 // ---------------------------------------------------------------------------
    289 
    290 interface LoadResult {
    291   loaded: boolean;
    292   detail: string;
    293   errorsOnLoad: number;
    294 }
    295 
    296 interface PhaseState {
    297   gameStarted: boolean;
    298   mechanicsWork: boolean;
    299   piecesWork: boolean;
    300   gameplayWorks: boolean;
    301 }
    302 
    303 const ALL_TEST_NAMES = [
    304   "game_loads",
    305   "game_starts",
    306   "auto_drop",
    307   "move_left",
    308   "move_right",
    309   "move_down",
    310   "rotate",
    311   "hard_drop",
    312   "all_pieces_rotate",
    313   "piece_locks",
    314   "new_piece_spawns",
    315   "multiple_pieces",
    316   "line_clear",
    317   "score_increases_on_clear",
    318   "score_element_visible",
    319   "game_over",
    320   "playable_30s",
    321   "multi_line_clear",
    322   "score_scaling",
    323   "level_progression",
    324   "speed_progression",
    325   "next_piece_preview",
    326   "game_over_display",
    327   "counter_clockwise_rotation",
    328   "soft_drop_distinct",
    329   "rendering_clean",
    330 ];
    331 
    332 function emptyCalibration(consoleErrors: string[]): DriverCalibration {
    333   return {
    334     renderer: "unknown",
    335     gridDetected: false,
    336     gridBounds: null,
    337     cellWidth: 0,
    338     cellHeight: 0,
    339     controls: { left: "ArrowLeft", right: "ArrowRight", down: "ArrowDown", rotate: "ArrowUp", drop: "Space" },
    340     controlMap: null,
    341     startMechanism: "unknown",
    342     scoreElementSelector: null,
    343     levelElementSelector: null,
    344     backgroundColor: null,
    345     consoleErrors,
    346     gridConfidence: 0,
    347     gridDetectedAt: "initial",
    348   };
    349 }
    350 
    351 // ---------------------------------------------------------------------------
    352 // Main entry point
    353 // ---------------------------------------------------------------------------
    354 
    355 export async function runAllTests(
    356   driver: TetrisDriver,
    357   serverUrl: string
    358 ): Promise<{
    359   testResults: TestResult[];
    360   calibration: DriverCalibration;
    361   gameplay: GameplayStats;
    362   session: GameSession;
    363   survey: SurveyData;
    364   competitivePlay: CompetitivePlayResult | null;
    365   calibrationDrift: CalibrationDrift;
    366 }> {
    367   const gameplay: GameplayStats = {
    368     pieces_placed: 0,
    369     lines_cleared: 0,
    370     max_score_observed: 0,
    371     play_duration_seconds: 0,
    372     errors_during_play: 0,
    373   };
    374 
    375   const session: GameSession = {
    376     started: false,
    377     startMechanism: "unknown",
    378     piecesSpawned: 0,
    379     piecesLocked: 0,
    380     linesCleared: 0,
    381     rotationsObserved: 0,
    382     movementsObserved: 0,
    383     hardDropsObserved: 0,
    384     gameOverDetected: false,
    385     gameOverText: null,
    386     gameOverRestartAvailable: false,
    387     consoleErrors: [],
    388     durationSeconds: 0,
    389     pieceTypes: new Set<string>(),
    390     scoreValues: [],
    391     gridReadSuccess: 0,
    392     gridReadFail: 0,
    393     frames: 0,
    394     events: [],
    395     skippedPhases: [],
    396     distinctRotationShapes: 0,
    397     rotationShapesByPiece: new Map<string, Set<string>>(),
    398   };
    399 
    400   let survey: SurveyData = {
    401     has_overlay: false,
    402     has_canvas: false,
    403     has_dom_grid: false,
    404     visible_text: [],
    405     clickable_elements: 0,
    406   };
    407 
    408   let competitivePlay: CompetitivePlayResult | null = null;
    409 
    410   // ---- Phase 1: Load the page ----
    411   const loadResult = await driver.loadPage(serverUrl);
    412   if (!loadResult.loaded) {
    413     const failedTests = ALL_TEST_NAMES.map((name) => ({
    414       name, pass: false, detail: loadResult.detail,
    415     }));
    416     return {
    417       testResults: failedTests,
    418       calibration: emptyCalibration(driver.getConsoleErrors()),
    419       gameplay, session, survey, competitivePlay,
    420       calibrationDrift: driver.getCalibrationDrift(),
    421     };
    422   }
    423 
    424   // ---- Pre-test survey ----
    425   survey = await driver.surveyPage();
    426 
    427   // ---- Detect game-shaped landmarks for the game_loads test ----
    428   try {
    429     session.gameLoadLandmarks = await driver.detectGameLandmarks();
    430   } catch {
    431     session.gameLoadLandmarks = undefined;
    432   }
    433 
    434   // ---- Phase 2: Discover + verify start, then calibrate ----
    435   // Bridge flow: try each candidate, ask verifyGameStarted() to confirm,
    436   // commit the first verified candidate. On false positive, reload and try
    437   // the next. If nothing verifies, tell the driver to reject and skip the
    438   // legacy fuzzy detector (which historically clicked Pause buttons etc.).
    439   let cal: DriverCalibration;
    440   let verified: { candidate: StartCandidate } | null = null;
    441   try {
    442     verified = await detectStartWithVerification(driver, serverUrl);
    443   } catch (err) {
    444     console.log(`[bot] bridge detection threw: ${err instanceof Error ? err.message : String(err)}`);
    445   }
    446 
    447   try {
    448     if (verified) {
    449       driver.confirmStartMechanism(verified.candidate);
    450     } else {
    451       driver.rejectStartMechanism();
    452     }
    453     cal = await driver.calibrate();
    454     session.started = cal.startMechanism !== "unknown";
    455     session.startMechanism = cal.startMechanism;
    456   } catch (err) {
    457     cal = emptyCalibration(driver.getConsoleErrors());
    458   }
    459 
    460   // Merge console errors from calibration
    461   for (const e of cal.consoleErrors) {
    462     if (!session.consoleErrors.includes(e)) session.consoleErrors.push(e);
    463   }
    464 
    465   let gameStarted = session.started;
    466   if (!gameStarted) {
    467     session.skippedPhases.push(
    468       "mechanics: game did not start",
    469       "pieces: game did not start",
    470       "gameplay: game did not start",
    471       "gameover: game did not start",
    472       "endurance: game did not start",
    473       "competitive: game did not start"
    474     );
    475   } else {
    476     driver.armInactivityWatchdog();
    477   }
    478 
    479   // Re-calibrate after start: DOM games may create grid cells dynamically
    480   if (gameStarted && !cal.gridDetected) {
    481     try {
    482       await driver.wait(500);
    483       const recal = await driver.recalibrate();
    484       if (recal.gridDetected) {
    485         cal = recal;
    486       }
    487     } catch { /* keep original */ }
    488   }
    489 
    490   // Control discovery: probe what each key actually does. This is the
    491   // difference between "ArrowDown is soft drop" (assumption) and "on this
    492   // particular game, ArrowDown is hard drop" (measurement). Runs only once
    493   // per session -- the result is cached on the driver and flows through
    494   // subsequent pressKey() calls automatically.
    495   if (gameStarted && cal.gridDetected) {
    496     try {
    497       const controlMap = await driver.discoverControls(serverUrl);
    498       console.log(
    499         `[bot] control discovery complete: ` +
    500         `move_left=${controlMap.move_left.key ?? "?"}, ` +
    501         `move_right=${controlMap.move_right.key ?? "?"}, ` +
    502         `soft_drop=${controlMap.soft_drop.key ?? "NONE"}, ` +
    503         `hard_drop=${controlMap.hard_drop.key ?? "?"}, ` +
    504         `rotate_cw=${controlMap.rotate_cw.key ?? "?"}`
    505       );
    506       // Refresh the working calibration from the driver -- discoverControls()
    507       // updates cal.controlMap and cal.controls on the cached calibration
    508       // object in-place.
    509       cal = driver.getCalibration();
    510     } catch (err) {
    511       console.log(`[bot] control discovery threw: ${err instanceof Error ? err.message : String(err)}`);
    512     }
    513   }
    514 
    515   // ---- Phases 3-8 wrapped in an inactivity guard: if the driver's
    516   // watchdog fires, we bail out of gameplay phases early but still write
    517   // a partial report with whatever did run. ----
    518   let mechanicsWork = false;
    519   let piecesWork = false;
    520   let gameplayWorks = false;
    521 
    522   try {
    523     // ---- Phase 3: Basic mechanics ----
    524     if (gameStarted && cal.gridDetected) {
    525       await runBasicMechanicsPhase(driver, session);
    526       mechanicsWork =
    527         session.movementsObserved > 0 ||
    528         session.rotationsObserved > 0 ||
    529         session.hardDropsObserved > 0 ||
    530         session.events.some((e) => e.type === "piece_moved");
    531     }
    532 
    533     if (gameStarted && !mechanicsWork) {
    534       session.skippedPhases.push(
    535         "pieces: mechanics failed",
    536         "gameplay: mechanics failed",
    537         "gameover: mechanics failed",
    538         "endurance: mechanics failed",
    539         "competitive: mechanics failed"
    540       );
    541     }
    542 
    543     // ---- Phase 4: Piece lifecycle ----
    544     if (mechanicsWork) {
    545       piecesWork = session.piecesLocked > 0 || session.hardDropsObserved > 0;
    546     }
    547 
    548     if (mechanicsWork && !piecesWork) {
    549       session.skippedPhases.push(
    550         "gameplay: piece lifecycle failed",
    551         "gameover: piece lifecycle failed",
    552         "endurance: piece lifecycle failed",
    553         "competitive: piece lifecycle failed"
    554       );
    555     }
    556 
    557     // ---- Phase 5: Gameplay ----
    558     if (piecesWork) {
    559       try {
    560         await driver.loadPage(serverUrl);
    561         cal = await driver.calibrate();
    562         if (gameStarted && !cal.gridDetected) {
    563           await driver.wait(500);
    564           const recal = await driver.recalibrate();
    565           if (recal.gridDetected) cal = recal;
    566         }
    567         session.started = session.started || cal.startMechanism !== "unknown";
    568       } catch (err) {
    569         if (err instanceof Error && err.name === "InactivityAbortError") throw err;
    570       }
    571 
    572       await runGameplayPhase(driver, session, gameplay);
    573       gameplayWorks = gameplay.pieces_placed > 0;
    574     }
    575 
    576     if (piecesWork && !gameplayWorks) {
    577       session.skippedPhases.push(
    578         "endurance: gameplay failed",
    579         "competitive: gameplay failed"
    580       );
    581     }
    582 
    583     // ---- Phase 6: Game over ----
    584     if (piecesWork) {
    585       try {
    586         await driver.loadPage(serverUrl);
    587         cal = await driver.calibrate();
    588         if (!cal.gridDetected) {
    589           await driver.wait(500);
    590           const recal = await driver.recalibrate();
    591           if (recal.gridDetected) cal = recal;
    592         }
    593       } catch (err) {
    594         if (err instanceof Error && err.name === "InactivityAbortError") throw err;
    595       }
    596 
    597       await runGameOverPhase(driver, session);
    598     }
    599 
    600     // ---- Phase 7: Endurance ----
    601     if (gameplayWorks) {
    602       try {
    603         await driver.loadPage(serverUrl);
    604         cal = await driver.calibrate();
    605         if (!cal.gridDetected) {
    606           await driver.wait(500);
    607           const recal = await driver.recalibrate();
    608           if (recal.gridDetected) cal = recal;
    609         }
    610       } catch (err) {
    611         if (err instanceof Error && err.name === "InactivityAbortError") throw err;
    612       }
    613 
    614       await runEndurancePhase(driver, session, gameplay);
    615     }
    616 
    617     // ---- Phase 8: Competitive play ----
    618     if (gameplayWorks) {
    619       try {
    620         await driver.loadPage(serverUrl);
    621         cal = await driver.calibrate();
    622         if (!cal.gridDetected) {
    623           await driver.wait(500);
    624           const recal = await driver.recalibrate();
    625           if (recal.gridDetected) cal = recal;
    626         }
    627       } catch (err) {
    628         if (err instanceof Error && err.name === "InactivityAbortError") throw err;
    629       }
    630 
    631       competitivePlay = await runCompetitivePlayPhase(driver, session, gameplay, serverUrl);
    632     } else if (!session.skippedPhases.some((p) => p.startsWith("competitive:"))) {
    633       session.skippedPhases.push("competitive: gameplay failed");
    634     }
    635   } catch (err) {
    636     if (err instanceof Error && err.name === "InactivityAbortError") {
    637       console.log(`[bot] inactivity watchdog fired: ${err.message}`);
    638       session.skippedPhases.push(`inactivity_abort: ${err.message}`);
    639     } else {
    640       throw err;
    641     }
    642   }
    643 
    644   session.durationSeconds = gameplay.play_duration_seconds;
    645 
    646   // ---- Derive test results ----
    647   const phaseState = { gameStarted, mechanicsWork, piecesWork, gameplayWorks };
    648   const testResults = deriveTestResults(session, cal, loadResult, driver.getConsoleErrors(), gameplay, phaseState, competitivePlay);
    649 
    650   const calibrationDrift = driver.getCalibrationDrift();
    651   return { testResults, calibration: cal, gameplay, session, survey, competitivePlay, calibrationDrift };
    652 }
    653 
    654 // ---------------------------------------------------------------------------
    655 // Phase implementations (use driver, never Playwright directly)
    656 // ---------------------------------------------------------------------------
    657 
    658 async function runBasicMechanicsPhase(
    659   driver: TetrisDriver,
    660   session: GameSession
    661 ): Promise<void> {
    662   // Auto-drop test: read grid twice with 5s gap, no input
    663   const snapT0 = await driver.readGrid();
    664   if (snapT0.grid) session.gridReadSuccess++;
    665   else session.gridReadFail++;
    666   session.frames++;
    667 
    668   await driver.wait(5000);
    669 
    670   const snapT1 = await driver.readGrid();
    671   if (snapT1.grid) session.gridReadSuccess++;
    672   else session.gridReadFail++;
    673   session.frames++;
    674 
    675   if (snapT0.grid && snapT1.grid && driver.gridsAreDifferent(snapT0.grid, snapT1.grid)) {
    676     const topBefore = countFilledInTopRows(snapT0.grid, 10);
    677     const topAfter = countFilledInTopRows(snapT1.grid, 10);
    678     const bottomBefore = snapT0.filledInBottom(10);
    679     const bottomAfter = snapT1.filledInBottom(10);
    680     if (bottomAfter > bottomBefore || topAfter < topBefore || driver.gridsAreDifferent(snapT0.grid, snapT1.grid)) {
    681       session.events.push({ type: "piece_moved", direction: "down", frame: session.frames });
    682     }
    683   }
    684 
    685   // Movement tests
    686   for (const dir of ["left", "right", "down"] as const) {
    687     const snapBefore = await driver.readGrid();
    688     if (snapBefore.grid) session.gridReadSuccess++;
    689     else session.gridReadFail++;
    690     session.frames++;
    691 
    692     await driver.pressKey(dir);
    693     await driver.wait(300);
    694 
    695     const snapAfter = await driver.readGrid();
    696     if (snapAfter.grid) session.gridReadSuccess++;
    697     else session.gridReadFail++;
    698     session.frames++;
    699 
    700     if (snapBefore.grid && snapAfter.grid && driver.gridsAreDifferent(snapBefore.grid, snapAfter.grid)) {
    701       session.movementsObserved++;
    702       session.events.push({ type: "piece_moved", direction: dir, frame: session.frames });
    703     }
    704   }
    705 
    706   // Rotation test: press rotate 4 times and check how many DISTINCT shapes
    707   // the piece cycles through. A correctly working Tetris game lets you
    708   // rotate a non-O piece to at least 2 different states (I/S/Z have 2 total
    709   // states, J/L/T have 4). A broken game where rotation only changes the
    710   // piece once (then gets stuck) will only show 1 non-baseline shape, which
    711   // is not enough to pass this test.
    712   //
    713   // Shape comparison is position-invariant (see normalizedShapeKey) so that
    714   // auto-drop between presses doesn't confuse the detector.
    715   const snapBeforeRot = await driver.readGrid();
    716   if (snapBeforeRot.grid) session.gridReadSuccess++;
    717   else session.gridReadFail++;
    718   session.frames++;
    719 
    720   const observedShapes = new Set<string>();
    721   if (snapBeforeRot.activePieceCells && snapBeforeRot.activePieceCells.length > 0) {
    722     observedShapes.add(normalizedShapeKey(snapBeforeRot.activePieceCells));
    723   }
    724 
    725   let lastRotationSnap = snapBeforeRot;
    726   for (let i = 0; i < 4; i++) {
    727     await driver.pressKey("rotate");
    728     await driver.wait(100);
    729 
    730     const snap = await driver.readGrid();
    731     if (snap.grid) session.gridReadSuccess++;
    732     else session.gridReadFail++;
    733     session.frames++;
    734 
    735     if (snap.activePieceCells && snap.activePieceCells.length > 0) {
    736       observedShapes.add(normalizedShapeKey(snap.activePieceCells));
    737     }
    738     lastRotationSnap = snap;
    739 
    740     // Any observed change in the grid between successive rotations counts as
    741     // a "rotation observed" event for legacy metrics. This keeps the gameplay
    742     // phase's counters consistent with the old behavior for downstream tests
    743     // that still use session.rotationsObserved.
    744     if (
    745       i === 0 &&
    746       snapBeforeRot.grid &&
    747       snap.grid &&
    748       driver.gridsAreDifferent(snapBeforeRot.grid, snap.grid)
    749     ) {
    750       session.rotationsObserved++;
    751       session.events.push({ type: "piece_rotated", frame: session.frames });
    752     }
    753   }
    754 
    755   // Record the max number of distinct shapes ever seen in a single rotation
    756   // test. Later gameplay-phase probes may overwrite this if they see more.
    757   if (observedShapes.size > session.distinctRotationShapes) {
    758     session.distinctRotationShapes = observedShapes.size;
    759   }
    760 
    761   // Silence unused-variable warning: lastRotationSnap is kept for potential
    762   // debugging/extension.
    763   void lastRotationSnap;
    764 
    765   // Hard drop test
    766   const snapBeforeDrop = await driver.readGrid();
    767   if (snapBeforeDrop.grid) session.gridReadSuccess++;
    768   else session.gridReadFail++;
    769   session.frames++;
    770 
    771   await driver.pressKey("drop");
    772   await driver.wait(500);
    773 
    774   const snapAfterDrop = await driver.readGrid();
    775   if (snapAfterDrop.grid) session.gridReadSuccess++;
    776   else session.gridReadFail++;
    777   session.frames++;
    778 
    779   if (snapBeforeDrop.grid && snapAfterDrop.grid && driver.gridsAreDifferent(snapBeforeDrop.grid, snapAfterDrop.grid)) {
    780     const bottomFilled = snapAfterDrop.filledInBottom(5);
    781     if (bottomFilled > 0) {
    782       session.hardDropsObserved++;
    783       session.piecesLocked++;
    784       session.events.push({ type: "hard_drop", frame: session.frames });
    785       session.events.push({ type: "piece_locked", frame: session.frames, filledDelta: bottomFilled });
    786     }
    787   }
    788 
    789   // New piece spawns
    790   await driver.wait(500);
    791   const snapAfterSpawn = await driver.readGrid(snapAfterDrop.grid);
    792   if (snapAfterSpawn.grid) {
    793     session.gridReadSuccess++;
    794     session.frames++;
    795     if (snapAfterSpawn.hasFilledInTop(4)) {
    796       session.piecesSpawned++;
    797       if (snapAfterSpawn.activePieceCells) {
    798         const pt = snapAfterSpawn.activePieceType || "unknown";
    799         session.pieceTypes.add(pt);
    800         session.events.push({ type: "piece_spawned", pieceType: pt as PieceType, frame: session.frames });
    801       }
    802     }
    803   } else {
    804     session.gridReadFail++;
    805     session.frames++;
    806   }
    807 
    808   // Piece locks persistence test
    809   const snapPersist1 = await driver.readGrid();
    810   await driver.wait(2000);
    811   const snapPersist2 = await driver.readGrid();
    812   if (snapPersist1.grid && snapPersist2.grid) {
    813     session.gridReadSuccess += 2;
    814     session.frames += 2;
    815     const bottom1 = snapPersist1.filledInBottom(4);
    816     const bottom2 = snapPersist2.filledInBottom(4);
    817     if (bottom1 > 0 && bottom2 >= bottom1) {
    818       if (session.piecesLocked === 0) session.piecesLocked++;
    819     }
    820   }
    821 }
    822 
    823 async function runGameplayPhase(
    824   driver: TetrisDriver,
    825   session: GameSession,
    826   gameplay: GameplayStats
    827 ): Promise<void> {
    828   const snapBefore = await driver.readGrid();
    829   const filledBefore = snapBefore.filledCount;
    830   if (snapBefore.grid) session.gridReadSuccess++;
    831   else session.gridReadFail++;
    832   session.frames++;
    833 
    834   // Read initial score
    835   const initialScore = await driver.readScore();
    836   if (initialScore !== null) session.scoreValues.push(initialScore);
    837 
    838   // Play using AI
    839   const result = await playGame(driver, {
    840     maxPieces: 60,
    841     maxDurationMs: 45000,
    842     rotationTrack: session.rotationShapesByPiece,
    843   });
    844   gameplay.pieces_placed += result.piecesPlaced;
    845   gameplay.errors_during_play += result.errors;
    846   session.gridReadSuccess += result.gridReads;
    847   session.gridReadFail += result.gridReadFails;
    848   session.frames += result.gridReads + result.gridReadFails;
    849   session.piecesLocked += result.piecesPlaced;
    850 
    851   for (const sv of result.scoreValues) {
    852     session.scoreValues.push(sv);
    853     if (sv > gameplay.max_score_observed) gameplay.max_score_observed = sv;
    854   }
    855 
    856   if (result.linesCleared > 0) {
    857     session.linesCleared += result.linesCleared;
    858     gameplay.lines_cleared += result.linesCleared;
    859     for (let i = 0; i < result.linesCleared; i++) {
    860       session.events.push({ type: "line_cleared", count: 1, frame: session.frames });
    861     }
    862   }
    863 
    864   // Propagate score-before/after-clear from AI play
    865   if (result.scoreBeforeClear !== undefined && session.scoreBeforeClear === undefined) {
    866     session.scoreBeforeClear = result.scoreBeforeClear;
    867   }
    868   if (result.scoreAfterClear !== undefined && session.scoreAfterClear === undefined) {
    869     session.scoreAfterClear = result.scoreAfterClear;
    870   }
    871 
    872   // Read final score
    873   const finalScore = await driver.readScore();
    874   if (finalScore !== null) {
    875     session.scoreValues.push(finalScore);
    876     if (finalScore > gameplay.max_score_observed) gameplay.max_score_observed = finalScore;
    877   }
    878 
    879   // If no score element found, try to detect changing numbers
    880   if (session.scoreValues.length === 0) {
    881     // We cannot scan page text without Playwright directly, so skip this fallback
    882     // The driver's readScore handles the detection
    883   }
    884 
    885   if (result.piecesPlaced > 0) {
    886     session.events.push({
    887       type: "piece_locked",
    888       frame: session.frames,
    889       filledDelta: result.piecesPlaced * 4,
    890     });
    891   }
    892 
    893   // If no lines cleared by AI, try brute-force
    894   if (session.linesCleared === 0) {
    895     // Capture score before brute-force line clear attempt
    896     if (session.scoreBeforeClear === undefined) {
    897       const preScore = await driver.readScore();
    898       if (preScore !== null) session.scoreBeforeClear = preScore;
    899     }
    900 
    901     const cleared = await tryFillRow(driver, 10);
    902     gameplay.pieces_placed += 10;
    903     if (cleared) {
    904       session.linesCleared++;
    905       gameplay.lines_cleared++;
    906       session.events.push({ type: "line_cleared", count: 1, frame: session.frames });
    907 
    908       // Capture score after brute-force line clear
    909       if (session.scoreAfterClear === undefined) {
    910         await driver.wait(200);
    911         const postScore = await driver.readScore();
    912         if (postScore !== null) session.scoreAfterClear = postScore;
    913       }
    914     }
    915   }
    916 
    917   // Check if total filled decreased
    918   if (session.linesCleared === 0) {
    919     const snapAfter = await driver.readGrid();
    920     const filledAfter = snapAfter.filledCount;
    921     if (filledAfter < filledBefore && filledBefore > 0) {
    922       // Capture score around grid-verified line clear
    923       if (session.scoreBeforeClear === undefined) {
    924         // Use the last known score reading as proxy
    925         const lastKnown = session.scoreValues.length > 0
    926           ? session.scoreValues[session.scoreValues.length - 1]
    927           : null;
    928         if (lastKnown !== null) session.scoreBeforeClear = lastKnown;
    929       }
    930 
    931       session.linesCleared++;
    932       gameplay.lines_cleared++;
    933       session.events.push({ type: "line_cleared", count: 1, frame: session.frames });
    934 
    935       if (session.scoreAfterClear === undefined) {
    936         const postScore = await driver.readScore();
    937         if (postScore !== null) session.scoreAfterClear = postScore;
    938       }
    939     }
    940   }
    941 }
    942 
    943 async function runGameOverPhase(
    944   driver: TetrisDriver,
    945   session: GameSession
    946 ): Promise<void> {
    947   const MAX_DROPS = 40;
    948   const BATCH_SIZE = 5;
    949 
    950   // Capture game over text + restart option immediately after triggering game
    951   // over, before anything (e.g. Phase 8 page reload) can clear it.
    952   const captureGameOverDisplay = async (): Promise<void> => {
    953     try {
    954       session.gameOverText = await driver.detectGameOverText();
    955     } catch {
    956       session.gameOverText = null;
    957     }
    958     try {
    959       session.gameOverRestartAvailable = await driver.detectRestartOption();
    960     } catch {
    961       session.gameOverRestartAvailable = false;
    962     }
    963   };
    964 
    965   for (let i = 0; i < MAX_DROPS; i++) {
    966     await driver.pressKey("drop");
    967     await driver.wait(150);
    968 
    969     if ((i + 1) % BATCH_SIZE === 0) {
    970       const snap = await driver.readGrid();
    971       if (snap.grid) {
    972         session.gridReadSuccess++;
    973         session.frames++;
    974 
    975         if (snap.hasFilledInTop(4)) {
    976           await driver.pressKey("drop");
    977           await driver.wait(300);
    978           const snapAfter = await driver.readGrid();
    979           if (snapAfter.grid) {
    980             session.gridReadSuccess++;
    981             session.frames++;
    982             if (!driver.gridsAreDifferent(snap.grid, snapAfter.grid)) {
    983               session.gameOverDetected = true;
    984               session.events.push({ type: "game_over", frame: session.frames });
    985               await captureGameOverDisplay();
    986               return;
    987             }
    988           }
    989         }
    990       } else {
    991         session.gridReadFail++;
    992         session.frames++;
    993       }
    994     }
    995   }
    996 
    997   // Check for game over text in DOM
    998   const gameOverText = await driver.detectGameOverText();
    999   if (gameOverText) {
   1000     const finalSnap = await driver.readGrid();
   1001     if (finalSnap.grid && finalSnap.filledCount > 10) {
   1002       session.gameOverDetected = true;
   1003       session.events.push({ type: "game_over", frame: session.frames });
   1004       // We already have the text; capture restart too.
   1005       session.gameOverText = gameOverText;
   1006       try {
   1007         session.gameOverRestartAvailable = await driver.detectRestartOption();
   1008       } catch {
   1009         session.gameOverRestartAvailable = false;
   1010       }
   1011     }
   1012   }
   1013 }
   1014 
   1015 async function runEndurancePhase(
   1016   driver: TetrisDriver,
   1017   session: GameSession,
   1018   gameplay: GameplayStats
   1019 ): Promise<void> {
   1020   const errorsBefore = driver.getConsoleErrors().length;
   1021   const start = Date.now();
   1022 
   1023   const result = await playGame(driver, {
   1024     maxDurationMs: 30000,
   1025     rotationTrack: session.rotationShapesByPiece,
   1026   });
   1027 
   1028   const elapsed = Math.round((Date.now() - start) / 1000);
   1029   gameplay.pieces_placed += result.piecesPlaced;
   1030   gameplay.lines_cleared += result.linesCleared;
   1031   session.linesCleared += result.linesCleared;
   1032   gameplay.play_duration_seconds += elapsed;
   1033   gameplay.errors_during_play += result.errors;
   1034   session.gridReadSuccess += result.gridReads;
   1035   session.gridReadFail += result.gridReadFails;
   1036   session.frames += result.gridReads + result.gridReadFails;
   1037 
   1038   const newErrors = driver.getConsoleErrors().slice(errorsBefore);
   1039   for (const e of newErrors) {
   1040     if (!session.consoleErrors.includes(e)) session.consoleErrors.push(e);
   1041   }
   1042 }
   1043 
   1044 /**
   1045  * Test whether the game supports counter-clockwise rotation.
   1046  *
   1047  * The naive "press Z then press rotate, compare" approach is broken because
   1048  * rotation state is ordinal: after Z (state 0->3) then rotate (3->0), the
   1049  * piece returns to the original state regardless of direction, so the two
   1050  * intermediate snapshots always differ. That test is a tautology.
   1051  *
   1052  * Instead we run each key against a FRESH baseline game by reloading the
   1053  * page between presses. Both presses are then measured from rotation
   1054  * state 0. If the two resulting grids match, both keys rotate in the same
   1055  * direction; if they differ, they rotate opposite.
   1056  *
   1057  * Returns { done: false, ccw: null } when a reliable signal is unavailable
   1058  * (e.g. no active piece, O-piece baseline, rotate key doesn't rotate at all).
   1059  */
   1060 async function testRotationDirection(
   1061   driver: TetrisDriver,
   1062   serverUrl: string
   1063 ): Promise<{ done: boolean; ccw: boolean | null }> {
   1064   // Helper: reload, start game, wait for an active piece to be visible.
   1065   // On a fresh game the settled grid is empty, so we read WITHOUT passing a
   1066   // settled grid and let readGrid use its top-6-rows fallback to detect the
   1067   // active piece.
   1068   const freshBaseline = async (): Promise<{
   1069     piece: PieceType | null;
   1070     grid: Grid | null;
   1071     settled: Grid | null;
   1072   }> => {
   1073     try {
   1074       const load = await driver.loadPage(serverUrl);
   1075       if (!load.loaded) return { piece: null, grid: null, settled: null };
   1076     } catch {
   1077       return { piece: null, grid: null, settled: null };
   1078     }
   1079     try {
   1080       await driver.calibrate();
   1081     } catch {
   1082       return { piece: null, grid: null, settled: null };
   1083     }
   1084 
   1085     // Use an empty grid as the "settled" reference so the active piece is
   1086     // detected as the full delta (any filled cell in current == active).
   1087     // This is robust even when the game hasn't yet spawned a piece at the
   1088     // moment of the first read.
   1089     const emptySettled: Grid = Array.from({ length: GRID_ROWS }, () =>
   1090       Array.from({ length: GRID_COLS }, () => false)
   1091     );
   1092 
   1093     // Poll up to ~3s for an active piece to appear.
   1094     let snap = await driver.readGrid(emptySettled);
   1095     let attempts = 0;
   1096     while (
   1097       (!snap.activePieceCells || snap.activePieceCells.length !== 4) &&
   1098       attempts < 30
   1099     ) {
   1100       await driver.wait(100);
   1101       snap = await driver.readGrid(emptySettled);
   1102       attempts++;
   1103     }
   1104 
   1105     if (!snap.activePieceCells || snap.activePieceCells.length !== 4) {
   1106       return { piece: null, grid: snap.grid, settled: emptySettled };
   1107     }
   1108     return {
   1109       piece: snap.activePieceType,
   1110       grid: snap.grid,
   1111       settled: emptySettled,
   1112     };
   1113   };
   1114 
   1115   // Extract the active piece cells from a grid (assumes the grid contains
   1116   // only the active piece, which is the case on a fresh game where the
   1117   // settled grid is empty). Returns a position-normalized shape string so
   1118   // that comparisons ignore where on the board the piece sits.
   1119   const shapeKey = (grid: Grid): string | null => {
   1120     const cells: [number, number][] = [];
   1121     for (let r = 0; r < grid.length; r++) {
   1122       for (let c = 0; c < grid[r].length; c++) {
   1123         if (grid[r][c]) cells.push([r, c]);
   1124       }
   1125     }
   1126     if (cells.length !== 4) return null;
   1127     const minR = Math.min(...cells.map(([r]) => r));
   1128     const minC = Math.min(...cells.map(([, c]) => c));
   1129     return cells
   1130       .map(([r, c]) => `${r - minR},${c - minC}`)
   1131       .sort()
   1132       .join("|");
   1133   };
   1134 
   1135   // Helper: press a key, wait briefly for the game to process it, and
   1136   // return the post-press grid if the piece's SHAPE differs from baseline.
   1137   // We compare shape (not full grid) so that piece falling during the wait
   1138   // does not confound the rotation measurement.
   1139   const measureKeyShape = async (
   1140     pressFn: () => Promise<void>,
   1141     baseShape: string
   1142   ): Promise<string | null> => {
   1143     await pressFn();
   1144     await driver.wait(80);
   1145     const snap = await driver.readGrid();
   1146     if (!snap.grid) return null;
   1147     const shape = shapeKey(snap.grid);
   1148     if (!shape) return null;
   1149     if (shape === baseShape) return null;
   1150     return shape;
   1151   };
   1152 
   1153   // Only J, L, T pieces have 4 visually-distinct rotation states. I, S, Z
   1154   // have only 2 (rotating CW vs CCW from state 0 produces an identical
   1155   // visual). O is rotationally symmetric. So we can only distinguish
   1156   // rotation directions using J, L, or T pieces.
   1157   const DISTINGUISHABLE: Set<PieceType> = new Set<PieceType>(["J", "L", "T"]);
   1158   const log = (msg: string) => console.log(`[ccw] ${msg}`);
   1159 
   1160   // Per-piece-type sample: the shape after pressing the calibrated rotate
   1161   // key from a fresh-spawn baseline. Keyed by piece type so Trial 2 can
   1162   // match whatever piece type its fresh reload happens to produce.
   1163   const trial1Shapes = new Map<PieceType, string>();
   1164 
   1165   // ----- Trial 1: collect rotate-key shape samples for several piece types -----
   1166   for (let attempt = 0; attempt < 10; attempt++) {
   1167     const b = await freshBaseline();
   1168     if (!b.grid || !b.piece) continue;
   1169     if (!DISTINGUISHABLE.has(b.piece)) continue;
   1170     if (trial1Shapes.has(b.piece)) continue;
   1171     const baseShape = shapeKey(b.grid);
   1172     if (!baseShape) continue;
   1173     const afterShape = await measureKeyShape(
   1174       () => driver.pressKey("rotate"),
   1175       baseShape
   1176     );
   1177     if (afterShape) {
   1178       trial1Shapes.set(b.piece, afterShape);
   1179       log(`trial1: rotate changed ${b.piece} (samples: ${trial1Shapes.size})`);
   1180       if (trial1Shapes.size >= DISTINGUISHABLE.size) break;
   1181     } else {
   1182       log(`trial1: rotate did NOT change ${b.piece}`);
   1183     }
   1184   }
   1185   if (trial1Shapes.size === 0) {
   1186     log("could not establish any Trial 1 reference direction");
   1187     return { done: false, ccw: null };
   1188   }
   1189 
   1190   // ----- Trial 2: press the raw "z" key from a fresh baseline whose piece
   1191   // type matches one of our Trial 1 samples, and compare the resulting
   1192   // shape to the corresponding Trial 1 shape. -----
   1193   for (let attempt = 0; attempt < 10; attempt++) {
   1194     const b = await freshBaseline();
   1195     if (!b.grid || !b.piece) continue;
   1196     const rotateShape = trial1Shapes.get(b.piece);
   1197     if (!rotateShape) continue;
   1198     const baseShape = shapeKey(b.grid);
   1199     if (!baseShape) continue;
   1200     const afterShape = await measureKeyShape(
   1201       () => driver.pressRawKey("z"),
   1202       baseShape
   1203     );
   1204     if (!afterShape) {
   1205       log(`trial2: z caused no shape change on ${b.piece} -> CCW not supported`);
   1206       return { done: true, ccw: false };
   1207     }
   1208     const opposite = afterShape !== rotateShape;
   1209     log(
   1210       `trial2: ${b.piece} rotate=${rotateShape} z=${afterShape} opposite=${opposite}`
   1211     );
   1212     return { done: true, ccw: opposite };
   1213   }
   1214 
   1215   log("could not find Trial 2 baseline with matching piece");
   1216   return { done: false, ccw: null };
   1217 }
   1218 
   1219 async function runCompetitivePlayPhase(
   1220   driver: TetrisDriver,
   1221   session: GameSession,
   1222   gameplay: GameplayStats,
   1223   serverUrl: string
   1224 ): Promise<CompetitivePlayResult> {
   1225   // Dedicated rotation-direction test (run BEFORE the main play loop so that
   1226   // each key press is measured from a fresh baseline game state). See
   1227   // testRotationDirection() for details.
   1228   let ccwTestDone = false;
   1229   let ccwResult: boolean | null = null;
   1230   try {
   1231     const rotResult = await testRotationDirection(driver, serverUrl);
   1232     ccwTestDone = rotResult.done;
   1233     ccwResult = rotResult.ccw;
   1234   } catch {
   1235     ccwTestDone = false;
   1236     ccwResult = null;
   1237   }
   1238 
   1239   // Reload once more so competitive play starts from a clean game state.
   1240   try {
   1241     await driver.loadPage(serverUrl);
   1242     await driver.calibrate();
   1243   } catch {
   1244     /* continue: play loop will still attempt to run */
   1245   }
   1246 
   1247   const start = Date.now();
   1248   const maxDuration = 60000;
   1249 
   1250   const result: CompetitivePlayResult & {
   1251     _ccwResult?: boolean | null;
   1252     _ccwTestDone?: boolean;
   1253     _softDropDistinct?: boolean | null;
   1254     _softDropTestDone?: boolean;
   1255   } = {
   1256     duration_seconds: 0,
   1257     pieces_placed: 0,
   1258     total_lines_cleared: 0,
   1259     single_clears: 0,
   1260     double_clears: 0,
   1261     triple_clears: 0,
   1262     tetris_clears: 0,
   1263     max_combo: 0,
   1264     score_readings: [],
   1265     score_final: 0,
   1266     score_increases: [],
   1267     level_readings: [],
   1268     level_final: 0,
   1269     game_over_reached: false,
   1270     game_over_text_found: null,
   1271     restart_available: false,
   1272     next_piece_visible: false,
   1273     speed_increased: false,
   1274     bugs_detected: [],
   1275   };
   1276 
   1277   // Read initial score
   1278   let lastScore = 0;
   1279   const initialScore = await driver.readScore();
   1280   if (initialScore !== null) {
   1281     lastScore = initialScore;
   1282     result.score_readings.push(lastScore);
   1283   }
   1284 
   1285   // Read initial level
   1286   const initialLevel = await driver.readLevel();
   1287   if (initialLevel !== null) result.level_readings.push(initialLevel);
   1288 
   1289   // Measure initial drop speed
   1290   const initialDropInterval = await driver.measureDropInterval();
   1291 
   1292   // Play loop
   1293   // settledGrid = the locked board WITHOUT any active piece. Recomputed
   1294   // after each placement by reading the fresh grid and stripping the newly-
   1295   // spawned piece.
   1296   let settledGrid: Grid | null = null;
   1297   let pollCount = 0;
   1298   let consecutiveClears = 0;
   1299   let maxCombo = 0;
   1300   let softDropTestDone = false;
   1301   let softDropDistinct: boolean | null = null;
   1302 
   1303   let filledCellSamples: number[] = [];
   1304   let trailCheckPieceMark = 0;
   1305 
   1306   while (Date.now() - start < maxDuration) {
   1307     try {
   1308       const snap = await driver.readGrid(settledGrid);
   1309       pollCount++;
   1310 
   1311       if (!snap.grid) {
   1312         await driver.wait(60);
   1313         continue;
   1314       }
   1315 
   1316       // Score tracking every 5th poll
   1317       if (pollCount % 5 === 0) {
   1318         const score = await driver.readScore();
   1319         if (score !== null && score > 0) {
   1320           result.score_readings.push(score);
   1321           if (score > lastScore) {
   1322             result.score_increases.push(score - lastScore);
   1323             lastScore = score;
   1324           }
   1325         }
   1326       }
   1327 
   1328       // Level tracking every 10th poll
   1329       if (pollCount % 10 === 0) {
   1330         const level = await driver.readLevel();
   1331         if (level !== null) result.level_readings.push(level);
   1332       }
   1333 
   1334       // Active piece detection + AI placement
   1335       if (snap.activePieceCells && snap.activePieceCells.length === 4) {
   1336         const pieceType = snap.activePieceType || "unknown";
   1337         session.pieceTypes.add(pieceType);
   1338 
   1339         // Rotation probe for all_pieces_rotate: for the first time we see
   1340         // this piece type in competitive play, press rotate 4 times and
   1341         // record distinct normalized shapes. A correctly working game
   1342         // returns to baseline after 4 presses so the placement logic below
   1343         // continues from the same rotation state. Skip O and unknown.
   1344         if (
   1345           pieceType !== "unknown" &&
   1346           pieceType !== "O" &&
   1347           !session.rotationShapesByPiece.has(pieceType)
   1348         ) {
   1349           const shapes = new Set<string>();
   1350           shapes.add(normalizedShapeKey(snap.activePieceCells));
   1351           for (let r = 0; r < 4; r++) {
   1352             await driver.pressKey("rotate");
   1353             await driver.wait(80);
   1354             const rotSnap = await driver.readGrid(settledGrid);
   1355             if (rotSnap.activePieceCells && rotSnap.activePieceCells.length > 0) {
   1356               shapes.add(normalizedShapeKey(rotSnap.activePieceCells));
   1357             }
   1358           }
   1359           session.rotationShapesByPiece.set(pieceType, shapes);
   1360         }
   1361 
   1362         // Soft drop test
   1363         if (!softDropTestDone && result.pieces_placed > 3 && result.pieces_placed % 5 === 0) {
   1364           const snapBeforeDown = await driver.readGrid(settledGrid);
   1365           await driver.pressKey("down");
   1366           await driver.wait(60);
   1367           const snapAfterDown = await driver.readGrid(settledGrid);
   1368 
   1369           if (snapBeforeDown.activePieceCells && snapAfterDown.activePieceCells) {
   1370             const avgRowBefore = snapBeforeDown.activePieceCells.reduce((s, [r]) => s + r, 0) / snapBeforeDown.activePieceCells.length;
   1371             const avgRowAfter = snapAfterDown.activePieceCells.reduce((s, [r]) => s + r, 0) / snapAfterDown.activePieceCells.length;
   1372             const rowDelta = avgRowAfter - avgRowBefore;
   1373             softDropDistinct = rowDelta >= 0.5 && rowDelta <= 3;
   1374             softDropTestDone = true;
   1375           }
   1376         }
   1377 
   1378         // Rendering trail sampling
   1379         if (result.pieces_placed > 0 && result.pieces_placed % 10 === 0 && result.pieces_placed !== trailCheckPieceMark) {
   1380           trailCheckPieceMark = result.pieces_placed;
   1381           const sampleSnap = await driver.readGrid();
   1382           if (sampleSnap.grid) filledCellSamples.push(sampleSnap.filledCount);
   1383         }
   1384 
   1385         // Compute clean locked board (without active piece) for AI eval
   1386         // and as the diff base for the next iteration.
   1387         const boardBeforePlacement = stripActivePiece(snap.grid, snap.activePieceCells);
   1388         const placement = findBestPlacement(boardBeforePlacement, pieceType as PieceType);
   1389 
   1390         if (placement) {
   1391           await executePlacement(driver, placement, snap.activePieceCells);
   1392         } else {
   1393           await driver.pressKey("drop");
   1394         }
   1395 
   1396         result.pieces_placed++;
   1397 
   1398         // Wait for lock + new piece spawn, then re-read and strip the new
   1399         // active piece so settledGrid reflects only the locked board.
   1400         await driver.wait(350);
   1401 
   1402         const afterSnap = await driver.readGrid(boardBeforePlacement);
   1403         if (afterSnap.grid) {
   1404           // Line-clear detection: compare filled count of locked boards
   1405           // before and after placement (before + 4 = expected, minus clears).
   1406           const filledBefore = countFilled(boardBeforePlacement) + 4;
   1407           const filledNow = countFilled(afterSnap.grid) -
   1408             (afterSnap.activePieceCells ? afterSnap.activePieceCells.length : 0);
   1409 
   1410           if (filledNow < filledBefore - 5 && filledBefore > 10) {
   1411             const clearedCount = Math.round((filledBefore - filledNow) / GRID_COLS);
   1412             if (clearedCount > 0 && clearedCount <= 4) {
   1413               result.total_lines_cleared += clearedCount;
   1414               consecutiveClears++;
   1415               if (consecutiveClears > maxCombo) maxCombo = consecutiveClears;
   1416 
   1417               switch (clearedCount) {
   1418                 case 1: result.single_clears++; break;
   1419                 case 2: result.double_clears++; break;
   1420                 case 3: result.triple_clears++; break;
   1421                 case 4: result.tetris_clears++; break;
   1422               }
   1423             }
   1424           } else if (filledNow >= filledBefore - 1) {
   1425             consecutiveClears = 0;
   1426           }
   1427 
   1428           if (afterSnap.activePieceCells && afterSnap.activePieceCells.length >= 3 && afterSnap.activePieceCells.length <= 5) {
   1429             settledGrid = stripActivePiece(afterSnap.grid, afterSnap.activePieceCells);
   1430           } else {
   1431             settledGrid = afterSnap.grid;
   1432           }
   1433         }
   1434       } else {
   1435         // No active piece visible -- wait briefly for spawn.
   1436         await driver.wait(60);
   1437       }
   1438     } catch {
   1439       await driver.wait(60);
   1440     }
   1441   }
   1442 
   1443   result.duration_seconds = Math.round((Date.now() - start) / 1000);
   1444   result.max_combo = maxCombo;
   1445 
   1446   // Read final score
   1447   const finalScore = await driver.readScore();
   1448   if (finalScore !== null) {
   1449     result.score_final = finalScore;
   1450     result.score_readings.push(finalScore);
   1451   }
   1452 
   1453   // Read final level
   1454   const finalLevel = await driver.readLevel();
   1455   if (finalLevel !== null) {
   1456     result.level_final = finalLevel;
   1457     result.level_readings.push(finalLevel);
   1458   }
   1459 
   1460   // Measure final drop speed
   1461   const finalDropInterval = await driver.measureDropInterval();
   1462   if (initialDropInterval > 0 && finalDropInterval > 0 && finalDropInterval < initialDropInterval * 0.8) {
   1463     result.speed_increased = true;
   1464   }
   1465 
   1466   // Game over check
   1467   const gameOverText = await driver.detectGameOverText();
   1468   if (gameOverText) {
   1469     result.game_over_reached = true;
   1470     result.game_over_text_found = gameOverText;
   1471   }
   1472 
   1473   result.restart_available = await driver.detectRestartOption();
   1474   result.next_piece_visible = await driver.detectNextPiecePreview();
   1475 
   1476   // Bug detection
   1477   if (result.score_increases.length > 3) {
   1478     const singleDeltas = result.score_increases.filter((d) => d > 0 && d <= 200);
   1479     const multiDeltas = result.score_increases.filter((d) => d > 200);
   1480     if (singleDeltas.length > 0 && multiDeltas.length === 0 &&
   1481         (result.double_clears + result.triple_clears + result.tetris_clears) > 0) {
   1482       result.bugs_detected.push("score_does_not_scale_with_simultaneous_clears");
   1483     }
   1484   }
   1485 
   1486   if (result.level_readings.length > 1) {
   1487     const uniqueLevels = [...new Set(result.level_readings)];
   1488     if (uniqueLevels.length === 1 && result.total_lines_cleared >= 10) {
   1489       result.bugs_detected.push("level_does_not_increase");
   1490     }
   1491   }
   1492 
   1493   if (result.level_readings.length > 1) {
   1494     const uniqueLevels = [...new Set(result.level_readings)];
   1495     if (uniqueLevels.length > 1 && !result.speed_increased) {
   1496       result.bugs_detected.push("speed_does_not_increase");
   1497     }
   1498   }
   1499 
   1500   // Rendering trail detection
   1501   if (result.pieces_placed >= 10 && filledCellSamples.length >= 2) {
   1502     const maxFilled = Math.max(...filledCellSamples);
   1503     if (maxFilled > result.pieces_placed * 8) {
   1504       result.rendering_trail_detected = true;
   1505       result.bugs_detected.push("rendering_trail");
   1506     } else {
   1507       const onlyIncreasing = filledCellSamples.every((v, i) =>
   1508         i === 0 || v >= filledCellSamples[i - 1]
   1509       );
   1510       if (onlyIncreasing && filledCellSamples.length >= 3 && maxFilled > result.pieces_placed * 6) {
   1511         result.rendering_trail_detected = true;
   1512         result.bugs_detected.push("rendering_trail");
   1513       } else {
   1514         result.rendering_trail_detected = false;
   1515       }
   1516     }
   1517   }
   1518 
   1519   result._ccwResult = ccwResult;
   1520   result._ccwTestDone = ccwTestDone;
   1521   result._softDropDistinct = softDropDistinct;
   1522   result._softDropTestDone = softDropTestDone;
   1523 
   1524   return result;
   1525 }
   1526 
   1527 // ---------------------------------------------------------------------------
   1528 // Play helpers (use driver, never Playwright directly)
   1529 // ---------------------------------------------------------------------------
   1530 
   1531 async function playGame(
   1532   driver: TetrisDriver,
   1533   options: {
   1534     maxPieces?: number;
   1535     maxDurationMs?: number;
   1536     rotationTrack?: Map<string, Set<string>>;
   1537   }
   1538 ): Promise<{ piecesPlaced: number; linesCleared: number; errors: number; gridReads: number; gridReadFails: number; scoreValues: number[]; scoreBeforeClear?: number; scoreAfterClear?: number }> {
   1539   const maxPieces = options.maxPieces ?? 100;
   1540   const maxDuration = options.maxDurationMs ?? 30000;
   1541   const rotationTrack = options.rotationTrack;
   1542   const start = Date.now();
   1543   let piecesPlaced = 0;
   1544   let linesCleared = 0;
   1545   let errors = 0;
   1546   let gridReads = 0;
   1547   let gridReadFails = 0;
   1548   let consecutiveReadFails = 0;
   1549   const scoreValues: number[] = [];
   1550   let scorePollCounter = 0;
   1551   let scoreBeforeClear: number | undefined;
   1552   let scoreAfterClear: number | undefined;
   1553   /** The most recent score reading, kept up to date so we can snapshot it
   1554    *  immediately before a line clear is detected. */
   1555   let lastScoreReading: number | null = null;
   1556 
   1557   // settledGrid = the locked board WITHOUT any active piece. We recompute
   1558   // it after each placement by reading the fresh grid and stripping out the
   1559   // new active piece (the one that just spawned after our drop).
   1560   let settledGrid: Grid | null = null;
   1561 
   1562   while (piecesPlaced < maxPieces && Date.now() - start < maxDuration) {
   1563     try {
   1564       const snap = await driver.readGrid(settledGrid);
   1565 
   1566       if (!snap.grid) {
   1567         gridReadFails++;
   1568         consecutiveReadFails++;
   1569         if (consecutiveReadFails > 10) {
   1570           await playRandomForDuration(driver, Math.min(5000, maxDuration - (Date.now() - start)));
   1571           piecesPlaced += 3;
   1572           break;
   1573         }
   1574         await driver.wait(60);
   1575         continue;
   1576       }
   1577 
   1578       gridReads++;
   1579       consecutiveReadFails = 0;
   1580 
   1581       // Score tracking
   1582       scorePollCounter++;
   1583       if (scorePollCounter % 5 === 0) {
   1584         const score = await driver.readScore();
   1585         if (score !== null) {
   1586           scoreValues.push(score);
   1587           lastScoreReading = score;
   1588         }
   1589       }
   1590 
   1591       if (snap.activePieceCells && snap.activePieceCells.length === 4) {
   1592         const pieceType = snap.activePieceType || "unknown";
   1593 
   1594         // Rotation probe: for the first time we see this piece type, press
   1595         // rotate 4 times and record each resulting shape. A correctly
   1596         // working game cycles through the piece's rotation states and
   1597         // returns to baseline after 4 presses (so executePlacement below
   1598         // can proceed normally from the same rotation state). A broken
   1599         // game where rotation only fires once (or stalls) will record
   1600         // fewer distinct shapes -- which is exactly what the
   1601         // all_pieces_rotate test is looking for.
   1602         if (
   1603           rotationTrack &&
   1604           pieceType !== "unknown" &&
   1605           pieceType !== "O" &&
   1606           !rotationTrack.has(pieceType)
   1607         ) {
   1608           const shapes = new Set<string>();
   1609           shapes.add(normalizedShapeKey(snap.activePieceCells));
   1610           for (let r = 0; r < 4; r++) {
   1611             await driver.pressKey("rotate");
   1612             await driver.wait(80);
   1613             const rotSnap = await driver.readGrid(settledGrid);
   1614             if (rotSnap.activePieceCells && rotSnap.activePieceCells.length > 0) {
   1615               shapes.add(normalizedShapeKey(rotSnap.activePieceCells));
   1616             }
   1617           }
   1618           rotationTrack.set(pieceType, shapes);
   1619         }
   1620 
   1621         // Re-read snap after rotation probe so activePieceCells reflects
   1622         // the current position (auto-drop may have shifted the piece).
   1623         let workingSnap = snap;
   1624         if (rotationTrack && pieceType !== "unknown" && pieceType !== "O") {
   1625           const freshSnap = await driver.readGrid(settledGrid);
   1626           if (freshSnap.grid && freshSnap.activePieceCells && freshSnap.activePieceCells.length === 4) {
   1627             workingSnap = freshSnap;
   1628           }
   1629         }
   1630 
   1631         // Save the locked board as-of right now (no active piece). This is
   1632         // what findBestPlacement evaluates against, and what we use as the
   1633         // diff base for the NEXT iteration's active-piece detection.
   1634         const boardBeforePlacement = stripActivePiece(workingSnap.grid!, workingSnap.activePieceCells!);
   1635         const placement = findBestPlacement(boardBeforePlacement, pieceType as PieceType);
   1636 
   1637         // Track whether this placement will produce a line clear so we can
   1638         // snapshot the score before and after.
   1639         const expectsClear = placement ? placement.linesCleared > 0 : false;
   1640         // Snapshot score just before executing the drop (for score_increases_on_clear)
   1641         if (expectsClear && scoreBeforeClear === undefined) {
   1642           const preClearScore = await driver.readScore();
   1643           if (preClearScore !== null) {
   1644             scoreBeforeClear = preClearScore;
   1645             lastScoreReading = preClearScore;
   1646           } else if (lastScoreReading !== null) {
   1647             scoreBeforeClear = lastScoreReading;
   1648           }
   1649         }
   1650 
   1651         if (placement) {
   1652           await executePlacement(driver, placement, workingSnap.activePieceCells!);
   1653           linesCleared += placement.linesCleared;
   1654           piecesPlaced++;
   1655         } else {
   1656           await driver.pressKey("drop");
   1657           piecesPlaced++;
   1658         }
   1659 
   1660         // Wait long enough for lock + line clears + new piece spawn. Then
   1661         // read the fresh grid. The "active piece" in this afterSnap is the
   1662         // NEW piece that just spawned at the top -- we must detect it using
   1663         // boardBeforePlacement (NOT the current snap which has our just-
   1664         // dropped piece baked in) and strip it to get a clean settledGrid
   1665         // for the next iteration.
   1666         await driver.wait(350);
   1667 
   1668         const afterSnap = await driver.readGrid(boardBeforePlacement);
   1669         if (afterSnap.grid) {
   1670           // Line-clear detection by filled-count delta.
   1671           const filledBefore = countFilled(boardBeforePlacement) + 4;
   1672           const filledAfter = countFilled(afterSnap.grid);
   1673           let clearsThisPlacement = 0;
   1674           if (filledAfter < filledBefore) {
   1675             const possibleClears = Math.round((filledBefore - filledAfter) / GRID_COLS);
   1676             if (possibleClears > 0 && possibleClears <= 4) {
   1677               linesCleared += possibleClears;
   1678               clearsThisPlacement = possibleClears;
   1679             }
   1680           }
   1681 
   1682           // If a line clear happened (AI-predicted or grid-verified) and we
   1683           // haven't captured the post-clear score yet, read it now.
   1684           if ((expectsClear || clearsThisPlacement > 0) && scoreAfterClear === undefined) {
   1685             // If we didn't snapshot the before-clear score yet (grid-detected
   1686             // clear that the AI didn't predict), capture it retroactively from
   1687             // the last known reading.
   1688             if (scoreBeforeClear === undefined && lastScoreReading !== null) {
   1689               scoreBeforeClear = lastScoreReading;
   1690             }
   1691             const postClearScore = await driver.readScore();
   1692             if (postClearScore !== null) {
   1693               scoreAfterClear = postClearScore;
   1694               scoreValues.push(postClearScore);
   1695               lastScoreReading = postClearScore;
   1696             }
   1697           }
   1698 
   1699           if (afterSnap.activePieceCells && afterSnap.activePieceCells.length >= 3 && afterSnap.activePieceCells.length <= 5) {
   1700             settledGrid = stripActivePiece(afterSnap.grid, afterSnap.activePieceCells);
   1701           } else {
   1702             // No new piece detected yet (maybe still locking/clearing).
   1703             // Fall back to using the raw grid; next iteration will re-diff.
   1704             settledGrid = afterSnap.grid;
   1705           }
   1706         }
   1707       } else {
   1708         // No active piece visible yet -- wait briefly for spawn.
   1709         await driver.wait(60);
   1710       }
   1711     } catch {
   1712       errors++;
   1713       await playRandomMove(driver);
   1714       piecesPlaced++;
   1715       await driver.wait(60);
   1716     }
   1717   }
   1718 
   1719   return { piecesPlaced, linesCleared, errors, gridReads, gridReadFails, scoreValues, scoreBeforeClear, scoreAfterClear };
   1720 }
   1721 
   1722 
   1723 async function executePlacement(
   1724   driver: TetrisDriver,
   1725   placement: Placement,
   1726   _activeCells: [number, number][]
   1727 ): Promise<void> {
   1728   // Slam-left strategy (LeeYiyuan approach): don't track live position.
   1729   // 1. Rotate N times. 2. Left ~10 to slam against wall. 3. Right to target.
   1730   // 4. Hard drop. This is position-independent so rotation-column shifts
   1731   // don't matter.
   1732   for (let i = 0; i < placement.rotations; i++) {
   1733     await driver.pressKey("rotate");
   1734     await driver.wait(30);
   1735   }
   1736 
   1737   for (let i = 0; i < GRID_COLS; i++) {
   1738     await driver.pressKey("left");
   1739     await driver.wait(15);
   1740   }
   1741 
   1742   for (let i = 0; i < placement.column; i++) {
   1743     await driver.pressKey("right");
   1744     await driver.wait(15);
   1745   }
   1746 
   1747   await driver.pressKey("drop");
   1748   await driver.wait(60);
   1749 }
   1750 
   1751 async function playRandomMove(driver: TetrisDriver): Promise<void> {
   1752   const actions = ["left", "right", "rotate", "down"] as const;
   1753   const randomMoves = Math.floor(Math.random() * 4) + 1;
   1754   for (let i = 0; i < randomMoves; i++) {
   1755     const action = actions[Math.floor(Math.random() * actions.length)];
   1756     await driver.pressKey(action);
   1757     await driver.wait(50);
   1758   }
   1759   await driver.pressKey("drop");
   1760   await driver.wait(100);
   1761 }
   1762 
   1763 async function playRandomForDuration(driver: TetrisDriver, durationMs: number): Promise<void> {
   1764   const start = Date.now();
   1765   const actions = ["left", "right", "rotate", "down", "drop"] as const;
   1766   while (Date.now() - start < durationMs) {
   1767     const action = actions[Math.floor(Math.random() * actions.length)];
   1768     await driver.pressKey(action);
   1769     await driver.wait(100);
   1770   }
   1771 }
   1772 
   1773 async function tryFillRow(driver: TetrisDriver, maxAttempts: number): Promise<boolean> {
   1774   const columns = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
   1775   let attempts = 0;
   1776 
   1777   for (const targetCol of columns) {
   1778     if (attempts >= maxAttempts) break;
   1779 
   1780     for (let i = 0; i < 6; i++) {
   1781       await driver.pressKey("left");
   1782       await driver.wait(30);
   1783     }
   1784 
   1785     for (let i = 0; i < targetCol; i++) {
   1786       await driver.pressKey("right");
   1787       await driver.wait(30);
   1788     }
   1789 
   1790     await driver.pressKey("drop");
   1791     await driver.wait(200);
   1792     attempts++;
   1793   }
   1794 
   1795   const snap = await driver.readGrid();
   1796   if (!snap.grid) return false;
   1797 
   1798   const bottomFilled = snap.filledInBottom(1);
   1799   return bottomFilled < 8;
   1800 }
   1801 
   1802 // ---------------------------------------------------------------------------
   1803 // Test derivation (pure data, no driver needed)
   1804 // ---------------------------------------------------------------------------
   1805 
   1806 function deriveTestResults(
   1807   session: GameSession,
   1808   cal: DriverCalibration,
   1809   loadResult: LoadResult,
   1810   consoleErrors: string[],
   1811   gameplay: GameplayStats,
   1812   phaseState: PhaseState,
   1813   competitivePlay: CompetitivePlayResult | null
   1814 ): TestResult[] {
   1815   const results: TestResult[] = [];
   1816   const gridReliable = session.gridReadSuccess > 0 &&
   1817     session.gridReadSuccess / (session.gridReadSuccess + session.gridReadFail) > 0.5;
   1818 
   1819   const skipResult = (name: string, reason: string): TestResult => ({
   1820     name, pass: false, detail: `skipped: ${reason}`,
   1821   });
   1822 
   1823   // 1. game_loads: page loaded with non-trivial content and at least one
   1824   // game-shaped landmark. Console errors are captured in the report for
   1825   // informational purposes but do NOT gate this test.
   1826   {
   1827     const loaded = loadResult.loaded;
   1828     const landmarks = session.gameLoadLandmarks;
   1829     const hasLandmark = landmarks && (
   1830       landmarks.hasCanvas ||
   1831       landmarks.hasDomGrid ||
   1832       landmarks.hasTetrisRatioElement ||
   1833       landmarks.hasManyCellsContainer
   1834     );
   1835     const bodyHasContent = landmarks?.bodyHasContent ?? false;
   1836     const pass = loaded && bodyHasContent && !!hasLandmark;
   1837     let detail: string;
   1838     if (!loaded) detail = loadResult.detail || "page failed to load";
   1839     else if (!bodyHasContent) detail = "blank page (body has no content)";
   1840     else if (!hasLandmark) detail = "no game landmarks found (no canvas, grid, or game-shaped element)";
   1841     else detail = `loaded with landmarks: ${landmarks!.landmarksFound.join(", ")}`;
   1842     results.push({ name: "game_loads", pass, detail });
   1843   }
   1844 
   1845   // 2. game_starts
   1846   {
   1847     let startDetail: string;
   1848     if (session.started) {
   1849       startDetail = `started via ${session.startMechanism}`;
   1850       if (cal.startButton) {
   1851         const btn = cal.startButton;
   1852         startDetail += ` (${btn.selector}, "${btn.text}"${btn.disappeared ? ", disappeared after click" : ""})`;
   1853       }
   1854     } else {
   1855       startDetail = "could not start game with any mechanism";
   1856     }
   1857     results.push({ name: "game_starts", pass: session.started, detail: startDetail });
   1858   }
   1859 
   1860   // 3. auto_drop
   1861   if (!phaseState.gameStarted) {
   1862     results.push(skipResult("auto_drop", "game did not start"));
   1863   } else {
   1864     const autoDropEvents = session.events.filter(
   1865       (e) => e.type === "piece_moved" && e.direction === "down" && e.frame <= 2
   1866     );
   1867     if (autoDropEvents.length > 0) {
   1868       results.push({ name: "auto_drop", pass: true, detail: "grid state changed after 5s with no input (grid-verified)" });
   1869     } else if (!gridReliable) {
   1870       results.push({ name: "auto_drop", pass: false, detail: "grid reader unreliable, cannot verify auto-drop" });
   1871     } else {
   1872       results.push({ name: "auto_drop", pass: false, detail: "piece did not move down in 5 seconds (grid-verified)" });
   1873     }
   1874   }
   1875 
   1876   // 4-6. movement tests
   1877   for (const dir of ["left", "right", "down"] as const) {
   1878     if (!phaseState.gameStarted) {
   1879       results.push(skipResult(`move_${dir}`, "game did not start"));
   1880       continue;
   1881     }
   1882     // Not applicable: if control discovery determined that this game has no
   1883     // soft_drop (no key produced a single-row downward move), then move_down
   1884     // is a feature the game genuinely lacks rather than something that's
   1885     // broken. Report as skipped with a clear reason so it does not drag the
   1886     // score down.
   1887     if (dir === "down" && cal.controlMap && cal.controlMap.soft_drop.confidence === "not_found") {
   1888       results.push(skipResult("move_down", "no soft_drop key (game has only hard_drop)"));
   1889       continue;
   1890     }
   1891     const moveEvents = session.events.filter((e) => e.type === "piece_moved" && e.direction === dir);
   1892     if (moveEvents.length > 0) {
   1893       results.push({ name: `move_${dir}`, pass: true, detail: "grid state changed after key press (grid-verified)" });
   1894     } else if (!gridReliable) {
   1895       results.push({ name: `move_${dir}`, pass: false, detail: "grid reader unreliable, cannot verify movement" });
   1896     } else {
   1897       results.push({ name: `move_${dir}`, pass: false, detail: "no grid change detected after key press" });
   1898     }
   1899   }
   1900 
   1901   // 7. rotate
   1902   //
   1903   // A correctly working game should cycle a non-O piece through at least 2
   1904   // rotation states (I/S/Z have 2 states; J/L/T have 4). We press rotate 4
   1905   // times and count distinct normalized shapes; a broken game where
   1906   // rotation only fires once will only produce 2 shapes total (initial +
   1907   // first rotation), which is NOT enough. We require 3 distinct shapes
   1908   // overall (initial + at least 2 other states).
   1909   //
   1910   // Fallback: if the Phase 3 probe landed on an O piece (only 1 shape) or
   1911   // the grid reader couldn't identify the active piece cells, we also
   1912   // consult the gameplay-phase per-piece tracking -- any piece type with
   1913   // 3+ distinct shapes observed there proves the game rotates correctly.
   1914   const gameplayRotationShapesMax = [...session.rotationShapesByPiece.values()]
   1915     .reduce((max, set) => Math.max(max, set.size), 0);
   1916   const maxShapesSeen = Math.max(session.distinctRotationShapes, gameplayRotationShapesMax);
   1917   if (!phaseState.gameStarted) {
   1918     results.push(skipResult("rotate", "game did not start"));
   1919   } else if (maxShapesSeen >= 3) {
   1920     results.push({
   1921       name: "rotate",
   1922       pass: true,
   1923       detail: `piece cycled through ${maxShapesSeen} distinct shapes after 4 rotate presses (grid-verified)`,
   1924     });
   1925   } else if (!gridReliable) {
   1926     results.push({ name: "rotate", pass: false, detail: "grid reader unreliable, cannot verify rotation" });
   1927   } else if (maxShapesSeen === 2) {
   1928     results.push({
   1929       name: "rotate",
   1930       pass: false,
   1931       detail: "piece only reached 1 rotation state then stalled (expected at least 2 distinct non-baseline shapes)",
   1932     });
   1933   } else {
   1934     results.push({
   1935       name: "rotate",
   1936       pass: false,
   1937       detail: `no shape change detected after rotate key (${maxShapesSeen} distinct shape(s))`,
   1938     });
   1939   }
   1940 
   1941   // 8. hard_drop
   1942   if (!phaseState.gameStarted) {
   1943     results.push(skipResult("hard_drop", "game did not start"));
   1944   } else if (session.hardDropsObserved > 0) {
   1945     results.push({ name: "hard_drop", pass: true, detail: "piece immediately dropped to bottom (grid-verified)" });
   1946   } else if (!gridReliable) {
   1947     results.push({ name: "hard_drop", pass: false, detail: "grid reader unreliable, cannot verify hard drop" });
   1948   } else {
   1949     results.push({ name: "hard_drop", pass: false, detail: "no grid change with bottom cells detected after hard drop key" });
   1950   }
   1951 
   1952   // 9. all_pieces_rotate
   1953   //
   1954   // During gameplay we probe each new piece type by pressing rotate 4
   1955   // times and recording distinct normalized shapes. A correctly working
   1956   // game cycles J/L/T through 4 rotation states (we expect to observe 3+
   1957   // distinct shapes even accounting for timing jitter).
   1958   //
   1959   // Pass rule: at least 2 multi-state piece types (J/L/T) reached 3+
   1960   // distinct shapes (baseline + at least 2 other states). This excludes
   1961   // broken games that rotate exactly once before getting stuck (2 shapes
   1962   // total). We require multi-state pieces because I/S/Z in classic-style
   1963   // games only have 2 rotation states, so we can't distinguish "broken
   1964   // rotation stuck at state 1" from "working 2-state S/Z".
   1965   //
   1966   // Skip if fewer than 2 J/L/T types were ever seen in the gameplay
   1967   // phase -- not enough data to make the claim.
   1968   if (!phaseState.gameStarted) {
   1969     results.push(skipResult("all_pieces_rotate", "game did not start"));
   1970   } else {
   1971     // Union of piece types visible via session.pieceTypes AND the tracking
   1972     // map keys, since the tracking map is populated in gameplay phases that
   1973     // don't add to pieceTypes.
   1974     const allSeenTypes = new Set<string>([
   1975       ...session.pieceTypes,
   1976       ...session.rotationShapesByPiece.keys(),
   1977     ]);
   1978     const multiStateTypesSeen = [...allSeenTypes].filter((t) =>
   1979       ["J", "L", "T"].includes(t)
   1980     );
   1981     const trackedTypes = [...session.rotationShapesByPiece.entries()];
   1982     const multiStateRotated = trackedTypes.filter(
   1983       ([t, shapes]) => ["J", "L", "T"].includes(t) && shapes.size >= 3
   1984     );
   1985     if (multiStateTypesSeen.length < 2) {
   1986       results.push(
   1987         skipResult(
   1988           "all_pieces_rotate",
   1989           `not enough piece types to verify (saw ${multiStateTypesSeen.length} of J/L/T, need 2)`
   1990         )
   1991       );
   1992     } else if (multiStateRotated.length >= 2) {
   1993       const detail = multiStateRotated
   1994         .map(([t, s]) => `${t}:${s.size}`)
   1995         .join(" ");
   1996       results.push({
   1997         name: "all_pieces_rotate",
   1998         pass: true,
   1999         detail: `${multiStateRotated.length} J/L/T piece type(s) rotated to 3+ distinct shapes [${detail}]`,
   2000       });
   2001     } else if (!gridReliable) {
   2002       results.push({
   2003         name: "all_pieces_rotate",
   2004         pass: false,
   2005         detail: "grid reader unreliable, cannot verify per-piece rotation",
   2006       });
   2007     } else {
   2008       const detail = trackedTypes
   2009         .filter(([t]) => ["J", "L", "T"].includes(t))
   2010         .map(([t, s]) => `${t}:${s.size}`)
   2011         .join(" ");
   2012       results.push({
   2013         name: "all_pieces_rotate",
   2014         pass: false,
   2015         detail: `only ${multiStateRotated.length} of ${multiStateTypesSeen.length} J/L/T rotated to 3+ distinct shapes (need 2) [${detail}]`,
   2016       });
   2017     }
   2018   }
   2019 
   2020   // 10. piece_locks
   2021   if (!phaseState.gameStarted) {
   2022     results.push(skipResult("piece_locks", "game did not start"));
   2023   } else if (!gridReliable) {
   2024     results.push({ name: "piece_locks", pass: false, detail: "grid reader unreliable, cannot verify piece locking" });
   2025   } else {
   2026     const lockEvents = session.events.filter((e) => e.type === "piece_locked");
   2027     if (lockEvents.length > 0) {
   2028       results.push({ name: "piece_locks", pass: true, detail: `filled cells persist at bottom (grid-verified, ${lockEvents.length} lock event(s))` });
   2029     } else if (session.piecesLocked > 0 && session.piecesSpawned > 0) {
   2030       results.push({ name: "piece_locks", pass: true, detail: `${session.piecesLocked} piece(s) locked during play` });
   2031     } else if (session.piecesLocked > 0 && session.piecesSpawned === 0) {
   2032       results.push({ name: "piece_locks", pass: false, detail: `${session.piecesLocked} lock event(s) but 0 spawns detected - likely false positive from UI misread` });
   2033     } else {
   2034       results.push({ name: "piece_locks", pass: false, detail: "could not verify piece locking via grid reader" });
   2035     }
   2036   }
   2037 
   2038   // 11. new_piece_spawns
   2039   if (!phaseState.gameStarted) {
   2040     results.push(skipResult("new_piece_spawns", "game did not start"));
   2041   } else if (session.piecesSpawned > 0) {
   2042     results.push({ name: "new_piece_spawns", pass: true, detail: `${session.piecesSpawned} new piece(s) detected at top of grid` });
   2043   } else {
   2044     results.push({ name: "new_piece_spawns", pass: false, detail: "could not detect new piece spawning at top via grid reader" });
   2045   }
   2046 
   2047   // 12. multiple_pieces
   2048   if (!phaseState.mechanicsWork) {
   2049     results.push(skipResult("multiple_pieces", "mechanics phase failed"));
   2050   } else if (session.piecesLocked >= 3 && session.piecesSpawned > 0) {
   2051     results.push({ name: "multiple_pieces", pass: true, detail: `${session.piecesLocked} pieces placed during play session` });
   2052   } else {
   2053     results.push({ name: "multiple_pieces", pass: false, detail: `only ${session.piecesLocked} piece(s) detected, need at least 3` });
   2054   }
   2055 
   2056   // 13. line_clear
   2057   if (!phaseState.mechanicsWork) {
   2058     results.push(skipResult("line_clear", "mechanics phase failed"));
   2059   } else if (session.linesCleared > 0) {
   2060     results.push({ name: "line_clear", pass: true, detail: `${session.linesCleared} line(s) cleared (grid-verified)` });
   2061   } else {
   2062     results.push({ name: "line_clear", pass: false, detail: "could not trigger or detect a line clear via grid reader" });
   2063   }
   2064 
   2065   // 14a. score_increases_on_clear
   2066   if (!phaseState.mechanicsWork) {
   2067     results.push(skipResult("score_increases_on_clear", "mechanics phase failed"));
   2068   } else if (session.linesCleared > 0) {
   2069     if (session.scoreBeforeClear !== undefined && session.scoreAfterClear !== undefined) {
   2070       const scoreIncreased = session.scoreAfterClear > session.scoreBeforeClear;
   2071       results.push({
   2072         name: "score_increases_on_clear",
   2073         pass: scoreIncreased,
   2074         detail: scoreIncreased
   2075           ? `score went from ${session.scoreBeforeClear} to ${session.scoreAfterClear} after line clear`
   2076           : `score stayed at ${session.scoreBeforeClear} after clearing ${session.linesCleared} line(s)`,
   2077       });
   2078     } else if (!cal.scoreElementSelector) {
   2079       results.push(skipResult("score_increases_on_clear", "no score element found, cannot verify scoring on clear"));
   2080     } else {
   2081       results.push(skipResult("score_increases_on_clear", "lines cleared but could not read score before/after"));
   2082     }
   2083   } else {
   2084     results.push(skipResult("score_increases_on_clear", "no lines cleared, cannot verify scoring"));
   2085   }
   2086 
   2087   // 14b. score_element_visible
   2088   {
   2089     const hasScoreElement = !!cal.scoreElementSelector;
   2090     results.push({
   2091       name: "score_element_visible",
   2092       pass: hasScoreElement,
   2093       detail: hasScoreElement
   2094         ? `score display found (${cal.scoreElementSelector})`
   2095         : "no score display detected",
   2096     });
   2097   }
   2098 
   2099   // 15. game_over
   2100   if (!phaseState.piecesWork) {
   2101     results.push(skipResult("game_over", "piece lifecycle failed"));
   2102   } else {
   2103     results.push({
   2104       name: "game_over",
   2105       pass: session.gameOverDetected,
   2106       detail: session.gameOverDetected
   2107         ? "game stopped after stacking to top (grid-verified)"
   2108         : "could not trigger or detect game over via grid reader",
   2109     });
   2110   }
   2111 
   2112   // 16. playable_30s
   2113   if (!phaseState.gameplayWorks) {
   2114     results.push(skipResult("playable_30s", "gameplay phase failed"));
   2115   } else {
   2116     // Only count errors during play, not pre-start errors
   2117     const playErrors = gameplay.errors_during_play;
   2118     const crashed = playErrors > 3;
   2119     if (!crashed && gameplay.play_duration_seconds >= 10) {
   2120       results.push({ name: "playable_30s", pass: true, detail: `played for ${gameplay.play_duration_seconds}s, placed ${gameplay.pieces_placed} pieces, no crashes` });
   2121     } else if (crashed) {
   2122       results.push({ name: "playable_30s", pass: false, detail: `${playErrors} play errors` });
   2123     } else {
   2124       results.push({ name: "playable_30s", pass: false, detail: `only played for ${gameplay.play_duration_seconds}s` });
   2125     }
   2126   }
   2127 
   2128   // 17-25: Competitive play tests
   2129 
   2130   // 17. multi_line_clear
   2131   if (!phaseState.gameplayWorks || !competitivePlay) {
   2132     results.push(skipResult("multi_line_clear", "competitive play phase did not run"));
   2133   } else if (competitivePlay.double_clears + competitivePlay.triple_clears + competitivePlay.tetris_clears > 0) {
   2134     const hasMultiLineBug = competitivePlay.bugs_detected.includes("multi_line_clear_only_removes_one_row");
   2135     results.push({
   2136       name: "multi_line_clear",
   2137       pass: !hasMultiLineBug,
   2138       detail: hasMultiLineBug
   2139         ? "multi-line clear detected but only 1 row was removed"
   2140         : `multi-line clears work: ${competitivePlay.double_clears}x double, ${competitivePlay.triple_clears}x triple, ${competitivePlay.tetris_clears}x tetris`,
   2141     });
   2142   } else {
   2143     results.push(skipResult("multi_line_clear", "no multi-line clear opportunity occurred during play"));
   2144   }
   2145 
   2146   // 18. score_scaling
   2147   if (!phaseState.gameplayWorks || !competitivePlay) {
   2148     results.push(skipResult("score_scaling", "competitive play phase did not run"));
   2149   } else if (competitivePlay.double_clears + competitivePlay.triple_clears + competitivePlay.tetris_clears > 0) {
   2150     const hasBug = competitivePlay.bugs_detected.includes("score_does_not_scale_with_simultaneous_clears");
   2151     results.push({
   2152       name: "score_scaling",
   2153       pass: !hasBug,
   2154       detail: hasBug
   2155         ? "multi-line clears give same points as single clears"
   2156         : `score scales with clear type (${competitivePlay.score_increases.length} score changes observed)`,
   2157     });
   2158   } else {
   2159     results.push(skipResult("score_scaling", "no multi-line clear occurred to test scaling"));
   2160   }
   2161 
   2162   // 19. level_progression
   2163   if (!phaseState.gameplayWorks || !competitivePlay) {
   2164     results.push(skipResult("level_progression", "competitive play phase did not run"));
   2165   } else if (competitivePlay.total_lines_cleared < 10) {
   2166     results.push(skipResult("level_progression", `only ${competitivePlay.total_lines_cleared} lines cleared (need 10+)`));
   2167   } else {
   2168     const hasBug = competitivePlay.bugs_detected.includes("level_does_not_increase");
   2169     if (competitivePlay.level_readings.length < 2) {
   2170       results.push(skipResult("level_progression", "could not read level display"));
   2171     } else {
   2172       results.push({
   2173         name: "level_progression",
   2174         pass: !hasBug,
   2175         detail: hasBug
   2176           ? `level stayed at ${competitivePlay.level_readings[0]} despite ${competitivePlay.total_lines_cleared} lines cleared`
   2177           : `level progressed from ${competitivePlay.level_readings[0]} to ${competitivePlay.level_final}`,
   2178       });
   2179     }
   2180   }
   2181 
   2182   // 20. speed_progression
   2183   if (!phaseState.gameplayWorks || !competitivePlay) {
   2184     results.push(skipResult("speed_progression", "competitive play phase did not run"));
   2185   } else if (competitivePlay.level_readings.length < 2 || new Set(competitivePlay.level_readings).size <= 1) {
   2186     results.push(skipResult("speed_progression", "level did not increase, cannot test speed change"));
   2187   } else {
   2188     const hasBug = competitivePlay.bugs_detected.includes("speed_does_not_increase");
   2189     results.push({
   2190       name: "speed_progression",
   2191       pass: !hasBug && competitivePlay.speed_increased,
   2192       detail: competitivePlay.speed_increased
   2193         ? "drop speed increased with level"
   2194         : "drop speed did not change after level increased",
   2195     });
   2196   }
   2197 
   2198   // 21. next_piece_preview
   2199   if (!phaseState.gameplayWorks || !competitivePlay) {
   2200     results.push(skipResult("next_piece_preview", "competitive play phase did not run"));
   2201   } else {
   2202     results.push({
   2203       name: "next_piece_preview",
   2204       pass: competitivePlay.next_piece_visible,
   2205       detail: competitivePlay.next_piece_visible ? "next piece preview display found" : "no next piece preview found",
   2206     });
   2207   }
   2208 
   2209   // 22. game_over_display
   2210   // Verifies the game SHOWS a game-over UI after the internal game over
   2211   // state is reached. Two structural signals (no language matching):
   2212   //   1. A modal or overlay element appeared (position fixed/absolute,
   2213   //      covering >15% viewport, visible). Captured in Phase 6 after
   2214   //      game over was triggered via grid stacking.
   2215   //   2. A clickable element (restart) is visible inside that overlay.
   2216   // The test passes if EITHER a modal is present OR a restart option is
   2217   // visible, because different games show different UI styles.
   2218   if (!phaseState.gameplayWorks || !competitivePlay) {
   2219     results.push(skipResult("game_over_display", "competitive play phase did not run"));
   2220   } else if (!competitivePlay.game_over_reached && !session.gameOverDetected) {
   2221     results.push(skipResult("game_over_display", "game over not reached during play"));
   2222   } else {
   2223     const usePhase6 = session.gameOverDetected;
   2224     const modalFound = usePhase6
   2225       ? (session.gameOverText !== null && session.gameOverText !== undefined)
   2226       : (competitivePlay.game_over_text_found !== null);
   2227     const hasRestart = usePhase6
   2228       ? session.gameOverRestartAvailable === true
   2229       : competitivePlay.restart_available;
   2230     const source = usePhase6 ? "phase6" : "phase8";
   2231     const pass = modalFound || hasRestart;
   2232     const details: string[] = [];
   2233     if (modalFound) details.push("overlay detected");
   2234     if (hasRestart) details.push("restart clickable present");
   2235     if (!pass) details.push("no overlay or restart UI found");
   2236     results.push({
   2237       name: "game_over_display",
   2238       pass,
   2239       detail: `${details.join(", ")} (${source})`,
   2240     });
   2241   }
   2242 
   2243   // 23. counter_clockwise_rotation
   2244   if (!phaseState.gameplayWorks || !competitivePlay) {
   2245     results.push(skipResult("counter_clockwise_rotation", "competitive play phase did not run"));
   2246   } else {
   2247     const ccwTestDone = (competitivePlay as any)._ccwTestDone === true;
   2248     const ccwResult = (competitivePlay as any)._ccwResult;
   2249     if (!ccwTestDone) {
   2250       results.push(skipResult("counter_clockwise_rotation", "could not test rotation direction"));
   2251     } else {
   2252       results.push({
   2253         name: "counter_clockwise_rotation",
   2254         pass: ccwResult === true,
   2255         detail: ccwResult === true
   2256           ? "Z key rotates opposite direction from Up arrow"
   2257           : ccwResult === false
   2258           ? "Z key does same as Up arrow or does not rotate"
   2259           : "could not determine rotation direction",
   2260       });
   2261     }
   2262   }
   2263 
   2264   // 24. soft_drop_distinct
   2265   if (!phaseState.gameplayWorks || !competitivePlay) {
   2266     results.push(skipResult("soft_drop_distinct", "competitive play phase did not run"));
   2267   } else if (cal.controlMap && cal.controlMap.soft_drop.confidence === "not_found") {
   2268     // Game has no soft drop at all -- not applicable.
   2269     results.push(skipResult("soft_drop_distinct", "no soft_drop key (game has only hard_drop)"));
   2270   } else {
   2271     const softDropTestDone = (competitivePlay as any)._softDropTestDone === true;
   2272     const softDropDistinct = (competitivePlay as any)._softDropDistinct;
   2273     if (!softDropTestDone) {
   2274       results.push(skipResult("soft_drop_distinct", "could not test soft drop behavior"));
   2275     } else {
   2276       results.push({
   2277         name: "soft_drop_distinct",
   2278         pass: softDropDistinct === true,
   2279         detail: softDropDistinct === true
   2280           ? "Down arrow moves piece 1 row (distinct from hard drop)"
   2281           : "Down arrow acts like hard drop (drops to bottom)",
   2282       });
   2283     }
   2284   }
   2285 
   2286   // 25. rendering_clean
   2287   if (!phaseState.gameplayWorks || !competitivePlay) {
   2288     results.push(skipResult("rendering_clean", "competitive play phase did not run"));
   2289   } else if (competitivePlay.rendering_trail_detected === undefined) {
   2290     results.push(skipResult("rendering_clean", "not enough data to assess rendering trails"));
   2291   } else {
   2292     results.push({
   2293       name: "rendering_clean",
   2294       pass: !competitivePlay.rendering_trail_detected,
   2295       detail: competitivePlay.rendering_trail_detected
   2296         ? "rendering trail bug: falling piece leaves old cells colored after moving"
   2297         : "piece movement clears old cells correctly",
   2298     });
   2299   }
   2300 
   2301   return results;
   2302 }
   2303 
   2304 // ---------------------------------------------------------------------------
   2305 // Start-mechanism verification bridge
   2306 // ---------------------------------------------------------------------------
   2307 
   2308 /**
   2309  * The bot's gameplay-grounded check for "did the game really start?"
   2310  *
   2311  * This is the feedback channel the driver leans on. The driver can only see
   2312  * pixel and DOM deltas, so it can be fooled by Pause buttons, overlays, or
   2313  * spurious animations. The bot reads the grid, presses real gameplay keys,
   2314  * and watches for tetris-like behavior.
   2315  *
   2316  * Returns true only when the evidence clearly points to a started game:
   2317  *   - grid detected and populated in a sane range (pieces, not chrome)
   2318  *   - ArrowLeft causes a piece-like change, OR
   2319  *   - waiting ~1s causes the grid to change (auto-drop), OR
   2320  *   - the grid transitions in any measurable way that's not just chrome
   2321  *   - no immediate game over text
   2322  */
   2323 async function verifyGameStarted(driver: TetrisDriver): Promise<{
   2324   ok: boolean;
   2325   reason: string;
   2326 }> {
   2327   // 1. Instant rejection: game over text visible means we started into a
   2328   //    dead state, or clicked a Restart that then immediately ended again.
   2329   try {
   2330     const gameOverText = await driver.detectGameOverText();
   2331     if (gameOverText) {
   2332       return { ok: false, reason: `immediate game over: "${gameOverText}"` };
   2333     }
   2334   } catch { /* continue */ }
   2335 
   2336   // 2. tryStartMechanism() populated a minimal calibration for us, but some
   2337   //    games create their grid cells dynamically inside an animation frame
   2338   //    after the start button is clicked -- the initial detectGrid() can run
   2339   //    before the grid is fully populated. Refresh grid detection now so we
   2340   //    pick up any cells that appeared in the meantime.
   2341   try {
   2342     await driver.refreshGridDetection();
   2343   } catch { /* leave whatever tryStartMechanism populated */ }
   2344 
   2345   let cal;
   2346   try {
   2347     cal = driver.getCalibration();
   2348   } catch {
   2349     cal = null;
   2350   }
   2351   if (!cal || !cal.gridDetected) {
   2352     return { ok: false, reason: "no grid detected after start attempt" };
   2353   }
   2354 
   2355   // 3. Read the grid. Need a sane fill level (pieces, not chrome).
   2356   const snap = await driver.readGrid();
   2357   if (!snap.grid) {
   2358     return { ok: false, reason: "grid read failed" };
   2359   }
   2360   const totalCells = snap.grid.length * (snap.grid[0]?.length || 0);
   2361   if (totalCells === 0) {
   2362     return { ok: false, reason: "grid has zero cells" };
   2363   }
   2364   const fillRatio = snap.filledCount / totalCells;
   2365   // A running game may legitimately start empty, so 0 cells is allowed.
   2366   // But >60% filled likely means we're reading chrome as cells.
   2367   if (fillRatio > 0.6) {
   2368     return {
   2369       ok: false,
   2370       reason: `grid ${Math.round(fillRatio * 100)}% filled (likely reading chrome)`,
   2371     };
   2372   }
   2373 
   2374   // 4. Evidence: press ArrowLeft and see if the grid changes (movement works).
   2375   //    Capture pixel + DOM fingerprint snapshots at each step so we can fall
   2376   //    back to either signal for games that render the active piece outside
   2377   //    the cell layout (e.g. absolute-positioned divs floating over the grid).
   2378   //    Pixel diff is clipped to the grid area; DOM fingerprint catches changes
   2379   //    even when the piece is currently off-screen.
   2380   let movementSeen = false;
   2381   let movementPixelsChanged = false;
   2382   let movementDomChanged = false;
   2383   try {
   2384     const pxBefore = await driver.screenshotGridArea();
   2385     const fpBefore = await driver.captureGridDomFingerprint();
   2386     const before = await driver.readGrid();
   2387     await driver.pressKey("left");
   2388     await driver.wait(250);
   2389     const after = await driver.readGrid();
   2390     const pxAfter = await driver.screenshotGridArea();
   2391     const fpAfter = await driver.captureGridDomFingerprint();
   2392     if (before.grid && after.grid && driver.gridsAreDifferent(before.grid, after.grid)) {
   2393       movementSeen = true;
   2394     }
   2395     if (pxBefore && pxAfter && !pxBefore.equals(pxAfter)) {
   2396       movementPixelsChanged = true;
   2397     }
   2398     if (fpBefore && fpAfter && fpBefore !== fpAfter) {
   2399       movementDomChanged = true;
   2400     }
   2401   } catch { /* fall through to auto-drop check */ }
   2402 
   2403   // 5. Evidence: wait 1.1s and see if the grid changes on its own (auto-drop).
   2404   let autoDropSeen = false;
   2405   let autoDropPixelsChanged = false;
   2406   let autoDropDomChanged = false;
   2407   try {
   2408     const pxBefore = await driver.screenshotGridArea();
   2409     const fpBefore = await driver.captureGridDomFingerprint();
   2410     const before = await driver.readGrid();
   2411     await driver.wait(1100);
   2412     const after = await driver.readGrid();
   2413     const pxAfter = await driver.screenshotGridArea();
   2414     const fpAfter = await driver.captureGridDomFingerprint();
   2415     if (before.grid && after.grid && driver.gridsAreDifferent(before.grid, after.grid)) {
   2416       autoDropSeen = true;
   2417     }
   2418     if (pxBefore && pxAfter && !pxBefore.equals(pxAfter)) {
   2419       autoDropPixelsChanged = true;
   2420     }
   2421     if (fpBefore && fpAfter && fpBefore !== fpAfter) {
   2422       autoDropDomChanged = true;
   2423     }
   2424   } catch { /* fall through */ }
   2425 
   2426   // 6. Second chance at game-over after interaction.
   2427   try {
   2428     const gameOverText = await driver.detectGameOverText();
   2429     if (gameOverText) {
   2430       return { ok: false, reason: `game over after interaction: "${gameOverText}"` };
   2431     }
   2432   } catch { /* continue */ }
   2433 
   2434   if (movementSeen && autoDropSeen) {
   2435     return { ok: true, reason: "movement and auto-drop both observed" };
   2436   }
   2437   if (movementSeen) {
   2438     return { ok: true, reason: "movement key changes the grid" };
   2439   }
   2440   if (autoDropSeen) {
   2441     return { ok: true, reason: "grid changes on its own (auto-drop)" };
   2442   }
   2443 
   2444   // 6b. Pixel-based fallback: if the grid reader can't see movement but the
   2445   //     grid-area pixels changed both on key press AND during auto-drop, we're
   2446   //     almost certainly looking at a running Tetris game that renders its
   2447   //     active piece outside the cell layout (absolute divs, canvas overlay,
   2448   //     etc). Require BOTH signals to avoid accepting spurious animations
   2449   //     (cursor blink, score tick) as gameplay.
   2450   if (movementPixelsChanged && autoDropPixelsChanged) {
   2451     return {
   2452       ok: true,
   2453       reason: "grid-area pixels change on key press and on auto-drop (piece rendered outside cells)",
   2454     };
   2455   }
   2456 
   2457   // 6c. DOM-fingerprint fallback: when the active piece is an absolute-
   2458   //     positioned overlay that happens to be off-screen in the current
   2459   //     viewport (tall sidebars that push the grid out of frame), pixel diff
   2460   //     can come back clean while the DOM still reflects the moving piece.
   2461   //     Require BOTH a key-press-driven change AND an auto-drop-driven change
   2462   //     so static pages with idle timers don't slip through.
   2463   if (movementDomChanged && autoDropDomChanged) {
   2464     return {
   2465       ok: true,
   2466       reason: "grid container DOM changes on key press and on auto-drop (piece rendered outside cells)",
   2467     };
   2468   }
   2469 
   2470   // 7. Weaker fallback: if the grid is populated in a plausible range
   2471   //    (some pieces visible somewhere) and there's no game over, accept it
   2472   //    provisionally. The downstream phases will weed out dead starts.
   2473   if (snap.filledCount > 0 && snap.filledCount < totalCells * 0.5) {
   2474     return {
   2475       ok: false,
   2476       reason: `grid populated (${snap.filledCount} cells) but no movement or auto-drop observed`,
   2477     };
   2478   }
   2479 
   2480   return { ok: false, reason: "no gameplay evidence detected" };
   2481 }
   2482 
   2483 /**
   2484  * Full discovery loop: ask the driver for candidates, try each, verify with
   2485  * verifyGameStarted(), and return the first candidate the bot trusts. Reloads
   2486  * the page between candidates so each attempt starts from a clean state.
   2487  */
   2488 async function detectStartWithVerification(
   2489   driver: TetrisDriver,
   2490   serverUrl: string
   2491 ): Promise<{ candidate: StartCandidate } | null> {
   2492   const log = (msg: string) => console.log(`[bot:start] ${msg}`);
   2493 
   2494   const candidates = await driver.discoverStartCandidates();
   2495   log(`discovered ${candidates.length} candidate(s)`);
   2496 
   2497   for (let i = 0; i < candidates.length; i++) {
   2498     const candidate = candidates[i];
   2499     log(`(${i + 1}/${candidates.length}) trying: ${candidate.label}`);
   2500 
   2501     // Apply without committing.
   2502     let tryResult;
   2503     try {
   2504       tryResult = await driver.tryStartMechanism(candidate);
   2505     } catch (err) {
   2506       log(`  tryStartMechanism threw: ${err instanceof Error ? err.message : String(err)}`);
   2507       await reloadAndClear(driver, serverUrl);
   2508       continue;
   2509     }
   2510 
   2511     // Skip candidates with no observable effect at all.
   2512     if (!tryResult.visualChanged && !tryResult.domChanged && candidate.mechanism !== "auto") {
   2513       log(`  no visual/DOM change, skipping`);
   2514       continue;
   2515     }
   2516     if (tryResult.errorOccurred) {
   2517       log(`  JS error fired during attempt, skipping`);
   2518       await reloadAndClear(driver, serverUrl);
   2519       continue;
   2520     }
   2521 
   2522     // Ask the bot's own verification.
   2523     let verification;
   2524     try {
   2525       verification = await verifyGameStarted(driver);
   2526     } catch (err) {
   2527       log(`  verifyGameStarted threw: ${err instanceof Error ? err.message : String(err)}`);
   2528       await reloadAndClear(driver, serverUrl);
   2529       continue;
   2530     }
   2531 
   2532     if (verification.ok) {
   2533       log(`  VERIFIED: ${verification.reason}`);
   2534       // Important: the page is already in a started state. We clear the
   2535       // driver's cached calibration (without reloading) so the follow-up
   2536       // calibrate() call will re-apply the candidate from scratch -- that
   2537       // way the phase separation (load -> apply -> verify) stays consistent
   2538       // across downstream phases that reload the page.
   2539       await reloadAndClear(driver, serverUrl);
   2540       return { candidate };
   2541     }
   2542 
   2543     log(`  REJECTED: ${verification.reason}`);
   2544     await reloadAndClear(driver, serverUrl);
   2545   }
   2546 
   2547   log("no candidate verified");
   2548   return null;
   2549 }
   2550 
   2551 /** Reload the page and clear any in-flight confirmed candidate. */
   2552 async function reloadAndClear(driver: TetrisDriver, serverUrl: string): Promise<void> {
   2553   try {
   2554     driver.clearConfirmedStartMechanism();
   2555   } catch { /* ignore */ }
   2556   try {
   2557     await driver.loadPage(serverUrl);
   2558   } catch { /* ignore */ }
   2559 }

Impressum · Datenschutz