loop-benchmarking

Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.
git clone https://git.shiptheloop.com/loop-benchmarking.git
Log | Files | Refs | README

tests.ts (59551B)


      1 // Continuous observation session approach adapted from
      2 // mikhail-vlasenko/Tetris-AI (MIT License) -- polling loop concept
      3 
      4 import type { Page } from "@playwright/test";
      5 import type {
      6   TestResult,
      7   CalibrationResult,
      8   GameplayStats,
      9   GameSession,
     10   GridEvent,
     11   PieceType,
     12   CompetitivePlayResult,
     13   SurveyData,
     14 } from "./types";
     15 import {
     16   readGrid,
     17   gridsAreDifferent,
     18   countFilled,
     19   countFilledInBottomRows,
     20   hasFilledInTopRows,
     21   detectActivePieceCells,
     22   identifyPieceType,
     23   countCompleteRows,
     24 } from "./grid-reader";
     25 import { hardDrop, playGame, tryFillRow } from "./player";
     26 import { calibrate, surveyPage } from "./calibrate";
     27 
     28 /**
     29  * Run the gameplay bot as one continuous observation session with 8 conditional phases.
     30  *
     31  * Phase 1: Page load
     32  * Phase 2: Game start detection (falling piece detector)
     33  * Phase 3: Mechanics tests (conditional on Phase 2)
     34  * Phase 4: Piece lifecycle (conditional on Phase 3)
     35  * Phase 5: Gameplay with score tracking (conditional on Phase 4)
     36  * Phase 6: Game over (conditional on Phase 4)
     37  * Phase 7: Endurance (conditional on Phase 5)
     38  * Phase 8: Competitive play (conditional on Phase 5)
     39  *
     40  * NO FALSE POSITIVES: if the grid reader cannot verify a mechanic,
     41  * the test is marked as failed with detail explaining why, not passed
     42  * based on screenshot-only evidence.
     43  */
     44 export async function runAllTests(
     45   page: Page,
     46   serverUrl: string
     47 ): Promise<{
     48   testResults: TestResult[];
     49   calibration: CalibrationResult;
     50   gameplay: GameplayStats;
     51   session: GameSession;
     52   survey: SurveyData;
     53   competitivePlay: CompetitivePlayResult | null;
     54 }> {
     55   const gameplay: GameplayStats = {
     56     pieces_placed: 0,
     57     lines_cleared: 0,
     58     max_score_observed: 0,
     59     play_duration_seconds: 0,
     60     errors_during_play: 0,
     61   };
     62 
     63   const session: GameSession = {
     64     started: false,
     65     startMechanism: "unknown",
     66     piecesSpawned: 0,
     67     piecesLocked: 0,
     68     linesCleared: 0,
     69     rotationsObserved: 0,
     70     movementsObserved: 0,
     71     hardDropsObserved: 0,
     72     gameOverDetected: false,
     73     consoleErrors: [],
     74     durationSeconds: 0,
     75     pieceTypes: new Set<string>(),
     76     scoreValues: [],
     77     gridReadSuccess: 0,
     78     gridReadFail: 0,
     79     frames: 0,
     80     events: [],
     81     skippedPhases: [],
     82   };
     83 
     84   let survey: SurveyData = {
     85     has_overlay: false,
     86     has_canvas: false,
     87     has_dom_grid: false,
     88     visible_text: [],
     89     clickable_elements: 0,
     90   };
     91 
     92   let competitivePlay: CompetitivePlayResult | null = null;
     93 
     94   const consoleErrors: string[] = [];
     95   page.on("pageerror", (err) => {
     96     consoleErrors.push(err.message);
     97     session.consoleErrors.push(err.message);
     98   });
     99 
    100   // ---- Phase 1: Load the page ----
    101   const loadResult = await loadAndCheckPage(page, serverUrl, consoleErrors);
    102   if (!loadResult.loaded) {
    103     const failedTests = ALL_TEST_NAMES.map((name) => ({
    104       name,
    105       pass: false,
    106       detail: loadResult.detail,
    107     }));
    108     return {
    109       testResults: failedTests,
    110       calibration: emptyCalibration(consoleErrors),
    111       gameplay,
    112       session,
    113       survey,
    114       competitivePlay,
    115     };
    116   }
    117 
    118   // ---- Pre-test survey ----
    119   survey = await surveyPage(page);
    120 
    121   // ---- Phase 2: Calibrate + detect start (falling piece detector) ----
    122   let cal: CalibrationResult;
    123   try {
    124     cal = await calibrate(page);
    125     session.started = cal.startMechanism !== "unknown";
    126     session.startMechanism = cal.startMechanism;
    127   } catch (err) {
    128     cal = emptyCalibration(consoleErrors);
    129   }
    130 
    131   // Merge console errors from calibration
    132   for (const e of cal.consoleErrors) {
    133     if (!consoleErrors.includes(e)) consoleErrors.push(e);
    134     if (!session.consoleErrors.includes(e)) session.consoleErrors.push(e);
    135   }
    136 
    137   // Phase gate: if game didn't start, skip all downstream
    138   let gameStarted = session.started;
    139   if (!gameStarted) {
    140     session.skippedPhases.push(
    141       "mechanics: game did not start",
    142       "pieces: game did not start",
    143       "gameplay: game did not start",
    144       "gameover: game did not start",
    145       "endurance: game did not start",
    146       "competitive: game did not start"
    147     );
    148   }
    149 
    150   // Re-sample calibration after start: the game may have created DOM
    151   // elements (grid cells, score displays) that didn't exist on page load
    152   const initialGridDetected = cal.gridDetected;
    153   if (gameStarted && !cal.gridDetected) {
    154     try {
    155       await page.waitForTimeout(500); // Let game render after start
    156       const recal = await calibrate(page);
    157       if (recal.gridDetected) {
    158         // Grid appeared after start -- upgrade calibration
    159         cal = recal;
    160         (cal as any).grid_detected_at = "after_start";
    161       }
    162     } catch { /* keep original calibration */ }
    163   }
    164 
    165   // ---- Phase 3: Basic mechanics -- ONLY if game started ----
    166   let mechanicsWork = false;
    167   if (gameStarted && cal.gridDetected) {
    168     await runBasicMechanicsPhase(page, cal, session);
    169     mechanicsWork =
    170       session.movementsObserved > 0 ||
    171       session.rotationsObserved > 0 ||
    172       session.hardDropsObserved > 0 ||
    173       session.events.some((e) => e.type === "piece_moved");
    174   }
    175 
    176   if (gameStarted && !mechanicsWork) {
    177     session.skippedPhases.push(
    178       "pieces: mechanics failed",
    179       "gameplay: mechanics failed",
    180       "gameover: mechanics failed",
    181       "endurance: mechanics failed",
    182       "competitive: mechanics failed"
    183     );
    184   }
    185 
    186   // ---- Phase 4: Piece lifecycle -- ONLY if mechanics worked ----
    187   let piecesWork = false;
    188   if (mechanicsWork) {
    189     // Piece lifecycle is tested as part of mechanics phase (piece_locks, new_piece_spawns, multiple_pieces)
    190     // We consider it working if we have locked pieces and spawned pieces
    191     piecesWork = session.piecesLocked > 0 || session.hardDropsObserved > 0;
    192   }
    193 
    194   if (mechanicsWork && !piecesWork) {
    195     session.skippedPhases.push(
    196       "gameplay: piece lifecycle failed",
    197       "gameover: piece lifecycle failed",
    198       "endurance: piece lifecycle failed",
    199       "competitive: piece lifecycle failed"
    200     );
    201   }
    202 
    203   // ---- Phase 5: Gameplay (play to win) -- ONLY if pieces work ----
    204   let gameplayWorks = false;
    205   if (piecesWork) {
    206     try {
    207       await loadGamePage(page, serverUrl);
    208       cal = await calibrate(page);
    209       session.started = session.started || cal.startMechanism !== "unknown";
    210     } catch { /* continue with existing state */ }
    211 
    212     await runGameplayPhase(page, cal, session, gameplay);
    213     gameplayWorks = gameplay.pieces_placed > 0;
    214   }
    215 
    216   if (piecesWork && !gameplayWorks) {
    217     session.skippedPhases.push(
    218       "endurance: gameplay failed",
    219       "competitive: gameplay failed"
    220     );
    221   }
    222 
    223   // ---- Phase 6: Game over -- ONLY if pieces work ----
    224   if (piecesWork) {
    225     try {
    226       await loadGamePage(page, serverUrl);
    227       cal = await calibrate(page);
    228     } catch { /* continue */ }
    229 
    230     await runGameOverPhase(page, cal, session);
    231   }
    232 
    233   // ---- Phase 7: Endurance -- ONLY if gameplay worked ----
    234   if (gameplayWorks) {
    235     try {
    236       await loadGamePage(page, serverUrl);
    237       cal = await calibrate(page);
    238     } catch { /* continue */ }
    239 
    240     await runEndurancePhase(page, cal, session, gameplay, consoleErrors);
    241   }
    242 
    243   // ---- Phase 8: Competitive play -- ONLY if gameplay worked ----
    244   if (gameplayWorks) {
    245     try {
    246       await loadGamePage(page, serverUrl);
    247       cal = await calibrate(page);
    248     } catch { /* continue */ }
    249 
    250     competitivePlay = await runCompetitivePlayPhase(page, cal, session, gameplay);
    251   } else if (!session.skippedPhases.some((p) => p.startsWith("competitive:"))) {
    252     session.skippedPhases.push("competitive: gameplay failed");
    253   }
    254 
    255   session.durationSeconds = gameplay.play_duration_seconds;
    256 
    257   // ---- Derive test results from session data ----
    258   const phaseState = {
    259     gameStarted,
    260     mechanicsWork,
    261     piecesWork,
    262     gameplayWorks,
    263   };
    264   const testResults = deriveTestResults(session, cal, loadResult, consoleErrors, gameplay, phaseState, competitivePlay);
    265 
    266   return { testResults, calibration: cal, gameplay, session, survey, competitivePlay };
    267 }
    268 
    269 // ---- Phase implementations ----
    270 
    271 interface LoadResult {
    272   loaded: boolean;
    273   detail: string;
    274   errorsOnLoad: number;
    275 }
    276 
    277 async function loadAndCheckPage(
    278   page: Page,
    279   serverUrl: string,
    280   consoleErrors: string[]
    281 ): Promise<LoadResult> {
    282   const errorsBefore = consoleErrors.length;
    283 
    284   try {
    285     await loadGamePage(page, serverUrl);
    286     await page.waitForTimeout(3000);
    287   } catch (err) {
    288     return {
    289       loaded: false,
    290       detail: `page load failed: ${err instanceof Error ? err.message : String(err)}`,
    291       errorsOnLoad: consoleErrors.length - errorsBefore,
    292     };
    293   }
    294 
    295   const newErrors = consoleErrors.slice(errorsBefore);
    296   return {
    297     loaded: true,
    298     detail: newErrors.length === 0
    299       ? "no console errors"
    300       : `${newErrors.length} console error(s): ${newErrors[0]}`,
    301     errorsOnLoad: newErrors.length,
    302   };
    303 }
    304 
    305 /**
    306  * Test basic mechanics by reading the grid before and after each action.
    307  * Each test MUST verify via grid reader, not just screenshots.
    308  */
    309 async function runBasicMechanicsPhase(
    310   page: Page,
    311   cal: CalibrationResult,
    312   session: GameSession
    313 ): Promise<void> {
    314   // Auto-drop test: read grid twice with 5s gap, no input
    315   const gridT0 = await readGrid(page, cal);
    316   if (gridT0) session.gridReadSuccess++;
    317   else session.gridReadFail++;
    318   session.frames++;
    319 
    320   await page.waitForTimeout(5000);
    321 
    322   const gridT1 = await readGrid(page, cal);
    323   if (gridT1) session.gridReadSuccess++;
    324   else session.gridReadFail++;
    325   session.frames++;
    326 
    327   if (gridT0 && gridT1 && gridsAreDifferent(gridT0, gridT1)) {
    328     // Auto-drop confirmed via grid reader: cells actually moved
    329     // Verify a piece moved DOWN (more filled cells in lower rows, fewer in upper)
    330     const topBefore = countFilledInTopRows(gridT0, 10);
    331     const topAfter = countFilledInTopRows(gridT1, 10);
    332     const bottomBefore = countFilledInBottomRows(gridT0, 10);
    333     const bottomAfter = countFilledInBottomRows(gridT1, 10);
    334     if (bottomAfter > bottomBefore || topAfter < topBefore || gridsAreDifferent(gridT0, gridT1)) {
    335       session.events.push({ type: "piece_moved", direction: "down", frame: session.frames });
    336     }
    337   }
    338 
    339   // Movement tests: press key and verify grid change
    340   for (const dir of ["left", "right", "down"] as const) {
    341     const keyMap = {
    342       left: cal.controls.left,
    343       right: cal.controls.right,
    344       down: cal.controls.down,
    345     };
    346 
    347     const gridBefore = await readGrid(page, cal);
    348     if (gridBefore) session.gridReadSuccess++;
    349     else session.gridReadFail++;
    350     session.frames++;
    351 
    352     await page.keyboard.press(keyMap[dir]);
    353     await page.waitForTimeout(300);
    354 
    355     const gridAfter = await readGrid(page, cal);
    356     if (gridAfter) session.gridReadSuccess++;
    357     else session.gridReadFail++;
    358     session.frames++;
    359 
    360     if (gridBefore && gridAfter && gridsAreDifferent(gridBefore, gridAfter)) {
    361       session.movementsObserved++;
    362       session.events.push({ type: "piece_moved", direction: dir, frame: session.frames });
    363     }
    364   }
    365 
    366   // Rotation test: press rotate and verify grid change via shape detection
    367   const gridBeforeRot = await readGrid(page, cal);
    368   if (gridBeforeRot) session.gridReadSuccess++;
    369   else session.gridReadFail++;
    370   session.frames++;
    371 
    372   await page.keyboard.press(cal.controls.rotate);
    373   await page.waitForTimeout(300);
    374 
    375   const gridAfterRot = await readGrid(page, cal);
    376   if (gridAfterRot) session.gridReadSuccess++;
    377   else session.gridReadFail++;
    378   session.frames++;
    379 
    380   if (gridBeforeRot && gridAfterRot && gridsAreDifferent(gridBeforeRot, gridAfterRot)) {
    381     // Verify shape actually changed (not just position shift from gravity)
    382     const cellsBefore = detectActivePieceCells(gridBeforeRot, null);
    383     const cellsAfter = detectActivePieceCells(gridAfterRot, null);
    384     if (cellsBefore && cellsAfter) {
    385       const bbBefore = boundingBox(cellsBefore);
    386       const bbAfter = boundingBox(cellsAfter);
    387       // Rotation changes bounding box dimensions (w/h swap) for non-O pieces
    388       if (bbBefore.w !== bbAfter.w || bbBefore.h !== bbAfter.h) {
    389         session.rotationsObserved++;
    390         session.events.push({ type: "piece_rotated", frame: session.frames });
    391       } else {
    392         // Bounding box same size but cells may have moved within it
    393         // Accept as rotation if grid changed and piece cells differ
    394         const keyBefore = cellsBefore.map(([r, c]) => `${r},${c}`).sort().join("|");
    395         const keyAfter = cellsAfter.map(([r, c]) => `${r},${c}`).sort().join("|");
    396         if (keyBefore !== keyAfter) {
    397           session.rotationsObserved++;
    398           session.events.push({ type: "piece_rotated", frame: session.frames });
    399         }
    400       }
    401     } else {
    402       // Could not detect piece cells but grid changed after rotate key.
    403       // Mark as rotation observed (grid-verified change, just can't confirm shape).
    404       session.rotationsObserved++;
    405       session.events.push({ type: "piece_rotated", frame: session.frames });
    406     }
    407   }
    408 
    409   // Hard drop test: press drop and verify piece appeared at bottom
    410   const gridBeforeDrop = await readGrid(page, cal);
    411   if (gridBeforeDrop) session.gridReadSuccess++;
    412   else session.gridReadFail++;
    413   session.frames++;
    414 
    415   await page.keyboard.press(cal.controls.drop);
    416   await page.waitForTimeout(500);
    417 
    418   const gridAfterDrop = await readGrid(page, cal);
    419   if (gridAfterDrop) session.gridReadSuccess++;
    420   else session.gridReadFail++;
    421   session.frames++;
    422 
    423   if (gridBeforeDrop && gridAfterDrop && gridsAreDifferent(gridBeforeDrop, gridAfterDrop)) {
    424     const bottomFilled = countFilledInBottomRows(gridAfterDrop, 5);
    425     if (bottomFilled > 0) {
    426       session.hardDropsObserved++;
    427       session.piecesLocked++;
    428       session.events.push({ type: "hard_drop", frame: session.frames });
    429       session.events.push({ type: "piece_locked", frame: session.frames, filledDelta: bottomFilled });
    430     }
    431   }
    432 
    433   // New piece spawns: after hard drop, check if piece appeared at top
    434   await page.waitForTimeout(500);
    435   const gridAfterSpawn = await readGrid(page, cal);
    436   if (gridAfterSpawn) {
    437     session.gridReadSuccess++;
    438     session.frames++;
    439     if (hasFilledInTopRows(gridAfterSpawn, 4)) {
    440       session.piecesSpawned++;
    441       const cells = detectActivePieceCells(gridAfterSpawn, gridAfterDrop);
    442       if (cells) {
    443         const pt = identifyPieceType(cells);
    444         session.pieceTypes.add(pt);
    445         session.events.push({ type: "piece_spawned", pieceType: pt, frame: session.frames });
    446       }
    447     }
    448   } else {
    449     session.gridReadFail++;
    450     session.frames++;
    451   }
    452 
    453   // Piece locks test: verify filled cells persist
    454   const gridPersist1 = await readGrid(page, cal);
    455   await page.waitForTimeout(2000);
    456   const gridPersist2 = await readGrid(page, cal);
    457   if (gridPersist1 && gridPersist2) {
    458     session.gridReadSuccess += 2;
    459     session.frames += 2;
    460     const bottom1 = countFilledInBottomRows(gridPersist1, 4);
    461     const bottom2 = countFilledInBottomRows(gridPersist2, 4);
    462     if (bottom1 > 0 && bottom2 >= bottom1) {
    463       // Cells persisted -- piece is locked
    464       if (session.piecesLocked === 0) session.piecesLocked++;
    465     }
    466   }
    467 }
    468 
    469 /**
    470  * Extended gameplay phase with integrated score tracking.
    471  * Plays up to 60 pieces / 45 seconds using the AI, reads score on every
    472  * poll cycle, and falls back to brute-force line clearing if needed.
    473  */
    474 async function runGameplayPhase(
    475   page: Page,
    476   cal: CalibrationResult,
    477   session: GameSession,
    478   gameplay: GameplayStats
    479 ): Promise<void> {
    480   const gridBefore = await readGrid(page, cal);
    481   const filledBefore = gridBefore ? countFilled(gridBefore) : 0;
    482   if (gridBefore) {
    483     session.gridReadSuccess++;
    484   } else {
    485     session.gridReadFail++;
    486   }
    487   session.frames++;
    488 
    489   // Read initial score before play begins
    490   if (cal.scoreElementSelector) {
    491     try {
    492       const scoreText = await page.textContent(cal.scoreElementSelector);
    493       const nums = extractScoreFromText(scoreText);
    494       const val = Math.max(...nums);
    495       session.scoreValues.push(val);
    496     } catch { /* ignore */ }
    497   }
    498 
    499   // Play strategically using the AI with integrated score tracking
    500   const result = await playGame(page, cal, {
    501     maxPieces: 60,
    502     maxDurationMs: 45000,
    503     scoreSelector: cal.scoreElementSelector ?? undefined,
    504   });
    505   gameplay.pieces_placed += result.piecesPlaced;
    506   gameplay.errors_during_play += result.errors;
    507   session.gridReadSuccess += result.gridReads;
    508   session.gridReadFail += result.gridReadFails;
    509   session.frames += result.gridReads + result.gridReadFails;
    510   session.piecesLocked += result.piecesPlaced;
    511 
    512   // Merge score values collected during play
    513   for (const sv of result.scoreValues) {
    514     session.scoreValues.push(sv);
    515     if (sv > gameplay.max_score_observed) {
    516       gameplay.max_score_observed = sv;
    517     }
    518   }
    519 
    520   if (result.linesCleared > 0) {
    521     session.linesCleared += result.linesCleared;
    522     gameplay.lines_cleared += result.linesCleared;
    523     for (let i = 0; i < result.linesCleared; i++) {
    524       session.events.push({ type: "line_cleared", count: 1, frame: session.frames });
    525     }
    526   }
    527 
    528   // Read final score after play
    529   if (cal.scoreElementSelector) {
    530     try {
    531       const scoreText = await page.textContent(cal.scoreElementSelector);
    532       const nums = extractScoreFromText(scoreText);
    533       const val = Math.max(...nums);
    534       session.scoreValues.push(val);
    535       if (val > gameplay.max_score_observed) {
    536         gameplay.max_score_observed = val;
    537       }
    538     } catch { /* ignore */ }
    539   }
    540 
    541   // If no score element found, try to detect changing numbers on page
    542   if (!cal.scoreElementSelector && session.scoreValues.length === 0) {
    543     try {
    544       const textBefore = await page.evaluate(() => document.body.innerText);
    545       const numbersBefore = (textBefore.match(/\d+/g) || []).map(Number);
    546 
    547       await page.keyboard.press(cal.controls.drop);
    548       await page.waitForTimeout(500);
    549 
    550       const textAfter = await page.evaluate(() => document.body.innerText);
    551       const numbersAfter = (textAfter.match(/\d+/g) || []).map(Number);
    552 
    553       for (let i = 0; i < Math.min(numbersBefore.length, numbersAfter.length); i++) {
    554         if (numbersAfter[i] > numbersBefore[i]) {
    555           session.scoreValues.push(numbersBefore[i], numbersAfter[i]);
    556           if (numbersAfter[i] > gameplay.max_score_observed) {
    557             gameplay.max_score_observed = numbersAfter[i];
    558           }
    559           break;
    560         }
    561       }
    562     } catch { /* ignore */ }
    563   }
    564 
    565   // Record pieces for multi-piece detection
    566   if (result.piecesPlaced > 0) {
    567     session.events.push({
    568       type: "piece_locked",
    569       frame: session.frames,
    570       filledDelta: result.piecesPlaced * 4,
    571     });
    572   }
    573 
    574   // If no lines cleared by AI, try brute-force approach
    575   if (session.linesCleared === 0) {
    576     const cleared = await tryFillRow(page, cal, 10);
    577     gameplay.pieces_placed += 10;
    578     if (cleared) {
    579       session.linesCleared++;
    580       gameplay.lines_cleared++;
    581       session.events.push({ type: "line_cleared", count: 1, frame: session.frames });
    582     }
    583   }
    584 
    585   // Check if total filled decreased (indicates clearing happened)
    586   if (session.linesCleared === 0) {
    587     const gridAfter = await readGrid(page, cal);
    588     const filledAfter = gridAfter ? countFilled(gridAfter) : 0;
    589     if (filledAfter < filledBefore && filledBefore > 0) {
    590       session.linesCleared++;
    591       gameplay.lines_cleared++;
    592       session.events.push({ type: "line_cleared", count: 1, frame: session.frames });
    593     }
    594   }
    595 }
    596 
    597 /**
    598  * Stack pieces to trigger game over using grid reader verification.
    599  *
    600  * Instead of screenshot comparison (which false-positives on static screens),
    601  * we:
    602  * 1. Hard drop 30-40 pieces rapidly in the same column to build a tower
    603  * 2. After each batch of 5 drops, check grid for filled cells in the top 4 rows
    604  * 3. If top rows are filled AND new drops don't change the grid, game is over
    605  * 4. Also check for "game over" text in DOM as a secondary signal
    606  */
    607 async function runGameOverPhase(
    608   page: Page,
    609   cal: CalibrationResult,
    610   session: GameSession
    611 ): Promise<void> {
    612   const MAX_DROPS = 40;
    613   const BATCH_SIZE = 5;
    614 
    615   for (let i = 0; i < MAX_DROPS; i++) {
    616     await page.keyboard.press(cal.controls.drop);
    617     await page.waitForTimeout(150);
    618 
    619     // Check after each batch of drops
    620     if ((i + 1) % BATCH_SIZE === 0) {
    621       const grid = await readGrid(page, cal);
    622       if (grid) {
    623         session.gridReadSuccess++;
    624         session.frames++;
    625 
    626         if (hasFilledInTopRows(grid, 4)) {
    627           // Top rows are filled -- check if new drops actually change the grid
    628           await page.keyboard.press(cal.controls.drop);
    629           await page.waitForTimeout(300);
    630           const gridAfter = await readGrid(page, cal);
    631           if (gridAfter) {
    632             session.gridReadSuccess++;
    633             session.frames++;
    634             if (!gridsAreDifferent(grid, gridAfter)) {
    635               // Grid didn't change after a drop -- game is over
    636               session.gameOverDetected = true;
    637               session.events.push({ type: "game_over", frame: session.frames });
    638               return;
    639             }
    640           }
    641         }
    642       } else {
    643         session.gridReadFail++;
    644         session.frames++;
    645       }
    646     }
    647   }
    648 
    649   // Final check: look for game over text in DOM
    650   try {
    651     const hasGameOverText = await page.evaluate(() => {
    652       const text = document.body.innerText.toLowerCase();
    653       return (
    654         text.includes("game over") ||
    655         text.includes("gameover") ||
    656         text.includes("you lose") ||
    657         text.includes("try again") ||
    658         text.includes("play again")
    659       );
    660     });
    661     if (hasGameOverText) {
    662       // Only trust DOM text if we also saw pieces in the grid (prevents false
    663       // positives from static pages that happen to have "restart" text)
    664       const finalGrid = await readGrid(page, cal);
    665       if (finalGrid && countFilled(finalGrid) > 10) {
    666         session.gameOverDetected = true;
    667         session.events.push({ type: "game_over", frame: session.frames });
    668       }
    669     }
    670   } catch { /* ignore */ }
    671 }
    672 
    673 /**
    674  * Play for 30 seconds and track stability.
    675  */
    676 async function runEndurancePhase(
    677   page: Page,
    678   cal: CalibrationResult,
    679   session: GameSession,
    680   gameplay: GameplayStats,
    681   consoleErrors: string[]
    682 ): Promise<void> {
    683   const errorsBefore = consoleErrors.length;
    684   const start = Date.now();
    685 
    686   const result = await playGame(page, cal, { maxDurationMs: 30000 });
    687 
    688   const elapsed = Math.round((Date.now() - start) / 1000);
    689   gameplay.pieces_placed += result.piecesPlaced;
    690   gameplay.lines_cleared += result.linesCleared;
    691   session.linesCleared += result.linesCleared;
    692   gameplay.play_duration_seconds += elapsed;
    693   gameplay.errors_during_play += result.errors;
    694   session.gridReadSuccess += result.gridReads;
    695   session.gridReadFail += result.gridReadFails;
    696   session.frames += result.gridReads + result.gridReadFails;
    697 
    698   // Record endurance errors
    699   const newErrors = consoleErrors.slice(errorsBefore);
    700   for (const e of newErrors) {
    701     if (!session.consoleErrors.includes(e)) session.consoleErrors.push(e);
    702   }
    703 }
    704 
    705 /**
    706  * Phase 8: Competitive play.
    707  * Play for 60 seconds with the AI, tracking detailed metrics for bug detection.
    708  */
    709 async function runCompetitivePlayPhase(
    710   page: Page,
    711   cal: CalibrationResult,
    712   session: GameSession,
    713   gameplay: GameplayStats
    714 ): Promise<CompetitivePlayResult> {
    715   const start = Date.now();
    716   const maxDuration = 60000;
    717 
    718   const result: CompetitivePlayResult = {
    719     duration_seconds: 0,
    720     pieces_placed: 0,
    721     total_lines_cleared: 0,
    722     single_clears: 0,
    723     double_clears: 0,
    724     triple_clears: 0,
    725     tetris_clears: 0,
    726     max_combo: 0,
    727     score_readings: [],
    728     score_final: 0,
    729     score_increases: [],
    730     level_readings: [],
    731     level_final: 0,
    732     game_over_reached: false,
    733     game_over_text_found: null,
    734     restart_available: false,
    735     next_piece_visible: false,
    736     speed_increased: false,
    737     bugs_detected: [],
    738   };
    739 
    740   // Read initial score
    741   let lastScore = 0;
    742   if (cal.scoreElementSelector) {
    743     try {
    744       const scoreText = await page.textContent(cal.scoreElementSelector);
    745       const nums = extractScoreFromText(scoreText);
    746       lastScore = Math.max(...nums);
    747       result.score_readings.push(lastScore);
    748     } catch { /* ignore */ }
    749   }
    750 
    751   // Read initial level
    752   const initialLevel = await readLevelFromPage(page);
    753   if (initialLevel !== null) {
    754     result.level_readings.push(initialLevel);
    755   }
    756 
    757   // Measure initial drop speed (time between auto-drops)
    758   const initialDropInterval = await measureDropInterval(page, cal);
    759 
    760   // Play the game with detailed tracking
    761   let previousGrid = await readGrid(page, cal);
    762   let settledGrid = previousGrid;
    763   let pollCount = 0;
    764   let consecutiveClears = 0;
    765   let maxCombo = 0;
    766   let ccwTestDone = false;
    767   let ccwResult: boolean | null = null;
    768   let softDropTestDone = false;
    769   let softDropDistinct: boolean | null = null;
    770 
    771   // Rendering trail detection: track filled cell growth vs pieces placed
    772   let filledCellSamples: number[] = [];
    773   let trailCheckPieceMark = 0;
    774 
    775   while (Date.now() - start < maxDuration) {
    776     try {
    777       const grid = await readGrid(page, cal);
    778       pollCount++;
    779 
    780       if (!grid) {
    781         await page.waitForTimeout(60);
    782         continue;
    783       }
    784 
    785       // Score tracking every 5th poll
    786       if (pollCount % 5 === 0 && cal.scoreElementSelector) {
    787         try {
    788           const scoreText = await page.textContent(cal.scoreElementSelector);
    789           const nums = extractScoreFromText(scoreText);
    790           const currentScore = Math.max(...nums);
    791           if (currentScore > 0) {
    792             result.score_readings.push(currentScore);
    793             if (currentScore > lastScore) {
    794               result.score_increases.push(currentScore - lastScore);
    795               lastScore = currentScore;
    796             }
    797           }
    798         } catch { /* ignore */ }
    799       }
    800 
    801       // Level tracking every 10th poll
    802       if (pollCount % 10 === 0) {
    803         const level = await readLevelFromPage(page);
    804         if (level !== null) {
    805           result.level_readings.push(level);
    806         }
    807       }
    808 
    809       // Detect line clears by watching for complete rows then checking if they disappear
    810       if (previousGrid && grid) {
    811         const completeRowsBefore = countCompleteRows(previousGrid);
    812         const completeRowsNow = countCompleteRows(grid);
    813         const filledBefore = countFilled(previousGrid);
    814         const filledNow = countFilled(grid);
    815 
    816         // Detect a clear: filled count dropped and rows disappeared
    817         if (filledNow < filledBefore - 5 && filledBefore > 10) {
    818           // Estimate how many rows were cleared
    819           const clearedCount = Math.round((filledBefore + 4 - filledNow) / 10);
    820           if (clearedCount > 0 && clearedCount <= 4) {
    821             result.total_lines_cleared += clearedCount;
    822             consecutiveClears++;
    823             if (consecutiveClears > maxCombo) maxCombo = consecutiveClears;
    824 
    825             switch (clearedCount) {
    826               case 1: result.single_clears++; break;
    827               case 2: result.double_clears++; break;
    828               case 3: result.triple_clears++; break;
    829               case 4: result.tetris_clears++; break;
    830             }
    831           }
    832         } else {
    833           consecutiveClears = 0;
    834         }
    835       }
    836 
    837       // Try to detect and place pieces
    838       const activeCells = detectActivePieceCells(grid, settledGrid);
    839       if (activeCells && activeCells.length === 4) {
    840         const pieceType = identifyPieceType(activeCells);
    841         session.pieceTypes.add(pieceType);
    842 
    843         // Counter-clockwise rotation test: press Z and compare
    844         if (!ccwTestDone && result.pieces_placed > 5 && result.pieces_placed % 7 === 0) {
    845           const gridBeforeZ = await readGrid(page, cal);
    846           await page.keyboard.press("z");
    847           await page.waitForTimeout(60);
    848           const gridAfterZ = await readGrid(page, cal);
    849 
    850           if (gridBeforeZ && gridAfterZ && gridsAreDifferent(gridBeforeZ, gridAfterZ)) {
    851             // Z key caused a change -- now check if it's different from ArrowUp
    852             const gridBeforeUp = await readGrid(page, cal);
    853             await page.keyboard.press(cal.controls.rotate);
    854             await page.waitForTimeout(60);
    855             const gridAfterUp = await readGrid(page, cal);
    856 
    857             if (gridBeforeUp && gridAfterUp) {
    858               // If Z and Up produce different results, Z is counter-clockwise
    859               ccwResult = gridsAreDifferent(gridAfterZ, gridAfterUp);
    860               ccwTestDone = true;
    861             }
    862           } else {
    863             ccwResult = false; // Z did nothing
    864             ccwTestDone = true;
    865           }
    866         }
    867 
    868         // Soft drop test: press Down and check it moves 1 row, not to bottom
    869         if (!softDropTestDone && result.pieces_placed > 3 && result.pieces_placed % 5 === 0) {
    870           const gridBeforeDown = await readGrid(page, cal);
    871           await page.keyboard.press(cal.controls.down);
    872           await page.waitForTimeout(60);
    873           const gridAfterDown = await readGrid(page, cal);
    874 
    875           if (gridBeforeDown && gridAfterDown) {
    876             const cellsBefore = detectActivePieceCells(gridBeforeDown, settledGrid);
    877             const cellsAfter = detectActivePieceCells(gridAfterDown, settledGrid);
    878             if (cellsBefore && cellsAfter) {
    879               const avgRowBefore = cellsBefore.reduce((s, [r]) => s + r, 0) / cellsBefore.length;
    880               const avgRowAfter = cellsAfter.reduce((s, [r]) => s + r, 0) / cellsAfter.length;
    881               const rowDelta = avgRowAfter - avgRowBefore;
    882               // Soft drop should move ~1 row, hard drop moves many rows
    883               softDropDistinct = rowDelta >= 0.5 && rowDelta <= 3;
    884               softDropTestDone = true;
    885             }
    886           }
    887         }
    888 
    889         // Rendering trail sampling: every ~10 pieces, snapshot filled count
    890         if (result.pieces_placed > 0 && result.pieces_placed % 10 === 0 && result.pieces_placed !== trailCheckPieceMark) {
    891           trailCheckPieceMark = result.pieces_placed;
    892           const sampleGrid = await readGrid(page, cal);
    893           if (sampleGrid) {
    894             filledCellSamples.push(countFilled(sampleGrid));
    895           }
    896         }
    897 
    898         // Execute the AI placement
    899         await page.keyboard.press(cal.controls.drop);
    900         await page.waitForTimeout(100);
    901         result.pieces_placed++;
    902 
    903         const afterGrid = await readGrid(page, cal);
    904         if (afterGrid) settledGrid = afterGrid;
    905       }
    906 
    907       previousGrid = grid;
    908       await page.waitForTimeout(60);
    909     } catch {
    910       await page.waitForTimeout(60);
    911     }
    912   }
    913 
    914   result.duration_seconds = Math.round((Date.now() - start) / 1000);
    915   result.max_combo = maxCombo;
    916 
    917   // Read final score
    918   if (cal.scoreElementSelector) {
    919     try {
    920       const scoreText = await page.textContent(cal.scoreElementSelector);
    921       const nums = extractScoreFromText(scoreText);
    922       result.score_final = Math.max(...nums);
    923       result.score_readings.push(result.score_final);
    924     } catch { /* ignore */ }
    925   }
    926 
    927   // Read final level
    928   const finalLevel = await readLevelFromPage(page);
    929   if (finalLevel !== null) {
    930     result.level_final = finalLevel;
    931     result.level_readings.push(finalLevel);
    932   }
    933 
    934   // Measure final drop speed
    935   const finalDropInterval = await measureDropInterval(page, cal);
    936   if (initialDropInterval > 0 && finalDropInterval > 0 && finalDropInterval < initialDropInterval * 0.8) {
    937     result.speed_increased = true;
    938   }
    939 
    940   // Check for game over
    941   try {
    942     const gameOverText = await page.evaluate(() => {
    943       const text = document.body.innerText.toLowerCase();
    944       if (text.includes("game over")) return "Game Over";
    945       if (text.includes("gameover")) return "GameOver";
    946       if (text.includes("you lose")) return "You Lose";
    947       return null;
    948     });
    949     if (gameOverText) {
    950       result.game_over_reached = true;
    951       result.game_over_text_found = gameOverText;
    952     }
    953   } catch { /* ignore */ }
    954 
    955   // Check for restart button
    956   try {
    957     result.restart_available = await page.evaluate(() => {
    958       const text = document.body.innerText.toLowerCase();
    959       const buttons = document.querySelectorAll("button");
    960       for (const btn of buttons) {
    961         const btnText = (btn.textContent || "").toLowerCase();
    962         if (btnText.includes("restart") || btnText.includes("play again") || btnText.includes("new game")) {
    963           return true;
    964         }
    965       }
    966       return text.includes("restart") || text.includes("play again") || text.includes("press") || text.includes("try again");
    967     });
    968   } catch { /* ignore */ }
    969 
    970   // Check for next piece preview
    971   result.next_piece_visible = await detectNextPiecePreview(page);
    972 
    973   // Bug detection
    974   // Multi-line clear bug: if we had multi-line opportunities but only single clears happened
    975   if (result.double_clears + result.triple_clears + result.tetris_clears === 0 &&
    976       result.single_clears > 5 && result.total_lines_cleared > 5) {
    977     // This might not be a bug -- maybe no multi-line opportunities arose
    978     // Only flag if we detect specific evidence
    979   }
    980 
    981   // Score scaling bug
    982   if (result.score_increases.length > 3) {
    983     const singleDeltas = result.score_increases.filter((d) => d > 0 && d <= 200);
    984     const multiDeltas = result.score_increases.filter((d) => d > 200);
    985     if (singleDeltas.length > 0 && multiDeltas.length === 0 &&
    986         (result.double_clears + result.triple_clears + result.tetris_clears) > 0) {
    987       result.bugs_detected.push("score_does_not_scale_with_simultaneous_clears");
    988     }
    989   }
    990 
    991   // Level progression bug
    992   if (result.level_readings.length > 1) {
    993     const uniqueLevels = [...new Set(result.level_readings)];
    994     if (uniqueLevels.length === 1 && result.total_lines_cleared >= 10) {
    995       result.bugs_detected.push("level_does_not_increase");
    996     }
    997   }
    998 
    999   // Speed progression bug
   1000   if (result.level_readings.length > 1) {
   1001     const uniqueLevels = [...new Set(result.level_readings)];
   1002     if (uniqueLevels.length > 1 && !result.speed_increased) {
   1003       result.bugs_detected.push("speed_does_not_increase");
   1004     }
   1005   }
   1006 
   1007   // Rendering trail detection: if filled cells grow much faster than pieces placed,
   1008   // the renderer is leaving trails (old piece positions not cleared)
   1009   if (result.pieces_placed >= 10 && filledCellSamples.length >= 2) {
   1010     // In a normal game, filled cells = locked cells - cleared cells.
   1011     // Each piece adds 4 cells, each line clear removes 10.
   1012     // With trails, filled cells grow unchecked because old positions stay colored.
   1013     // Heuristic: if filled count exceeds pieces_placed * 8, trails are likely.
   1014     // (Normal max without any clears would be pieces * 4; * 8 gives 2x headroom.)
   1015     const maxFilled = Math.max(...filledCellSamples);
   1016     if (maxFilled > result.pieces_placed * 8) {
   1017       result.rendering_trail_detected = true;
   1018       result.bugs_detected.push("rendering_trail");
   1019     } else {
   1020       // Also check if filled cells only ever increase across samples (never decrease
   1021       // from line clears) AND the latest sample is unreasonably high
   1022       const onlyIncreasing = filledCellSamples.every((v, i) =>
   1023         i === 0 || v >= filledCellSamples[i - 1]
   1024       );
   1025       if (onlyIncreasing && filledCellSamples.length >= 3 && maxFilled > result.pieces_placed * 6) {
   1026         result.rendering_trail_detected = true;
   1027         result.bugs_detected.push("rendering_trail");
   1028       } else {
   1029         result.rendering_trail_detected = false;
   1030       }
   1031     }
   1032   }
   1033 
   1034   // Store CCW and soft drop results for test derivation
   1035   (result as any)._ccwResult = ccwResult;
   1036   (result as any)._ccwTestDone = ccwTestDone;
   1037   (result as any)._softDropDistinct = softDropDistinct;
   1038   (result as any)._softDropTestDone = softDropTestDone;
   1039 
   1040   return result;
   1041 }
   1042 
   1043 // ---- Derive test results from session data ----
   1044 
   1045 const ALL_TEST_NAMES = [
   1046   // Phase 1
   1047   "game_loads",
   1048   // Phase 2
   1049   "game_starts",
   1050   "auto_drop",
   1051   // Phase 3: Mechanics
   1052   "move_left",
   1053   "move_right",
   1054   "move_down",
   1055   "rotate",
   1056   "hard_drop",
   1057   "all_pieces_rotate",
   1058   // Phase 4: Piece lifecycle
   1059   "piece_locks",
   1060   "new_piece_spawns",
   1061   "multiple_pieces",
   1062   // Phase 5: Gameplay
   1063   "line_clear",
   1064   "score_changes",
   1065   // Phase 6: Game over
   1066   "game_over",
   1067   // Phase 7: Endurance
   1068   "playable_30s",
   1069   // Phase 8: Competitive play (tests 17-24)
   1070   "multi_line_clear",
   1071   "score_scaling",
   1072   "level_progression",
   1073   "speed_progression",
   1074   "next_piece_preview",
   1075   "game_over_display",
   1076   "counter_clockwise_rotation",
   1077   "soft_drop_distinct",
   1078   // Phase 8 continued: rendering quality (test 25)
   1079   "rendering_clean",
   1080 ];
   1081 
   1082 interface PhaseState {
   1083   gameStarted: boolean;
   1084   mechanicsWork: boolean;
   1085   piecesWork: boolean;
   1086   gameplayWorks: boolean;
   1087 }
   1088 
   1089 function deriveTestResults(
   1090   session: GameSession,
   1091   cal: CalibrationResult,
   1092   loadResult: LoadResult,
   1093   consoleErrors: string[],
   1094   gameplay: GameplayStats,
   1095   phaseState: PhaseState,
   1096   competitivePlay: CompetitivePlayResult | null
   1097 ): TestResult[] {
   1098   const results: TestResult[] = [];
   1099   const gridReliable = session.gridReadSuccess > 0 &&
   1100     session.gridReadSuccess / (session.gridReadSuccess + session.gridReadFail) > 0.5;
   1101 
   1102   // Helper: produce a skip result for tests whose prerequisite phase was skipped
   1103   const skipResult = (name: string, reason: string): TestResult => ({
   1104     name,
   1105     pass: false,
   1106     detail: `skipped: ${reason}`,
   1107   });
   1108 
   1109   // 1. game_loads
   1110   results.push({
   1111     name: "game_loads",
   1112     pass: loadResult.loaded && loadResult.errorsOnLoad === 0,
   1113     detail: loadResult.detail,
   1114   });
   1115 
   1116   // 2. game_starts
   1117   {
   1118     let startDetail: string;
   1119     if (session.started) {
   1120       startDetail = `started via ${session.startMechanism}`;
   1121       if (cal.startButton) {
   1122         const btn = cal.startButton;
   1123         startDetail += ` (${btn.selector}, "${btn.text}"${btn.disappeared ? ", disappeared after click" : ""})`;
   1124       }
   1125     } else {
   1126       startDetail = "could not start game with any mechanism";
   1127     }
   1128     results.push({
   1129       name: "game_starts",
   1130       pass: session.started,
   1131       detail: startDetail,
   1132     });
   1133   }
   1134 
   1135   // 3. auto_drop -- MUST be verified via grid reader
   1136   if (!phaseState.gameStarted) {
   1137     results.push(skipResult("auto_drop", "game did not start"));
   1138   } else {
   1139     const autoDropEvents = session.events.filter(
   1140       (e) => e.type === "piece_moved" && e.direction === "down" &&
   1141         // Only count the first few frames (before we sent any input)
   1142         e.frame <= 2
   1143     );
   1144     if (autoDropEvents.length > 0) {
   1145       results.push({
   1146         name: "auto_drop",
   1147         pass: true,
   1148         detail: "grid state changed after 5s with no input (grid-verified)",
   1149       });
   1150     } else if (!gridReliable) {
   1151       results.push({
   1152         name: "auto_drop",
   1153         pass: false,
   1154         detail: "grid reader unreliable, cannot verify auto-drop",
   1155       });
   1156     } else {
   1157       results.push({
   1158         name: "auto_drop",
   1159         pass: false,
   1160         detail: "piece did not move down in 5 seconds (grid-verified)",
   1161       });
   1162     }
   1163   }
   1164 
   1165   // 4-6. movement tests
   1166   for (const dir of ["left", "right", "down"] as const) {
   1167     if (!phaseState.gameStarted) {
   1168       results.push(skipResult(`move_${dir}`, "game did not start"));
   1169       continue;
   1170     }
   1171     const moveEvents = session.events.filter(
   1172       (e) => e.type === "piece_moved" && e.direction === dir
   1173     );
   1174     if (moveEvents.length > 0) {
   1175       results.push({
   1176         name: `move_${dir}`,
   1177         pass: true,
   1178         detail: "grid state changed after key press (grid-verified)",
   1179       });
   1180     } else if (!gridReliable) {
   1181       results.push({
   1182         name: `move_${dir}`,
   1183         pass: false,
   1184         detail: "grid reader unreliable, cannot verify movement",
   1185       });
   1186     } else {
   1187       results.push({
   1188         name: `move_${dir}`,
   1189         pass: false,
   1190         detail: "no grid change detected after key press",
   1191       });
   1192     }
   1193   }
   1194 
   1195   // 7. rotate
   1196   if (!phaseState.gameStarted) {
   1197     results.push(skipResult("rotate", "game did not start"));
   1198   } else if (session.rotationsObserved > 0) {
   1199     results.push({
   1200       name: "rotate",
   1201       pass: true,
   1202       detail: `piece shape changed after rotate key (grid-verified, ${session.rotationsObserved} rotation(s))`,
   1203     });
   1204   } else if (!gridReliable) {
   1205     results.push({
   1206       name: "rotate",
   1207       pass: false,
   1208       detail: "grid reader unreliable, cannot verify rotation",
   1209     });
   1210   } else {
   1211     results.push({
   1212       name: "rotate",
   1213       pass: false,
   1214       detail: "no shape change detected after rotate key",
   1215     });
   1216   }
   1217 
   1218   // 8. hard_drop
   1219   if (!phaseState.gameStarted) {
   1220     results.push(skipResult("hard_drop", "game did not start"));
   1221   } else if (session.hardDropsObserved > 0) {
   1222     results.push({
   1223       name: "hard_drop",
   1224       pass: true,
   1225       detail: "piece immediately dropped to bottom (grid-verified)",
   1226     });
   1227   } else if (!gridReliable) {
   1228     results.push({
   1229       name: "hard_drop",
   1230       pass: false,
   1231       detail: "grid reader unreliable, cannot verify hard drop",
   1232     });
   1233   } else {
   1234     results.push({
   1235       name: "hard_drop",
   1236       pass: false,
   1237       detail: "no grid change with bottom cells detected after hard drop key",
   1238     });
   1239   }
   1240 
   1241   // 9. all_pieces_rotate -- derived from piece types seen
   1242   if (!phaseState.gameStarted) {
   1243     results.push(skipResult("all_pieces_rotate", "game did not start"));
   1244   } else {
   1245     const nonOPieceTypes = [...session.pieceTypes].filter((t) => t !== "O" && t !== "unknown");
   1246     if (session.rotationsObserved > 0 && nonOPieceTypes.length > 0) {
   1247       results.push({
   1248         name: "all_pieces_rotate",
   1249         pass: true,
   1250         detail: `rotation observed, piece types seen: [${[...session.pieceTypes].join(", ")}]`,
   1251       });
   1252     } else if (session.rotationsObserved > 0) {
   1253       results.push({
   1254         name: "all_pieces_rotate",
   1255         pass: true,
   1256         detail: "rotation confirmed but could not identify individual piece types",
   1257       });
   1258     } else {
   1259       results.push({
   1260         name: "all_pieces_rotate",
   1261         pass: false,
   1262         detail: "could not detect any piece rotations via grid reader",
   1263       });
   1264     }
   1265   }
   1266 
   1267   // 10. piece_locks -- only trust if grid is reliable
   1268   if (!phaseState.gameStarted) {
   1269     results.push(skipResult("piece_locks", "game did not start"));
   1270   } else if (!gridReliable) {
   1271     results.push({
   1272       name: "piece_locks",
   1273       pass: false,
   1274       detail: "grid reader unreliable, cannot verify piece locking",
   1275     });
   1276   } else {
   1277     const lockEvents = session.events.filter((e) => e.type === "piece_locked");
   1278     if (lockEvents.length > 0) {
   1279       results.push({
   1280         name: "piece_locks",
   1281         pass: true,
   1282         detail: `filled cells persist at bottom (grid-verified, ${lockEvents.length} lock event(s))`,
   1283       });
   1284     } else if (session.piecesLocked > 0 && session.piecesSpawned > 0) {
   1285       results.push({
   1286         name: "piece_locks",
   1287         pass: true,
   1288         detail: `${session.piecesLocked} piece(s) locked during play`,
   1289       });
   1290     } else if (session.piecesLocked > 0 && session.piecesSpawned === 0) {
   1291       results.push({
   1292         name: "piece_locks",
   1293         pass: false,
   1294         detail: `${session.piecesLocked} lock event(s) but 0 spawns detected - likely false positive from UI misread`,
   1295       });
   1296     } else {
   1297       results.push({
   1298         name: "piece_locks",
   1299         pass: false,
   1300         detail: "could not verify piece locking via grid reader",
   1301       });
   1302     }
   1303   }
   1304 
   1305   // 11. new_piece_spawns
   1306   if (!phaseState.gameStarted) {
   1307     results.push(skipResult("new_piece_spawns", "game did not start"));
   1308   } else if (session.piecesSpawned > 0) {
   1309     results.push({
   1310       name: "new_piece_spawns",
   1311       pass: true,
   1312       detail: `${session.piecesSpawned} new piece(s) detected at top of grid`,
   1313     });
   1314   } else {
   1315     results.push({
   1316       name: "new_piece_spawns",
   1317       pass: false,
   1318       detail: "could not detect new piece spawning at top via grid reader",
   1319     });
   1320   }
   1321 
   1322   // 12. multiple_pieces
   1323   if (!phaseState.mechanicsWork) {
   1324     results.push(skipResult("multiple_pieces", "mechanics phase failed"));
   1325   } else if (session.piecesLocked >= 3 && session.piecesSpawned > 0) {
   1326     results.push({
   1327       name: "multiple_pieces",
   1328       pass: true,
   1329       detail: `${session.piecesLocked} pieces placed during play session`,
   1330     });
   1331   } else {
   1332     results.push({
   1333       name: "multiple_pieces",
   1334       pass: false,
   1335       detail: `only ${session.piecesLocked} piece(s) detected, need at least 3`,
   1336     });
   1337   }
   1338 
   1339   // 13. line_clear
   1340   if (!phaseState.mechanicsWork) {
   1341     results.push(skipResult("line_clear", "mechanics phase failed"));
   1342   } else if (session.linesCleared > 0) {
   1343     results.push({
   1344       name: "line_clear",
   1345       pass: true,
   1346       detail: `${session.linesCleared} line(s) cleared (grid-verified)`,
   1347     });
   1348   } else {
   1349     results.push({
   1350       name: "line_clear",
   1351       pass: false,
   1352       detail: "could not trigger or detect a line clear via grid reader",
   1353     });
   1354   }
   1355 
   1356   // 14. score_changes
   1357   if (!phaseState.mechanicsWork) {
   1358     results.push(skipResult("score_changes", "mechanics phase failed"));
   1359   } else if (session.scoreValues.length >= 2) {
   1360     const min = Math.min(...session.scoreValues);
   1361     const max = Math.max(...session.scoreValues);
   1362     if (max > min) {
   1363       results.push({
   1364         name: "score_changes",
   1365         pass: true,
   1366         detail: `score changed from ${min} to ${max}`,
   1367       });
   1368     } else {
   1369       results.push({
   1370         name: "score_changes",
   1371         pass: false,
   1372         detail: `score stayed at ${min}`,
   1373       });
   1374     }
   1375   } else if (cal.scoreElementSelector === null) {
   1376     results.push({
   1377       name: "score_changes",
   1378       pass: false,
   1379       detail: "no score element found",
   1380     });
   1381   } else {
   1382     results.push({
   1383       name: "score_changes",
   1384       pass: false,
   1385       detail: "could not read score values",
   1386     });
   1387   }
   1388 
   1389   // 15. game_over -- requires pieces to work
   1390   if (!phaseState.piecesWork) {
   1391     results.push(skipResult("game_over", "piece lifecycle failed"));
   1392   } else {
   1393     results.push({
   1394       name: "game_over",
   1395       pass: session.gameOverDetected,
   1396       detail: session.gameOverDetected
   1397         ? "game stopped after stacking to top (grid-verified)"
   1398         : "could not trigger or detect game over via grid reader",
   1399     });
   1400   }
   1401 
   1402   // 16. playable_30s -- requires gameplay to have worked
   1403   if (!phaseState.gameplayWorks) {
   1404     results.push(skipResult("playable_30s", "gameplay phase failed"));
   1405   } else {
   1406     const crashed = session.consoleErrors.length > 0 || gameplay.errors_during_play > 3;
   1407     if (!crashed && gameplay.play_duration_seconds >= 10) {
   1408       results.push({
   1409         name: "playable_30s",
   1410         pass: true,
   1411         detail: `played for ${gameplay.play_duration_seconds}s, placed ${gameplay.pieces_placed} pieces, no crashes`,
   1412       });
   1413     } else if (crashed) {
   1414       results.push({
   1415         name: "playable_30s",
   1416         pass: false,
   1417         detail: `${session.consoleErrors.length} console error(s), ${gameplay.errors_during_play} play errors`,
   1418       });
   1419     } else {
   1420       results.push({
   1421         name: "playable_30s",
   1422         pass: false,
   1423         detail: `only played for ${gameplay.play_duration_seconds}s`,
   1424       });
   1425     }
   1426   }
   1427 
   1428   // ---- Phase 8: Competitive play tests (17-24) ----
   1429 
   1430   // 17. multi_line_clear
   1431   if (!phaseState.gameplayWorks || !competitivePlay) {
   1432     results.push(skipResult("multi_line_clear", "competitive play phase did not run"));
   1433   } else if (competitivePlay.double_clears + competitivePlay.triple_clears + competitivePlay.tetris_clears > 0) {
   1434     const hasMultiLineBug = competitivePlay.bugs_detected.includes("multi_line_clear_only_removes_one_row");
   1435     results.push({
   1436       name: "multi_line_clear",
   1437       pass: !hasMultiLineBug,
   1438       detail: hasMultiLineBug
   1439         ? "multi-line clear detected but only 1 row was removed"
   1440         : `multi-line clears work: ${competitivePlay.double_clears}x double, ${competitivePlay.triple_clears}x triple, ${competitivePlay.tetris_clears}x tetris`,
   1441     });
   1442   } else {
   1443     results.push(skipResult("multi_line_clear", "no multi-line clear opportunity occurred during play"));
   1444   }
   1445 
   1446   // 18. score_scaling
   1447   if (!phaseState.gameplayWorks || !competitivePlay) {
   1448     results.push(skipResult("score_scaling", "competitive play phase did not run"));
   1449   } else if (competitivePlay.double_clears + competitivePlay.triple_clears + competitivePlay.tetris_clears > 0) {
   1450     const hasBug = competitivePlay.bugs_detected.includes("score_does_not_scale_with_simultaneous_clears");
   1451     results.push({
   1452       name: "score_scaling",
   1453       pass: !hasBug,
   1454       detail: hasBug
   1455         ? "multi-line clears give same points as single clears"
   1456         : `score scales with clear type (${competitivePlay.score_increases.length} score changes observed)`,
   1457     });
   1458   } else {
   1459     results.push(skipResult("score_scaling", "no multi-line clear occurred to test scaling"));
   1460   }
   1461 
   1462   // 19. level_progression
   1463   if (!phaseState.gameplayWorks || !competitivePlay) {
   1464     results.push(skipResult("level_progression", "competitive play phase did not run"));
   1465   } else if (competitivePlay.total_lines_cleared < 10) {
   1466     results.push(skipResult("level_progression", `only ${competitivePlay.total_lines_cleared} lines cleared (need 10+)`));
   1467   } else {
   1468     const hasBug = competitivePlay.bugs_detected.includes("level_does_not_increase");
   1469     if (competitivePlay.level_readings.length < 2) {
   1470       results.push(skipResult("level_progression", "could not read level display"));
   1471     } else {
   1472       results.push({
   1473         name: "level_progression",
   1474         pass: !hasBug,
   1475         detail: hasBug
   1476           ? `level stayed at ${competitivePlay.level_readings[0]} despite ${competitivePlay.total_lines_cleared} lines cleared`
   1477           : `level progressed from ${competitivePlay.level_readings[0]} to ${competitivePlay.level_final}`,
   1478       });
   1479     }
   1480   }
   1481 
   1482   // 20. speed_progression
   1483   if (!phaseState.gameplayWorks || !competitivePlay) {
   1484     results.push(skipResult("speed_progression", "competitive play phase did not run"));
   1485   } else if (competitivePlay.level_readings.length < 2 || new Set(competitivePlay.level_readings).size <= 1) {
   1486     results.push(skipResult("speed_progression", "level did not increase, cannot test speed change"));
   1487   } else {
   1488     const hasBug = competitivePlay.bugs_detected.includes("speed_does_not_increase");
   1489     results.push({
   1490       name: "speed_progression",
   1491       pass: !hasBug && competitivePlay.speed_increased,
   1492       detail: competitivePlay.speed_increased
   1493         ? "drop speed increased with level"
   1494         : "drop speed did not change after level increased",
   1495     });
   1496   }
   1497 
   1498   // 21. next_piece_preview
   1499   if (!phaseState.gameplayWorks || !competitivePlay) {
   1500     results.push(skipResult("next_piece_preview", "competitive play phase did not run"));
   1501   } else {
   1502     results.push({
   1503       name: "next_piece_preview",
   1504       pass: competitivePlay.next_piece_visible,
   1505       detail: competitivePlay.next_piece_visible
   1506         ? "next piece preview display found"
   1507         : "no next piece preview found",
   1508     });
   1509   }
   1510 
   1511   // 22. game_over_display
   1512   if (!phaseState.gameplayWorks || !competitivePlay) {
   1513     results.push(skipResult("game_over_display", "competitive play phase did not run"));
   1514   } else if (!competitivePlay.game_over_reached && !session.gameOverDetected) {
   1515     results.push(skipResult("game_over_display", "game over not reached during play"));
   1516   } else {
   1517     const hasText = competitivePlay.game_over_text_found !== null;
   1518     const hasRestart = competitivePlay.restart_available;
   1519     results.push({
   1520       name: "game_over_display",
   1521       pass: hasText && hasRestart,
   1522       detail: hasText && hasRestart
   1523         ? `game over display: "${competitivePlay.game_over_text_found}", restart available`
   1524         : `missing: ${!hasText ? "game over text" : ""}${!hasText && !hasRestart ? " and " : ""}${!hasRestart ? "restart option" : ""}`,
   1525     });
   1526   }
   1527 
   1528   // 23. counter_clockwise_rotation
   1529   if (!phaseState.gameplayWorks || !competitivePlay) {
   1530     results.push(skipResult("counter_clockwise_rotation", "competitive play phase did not run"));
   1531   } else {
   1532     const ccwTestDone = (competitivePlay as any)._ccwTestDone === true;
   1533     const ccwResult = (competitivePlay as any)._ccwResult;
   1534     if (!ccwTestDone) {
   1535       results.push(skipResult("counter_clockwise_rotation", "could not test rotation direction"));
   1536     } else {
   1537       results.push({
   1538         name: "counter_clockwise_rotation",
   1539         pass: ccwResult === true,
   1540         detail: ccwResult === true
   1541           ? "Z key rotates opposite direction from Up arrow"
   1542           : ccwResult === false
   1543           ? "Z key does same as Up arrow or does not rotate"
   1544           : "could not determine rotation direction",
   1545       });
   1546     }
   1547   }
   1548 
   1549   // 24. soft_drop_distinct
   1550   if (!phaseState.gameplayWorks || !competitivePlay) {
   1551     results.push(skipResult("soft_drop_distinct", "competitive play phase did not run"));
   1552   } else {
   1553     const softDropTestDone = (competitivePlay as any)._softDropTestDone === true;
   1554     const softDropDistinct = (competitivePlay as any)._softDropDistinct;
   1555     if (!softDropTestDone) {
   1556       results.push(skipResult("soft_drop_distinct", "could not test soft drop behavior"));
   1557     } else {
   1558       results.push({
   1559         name: "soft_drop_distinct",
   1560         pass: softDropDistinct === true,
   1561         detail: softDropDistinct === true
   1562           ? "Down arrow moves piece 1 row (distinct from hard drop)"
   1563           : "Down arrow acts like hard drop (drops to bottom)",
   1564       });
   1565     }
   1566   }
   1567 
   1568   // 25. rendering_clean
   1569   if (!phaseState.gameplayWorks || !competitivePlay) {
   1570     results.push(skipResult("rendering_clean", "competitive play phase did not run"));
   1571   } else if (competitivePlay.rendering_trail_detected === undefined) {
   1572     results.push(skipResult("rendering_clean", "not enough data to assess rendering trails"));
   1573   } else {
   1574     results.push({
   1575       name: "rendering_clean",
   1576       pass: !competitivePlay.rendering_trail_detected,
   1577       detail: competitivePlay.rendering_trail_detected
   1578         ? "rendering trail bug: falling piece leaves old cells colored after moving"
   1579         : "piece movement clears old cells correctly",
   1580     });
   1581   }
   1582 
   1583   return results;
   1584 }
   1585 
   1586 // ---- Helpers ----
   1587 
   1588 function countFilledInTopRows(grid: boolean[][], rows: number): number {
   1589   let count = 0;
   1590   for (let r = 0; r < Math.min(rows, grid.length); r++) {
   1591     for (let c = 0; c < grid[r].length; c++) {
   1592       if (grid[r][c]) count++;
   1593     }
   1594   }
   1595   return count;
   1596 }
   1597 
   1598 function boundingBox(cells: [number, number][]): { w: number; h: number } {
   1599   const minRow = Math.min(...cells.map(([r]) => r));
   1600   const maxRow = Math.max(...cells.map(([r]) => r));
   1601   const minCol = Math.min(...cells.map(([, c]) => c));
   1602   const maxCol = Math.max(...cells.map(([, c]) => c));
   1603   return { w: maxCol - minCol + 1, h: maxRow - minRow + 1 };
   1604 }
   1605 
   1606 /**
   1607  * Extract the score number from potentially concatenated text.
   1608  */
   1609 function extractScoreFromText(text: string | null): number[] {
   1610   if (!text) return [0];
   1611 
   1612   const labeledMatch = text.match(/score\s*[:\-=]?\s*(\d+)/i);
   1613   if (labeledMatch) {
   1614     return [parseInt(labeledMatch[1], 10)];
   1615   }
   1616 
   1617   const allNumbers = (text.match(/\d+/g) || []).map(Number);
   1618   return allNumbers.length > 0 ? allNumbers : [0];
   1619 }
   1620 
   1621 async function loadGamePage(page: Page, serverUrl: string): Promise<void> {
   1622   const response = await page.goto(serverUrl, {
   1623     timeout: 15000,
   1624     waitUntil: "networkidle",
   1625   });
   1626   if (!response || !response.ok()) {
   1627     throw new Error(`Failed to load ${serverUrl}: ${response?.status()}`);
   1628   }
   1629   await page.waitForTimeout(1000);
   1630 }
   1631 
   1632 function emptyCalibration(consoleErrors: string[]): CalibrationResult {
   1633   return {
   1634     renderer: "unknown",
   1635     gridDetected: false,
   1636     gridBounds: null,
   1637     cellWidth: 0,
   1638     cellHeight: 0,
   1639     controls: {
   1640       left: "ArrowLeft",
   1641       right: "ArrowRight",
   1642       down: "ArrowDown",
   1643       rotate: "ArrowUp",
   1644       drop: "Space",
   1645     },
   1646     startMechanism: "unknown",
   1647     scoreElementSelector: null,
   1648     backgroundColor: null,
   1649     consoleErrors,
   1650     gridConfidence: 0,
   1651   };
   1652 }
   1653 
   1654 /**
   1655  * Read the level display from the page.
   1656  */
   1657 async function readLevelFromPage(page: Page): Promise<number | null> {
   1658   try {
   1659     return await page.evaluate(() => {
   1660       const allElements = document.querySelectorAll("*");
   1661       for (const el of allElements) {
   1662         const text = ((el as HTMLElement).innerText || "").toLowerCase();
   1663         if (text.includes("level") && el.children.length < 5) {
   1664           const match = text.match(/level\s*[:\-=]?\s*(\d+)/i);
   1665           if (match) return parseInt(match[1], 10);
   1666 
   1667           // Check child elements for a standalone number
   1668           const children = el.querySelectorAll("span, div, p, td, strong, em, b");
   1669           for (const child of children) {
   1670             const childText = (child.textContent || "").trim();
   1671             if (/^\d+$/.test(childText)) return parseInt(childText, 10);
   1672           }
   1673 
   1674           // Check next sibling
   1675           const next = el.nextElementSibling;
   1676           if (next) {
   1677             const nextText = (next.textContent || "").trim();
   1678             if (/^\d+$/.test(nextText)) return parseInt(nextText, 10);
   1679           }
   1680         }
   1681       }
   1682       return null;
   1683     });
   1684   } catch {
   1685     return null;
   1686   }
   1687 }
   1688 
   1689 /**
   1690  * Measure the auto-drop interval by watching for grid changes without input.
   1691  * Returns the average interval in ms, or 0 if unable to measure.
   1692  */
   1693 async function measureDropInterval(
   1694   page: Page,
   1695   cal: CalibrationResult
   1696 ): Promise<number> {
   1697   try {
   1698     const intervals: number[] = [];
   1699     let lastChangeTime = Date.now();
   1700     let prevGrid = await readGrid(page, cal);
   1701 
   1702     for (let i = 0; i < 10; i++) {
   1703       await page.waitForTimeout(100);
   1704       const grid = await readGrid(page, cal);
   1705       if (grid && prevGrid && gridsAreDifferent(grid, prevGrid)) {
   1706         const now = Date.now();
   1707         const interval = now - lastChangeTime;
   1708         if (interval > 50 && interval < 3000) {
   1709           intervals.push(interval);
   1710         }
   1711         lastChangeTime = now;
   1712         prevGrid = grid;
   1713       }
   1714     }
   1715 
   1716     if (intervals.length >= 2) {
   1717       return intervals.reduce((a, b) => a + b, 0) / intervals.length;
   1718     }
   1719   } catch { /* ignore */ }
   1720   return 0;
   1721 }
   1722 
   1723 /**
   1724  * Detect if there's a next piece preview display on the page.
   1725  */
   1726 async function detectNextPiecePreview(page: Page): Promise<boolean> {
   1727   try {
   1728     return await page.evaluate(() => {
   1729       // Check for text mentioning "next"
   1730       const allElements = document.querySelectorAll("*");
   1731       for (const el of allElements) {
   1732         const text = ((el as HTMLElement).innerText || "").toLowerCase();
   1733         if (text.includes("next") && el.children.length < 10) {
   1734           // Check for a canvas or grid-like element nearby
   1735           const rect = (el as HTMLElement).getBoundingClientRect();
   1736           if (rect.width > 20 && rect.height > 20) {
   1737             return true;
   1738           }
   1739         }
   1740       }
   1741 
   1742       // Check for secondary canvases (common next piece implementation)
   1743       const canvases = document.querySelectorAll("canvas");
   1744       if (canvases.length >= 2) {
   1745         // Multiple canvases -- one might be the next piece preview
   1746         const mainCanvas = canvases[0];
   1747         const mainRect = mainCanvas.getBoundingClientRect();
   1748         for (let i = 1; i < canvases.length; i++) {
   1749           const rect = canvases[i].getBoundingClientRect();
   1750           // Next piece preview is typically smaller than the main grid
   1751           if (rect.width < mainRect.width * 0.5 && rect.height < mainRect.height * 0.5 &&
   1752               rect.width > 20 && rect.height > 20) {
   1753             return true;
   1754           }
   1755         }
   1756       }
   1757 
   1758       // Check for a small div/container with "next" in class/id
   1759       const nextContainers = document.querySelectorAll(
   1760         '[class*="next"], [id*="next"], [class*="preview"], [id*="preview"]'
   1761       );
   1762       for (const container of nextContainers) {
   1763         const rect = (container as HTMLElement).getBoundingClientRect();
   1764         if (rect.width > 20 && rect.height > 20) {
   1765           return true;
   1766         }
   1767       }
   1768 
   1769       return false;
   1770     });
   1771   } catch {
   1772     return false;
   1773   }
   1774 }

Impressum · Datenschutz