V2: fix AI player so it actually plays Tetris - loop-benchmarking - Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.

commit f2f3ae07a56e601bfb819d07f034eedeabc6a9c8
parent 3d89b1b341dd38bd6e1d3574d07e083fb57b1d62
Author: Brian Graham <brian@buildingbetterteams.de>
Date:   Fri, 10 Apr 2026 21:20:12 +0200

V2: fix AI player so it actually plays Tetris

Four bugs in the LeeYiyuan port were preventing the bot from playing:

Bug 1 (primary): settledGrid pollution
After hard-drop, the bot read the grid and stored it as 'settled' but
the next piece had already spawned. detectActivePieceCells then returned
null because the new piece was 'baked into settled'. Bot froze after
move 1 (telemetry: pieces_spawned=1, pieces_locked=20).

Fix: stripActivePiece(boardBeforePlacement) before placement, wait
350ms after drop, detect new piece against the saved board, strip it
to get clean settledGrid for next iteration.

Bug 2: pre-rotation column math wrong
currentCol was captured before rotation, but rotation shifts the
piece's leftmost column, so column moves were off by 1-2 cells.

Fix: Adopt LeeYiyuan's slam-left strategy. After computing placement:
rotate N times, press Left 10 times (slam to wall), press Right
placement.column times, hard drop. No live position tracking needed.

Bug 3: J[3] had negative column offset
PIECES.J[3] = [[0,0],[1,0],[2,0],[2,-1]] -- minCol=-1 broke the
column math for J piece in rotation state 3.

Fix: Normalized to [[0,1],[1,1],[2,1],[2,0]] with minCol=0. All 28
rotation states now have minRow=0 and minCol=0.

Bug 4: First-frame piece detection sloppy
detectActivePieceCells fallback scanned all cells in top 6 rows when
settledGrid was null, picking up UI chrome.

Fix: BFS for connected component of 3-5 cells in top 4 rows, pick the
one closest to spawn center column 4.5.

Results: bot now actually plays Tetris.
- Test on 4c7db3b9: 94% score (was 89%)
- Gameplay phase cleared 10 lines (was 0)
- Competitive play cleared 2 lines, scored 200 (was 0)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Diffstat:
M tasks/tetris/eval/gameplay-bot-v2/bot.ts  | 185 ++++++++++++++++++++++++++++++++++++++++---------------------------------------
M tasks/tetris/eval/gameplay-bot-v2/driver.ts  | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++----------

2 files changed, 149 insertions(+), 101 deletions(-)
diff --git a/tasks/tetris/eval/gameplay-bot-v2/bot.ts b/tasks/tetris/eval/gameplay-bot-v2/bot.ts
@@ -64,7 +64,7 @@ const PIECES: Record<string, [number, number][][]> = {
     [[0, 0], [1, 0], [1, 1], [1, 2]],
     [[0, 0], [0, 1], [1, 0], [2, 0]],
     [[0, 0], [0, 1], [0, 2], [1, 2]],
-    [[0, 0], [1, 0], [2, 0], [2, -1]],
+    [[0, 1], [1, 1], [2, 1], [2, 0]],
   ],
   L: [
     [[0, 2], [1, 0], [1, 1], [1, 2]],
@@ -1155,8 +1155,10 @@ async function runCompetitivePlayPhase(
   const initialDropInterval = await driver.measureDropInterval();
 
   // Play loop
-  let previousSnap = await driver.readGrid();
-  let settledGrid = previousSnap.grid;
+  // settledGrid = the locked board WITHOUT any active piece. Recomputed
+  // after each placement by reading the fresh grid and stripping the newly-
+  // spawned piece.
+  let settledGrid: Grid | null = null;
   let pollCount = 0;
   let consecutiveClears = 0;
   let maxCombo = 0;
@@ -1194,30 +1196,6 @@ async function runCompetitivePlayPhase(
         if (level !== null) result.level_readings.push(level);
       }
 
-      // Line clear detection
-      if (previousSnap.grid && snap.grid) {
-        const filledBefore = previousSnap.filledCount;
-        const filledNow = snap.filledCount;
-
-        if (filledNow < filledBefore - 5 && filledBefore > 10) {
-          const clearedCount = Math.round((filledBefore + 4 - filledNow) / 10);
-          if (clearedCount > 0 && clearedCount <= 4) {
-            result.total_lines_cleared += clearedCount;
-            consecutiveClears++;
-            if (consecutiveClears > maxCombo) maxCombo = consecutiveClears;
-
-            switch (clearedCount) {
-              case 1: result.single_clears++; break;
-              case 2: result.double_clears++; break;
-              case 3: result.triple_clears++; break;
-              case 4: result.tetris_clears++; break;
-            }
-          }
-        } else {
-          consecutiveClears = 0;
-        }
-      }
-
       // Active piece detection + AI placement
       if (snap.activePieceCells && snap.activePieceCells.length === 4) {
         const pieceType = snap.activePieceType || "unknown";
@@ -1246,9 +1224,10 @@ async function runCompetitivePlayPhase(
           if (sampleSnap.grid) filledCellSamples.push(sampleSnap.filledCount);
         }
 
-        // Execute AI placement
-        const boardWithoutPiece = settledGrid ?? stripActivePiece(snap.grid!, snap.activePieceCells);
-        const placement = findBestPlacement(boardWithoutPiece, pieceType as PieceType);
+        // Compute clean locked board (without active piece) for AI eval
+        // and as the diff base for the next iteration.
+        const boardBeforePlacement = stripActivePiece(snap.grid, snap.activePieceCells);
+        const placement = findBestPlacement(boardBeforePlacement, pieceType as PieceType);
 
         if (placement) {
           await executePlacement(driver, placement, snap.activePieceCells);
@@ -1256,15 +1235,48 @@ async function runCompetitivePlayPhase(
           await driver.pressKey("drop");
         }
 
-        await driver.wait(100);
         result.pieces_placed++;
 
-        const afterSnap = await driver.readGrid();
-        if (afterSnap.grid) settledGrid = afterSnap.grid;
-      }
+        // Wait for lock + new piece spawn, then re-read and strip the new
+        // active piece so settledGrid reflects only the locked board.
+        await driver.wait(350);
 
-      previousSnap = snap;
-      await driver.wait(60);
+        const afterSnap = await driver.readGrid(boardBeforePlacement);
+        if (afterSnap.grid) {
+          // Line-clear detection: compare filled count of locked boards
+          // before and after placement (before + 4 = expected, minus clears).
+          const filledBefore = countFilled(boardBeforePlacement) + 4;
+          const filledNow = countFilled(afterSnap.grid) -
+            (afterSnap.activePieceCells ? afterSnap.activePieceCells.length : 0);
+
+          if (filledNow < filledBefore - 5 && filledBefore > 10) {
+            const clearedCount = Math.round((filledBefore - filledNow) / GRID_COLS);
+            if (clearedCount > 0 && clearedCount <= 4) {
+              result.total_lines_cleared += clearedCount;
+              consecutiveClears++;
+              if (consecutiveClears > maxCombo) maxCombo = consecutiveClears;
+
+              switch (clearedCount) {
+                case 1: result.single_clears++; break;
+                case 2: result.double_clears++; break;
+                case 3: result.triple_clears++; break;
+                case 4: result.tetris_clears++; break;
+              }
+            }
+          } else if (filledNow >= filledBefore - 1) {
+            consecutiveClears = 0;
+          }
+
+          if (afterSnap.activePieceCells && afterSnap.activePieceCells.length >= 3 && afterSnap.activePieceCells.length <= 5) {
+            settledGrid = stripActivePiece(afterSnap.grid, afterSnap.activePieceCells);
+          } else {
+            settledGrid = afterSnap.grid;
+          }
+        }
+      } else {
+        // No active piece visible -- wait briefly for spawn.
+        await driver.wait(60);
+      }
     } catch {
       await driver.wait(60);
     }
@@ -1374,10 +1386,10 @@ async function playGame(
   const scoreValues: number[] = [];
   let scorePollCounter = 0;
 
-  let previousGrid: Grid | null = null;
+  // settledGrid = the locked board WITHOUT any active piece. We recompute
+  // it after each placement by reading the fresh grid and stripping out the
+  // new active piece (the one that just spawned after our drop).
   let settledGrid: Grid | null = null;
-  let lastPlacementTime = Date.now();
-  let waitingForNewPiece = false;
 
   while (piecesPlaced < maxPieces && Date.now() - start < maxDuration) {
     try {
@@ -1405,62 +1417,54 @@ async function playGame(
         if (score !== null) scoreValues.push(score);
       }
 
-      // Detect if anything changed
-      if (previousGrid && !driver.gridsAreDifferent(snap.grid, previousGrid)) {
-        if (Date.now() - lastPlacementTime > 8000) {
-          await driver.pressKey("drop");
-          lastPlacementTime = Date.now();
-        }
-        await driver.wait(60);
-        continue;
-      }
-
-      if (waitingForNewPiece) {
-        settledGrid = snap.grid;
-        waitingForNewPiece = false;
-        lastPlacementTime = Date.now();
-        previousGrid = snap.grid;
-        await driver.wait(60);
-        continue;
-      }
-
       if (snap.activePieceCells && snap.activePieceCells.length === 4) {
         const pieceType = snap.activePieceType || "unknown";
 
-        const boardWithoutPiece = settledGrid ?? stripActivePiece(snap.grid, snap.activePieceCells);
-        const placement = findBestPlacement(boardWithoutPiece, pieceType as PieceType);
+        // Save the locked board as-of right now (no active piece). This is
+        // what findBestPlacement evaluates against, and what we use as the
+        // diff base for the NEXT iteration's active-piece detection.
+        const boardBeforePlacement = stripActivePiece(snap.grid, snap.activePieceCells);
+        const placement = findBestPlacement(boardBeforePlacement, pieceType as PieceType);
 
         if (placement) {
           await executePlacement(driver, placement, snap.activePieceCells);
           linesCleared += placement.linesCleared;
           piecesPlaced++;
-          waitingForNewPiece = true;
         } else {
           await driver.pressKey("drop");
           piecesPlaced++;
-          waitingForNewPiece = true;
         }
 
-        await driver.wait(100);
+        // Wait long enough for lock + line clears + new piece spawn. Then
+        // read the fresh grid. The "active piece" in this afterSnap is the
+        // NEW piece that just spawned at the top -- we must detect it using
+        // boardBeforePlacement (NOT the current snap which has our just-
+        // dropped piece baked in) and strip it to get a clean settledGrid
+        // for the next iteration.
+        await driver.wait(350);
 
-        const afterSnap = await driver.readGrid();
+        const afterSnap = await driver.readGrid(boardBeforePlacement);
         if (afterSnap.grid) {
-          if (settledGrid) {
-            const filledBefore = countFilled(settledGrid);
-            const filledAfter = countFilled(afterSnap.grid);
-            if (filledAfter < filledBefore) {
-              const possibleClears = Math.round((filledBefore + 4 - filledAfter) / GRID_COLS);
-              if (possibleClears > 0 && possibleClears <= 4) linesCleared += possibleClears;
-            }
+          // Line-clear detection by filled-count delta.
+          const filledBefore = countFilled(boardBeforePlacement) + 4;
+          const filledAfter = countFilled(afterSnap.grid);
+          if (filledAfter < filledBefore) {
+            const possibleClears = Math.round((filledBefore - filledAfter) / GRID_COLS);
+            if (possibleClears > 0 && possibleClears <= 4) linesCleared += possibleClears;
           }
-          settledGrid = afterSnap.grid;
-        }
 
-        lastPlacementTime = Date.now();
+          if (afterSnap.activePieceCells && afterSnap.activePieceCells.length >= 3 && afterSnap.activePieceCells.length <= 5) {
+            settledGrid = stripActivePiece(afterSnap.grid, afterSnap.activePieceCells);
+          } else {
+            // No new piece detected yet (maybe still locking/clearing).
+            // Fall back to using the raw grid; next iteration will re-diff.
+            settledGrid = afterSnap.grid;
+          }
+        }
+      } else {
+        // No active piece visible yet -- wait briefly for spawn.
+        await driver.wait(60);
       }
-
-      previousGrid = snap.grid;
-      await driver.wait(60);
     } catch {
       errors++;
       await playRandomMove(driver);
@@ -1475,26 +1479,25 @@ async function playGame(
 async function executePlacement(
   driver: TetrisDriver,
   placement: Placement,
-  activeCells: [number, number][]
+  _activeCells: [number, number][]
 ): Promise<void> {
+  // Slam-left strategy (LeeYiyuan approach): don't track live position.
+  // 1. Rotate N times. 2. Left ~10 to slam against wall. 3. Right to target.
+  // 4. Hard drop. This is position-independent so rotation-column shifts
+  // don't matter.
   for (let i = 0; i < placement.rotations; i++) {
     await driver.pressKey("rotate");
-    await driver.wait(50);
+    await driver.wait(30);
   }
 
-  const currentCol = Math.min(...activeCells.map(([, c]) => c));
-  const diff = placement.column - currentCol;
+  for (let i = 0; i < GRID_COLS; i++) {
+    await driver.pressKey("left");
+    await driver.wait(15);
+  }
 
-  if (diff < 0) {
-    for (let i = 0; i < Math.abs(diff); i++) {
-      await driver.pressKey("left");
-      await driver.wait(30);
-    }
-  } else if (diff > 0) {
-    for (let i = 0; i < diff; i++) {
-      await driver.pressKey("right");
-      await driver.wait(30);
-    }
+  for (let i = 0; i < placement.column; i++) {
+    await driver.pressKey("right");
+    await driver.wait(15);
   }
 
   await driver.pressKey("drop");
diff --git a/tasks/tetris/eval/gameplay-bot-v2/driver.ts b/tasks/tetris/eval/gameplay-bot-v2/driver.ts
@@ -135,9 +135,8 @@ function detectActivePieceCells(
 ): [number, number][] | null {
   if (!current) return null;
 
-  const cells: [number, number][] = [];
-
   if (settled && settled.length === current.length) {
+    const cells: [number, number][] = [];
     for (let row = 0; row < current.length; row++) {
       for (let col = 0; col < current[row].length; col++) {
         if (current[row][col] && !settled[row][col]) {
@@ -145,19 +144,65 @@ function detectActivePieceCells(
         }
       }
     }
-  } else {
-    // Fallback: scan top 6 rows for filled cells
-    for (let row = 0; row < Math.min(6, current.length); row++) {
-      for (let col = 0; col < current[row].length; col++) {
-        if (current[row][col]) {
-          cells.push([row, col]);
+    if (cells.length < 3 || cells.length > 5) return null;
+    return cells;
+  }
+
+  // Fallback: find a connected 3-5 cell cluster in the top 4 rows.
+  // (A fresh tetromino spawns at the top and forms a single connected
+  // component; filled cells elsewhere are usually UI chrome or false
+  // positives from canvas reading.)
+  const topRows = Math.min(4, current.length);
+  const filled: [number, number][] = [];
+  for (let row = 0; row < topRows; row++) {
+    for (let col = 0; col < current[row].length; col++) {
+      if (current[row][col]) filled.push([row, col]);
+    }
+  }
+  if (filled.length < 3) return null;
+
+  // BFS to find connected components (4-connectivity)
+  const key = (r: number, c: number) => `${r},${c}`;
+  const filledSet = new Set(filled.map(([r, c]) => key(r, c)));
+  const seen = new Set<string>();
+  const components: [number, number][][] = [];
+
+  for (const [r0, c0] of filled) {
+    if (seen.has(key(r0, c0))) continue;
+    const stack: [number, number][] = [[r0, c0]];
+    const comp: [number, number][] = [];
+    while (stack.length > 0) {
+      const [r, c] = stack.pop()!;
+      const k = key(r, c);
+      if (seen.has(k)) continue;
+      seen.add(k);
+      comp.push([r, c]);
+      for (const [dr, dc] of [[-1, 0], [1, 0], [0, -1], [0, 1]] as const) {
+        const nr = r + dr, nc = c + dc;
+        if (nr >= 0 && nr < topRows && nc >= 0 && current[nr] && nc < current[nr].length) {
+          if (filledSet.has(key(nr, nc)) && !seen.has(key(nr, nc))) {
+            stack.push([nr, nc]);
+          }
         }
       }
     }
+    components.push(comp);
   }
 
-  if (cells.length < 3 || cells.length > 5) return null;
-  return cells;
+  // Pick the first component sized 3-5 (a tetromino). If several match,
+  // prefer the one closest to the center column (spawn position).
+  const tetrominoCandidates = components.filter((c) => c.length >= 3 && c.length <= 5);
+  if (tetrominoCandidates.length === 0) return null;
+
+  tetrominoCandidates.sort((a, b) => {
+    const avgColA = a.reduce((s, [, c]) => s + c, 0) / a.length;
+    const avgColB = b.reduce((s, [, c]) => s + c, 0) / b.length;
+    const centerDistA = Math.abs(avgColA - 4.5);
+    const centerDistB = Math.abs(avgColB - 4.5);
+    return centerDistA - centerDistB;
+  });
+
+  return tetrominoCandidates[0];
 }
 
 function identifyPieceType(cells: [number, number][]): PieceType {

	loop-benchmarking Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.
	git clone https://git.shiptheloop.com/loop-benchmarking.git
	Log \| Files \| Refs \| README

M	tasks/tetris/eval/gameplay-bot-v2/bot.ts	\|	185	++++++++++++++++++++++++++++++++++++++++---------------------------------------
M	tasks/tetris/eval/gameplay-bot-v2/driver.ts	\|	65	+++++++++++++++++++++++++++++++++++++++++++++++++++++++----------