commit 42321c004e708af0b099db58e9cbedf54e03e145
parent 13710ed75e85f22717f6463b1b3bc0b822a0a66b
Author: Brian Graham <brian@buildingbetterteams.de>
Date: Sun, 12 Apr 2026 08:31:49 +0200
V2: landmarks-based game_loads, updated calibration test names
game_loads now checks for structural landmarks (canvas, DOM grid,
tetris-ratio elements, cell containers) instead of failing on console
errors. Console errors are informational, not a pass/fail gate.
8fe72fce: game_loads now PASS (was FAIL from benign startup TypeError),
score 100% (20/20 scorable).
Updated all calibration JSON files: score_changes renamed to
score_increases_on_clear + score_element_visible.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Diffstat:
13 files changed, 248 insertions(+), 32 deletions(-)
diff --git a/tasks/tetris/eval/gameplay-bot-v2/bot.ts b/tasks/tetris/eval/gameplay-bot-v2/bot.ts
@@ -314,7 +314,8 @@ const ALL_TEST_NAMES = [
"new_piece_spawns",
"multiple_pieces",
"line_clear",
- "score_changes",
+ "score_increases_on_clear",
+ "score_element_visible",
"game_over",
"playable_30s",
"multi_line_clear",
@@ -423,6 +424,13 @@ export async function runAllTests(
// ---- Pre-test survey ----
survey = await driver.surveyPage();
+ // ---- Detect game-shaped landmarks for the game_loads test ----
+ try {
+ session.gameLoadLandmarks = await driver.detectGameLandmarks();
+ } catch {
+ session.gameLoadLandmarks = undefined;
+ }
+
// ---- Phase 2: Discover + verify start, then calibrate ----
// Bridge flow: try each candidate, ask verifyGameStarted() to confirm,
// commit the first verified candidate. On false positive, reload and try
@@ -830,6 +838,14 @@ async function runGameplayPhase(
}
}
+ // Propagate score-before/after-clear from AI play
+ if (result.scoreBeforeClear !== undefined && session.scoreBeforeClear === undefined) {
+ session.scoreBeforeClear = result.scoreBeforeClear;
+ }
+ if (result.scoreAfterClear !== undefined && session.scoreAfterClear === undefined) {
+ session.scoreAfterClear = result.scoreAfterClear;
+ }
+
// Read final score
const finalScore = await driver.readScore();
if (finalScore !== null) {
@@ -853,12 +869,25 @@ async function runGameplayPhase(
// If no lines cleared by AI, try brute-force
if (session.linesCleared === 0) {
+ // Capture score before brute-force line clear attempt
+ if (session.scoreBeforeClear === undefined) {
+ const preScore = await driver.readScore();
+ if (preScore !== null) session.scoreBeforeClear = preScore;
+ }
+
const cleared = await tryFillRow(driver, 10);
gameplay.pieces_placed += 10;
if (cleared) {
session.linesCleared++;
gameplay.lines_cleared++;
session.events.push({ type: "line_cleared", count: 1, frame: session.frames });
+
+ // Capture score after brute-force line clear
+ if (session.scoreAfterClear === undefined) {
+ await driver.wait(200);
+ const postScore = await driver.readScore();
+ if (postScore !== null) session.scoreAfterClear = postScore;
+ }
}
}
@@ -867,9 +896,23 @@ async function runGameplayPhase(
const snapAfter = await driver.readGrid();
const filledAfter = snapAfter.filledCount;
if (filledAfter < filledBefore && filledBefore > 0) {
+ // Capture score around grid-verified line clear
+ if (session.scoreBeforeClear === undefined) {
+ // Use the last known score reading as proxy
+ const lastKnown = session.scoreValues.length > 0
+ ? session.scoreValues[session.scoreValues.length - 1]
+ : null;
+ if (lastKnown !== null) session.scoreBeforeClear = lastKnown;
+ }
+
session.linesCleared++;
gameplay.lines_cleared++;
session.events.push({ type: "line_cleared", count: 1, frame: session.frames });
+
+ if (session.scoreAfterClear === undefined) {
+ const postScore = await driver.readScore();
+ if (postScore !== null) session.scoreAfterClear = postScore;
+ }
}
}
}
@@ -1469,7 +1512,7 @@ async function playGame(
maxDurationMs?: number;
rotationTrack?: Map<string, Set<string>>;
}
-): Promise<{ piecesPlaced: number; linesCleared: number; errors: number; gridReads: number; gridReadFails: number; scoreValues: number[] }> {
+): Promise<{ piecesPlaced: number; linesCleared: number; errors: number; gridReads: number; gridReadFails: number; scoreValues: number[]; scoreBeforeClear?: number; scoreAfterClear?: number }> {
const maxPieces = options.maxPieces ?? 100;
const maxDuration = options.maxDurationMs ?? 30000;
const rotationTrack = options.rotationTrack;
@@ -1482,6 +1525,11 @@ async function playGame(
let consecutiveReadFails = 0;
const scoreValues: number[] = [];
let scorePollCounter = 0;
+ let scoreBeforeClear: number | undefined;
+ let scoreAfterClear: number | undefined;
+ /** The most recent score reading, kept up to date so we can snapshot it
+ * immediately before a line clear is detected. */
+ let lastScoreReading: number | null = null;
// settledGrid = the locked board WITHOUT any active piece. We recompute
// it after each placement by reading the fresh grid and stripping out the
@@ -1511,7 +1559,10 @@ async function playGame(
scorePollCounter++;
if (scorePollCounter % 5 === 0) {
const score = await driver.readScore();
- if (score !== null) scoreValues.push(score);
+ if (score !== null) {
+ scoreValues.push(score);
+ lastScoreReading = score;
+ }
}
if (snap.activePieceCells && snap.activePieceCells.length === 4) {
@@ -1560,6 +1611,20 @@ async function playGame(
const boardBeforePlacement = stripActivePiece(workingSnap.grid!, workingSnap.activePieceCells!);
const placement = findBestPlacement(boardBeforePlacement, pieceType as PieceType);
+ // Track whether this placement will produce a line clear so we can
+ // snapshot the score before and after.
+ const expectsClear = placement ? placement.linesCleared > 0 : false;
+ // Snapshot score just before executing the drop (for score_increases_on_clear)
+ if (expectsClear && scoreBeforeClear === undefined) {
+ const preClearScore = await driver.readScore();
+ if (preClearScore !== null) {
+ scoreBeforeClear = preClearScore;
+ lastScoreReading = preClearScore;
+ } else if (lastScoreReading !== null) {
+ scoreBeforeClear = lastScoreReading;
+ }
+ }
+
if (placement) {
await executePlacement(driver, placement, workingSnap.activePieceCells!);
linesCleared += placement.linesCleared;
@@ -1582,9 +1647,30 @@ async function playGame(
// Line-clear detection by filled-count delta.
const filledBefore = countFilled(boardBeforePlacement) + 4;
const filledAfter = countFilled(afterSnap.grid);
+ let clearsThisPlacement = 0;
if (filledAfter < filledBefore) {
const possibleClears = Math.round((filledBefore - filledAfter) / GRID_COLS);
- if (possibleClears > 0 && possibleClears <= 4) linesCleared += possibleClears;
+ if (possibleClears > 0 && possibleClears <= 4) {
+ linesCleared += possibleClears;
+ clearsThisPlacement = possibleClears;
+ }
+ }
+
+ // If a line clear happened (AI-predicted or grid-verified) and we
+ // haven't captured the post-clear score yet, read it now.
+ if ((expectsClear || clearsThisPlacement > 0) && scoreAfterClear === undefined) {
+ // If we didn't snapshot the before-clear score yet (grid-detected
+ // clear that the AI didn't predict), capture it retroactively from
+ // the last known reading.
+ if (scoreBeforeClear === undefined && lastScoreReading !== null) {
+ scoreBeforeClear = lastScoreReading;
+ }
+ const postClearScore = await driver.readScore();
+ if (postClearScore !== null) {
+ scoreAfterClear = postClearScore;
+ scoreValues.push(postClearScore);
+ lastScoreReading = postClearScore;
+ }
}
if (afterSnap.activePieceCells && afterSnap.activePieceCells.length >= 3 && afterSnap.activePieceCells.length <= 5) {
@@ -1607,9 +1693,10 @@ async function playGame(
}
}
- return { piecesPlaced, linesCleared, errors, gridReads, gridReadFails, scoreValues };
+ return { piecesPlaced, linesCleared, errors, gridReads, gridReadFails, scoreValues, scoreBeforeClear, scoreAfterClear };
}
+
async function executePlacement(
driver: TetrisDriver,
placement: Placement,
@@ -1710,12 +1797,27 @@ function deriveTestResults(
name, pass: false, detail: `skipped: ${reason}`,
});
- // 1. game_loads
- results.push({
- name: "game_loads",
- pass: loadResult.loaded && loadResult.errorsOnLoad === 0,
- detail: loadResult.detail,
- });
+ // 1. game_loads: page loaded with non-trivial content and at least one
+ // game-shaped landmark. Console errors are captured in the report for
+ // informational purposes but do NOT gate this test.
+ {
+ const loaded = loadResult.loaded;
+ const landmarks = session.gameLoadLandmarks;
+ const hasLandmark = landmarks && (
+ landmarks.hasCanvas ||
+ landmarks.hasDomGrid ||
+ landmarks.hasTetrisRatioElement ||
+ landmarks.hasManyCellsContainer
+ );
+ const bodyHasContent = landmarks?.bodyHasContent ?? false;
+ const pass = loaded && bodyHasContent && !!hasLandmark;
+ let detail: string;
+ if (!loaded) detail = loadResult.detail || "page failed to load";
+ else if (!bodyHasContent) detail = "blank page (body has no content)";
+ else if (!hasLandmark) detail = "no game landmarks found (no canvas, grid, or game-shaped element)";
+ else detail = `loaded with landmarks: ${landmarks!.landmarksFound.join(", ")}`;
+ results.push({ name: "game_loads", pass, detail });
+ }
// 2. game_starts
{
@@ -1937,21 +2039,38 @@ function deriveTestResults(
results.push({ name: "line_clear", pass: false, detail: "could not trigger or detect a line clear via grid reader" });
}
- // 14. score_changes
+ // 14a. score_increases_on_clear
if (!phaseState.mechanicsWork) {
- results.push(skipResult("score_changes", "mechanics phase failed"));
- } else if (session.scoreValues.length >= 2) {
- const min = Math.min(...session.scoreValues);
- const max = Math.max(...session.scoreValues);
- if (max > min) {
- results.push({ name: "score_changes", pass: true, detail: `score changed from ${min} to ${max}` });
+ results.push(skipResult("score_increases_on_clear", "mechanics phase failed"));
+ } else if (session.linesCleared > 0) {
+ if (session.scoreBeforeClear !== undefined && session.scoreAfterClear !== undefined) {
+ const scoreIncreased = session.scoreAfterClear > session.scoreBeforeClear;
+ results.push({
+ name: "score_increases_on_clear",
+ pass: scoreIncreased,
+ detail: scoreIncreased
+ ? `score went from ${session.scoreBeforeClear} to ${session.scoreAfterClear} after line clear`
+ : `score stayed at ${session.scoreBeforeClear} after clearing ${session.linesCleared} line(s)`,
+ });
+ } else if (!cal.scoreElementSelector) {
+ results.push(skipResult("score_increases_on_clear", "no score element found, cannot verify scoring on clear"));
} else {
- results.push({ name: "score_changes", pass: false, detail: `score stayed at ${min}` });
+ results.push(skipResult("score_increases_on_clear", "lines cleared but could not read score before/after"));
}
- } else if (!cal.scoreElementSelector) {
- results.push({ name: "score_changes", pass: false, detail: "no score element found" });
} else {
- results.push({ name: "score_changes", pass: false, detail: "could not read score values" });
+ results.push(skipResult("score_increases_on_clear", "no lines cleared, cannot verify scoring"));
+ }
+
+ // 14b. score_element_visible
+ {
+ const hasScoreElement = !!cal.scoreElementSelector;
+ results.push({
+ name: "score_element_visible",
+ pass: hasScoreElement,
+ detail: hasScoreElement
+ ? `score display found (${cal.scoreElementSelector})`
+ : "no score display detected",
+ });
}
// 15. game_over
diff --git a/tasks/tetris/eval/gameplay-bot-v2/driver.ts b/tasks/tetris/eval/gameplay-bot-v2/driver.ts
@@ -14,6 +14,7 @@ import type {
StartCandidate,
TryStartResult,
SurveyData,
+ GameLandmarks,
PieceType,
DriverCalibration,
CalibrationDrift,
@@ -520,6 +521,88 @@ export class PlaywrightDriver implements TetrisDriver {
}
}
+ async detectGameLandmarks(): Promise<GameLandmarks> {
+ return await this.page.evaluate(() => {
+ const body = document.body;
+ const bodyText = body?.innerText?.trim() || "";
+ const bodyHasContent = body !== null &&
+ (bodyText.length > 0 || body.children.length > 0);
+
+ // Check for canvas
+ const canvases = document.querySelectorAll("canvas");
+ let hasCanvas = false;
+ for (const c of canvases) {
+ const rect = (c as HTMLCanvasElement).getBoundingClientRect();
+ if (rect.width >= 50 && rect.height >= 50) {
+ hasCanvas = true;
+ break;
+ }
+ }
+
+ // Check for named DOM grid containers
+ const namedContainers = document.querySelectorAll(
+ '[class*="board"], [class*="grid"], [class*="field"], [id*="board"], [id*="grid"]'
+ );
+ let hasDomGrid = false;
+ for (const el of namedContainers) {
+ const rect = (el as HTMLElement).getBoundingClientRect();
+ if (rect.width >= 50 && rect.height >= 50) {
+ hasDomGrid = true;
+ break;
+ }
+ }
+
+ // Check for tetris-grid-aspect element (2:1 height:width)
+ let hasTetrisRatioElement = false;
+ const allElements = document.querySelectorAll("div, section, main, article");
+ for (const el of allElements) {
+ const rect = (el as HTMLElement).getBoundingClientRect();
+ if (rect.width < 100 || rect.height < 200) continue;
+ const ratio = rect.height / rect.width;
+ if (ratio >= 1.5 && ratio <= 2.5) {
+ hasTetrisRatioElement = true;
+ break;
+ }
+ }
+
+ // Check for large container with many same-sized children
+ let hasManyCellsContainer = false;
+ for (const el of allElements) {
+ const children = el.children;
+ if (children.length < 50 || children.length > 400) continue;
+ const sizes = new Set<string>();
+ const sampleCount = Math.min(10, children.length);
+ for (let i = 0; i < sampleCount; i++) {
+ const child = children[Math.floor(i * children.length / sampleCount)] as HTMLElement;
+ const r = child.getBoundingClientRect();
+ if (r.width > 0 && r.height > 0) {
+ sizes.add(`${Math.round(r.width)}x${Math.round(r.height)}`);
+ }
+ }
+ if (sizes.size <= 3 && sizes.size > 0) {
+ hasManyCellsContainer = true;
+ break;
+ }
+ }
+
+ const landmarksFound: string[] = [];
+ if (bodyHasContent) landmarksFound.push("body_content");
+ if (hasCanvas) landmarksFound.push("canvas");
+ if (hasDomGrid) landmarksFound.push("dom_grid");
+ if (hasTetrisRatioElement) landmarksFound.push("tetris_ratio");
+ if (hasManyCellsContainer) landmarksFound.push("cells_container");
+
+ return {
+ bodyHasContent,
+ hasCanvas,
+ hasDomGrid,
+ hasTetrisRatioElement,
+ hasManyCellsContainer,
+ landmarksFound,
+ };
+ });
+ }
+
async calibrate(): Promise<DriverCalibration> {
// Fast path: try applying the cached calibration from a prior run.
if (this.firstCal) {
diff --git a/tasks/tetris/eval/gameplay-bot-v2/types.ts b/tasks/tetris/eval/gameplay-bot-v2/types.ts
@@ -346,6 +346,10 @@ export interface GameSession {
durationSeconds: number;
pieceTypes: Set<string>;
scoreValues: number[];
+ /** Score reading taken just before the first detected line clear. */
+ scoreBeforeClear?: number;
+ /** Score reading taken just after the first detected line clear. */
+ scoreAfterClear?: number;
gridReadSuccess: number;
gridReadFail: number;
frames: number;
diff --git a/tasks/tetris/eval/gameplay-bot/calibration/1d08ee76.json b/tasks/tetris/eval/gameplay-bot/calibration/1d08ee76.json
@@ -18,7 +18,8 @@
"new_piece_spawns": true,
"multiple_pieces": true,
"line_clear": true,
- "score_changes": true,
+ "score_increases_on_clear": true,
+ "score_element_visible": null,
"game_over": true,
"playable_30s": true,
"multi_line_clear": true,
diff --git a/tasks/tetris/eval/gameplay-bot/calibration/4949d521.json b/tasks/tetris/eval/gameplay-bot/calibration/4949d521.json
@@ -18,7 +18,8 @@
"new_piece_spawns": true,
"multiple_pieces": true,
"line_clear": false,
- "score_changes": true,
+ "score_increases_on_clear": true,
+ "score_element_visible": null,
"game_over": true,
"playable_30s": true,
"multi_line_clear": null,
diff --git a/tasks/tetris/eval/gameplay-bot/calibration/4c7db3b9.json b/tasks/tetris/eval/gameplay-bot/calibration/4c7db3b9.json
@@ -18,7 +18,8 @@
"new_piece_spawns": true,
"multiple_pieces": true,
"line_clear": true,
- "score_changes": null,
+ "score_increases_on_clear": null,
+ "score_element_visible": null,
"game_over": true,
"playable_30s": true,
"multi_line_clear": null,
diff --git a/tasks/tetris/eval/gameplay-bot/calibration/7a348b81.json b/tasks/tetris/eval/gameplay-bot/calibration/7a348b81.json
@@ -18,7 +18,8 @@
"new_piece_spawns": null,
"multiple_pieces": null,
"line_clear": null,
- "score_changes": null,
+ "score_increases_on_clear": null,
+ "score_element_visible": null,
"game_over": null,
"playable_30s": null,
"multi_line_clear": null,
diff --git a/tasks/tetris/eval/gameplay-bot/calibration/8fe72fce.json b/tasks/tetris/eval/gameplay-bot/calibration/8fe72fce.json
@@ -18,7 +18,8 @@
"new_piece_spawns": true,
"multiple_pieces": true,
"line_clear": true,
- "score_changes": true,
+ "score_increases_on_clear": true,
+ "score_element_visible": null,
"game_over": true,
"playable_30s": true,
"multi_line_clear": true,
diff --git a/tasks/tetris/eval/gameplay-bot/calibration/93e8feea.json b/tasks/tetris/eval/gameplay-bot/calibration/93e8feea.json
@@ -18,7 +18,8 @@
"new_piece_spawns": null,
"multiple_pieces": null,
"line_clear": null,
- "score_changes": null,
+ "score_increases_on_clear": null,
+ "score_element_visible": null,
"game_over": null,
"playable_30s": null,
"multi_line_clear": null,
diff --git a/tasks/tetris/eval/gameplay-bot/calibration/9805c24a.json b/tasks/tetris/eval/gameplay-bot/calibration/9805c24a.json
@@ -18,7 +18,8 @@
"new_piece_spawns": true,
"multiple_pieces": true,
"line_clear": null,
- "score_changes": null,
+ "score_increases_on_clear": null,
+ "score_element_visible": null,
"game_over": true,
"playable_30s": true,
"multi_line_clear": null,
diff --git a/tasks/tetris/eval/gameplay-bot/calibration/bbb70053.json b/tasks/tetris/eval/gameplay-bot/calibration/bbb70053.json
@@ -18,7 +18,8 @@
"new_piece_spawns": null,
"multiple_pieces": null,
"line_clear": null,
- "score_changes": null,
+ "score_increases_on_clear": null,
+ "score_element_visible": null,
"game_over": null,
"playable_30s": null,
"multi_line_clear": null,
diff --git a/tasks/tetris/eval/gameplay-bot/calibration/cbbff570.json b/tasks/tetris/eval/gameplay-bot/calibration/cbbff570.json
@@ -18,7 +18,8 @@
"new_piece_spawns": true,
"multiple_pieces": true,
"line_clear": true,
- "score_changes": true,
+ "score_increases_on_clear": true,
+ "score_element_visible": null,
"game_over": true,
"playable_30s": true,
"multi_line_clear": true,
diff --git a/tasks/tetris/eval/gameplay-bot/calibration/e2e04e75.json b/tasks/tetris/eval/gameplay-bot/calibration/e2e04e75.json
@@ -18,7 +18,8 @@
"new_piece_spawns": true,
"multiple_pieces": true,
"line_clear": null,
- "score_changes": false,
+ "score_increases_on_clear": false,
+ "score_element_visible": null,
"game_over": true,
"playable_30s": true,
"multi_line_clear": null,