commit 2644610c24ac12d9ef707571aec1e31a934389a8
parent 8dc9ec566791cf32913b7ea8f3ba37a789ef0b86
Author: Brian Graham <brian@buildingbetterteams.de>
Date: Fri, 10 Apr 2026 20:04:37 +0200
V2: control discovery system
Bot no longer assumes default Tetris controls. Driver now probes each
candidate key to discover what it actually does:
- ArrowDown might be hard drop instead of soft drop
- Space might be pause instead of hard drop
- Some games have no soft drop at all (skip move_down test as N/A)
Discovery is reload-safe (clears game state between probes), classifies
based on grid delta (movement direction, distance, shape change),
budget capped at 50s.
New types: GameAction, ControlMapping, ControlMap with confidence levels
New driver methods: discoverControls(), getControl()
Bot updated: move_down and soft_drop_distinct skip when soft_drop not found
Report includes control_discovery field showing what each key does
Results:
- 1d08ee76 (control swap): 67% -> 83-89%
- 4c7db3b9 (working game): 86% -> 100%
- 8fe72fce (held): 95%
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Diffstat:
5 files changed, 844 insertions(+), 2 deletions(-)
diff --git a/tasks/tetris/eval/gameplay-bot-v2/bot.ts b/tasks/tetris/eval/gameplay-bot-v2/bot.ts
@@ -319,6 +319,7 @@ function emptyCalibration(consoleErrors: string[]): DriverCalibration {
cellWidth: 0,
cellHeight: 0,
controls: { left: "ArrowLeft", right: "ArrowRight", down: "ArrowDown", rotate: "ArrowUp", drop: "Space" },
+ controlMap: null,
startMechanism: "unknown",
scoreElementSelector: null,
levelElementSelector: null,
@@ -455,6 +456,31 @@ export async function runAllTests(
} catch { /* keep original */ }
}
+ // Control discovery: probe what each key actually does. This is the
+ // difference between "ArrowDown is soft drop" (assumption) and "on this
+ // particular game, ArrowDown is hard drop" (measurement). Runs only once
+ // per session -- the result is cached on the driver and flows through
+ // subsequent pressKey() calls automatically.
+ if (gameStarted && cal.gridDetected) {
+ try {
+ const controlMap = await driver.discoverControls(serverUrl);
+ console.log(
+ `[bot] control discovery complete: ` +
+ `move_left=${controlMap.move_left.key ?? "?"}, ` +
+ `move_right=${controlMap.move_right.key ?? "?"}, ` +
+ `soft_drop=${controlMap.soft_drop.key ?? "NONE"}, ` +
+ `hard_drop=${controlMap.hard_drop.key ?? "?"}, ` +
+ `rotate_cw=${controlMap.rotate_cw.key ?? "?"}`
+ );
+ // Refresh the working calibration from the driver -- discoverControls()
+ // updates cal.controlMap and cal.controls on the cached calibration
+ // object in-place.
+ cal = driver.getCalibration();
+ } catch (err) {
+ console.log(`[bot] control discovery threw: ${err instanceof Error ? err.message : String(err)}`);
+ }
+ }
+
// ---- Phase 3: Basic mechanics ----
let mechanicsWork = false;
if (gameStarted && cal.gridDetected) {
@@ -1590,6 +1616,15 @@ function deriveTestResults(
results.push(skipResult(`move_${dir}`, "game did not start"));
continue;
}
+ // Not applicable: if control discovery determined that this game has no
+ // soft_drop (no key produced a single-row downward move), then move_down
+ // is a feature the game genuinely lacks rather than something that's
+ // broken. Report as skipped with a clear reason so it does not drag the
+ // score down.
+ if (dir === "down" && cal.controlMap && cal.controlMap.soft_drop.confidence === "not_found") {
+ results.push(skipResult("move_down", "no soft_drop key (game has only hard_drop)"));
+ continue;
+ }
const moveEvents = session.events.filter((e) => e.type === "piece_moved" && e.direction === dir);
if (moveEvents.length > 0) {
results.push({ name: `move_${dir}`, pass: true, detail: "grid state changed after key press (grid-verified)" });
@@ -1849,6 +1884,9 @@ function deriveTestResults(
// 24. soft_drop_distinct
if (!phaseState.gameplayWorks || !competitivePlay) {
results.push(skipResult("soft_drop_distinct", "competitive play phase did not run"));
+ } else if (cal.controlMap && cal.controlMap.soft_drop.confidence === "not_found") {
+ // Game has no soft drop at all -- not applicable.
+ results.push(skipResult("soft_drop_distinct", "no soft_drop key (game has only hard_drop)"));
} else {
const softDropTestDone = (competitivePlay as any)._softDropTestDone === true;
const softDropDistinct = (competitivePlay as any)._softDropDistinct;
diff --git a/tasks/tetris/eval/gameplay-bot-v2/driver.ts b/tasks/tetris/eval/gameplay-bot-v2/driver.ts
@@ -7,6 +7,9 @@ import type {
GridBounds,
RendererType,
Controls,
+ ControlMap,
+ ControlMapping,
+ GameAction,
StartMechanism,
StartCandidate,
TryStartResult,
@@ -29,6 +32,46 @@ const DEFAULT_CONTROLS: Controls = {
drop: "Space",
};
+/**
+ * Candidate keys to try for each abstract game action, in priority order.
+ * The discovery loop tries these from top to bottom until one matches the
+ * expected behaviour (e.g. moves piece 1 column left).
+ */
+const CONTROL_CANDIDATES: Record<GameAction, string[]> = {
+ move_left: ["ArrowLeft", "a", "h"],
+ move_right: ["ArrowRight", "d", "l"],
+ // ArrowDown LAST: some games treat it as hard_drop, so we try alternatives
+ // first and fall through. This is the key insight of the discovery system.
+ soft_drop: ["s", "ArrowDown"],
+ // ArrowDown also gets tried as hard_drop because some games bind the down
+ // arrow to hard drop instead of soft drop. Order: conventional hard-drop
+ // keys first, then the ambiguous ArrowDown last.
+ hard_drop: ["Space", "Enter", "ArrowUp", "ArrowDown"],
+ rotate_cw: ["ArrowUp", "x", "w"],
+ rotate_ccw: ["z", "Control"],
+ // pause/hold are not currently discovered; we keep these entries for type
+ // completeness but they remain at default "not_found".
+ pause: ["p", "Escape"],
+ hold: ["c", "Shift"],
+};
+
+function emptyControlMap(): ControlMap {
+ const notFound = (): ControlMapping => ({
+ key: null,
+ confidence: "not_found",
+ observation: "",
+ });
+ return {
+ move_left: notFound(),
+ move_right: notFound(),
+ soft_drop: notFound(),
+ hard_drop: notFound(),
+ rotate_cw: notFound(),
+ rotate_ccw: notFound(),
+ key_observations: {},
+ };
+}
+
// ---------------------------------------------------------------------------
// GridSnapshot factory
// ---------------------------------------------------------------------------
@@ -176,6 +219,7 @@ function cloneCalibration(cal: DriverCalibration): DriverCalibration {
cellWidth: cal.cellWidth,
cellHeight: cal.cellHeight,
controls: { ...cal.controls },
+ controlMap: cal.controlMap ? cloneControlMap(cal.controlMap) : cal.controlMap ?? null,
startMechanism: cal.startMechanism,
scoreElementSelector: cal.scoreElementSelector,
levelElementSelector: cal.levelElementSelector,
@@ -195,6 +239,18 @@ function cloneCalibration(cal: DriverCalibration): DriverCalibration {
return copy;
}
+function cloneControlMap(map: ControlMap): ControlMap {
+ return {
+ move_left: { ...map.move_left },
+ move_right: { ...map.move_right },
+ soft_drop: { ...map.soft_drop },
+ hard_drop: { ...map.hard_drop },
+ rotate_cw: { ...map.rotate_cw },
+ rotate_ccw: { ...map.rotate_ccw },
+ key_observations: { ...map.key_observations },
+ };
+}
+
function gridBoundsSimilar(a: GridBounds, b: GridBounds): boolean {
// Tolerate rendering jitter but flag anything beyond ~10% size change.
const tol = Math.max(20, Math.min(a.width, b.width) * 0.15);
@@ -262,6 +318,10 @@ export class PlaywrightDriver implements TetrisDriver {
// Set by the bot when bridge verification definitively failed -- the legacy
// detectStartMechanism() fallback must NOT run and override the bot's verdict.
private startRejected: boolean = false;
+ // Result of discoverControls(). Persists across calibration cache hits so
+ // downstream phases reuse the discovered mapping without re-running the
+ // expensive discovery loop. null = not yet discovered.
+ private discoveredControls: ControlMap | null = null;
// Cumulative drift info across the session.
private drift: CalibrationDrift = {
drifted: false,
@@ -603,6 +663,7 @@ export class PlaywrightDriver implements TetrisDriver {
cellWidth: grid.cellWidth,
cellHeight: grid.cellHeight,
controls: { ...base.controls },
+ controlMap: base.controlMap ? cloneControlMap(base.controlMap) : null,
startMechanism: base.startMechanism,
scoreElementSelector: base.scoreElementSelector,
levelElementSelector: base.levelElementSelector,
@@ -1228,15 +1289,655 @@ export class PlaywrightDriver implements TetrisDriver {
// -- Input --
async pressKey(action: "left" | "right" | "down" | "rotate" | "drop"): Promise<void> {
+ // Prefer the discovered control map when available, falling back to the
+ // legacy controls field. If the discovered soft_drop is null (no soft
+ // drop on this game), pressing "down" becomes a no-op -- callers that
+ // care about distinguishing the two should check getControl("soft_drop")
+ // first.
+ const discovered = this.discoveredControls;
+ if (discovered) {
+ const mapped = this.mapLegacyActionToDiscovered(action, discovered);
+ if (mapped === null) {
+ // Explicit no-op: the game doesn't have this action. We still swallow
+ // the call silently rather than throwing so the bot's play loops that
+ // sprinkle in optional soft_drop presses don't crash.
+ return;
+ }
+ if (mapped !== undefined) {
+ await this.page.keyboard.press(mapped);
+ return;
+ }
+ }
const cal = this.cal;
const key = cal ? cal.controls[action] : DEFAULT_CONTROLS[action];
await this.page.keyboard.press(key);
}
+ /**
+ * Translate the legacy pressKey action names to the discovered control map.
+ * Returns:
+ * - a string key if discovered and valid
+ * - null if the action maps to soft_drop and soft_drop is explicitly
+ * not_found (no-op signal)
+ * - undefined if no discovery info for this action (caller falls back)
+ */
+ private mapLegacyActionToDiscovered(
+ action: "left" | "right" | "down" | "rotate" | "drop",
+ map: ControlMap
+ ): string | null | undefined {
+ switch (action) {
+ case "left":
+ return map.move_left.key ?? undefined;
+ case "right":
+ return map.move_right.key ?? undefined;
+ case "down":
+ // Distinguish "discovery ran and found no soft drop" from "discovery
+ // never ran for this action". The first returns null (no-op), the
+ // second returns undefined (fall through to defaults).
+ if (map.soft_drop.confidence === "not_found") return null;
+ return map.soft_drop.key ?? undefined;
+ case "rotate":
+ return map.rotate_cw.key ?? undefined;
+ case "drop":
+ return map.hard_drop.key ?? undefined;
+ }
+ }
+
async pressRawKey(key: string): Promise<void> {
await this.page.keyboard.press(key);
}
+ // -- Control discovery --
+
+ getControl(action: GameAction): string | null {
+ const map = this.discoveredControls;
+ if (map) {
+ switch (action) {
+ case "move_left": return map.move_left.key;
+ case "move_right": return map.move_right.key;
+ case "soft_drop": return map.soft_drop.key;
+ case "hard_drop": return map.hard_drop.key;
+ case "rotate_cw": return map.rotate_cw.key;
+ case "rotate_ccw": return map.rotate_ccw.key;
+ case "pause": return null;
+ case "hold": return null;
+ }
+ }
+ // Fallback to legacy controls field.
+ const cal = this.cal;
+ const controls = cal ? cal.controls : DEFAULT_CONTROLS;
+ switch (action) {
+ case "move_left": return controls.left;
+ case "move_right": return controls.right;
+ case "soft_drop": return controls.down;
+ case "hard_drop": return controls.drop;
+ case "rotate_cw": return controls.rotate;
+ case "rotate_ccw": return null;
+ case "pause": return null;
+ case "hold": return null;
+ }
+ }
+
+ /**
+ * Discover the control mapping by pressing candidate keys and watching
+ * grid deltas. This is expensive (can reload the page several times) so
+ * callers should only invoke it once per session.
+ *
+ * The result is cached on the driver and flows through getControl() and
+ * pressKey() from the moment it returns.
+ */
+ async discoverControls(serverUrl: string): Promise<ControlMap> {
+ const log = (msg: string) => console.log(`[discover] ${msg}`);
+ // 50s hard budget. Discovery happens between bridge verification and the
+ // first test phase, so we want to be quick. Each reload costs ~2s, so
+ // this is ~25 reloads max.
+ const deadline = Date.now() + 50_000;
+ const budgetExceeded = () => Date.now() >= deadline;
+
+ // Start from an empty map. If any key probe fails, we simply leave that
+ // slot as "not_found" with no observation.
+ const map = emptyControlMap();
+
+ // IMPORTANT: tests run after discovery expect the driver state to
+ // resemble "game freshly started". Discovery is destructive (it presses
+ // keys, stacks pieces, may even trigger game_over), so we always
+ // RELOAD between discovery trials and after discovery finishes.
+
+ // Helper: reload the page, re-apply the confirmed start, and wait for
+ // a piece to become observable.
+ const freshStart = async (): Promise<GridSnapshot | null> => {
+ try {
+ await this.loadPage(serverUrl);
+ } catch {
+ return null;
+ }
+ try {
+ // calibrate() will replay the confirmed candidate (if one is set)
+ // and populate this.cal. Discovery runs AFTER bridge verification,
+ // so confirmedCandidate is already committed at this point.
+ await this.calibrate();
+ } catch {
+ return null;
+ }
+ // Grid might still be spawning; give it a brief window.
+ await this.wait(300);
+ // Fall back to refresh in case the grid wasn't detected at the first
+ // calibrate() pass (DOM games that build cells post-click).
+ if (!this.cal?.gridDetected) {
+ try {
+ await this.refreshGridDetection();
+ } catch { /* ignore */ }
+ }
+ // Poll for an active piece so the delta classification works.
+ const emptyGrid: Grid = Array.from({ length: GRID_ROWS }, () =>
+ Array.from({ length: GRID_COLS }, () => false)
+ );
+ let snap = await this.readGrid(emptyGrid);
+ let tries = 0;
+ while ((!snap.grid || snap.filledCount === 0) && tries < 20) {
+ await this.wait(100);
+ snap = await this.readGrid(emptyGrid);
+ tries++;
+ }
+ if (!snap.grid) return null;
+ return snap;
+ };
+
+ // Helper: classify the delta between two grids.
+ // Returns a label indicating what probably happened, or "no_change".
+ const classifyDelta = (
+ before: Grid,
+ after: Grid
+ ):
+ | { kind: "no_change" }
+ | { kind: "move_left"; distance: number }
+ | { kind: "move_right"; distance: number }
+ | { kind: "move_down"; distance: number }
+ | { kind: "hard_drop"; distance: number }
+ | { kind: "rotate" }
+ | { kind: "other"; detail: string } => {
+ // Extract fill cells from each grid.
+ const cellsA: [number, number][] = [];
+ const cellsB: [number, number][] = [];
+ for (let r = 0; r < before.length; r++) {
+ for (let c = 0; c < before[r].length; c++) {
+ if (before[r][c]) cellsA.push([r, c]);
+ if (after[r][c]) cellsB.push([r, c]);
+ }
+ }
+ // Same cells? No change.
+ const keyA = cellsA.map(([r, c]) => `${r},${c}`).sort().join("|");
+ const keyB = cellsB.map(([r, c]) => `${r},${c}`).sort().join("|");
+ if (keyA === keyB) return { kind: "no_change" };
+
+ // If cell counts are similar (+/- 1), try to detect a rigid translation
+ // of the active piece. We do this by looking at the symmetric
+ // differences: cells that disappeared and cells that appeared.
+ const setA = new Set(keyA.split("|"));
+ const setB = new Set(keyB.split("|"));
+ const disappeared: [number, number][] = [];
+ const appeared: [number, number][] = [];
+ for (const [r, c] of cellsA) {
+ if (!setB.has(`${r},${c}`)) disappeared.push([r, c]);
+ }
+ for (const [r, c] of cellsB) {
+ if (!setA.has(`${r},${c}`)) appeared.push([r, c]);
+ }
+ if (disappeared.length === 0 && appeared.length === 0) {
+ return { kind: "no_change" };
+ }
+
+ // Compute centroids + shapes of the disappeared/appeared sets.
+ const avg = (arr: [number, number][]) => {
+ const sumR = arr.reduce((s, [r]) => s + r, 0);
+ const sumC = arr.reduce((s, [, c]) => s + c, 0);
+ return [sumR / arr.length, sumC / arr.length] as [number, number];
+ };
+ const normalize = (cells: [number, number][]): string => {
+ if (cells.length === 0) return "";
+ const minR = Math.min(...cells.map(([r]) => r));
+ const minC = Math.min(...cells.map(([, c]) => c));
+ return cells
+ .map(([r, c]) => `${r - minR},${c - minC}`)
+ .sort()
+ .join("|");
+ };
+
+ // Case 1: equal-size symmetric diff -> pure translation or rotation
+ // of the active piece. When a 4-cell piece moves 1 column, some cells
+ // overlap between old/new position (e.g. O-piece has 2 overlapping
+ // cells), so the symmetric diff can be as small as 2 cells. We allow
+ // 1-4 cells here.
+ if (
+ disappeared.length === appeared.length &&
+ disappeared.length >= 1 &&
+ disappeared.length <= 4
+ ) {
+ const [avgRA, avgCA] = avg(disappeared);
+ const [avgRB, avgCB] = avg(appeared);
+ const dRow = avgRB - avgRA;
+ const dCol = avgCB - avgCA;
+ const shapeA = normalize(disappeared);
+ const shapeB = normalize(appeared);
+ // Compare full-piece footprints: if a 4-cell piece rotates, the
+ // set of cells in the "disappeared" union with the settled grid
+ // can be very different from before. A cleaner rotation signal is
+ // the total cell count in before vs after. For a pure translation,
+ // the piece has the same bounding-box footprint (same shape) but
+ // shifted. For a rotation, the bounding box dimensions usually
+ // change (tall->wide or wide->tall).
+ const fullBB = (cells: [number, number][]) => {
+ if (cells.length === 0) return { w: 0, h: 0 };
+ return {
+ w: Math.max(...cells.map(([, c]) => c)) - Math.min(...cells.map(([, c]) => c)) + 1,
+ h: Math.max(...cells.map(([r]) => r)) - Math.min(...cells.map(([r]) => r)) + 1,
+ };
+ };
+ const bbA = fullBB(cellsA);
+ const bbB = fullBB(cellsB);
+ // Total-cells test: if the piece count didn't change and the
+ // bounding box aspect flipped (e.g. 4x1 -> 1x4), that's rotation.
+ // This catches I-piece rotation reliably.
+ if (
+ cellsA.length === cellsB.length &&
+ ((bbA.w !== bbB.w) || (bbA.h !== bbB.h)) &&
+ Math.abs(bbA.w - bbB.h) <= 1 && Math.abs(bbA.h - bbB.w) <= 1
+ ) {
+ return { kind: "rotate" };
+ }
+
+ // Rotation by shape change: if the disappeared/appeared shapes
+ // differ, and the centroid drift doesn't look like a clean 1-col
+ // horizontal translation, classify as rotation. Checked BEFORE the
+ // horizontal/vertical translation tests so rotations that coincide
+ // with auto-drop are still tagged as rotations, not translations.
+ if (shapeA !== shapeB) {
+ // A clean horizontal translation has dCol >= 1 and dRow small.
+ // A clean vertical translation has dRow >= 1 and dCol small.
+ // Anything else is rotation when the shape differs.
+ const looksLikeClearHorizontal =
+ Math.abs(dCol) >= 0.9 && Math.abs(dRow) < 0.6;
+ const looksLikeClearVertical =
+ Math.abs(dCol) < 0.6 && dRow >= 0.9;
+ if (!looksLikeClearHorizontal && !looksLikeClearVertical) {
+ return { kind: "rotate" };
+ }
+ }
+
+ // Horizontal translation: large dCol dominates, shape is stable.
+ if (shapeA === shapeB && Math.abs(dCol) >= 0.9 && Math.abs(dRow) <= 1.2) {
+ if (dCol <= -0.9) return { kind: "move_left", distance: Math.max(1, Math.round(-dCol)) };
+ if (dCol >= 0.9) return { kind: "move_right", distance: Math.max(1, Math.round(dCol)) };
+ }
+ // Vertical translation: dRow dominates, small dCol drift allowed.
+ if (shapeA === shapeB && Math.abs(dCol) < 0.7 && dRow >= 0.5) {
+ const distance = Math.max(1, Math.round(dRow));
+ if (distance >= 5) return { kind: "hard_drop", distance };
+ return { kind: "move_down", distance };
+ }
+ // Shape same but neither clearly horizontal nor vertical -> fall
+ // through to "other". This avoids misclassifying noise as motion.
+ }
+
+ // Case 2: 4 cells appeared near the bottom and 4 (or fewer) disappeared
+ // from higher up -> hard drop that teleported the piece.
+ if (appeared.length >= 3 && disappeared.length >= 3) {
+ const avgAppearedRow = avg(appeared)[0];
+ const avgDisappearedRow = avg(disappeared)[0];
+ const dRow = avgAppearedRow - avgDisappearedRow;
+ if (dRow >= 4) {
+ return { kind: "hard_drop", distance: Math.round(dRow) };
+ }
+ }
+
+ // Case 3: more appeared than disappeared (a new piece spawned while
+ // the old one dropped and locked). Treat as hard drop if the old
+ // piece ended up near the bottom.
+ if (appeared.length > disappeared.length && appeared.length >= 4) {
+ // Find the set of appeared cells in the bottom half.
+ const bottomAppeared = appeared.filter(([r]) => r >= GRID_ROWS / 2);
+ if (bottomAppeared.length >= 3 && disappeared.length <= 4) {
+ const avgDisappearedRow = disappeared.length > 0
+ ? avg(disappeared)[0]
+ : 0;
+ const avgBottomRow = avg(bottomAppeared)[0];
+ const dRow = avgBottomRow - avgDisappearedRow;
+ if (dRow >= 4) {
+ return { kind: "hard_drop", distance: Math.round(dRow) };
+ }
+ }
+ }
+
+ return { kind: "other", detail: `disappeared=${disappeared.length}, appeared=${appeared.length}` };
+ };
+
+ // Helper: try a single candidate key for an action, return whether it
+ // matched the expected classification.
+ const tryCandidateKey = async (
+ action: GameAction,
+ key: string,
+ expected: (
+ delta: ReturnType<typeof classifyDelta>
+ ) => { matched: boolean; observation: string }
+ ): Promise<boolean> => {
+ // Snapshot before.
+ const before = await this.readGrid();
+ if (!before.grid) {
+ map.key_observations[key] = "grid read failed before press";
+ return false;
+ }
+ // Ignore keys on a fully-empty grid -- the candidate might do nothing
+ // simply because there's no active piece yet.
+ if (before.filledCount === 0) {
+ map.key_observations[key] = "grid empty before press (no piece)";
+ return false;
+ }
+ try {
+ await this.pressRawKey(key);
+ } catch {
+ map.key_observations[key] = "keyboard press threw";
+ return false;
+ }
+ await this.wait(120);
+ const after = await this.readGrid();
+ if (!after.grid) {
+ map.key_observations[key] = "grid read failed after press";
+ return false;
+ }
+ const delta = classifyDelta(before.grid, after.grid);
+ const result = expected(delta);
+ // Only record this observation if we don't already have one for the key
+ // (first observation wins -- keeps the report meaningful).
+ if (!map.key_observations[key]) {
+ map.key_observations[key] = result.observation;
+ }
+ if (result.matched) {
+ const slot: ControlMapping = {
+ key,
+ confidence: "suspected",
+ observation: result.observation,
+ };
+ switch (action) {
+ case "move_left": map.move_left = slot; break;
+ case "move_right": map.move_right = slot; break;
+ case "soft_drop": map.soft_drop = slot; break;
+ case "hard_drop": map.hard_drop = slot; break;
+ case "rotate_cw": map.rotate_cw = slot; break;
+ case "rotate_ccw": map.rotate_ccw = slot; break;
+ default: break;
+ }
+ return true;
+ }
+ return false;
+ };
+
+ // ---- Movement discovery (order: least disruptive first) ----
+ // Try each action in priority order, reloading between actions to get
+ // a fresh piece that hasn't already moved from the previous probe.
+
+ // move_left
+ if (!budgetExceeded()) {
+ log("phase: move_left");
+ await freshStart();
+ for (const key of CONTROL_CANDIDATES.move_left) {
+ if (budgetExceeded()) break;
+ const matched = await tryCandidateKey("move_left", key, (delta) => {
+ if (delta.kind === "move_left") {
+ return { matched: true, observation: `moved ${delta.distance} col(s) left` };
+ }
+ if (delta.kind === "move_right") {
+ return { matched: false, observation: `moved ${delta.distance} col(s) right (wrong direction)` };
+ }
+ if (delta.kind === "hard_drop") {
+ return { matched: false, observation: `hard_drop (${delta.distance} rows)` };
+ }
+ if (delta.kind === "move_down") {
+ return { matched: false, observation: `moved ${delta.distance} row(s) down` };
+ }
+ if (delta.kind === "rotate") {
+ return { matched: false, observation: "rotation" };
+ }
+ if (delta.kind === "no_change") {
+ return { matched: false, observation: "no change" };
+ }
+ return { matched: false, observation: `other change (${delta.detail})` };
+ });
+ if (matched) { log(` move_left: ${key}`); break; }
+ }
+ }
+
+ // move_right -- no need to reload if move_left probe succeeded without
+ // disrupting the board meaningfully.
+ if (!budgetExceeded()) {
+ log("phase: move_right");
+ await freshStart();
+ for (const key of CONTROL_CANDIDATES.move_right) {
+ if (budgetExceeded()) break;
+ const matched = await tryCandidateKey("move_right", key, (delta) => {
+ if (delta.kind === "move_right") {
+ return { matched: true, observation: `moved ${delta.distance} col(s) right` };
+ }
+ if (delta.kind === "move_left") {
+ return { matched: false, observation: `moved ${delta.distance} col(s) left (wrong direction)` };
+ }
+ if (delta.kind === "hard_drop") {
+ return { matched: false, observation: `hard_drop (${delta.distance} rows)` };
+ }
+ if (delta.kind === "move_down") {
+ return { matched: false, observation: `moved ${delta.distance} row(s) down` };
+ }
+ if (delta.kind === "rotate") {
+ return { matched: false, observation: "rotation" };
+ }
+ if (delta.kind === "no_change") {
+ return { matched: false, observation: "no change" };
+ }
+ return { matched: false, observation: `other change (${delta.detail})` };
+ });
+ if (matched) { log(` move_right: ${key}`); break; }
+ }
+ }
+
+ // rotate_cw. Fast path: if an earlier phase observed this key producing
+ // a rotation, promote it without re-testing.
+ if (!budgetExceeded()) {
+ log("phase: rotate_cw");
+ // Check if a prior phase already saw one of the rotate candidates as
+ // a rotation.
+ let promotedEarly = false;
+ for (const key of CONTROL_CANDIDATES.rotate_cw) {
+ const obs = map.key_observations[key];
+ if (obs === "rotation" || obs === "shape changed (rotation)") {
+ map.rotate_cw = {
+ key,
+ confidence: "suspected",
+ observation: "rotation (promoted from earlier phase)",
+ };
+ log(` rotate_cw: ${key} (promoted from observation)`);
+ promotedEarly = true;
+ break;
+ }
+ }
+ if (!promotedEarly) {
+ await freshStart();
+ for (const key of CONTROL_CANDIDATES.rotate_cw) {
+ if (budgetExceeded()) break;
+ const matched = await tryCandidateKey("rotate_cw", key, (delta) => {
+ if (delta.kind === "rotate") {
+ return { matched: true, observation: "shape changed (rotation)" };
+ }
+ if (delta.kind === "hard_drop") {
+ return { matched: false, observation: `hard_drop (${delta.distance} rows)` };
+ }
+ if (delta.kind === "move_down") {
+ return { matched: false, observation: `moved ${delta.distance} row(s) down` };
+ }
+ if (delta.kind === "move_left") {
+ return { matched: false, observation: `moved ${delta.distance} col(s) left` };
+ }
+ if (delta.kind === "move_right") {
+ return { matched: false, observation: `moved ${delta.distance} col(s) right` };
+ }
+ if (delta.kind === "no_change") {
+ return { matched: false, observation: "no change" };
+ }
+ return { matched: false, observation: `other change (${delta.detail})` };
+ });
+ if (matched) { log(` rotate_cw: ${key}`); break; }
+ }
+ }
+ }
+
+ // hard_drop (do this BEFORE soft_drop so we know which key teleports).
+ // IMPORTANT: hard drop candidates include keys like Space that may be
+ // bound to other functions (pause, confirm, etc). Pressing Space on a
+ // game that uses it for pause will freeze all subsequent probes. So we
+ // RELOAD before every hard_drop attempt to guarantee a clean state.
+ if (!budgetExceeded()) {
+ log("phase: hard_drop");
+ for (const key of CONTROL_CANDIDATES.hard_drop) {
+ if (budgetExceeded()) break;
+ if (map.hard_drop.confidence !== "not_found") break;
+ // Prior observation fast-path: if we already saw this key act like
+ // a rotation/left/right/etc. in an earlier phase, skip retesting.
+ const priorObs = map.key_observations[key];
+ if (priorObs && !priorObs.includes("teleported") && !priorObs.includes("hard_drop")) {
+ continue;
+ }
+ // Always reload before a hard_drop probe.
+ await freshStart();
+ const matched = await tryCandidateKey("hard_drop", key, (delta) => {
+ if (delta.kind === "hard_drop") {
+ return { matched: true, observation: `teleported ${delta.distance} rows to bottom` };
+ }
+ if (delta.kind === "move_down") {
+ return {
+ matched: false,
+ observation: `moved ${delta.distance} row(s) down (soft drop, not hard drop)`,
+ };
+ }
+ if (delta.kind === "rotate") {
+ return { matched: false, observation: "rotation" };
+ }
+ if (delta.kind === "move_left") {
+ return { matched: false, observation: `moved ${delta.distance} col(s) left` };
+ }
+ if (delta.kind === "move_right") {
+ return { matched: false, observation: `moved ${delta.distance} col(s) right` };
+ }
+ if (delta.kind === "no_change") {
+ return { matched: false, observation: "no change" };
+ }
+ return { matched: false, observation: `other change (${delta.detail})` };
+ });
+ if (matched) { log(` hard_drop: ${key}`); break; }
+ }
+ }
+
+ // soft_drop -- this is where the main bug lives. Try alternatives FIRST.
+ // ArrowDown is tried only if it wasn't already claimed as hard_drop.
+ if (!budgetExceeded()) {
+ log("phase: soft_drop");
+ await freshStart();
+ // Skip ArrowDown if we already discovered it maps to hard_drop on
+ // this game. Same for any key already claimed by another action.
+ const claimedKeys = new Set<string>();
+ for (const slot of [map.move_left, map.move_right, map.hard_drop, map.rotate_cw]) {
+ if (slot.key) claimedKeys.add(slot.key);
+ }
+ for (const key of CONTROL_CANDIDATES.soft_drop) {
+ if (budgetExceeded()) break;
+ if (claimedKeys.has(key)) {
+ // ArrowDown was already claimed as hard_drop etc.
+ map.key_observations[key] =
+ map.key_observations[key] || "already claimed by another action";
+ continue;
+ }
+ const matched = await tryCandidateKey("soft_drop", key, (delta) => {
+ if (delta.kind === "move_down") {
+ return {
+ matched: delta.distance >= 1 && delta.distance <= 3,
+ observation: `moved ${delta.distance} row(s) down`,
+ };
+ }
+ if (delta.kind === "hard_drop") {
+ return {
+ matched: false,
+ observation: `teleported ${delta.distance} rows (hard_drop, not soft_drop)`,
+ };
+ }
+ if (delta.kind === "rotate") {
+ return { matched: false, observation: "rotation" };
+ }
+ if (delta.kind === "move_left") {
+ return { matched: false, observation: `moved ${delta.distance} col(s) left` };
+ }
+ if (delta.kind === "move_right") {
+ return { matched: false, observation: `moved ${delta.distance} col(s) right` };
+ }
+ if (delta.kind === "no_change") {
+ return { matched: false, observation: "no change" };
+ }
+ return { matched: false, observation: `other change (${delta.detail})` };
+ });
+ if (matched) { log(` soft_drop: ${key}`); break; }
+ }
+ if (map.soft_drop.confidence === "not_found") {
+ log(" soft_drop: NOT FOUND");
+ }
+ }
+
+ // rotate_ccw (best effort; cheap skip if budget exceeded)
+ if (!budgetExceeded()) {
+ log("phase: rotate_ccw");
+ await freshStart();
+ for (const key of CONTROL_CANDIDATES.rotate_ccw) {
+ if (budgetExceeded()) break;
+ const matched = await tryCandidateKey("rotate_ccw", key, (delta) => {
+ if (delta.kind === "rotate") {
+ return { matched: true, observation: "shape changed (rotation)" };
+ }
+ if (delta.kind === "no_change") {
+ return { matched: false, observation: "no change" };
+ }
+ return { matched: false, observation: `other change` };
+ });
+ if (matched) { log(` rotate_ccw: ${key}`); break; }
+ }
+ }
+
+ // Final reload so tests start from a clean state.
+ await freshStart();
+
+ this.discoveredControls = map;
+ // Mirror into the cached calibration so bot-side code that reads
+ // cal.controlMap sees the discovery result.
+ if (this.cal) {
+ this.cal.controlMap = map;
+ // Also back-port the discovered keys into the legacy controls field
+ // so any code that still reads cal.controls.<x> keeps working.
+ if (map.move_left.key) this.cal.controls.left = map.move_left.key;
+ if (map.move_right.key) this.cal.controls.right = map.move_right.key;
+ if (map.rotate_cw.key) this.cal.controls.rotate = map.rotate_cw.key;
+ if (map.hard_drop.key) this.cal.controls.drop = map.hard_drop.key;
+ // Only override the legacy `down` if we found a distinct soft drop key.
+ // If soft_drop is not_found, leave `down` alone so legacy callers still
+ // have SOMETHING to press -- pressKey() will intercept and no-op anyway.
+ if (map.soft_drop.key) this.cal.controls.down = map.soft_drop.key;
+ }
+ // Persist into the first-cal baseline too so cache replays preserve the
+ // discovered map.
+ if (this.firstCal) {
+ this.firstCal.controlMap = cloneControlMap(map);
+ if (map.move_left.key) this.firstCal.controls.left = map.move_left.key;
+ if (map.move_right.key) this.firstCal.controls.right = map.move_right.key;
+ if (map.rotate_cw.key) this.firstCal.controls.rotate = map.rotate_cw.key;
+ if (map.hard_drop.key) this.firstCal.controls.drop = map.hard_drop.key;
+ if (map.soft_drop.key) this.firstCal.controls.down = map.soft_drop.key;
+ }
+ return map;
+ }
+
async wait(ms: number): Promise<void> {
await this.page.waitForTimeout(ms);
}
diff --git a/tasks/tetris/eval/gameplay-bot-v2/index.ts b/tasks/tetris/eval/gameplay-bot-v2/index.ts
@@ -89,7 +89,7 @@ test.describe("Tetris Gameplay Bot v2", () => {
});
test("run gameplay bot", async ({ page }) => {
- test.setTimeout(300_000); // 5-minute total timeout
+ test.setTimeout(360_000); // 6-minute total timeout (discovery adds ~35s)
// Measure page load time
let loadTimeMs = -1;
@@ -151,6 +151,34 @@ test.describe("Tetris Gameplay Bot v2", () => {
cleanCompetitivePlay = clean;
}
+ // Build the control_discovery report field from the discovered map
+ // (may be null if discovery never ran because the game didn't start).
+ let controlDiscoveryReport: Record<string, string> | undefined;
+ if (calibration.controlMap) {
+ const cm = calibration.controlMap;
+ controlDiscoveryReport = {};
+ const actionDescriptions: Array<[string, { key: string | null; observation: string; confidence: string }]> = [
+ ["move_left", cm.move_left],
+ ["move_right", cm.move_right],
+ ["soft_drop", cm.soft_drop],
+ ["hard_drop", cm.hard_drop],
+ ["rotate_cw", cm.rotate_cw],
+ ["rotate_ccw", cm.rotate_ccw],
+ ];
+ for (const [name, mapping] of actionDescriptions) {
+ if (mapping.key) {
+ controlDiscoveryReport[name] = `${mapping.key}${mapping.observation ? ` (${mapping.observation})` : ""}`;
+ } else {
+ controlDiscoveryReport[name] = "NOT FOUND";
+ }
+ }
+ // Also report per-key observations so readers can see what every
+ // candidate did during discovery.
+ for (const [key, obs] of Object.entries(cm.key_observations)) {
+ if (obs) controlDiscoveryReport[`key:${key}`] = obs;
+ }
+ }
+
const report: BotReport = {
implementation: {
renderer: calibration.renderer,
@@ -158,6 +186,7 @@ test.describe("Tetris Gameplay Bot v2", () => {
grid_detected_at: calibration.gridDetectedAt || "initial",
grid_bounds: calibration.gridBounds,
controls: calibration.controls as unknown as Record<string, string>,
+ control_discovery: controlDiscoveryReport,
start_mechanism: calibration.startMechanism,
score_element_found: calibration.scoreElementSelector !== null,
grid_confidence: calibration.gridConfidence,
@@ -213,6 +242,14 @@ test.describe("Tetris Gameplay Bot v2", () => {
console.log(`Grid read success rate: ${Math.round(gridSuccessRate * 100)}%`);
console.log(`Start mechanism: ${calibration.startMechanism}`);
console.log(`Score element: ${calibration.scoreElementSelector ?? "none"}`);
+ if (calibration.controlMap) {
+ const cm = calibration.controlMap;
+ console.log(
+ `Controls: left=${cm.move_left.key ?? "?"}, right=${cm.move_right.key ?? "?"}, ` +
+ `rotate=${cm.rotate_cw.key ?? "?"}, hard_drop=${cm.hard_drop.key ?? "?"}, ` +
+ `soft_drop=${cm.soft_drop.key ?? "NONE"}`
+ );
+ }
console.log(`\nTests: ${passed}/${total} passed, ${skipped} skipped, ${failed} failed`);
console.log(`Score: ${report.summary.score} (${passed}/${scorable} scorable)`);
for (const t of testResults) {
diff --git a/tasks/tetris/eval/gameplay-bot-v2/playwright.config.ts b/tasks/tetris/eval/gameplay-bot-v2/playwright.config.ts
@@ -3,7 +3,7 @@ import { defineConfig } from "@playwright/test";
export default defineConfig({
testDir: ".",
testMatch: "index.ts",
- timeout: 60_000,
+ timeout: 360_000,
retries: 0,
workers: 1,
reporter: [["list"]],
diff --git a/tasks/tetris/eval/gameplay-bot-v2/types.ts b/tasks/tetris/eval/gameplay-bot-v2/types.ts
@@ -21,6 +21,50 @@ export interface Controls {
drop: string;
}
+/**
+ * Abstract game action -- the thing the bot wants to do, independent of which
+ * physical key performs it.
+ */
+export type GameAction =
+ | "move_left"
+ | "move_right"
+ | "soft_drop"
+ | "hard_drop"
+ | "rotate_cw"
+ | "rotate_ccw"
+ | "pause"
+ | "hold";
+
+/**
+ * The result of trying a candidate key for a particular action.
+ * - "suspected": the key did something plausible on one trial
+ * - "confirmed": verified on a fresh reload (currently unused, reserved)
+ * - "not_found": no candidate key produced the expected behaviour
+ */
+export interface ControlMapping {
+ /** The discovered key, or null if no candidate matched. */
+ key: string | null;
+ confidence: "suspected" | "confirmed" | "not_found";
+ /** Human-readable description of what was observed. */
+ observation: string;
+}
+
+/**
+ * Discovered control map. Produced by driver.discoverControls() after the
+ * game is started. Unlike the legacy Controls, individual actions may be null
+ * (e.g. soft_drop is optional -- some games don't implement it).
+ */
+export interface ControlMap {
+ move_left: ControlMapping;
+ move_right: ControlMapping;
+ soft_drop: ControlMapping;
+ hard_drop: ControlMapping;
+ rotate_cw: ControlMapping;
+ rotate_ccw: ControlMapping;
+ /** Observations for every key tried, keyed by the raw key name. */
+ key_observations: Record<string, string>;
+}
+
/** How the game was started. */
export type StartMechanism =
| "auto"
@@ -87,6 +131,8 @@ export interface DriverCalibration {
cellWidth: number;
cellHeight: number;
controls: Controls;
+ /** Discovered control map, or null if discovery has not run yet. */
+ controlMap?: ControlMap | null;
startMechanism: StartMechanism;
scoreElementSelector: string | null;
levelElementSelector: string | null;
@@ -175,6 +221,25 @@ export interface TetrisDriver {
pressRawKey(key: string): Promise<void>;
wait(ms: number): Promise<void>;
+ // -- Control discovery --
+ /**
+ * Run the control discovery loop against the currently-started game.
+ * Tries candidate keys for each abstract game action, observes grid deltas,
+ * and classifies each. Populates the driver's cached control map so
+ * subsequent pressKey() calls use the discovered mapping.
+ *
+ * Expensive (multiple reloads). Callers should invoke this at most once
+ * per session; the result is cached and re-applied across calibration
+ * cache hits.
+ */
+ discoverControls(serverUrl: string): Promise<ControlMap>;
+ /**
+ * Return the discovered key for a given action, or null if not found.
+ * Before discoverControls() has run, returns the legacy default key from
+ * the Controls struct.
+ */
+ getControl(action: GameAction): string | null;
+
// -- Score/Level/Lines Reading --
readScore(): Promise<number | null>;
readLevel(): Promise<number | null>;
@@ -290,6 +355,7 @@ export interface BotReport {
grid_detected_at: string;
grid_bounds: GridBounds | null;
controls: Record<string, string>;
+ control_discovery?: Record<string, string>;
start_mechanism: string;
score_element_found: boolean;
grid_confidence: number;