commit 1d5cce537fb6c78ac946ca42dca010215a97e6fd
parent 4ce8d09103c723f23b4f1d266fe3aef143995996
Author: Brian Graham <brian@buildingbetterteams.de>
Date: Thu, 9 Apr 2026 09:03:26 +0200
Language-agnostic start detection for gameplay bot
Rewrote start mechanism detection to be fully language-agnostic:
- No text string matching (removed btn/start/play selectors)
- Detects clickable elements by cursor:pointer, background color, size
- Sorts candidates by prominence (size, center proximity, contrast)
- Keyboard triggers (Enter, Space) tried before DOM buttons
- All wait times reduced from 300-500ms to 100ms
- Overlay detection purely structural (position, z-index, viewport %)
Tested: Spanish DOM game now starts correctly (game_starts: PASS via space).
Spanish DOM game 2 scores 89% (up from 80%).
Canvas games still blocked by GPU pixel readback issue.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Diffstat:
1 file changed, 136 insertions(+), 120 deletions(-)
diff --git a/tasks/tetris/eval/gameplay-bot/calibrate.ts b/tasks/tetris/eval/gameplay-bot/calibrate.ts
@@ -361,12 +361,13 @@ interface StartDetectionResult {
}
/**
- * 5-phase start detection. Language-agnostic, visual-first.
+ * 5-phase start detection. Fully language-agnostic, visual-first.
+ * No text matching of any kind -- detection is purely structural and behavioral.
*
- * Phase 1: Auto-start (1s, no input)
- * Phase 2: DOM button discovery (click all clickable elements by prominence)
- * Phase 3: Canvas click grid (for canvas-rendered buttons)
- * Phase 4: Keyboard triggers (Enter, Space, ArrowDown, Z, combos)
+ * Phase 1: Auto-start (no input, visual change detection)
+ * Phase 2: Keyboard triggers (Enter, Space, ArrowDown, Z -- fast, universal)
+ * Phase 3: DOM button discovery (click all clickable elements by visual prominence)
+ * Phase 4: Canvas click grid (for canvas-rendered buttons)
* Phase 5: Retry phases 2-4 (some games need two interactions)
*
* Total budget: 30 seconds.
@@ -398,37 +399,43 @@ async function detectStartMechanism(page: Page): Promise<StartDetectionResult> {
}
}
- // ---- Phase 2: DOM button discovery (language-agnostic) ----
- {
- log("Phase 2: trying DOM buttons...");
- const phase2Result = await tryDomButtons(page, budgetExceeded);
- log(`Phase 2 result: ${phase2Result ? `found=${phase2Result.mechanism}` : "none"}`);
- if (phase2Result) return phase2Result;
+ // ---- Phase 2: Keyboard triggers (fast, language-agnostic) ----
+ if (!budgetExceeded()) {
+ log("Phase 2: trying keyboard triggers...");
+ const phase2Result = await tryKeyboardTriggers(page, budgetExceeded);
+ if (phase2Result) {
+ log(`Phase 2 result: found=${phase2Result.mechanism}`);
+ return phase2Result;
+ }
+ log("Phase 2 result: none");
}
- // ---- Phase 3: Canvas click grid ----
+ // ---- Phase 3: DOM button discovery (language-agnostic, visual-only) ----
if (!budgetExceeded()) {
- const phase3Result = await tryCanvasClicks(page, budgetExceeded);
+ log("Phase 3: trying DOM buttons...");
+ const phase3Result = await tryDomButtons(page, budgetExceeded);
+ log(`Phase 3 result: ${phase3Result ? `found=${phase3Result.mechanism}` : "none"}`);
if (phase3Result) return phase3Result;
}
- // ---- Phase 4: Keyboard triggers ----
+ // ---- Phase 4: Canvas click grid ----
if (!budgetExceeded()) {
- const phase4Result = await tryKeyboardTriggers(page, budgetExceeded);
+ log("Phase 4: trying canvas clicks...");
+ const phase4Result = await tryCanvasClicks(page, budgetExceeded);
if (phase4Result) return phase4Result;
}
// ---- Phase 5: Retry phases 2-4 (some games need two interactions) ----
if (!budgetExceeded()) {
- const phase2Retry = await tryDomButtons(page, budgetExceeded);
+ const phase2Retry = await tryKeyboardTriggers(page, budgetExceeded);
if (phase2Retry) return phase2Retry;
}
if (!budgetExceeded()) {
- const phase3Retry = await tryCanvasClicks(page, budgetExceeded);
+ const phase3Retry = await tryDomButtons(page, budgetExceeded);
if (phase3Retry) return phase3Retry;
}
if (!budgetExceeded()) {
- const phase4Retry = await tryKeyboardTriggers(page, budgetExceeded);
+ const phase4Retry = await tryCanvasClicks(page, budgetExceeded);
if (phase4Retry) return phase4Retry;
}
@@ -436,95 +443,114 @@ async function detectStartMechanism(page: Page): Promise<StartDetectionResult> {
}
/**
- * Phase 2: Find all clickable DOM elements (language-agnostic, no text matching).
- * Sort by visual prominence (size, centrality). Click each and observe.
+ * Phase 2: Find all clickable DOM elements (fully language-agnostic, no text matching).
+ * Finds buttons, anchors, role=button, onclick, and cursor:pointer elements.
+ * Sort by visual prominence (size, centrality, contrast). Click each and observe.
*/
async function tryDomButtons(
page: Page,
budgetExceeded: () => boolean
): Promise<StartDetectionResult | null> {
try {
- // Collect all clickable elements
- const clickableSelector =
- 'button, a, [role="button"], [onclick], input[type="button"], input[type="submit"]';
- const visualSelector =
- '[class*="btn"], [class*="button"], [class*="start"], [class*="play"], ' +
- '[id*="start"], [id*="play"], [id*="btn"]';
-
- // Gather element info (position, size, text) for sorting
- const elementInfos = await page.evaluate(
- ({ clickableSelector, visualSelector }) => {
- const seen = new Set<Element>();
- const results: Array<{
- index: number;
- text: string;
- x: number;
- y: number;
- width: number;
- height: number;
- area: number;
- centerDist: number;
- selector: string;
- }> = [];
-
- const allElements: Element[] = [];
- for (const el of document.querySelectorAll(clickableSelector)) {
- if (!seen.has(el)) {
- seen.add(el);
- allElements.push(el);
- }
- }
- for (const el of document.querySelectorAll(visualSelector)) {
- if (!seen.has(el)) {
+ // Gather element info (position, size, text) for sorting -- purely structural/visual
+ const elementInfos = await page.evaluate(() => {
+ const seen = new Set<Element>();
+ const results: Array<{
+ index: number;
+ text: string;
+ x: number;
+ y: number;
+ width: number;
+ height: number;
+ area: number;
+ centerDist: number;
+ selector: string;
+ hasBackground: boolean;
+ }> = [];
+
+ // Phase A: structural clickable elements (type-based, no text matching)
+ const clickableSelector =
+ 'button, a, [role="button"], [onclick], input[type="button"], input[type="submit"]';
+ for (const el of document.querySelectorAll(clickableSelector)) {
+ if (!seen.has(el)) seen.add(el);
+ }
+
+ // Phase B: elements with cursor:pointer computed style (catches custom divs/spans acting as buttons)
+ const allEls = document.querySelectorAll("*");
+ for (const el of allEls) {
+ if (seen.has(el)) continue;
+ try {
+ const style = window.getComputedStyle(el);
+ if (style.cursor === "pointer") {
seen.add(el);
- allElements.push(el);
}
- }
+ } catch { /* skip */ }
+ }
- const pageW = window.innerWidth;
- const pageH = window.innerHeight;
- const pageCenterX = pageW / 2;
- const pageCenterY = pageH / 2;
-
- for (let i = 0; i < allElements.length; i++) {
- const el = allElements[i];
- const rect = el.getBoundingClientRect();
- if (rect.width < 5 || rect.height < 5) continue;
- if (rect.top > pageH || rect.left > pageW) continue;
-
- const cx = rect.left + rect.width / 2;
- const cy = rect.top + rect.height / 2;
- const centerDist = Math.sqrt((cx - pageCenterX) ** 2 + (cy - pageCenterY) ** 2);
-
- let selector = "";
- if (el.id) {
- selector = `#${el.id}`;
- } else if ((el as HTMLElement).className) {
- const cls = (el as HTMLElement).className.toString().split(" ")[0];
- if (cls) selector = `${el.tagName.toLowerCase()}.${cls}`;
+ const pageW = window.innerWidth;
+ const pageH = window.innerHeight;
+ const pageCenterX = pageW / 2;
+ const pageCenterY = pageH / 2;
+
+ let idx = 0;
+ for (const el of seen) {
+ const rect = el.getBoundingClientRect();
+ if (rect.width < 5 || rect.height < 5) continue;
+ if (rect.top > pageH || rect.left > pageW) continue;
+ // Skip elements that cover most of the viewport (overlays, not buttons)
+ if (rect.width > pageW * 0.8 && rect.height > pageH * 0.8) continue;
+
+ const cx = rect.left + rect.width / 2;
+ const cy = rect.top + rect.height / 2;
+ const centerDist = Math.sqrt((cx - pageCenterX) ** 2 + (cy - pageCenterY) ** 2);
+
+ // Check if element has a distinct background (high contrast, likely a button)
+ let hasBackground = false;
+ try {
+ const style = window.getComputedStyle(el as HTMLElement);
+ const bg = style.backgroundColor;
+ // transparent or rgba(0,0,0,0) means no background
+ if (bg && bg !== "transparent" && bg !== "rgba(0, 0, 0, 0)") {
+ hasBackground = true;
}
- if (!selector) selector = `${el.tagName.toLowerCase()}:nth-of-type(${i + 1})`;
-
- results.push({
- index: i,
- text: (el.textContent || "").trim().slice(0, 50),
- x: Math.round(cx),
- y: Math.round(cy),
- width: rect.width,
- height: rect.height,
- area: rect.width * rect.height,
- centerDist,
- selector,
- });
+ } catch { /* skip */ }
+
+ let selector = "";
+ if (el.id) {
+ selector = `#${el.id}`;
+ } else if ((el as HTMLElement).className) {
+ const cls = (el as HTMLElement).className.toString().split(" ")[0];
+ if (cls) selector = `${el.tagName.toLowerCase()}.${cls}`;
}
+ if (!selector) selector = `${el.tagName.toLowerCase()}:nth-of-type(${idx + 1})`;
+
+ results.push({
+ index: idx,
+ text: (el.textContent || "").trim().slice(0, 50),
+ x: Math.round(cx),
+ y: Math.round(cy),
+ width: rect.width,
+ height: rect.height,
+ area: rect.width * rect.height,
+ centerDist,
+ selector,
+ hasBackground,
+ });
+ idx++;
+ }
- // Sort by visual prominence: larger elements first, then closer to center
- results.sort((a, b) => b.area - a.area || a.centerDist - b.centerDist);
+ // Sort by visual prominence:
+ // 1. Elements with background first (more likely to be buttons)
+ // 2. Larger elements first
+ // 3. Closer to center preferred
+ results.sort((a, b) => {
+ if (a.hasBackground !== b.hasBackground) return a.hasBackground ? -1 : 1;
+ if (Math.abs(b.area - a.area) > 100) return b.area - a.area;
+ return a.centerDist - b.centerDist;
+ });
- return results;
- },
- { clickableSelector, visualSelector }
- );
+ return results;
+ });
console.log(`[start-detect] Phase 2: found ${elementInfos.length} clickable elements`);
// Click each element and observe for visual change
@@ -546,9 +572,9 @@ async function tryDomButtons(
const before = await page.screenshot();
console.log(`[start-detect] Clicking "${info.text}" (${info.selector}) at (${info.x},${info.y}), before=${before.length} bytes`);
await page.mouse.click(info.x, info.y);
- await page.waitForTimeout(300);
+ await page.waitForTimeout(100);
- const result = await detectVisualChange(page, { frames: 3, intervalMs: 200, before });
+ const result = await detectVisualChange(page, { frames: 3, intervalMs: 100, before });
console.log(`[start-detect] After click "${info.text}": changed=${result.changed}`);
if (result.changed) {
// Check if the element disappeared after clicking
@@ -581,7 +607,7 @@ async function tryDomButtons(
// No change -- try pressing Escape to undo any menu we opened
try {
await page.keyboard.press("Escape");
- await page.waitForTimeout(100);
+ await page.waitForTimeout(50);
} catch { /* ignore */ }
} catch { /* continue to next element */ }
}
@@ -646,9 +672,9 @@ async function tryCanvasClicks(
try {
const before = await page.screenshot();
await page.mouse.click(pos.x, pos.y);
- await page.waitForTimeout(300);
+ await page.waitForTimeout(100);
- const result = await detectVisualChange(page, { frames: 3, intervalMs: 200, before });
+ const result = await detectVisualChange(page, { frames: 3, intervalMs: 100, before });
if (result.changed) {
return {
mechanism: "click_canvas",
@@ -689,9 +715,9 @@ async function tryKeyboardTriggers(
try {
const before = await page.screenshot();
await page.keyboard.press(key);
- await page.waitForTimeout(300);
+ await page.waitForTimeout(100);
- const result = await detectVisualChange(page, { frames: 3, intervalMs: 200, before });
+ const result = await detectVisualChange(page, { frames: 3, intervalMs: 100, before });
if (result.changed) {
return { mechanism: mechanismMap[key] };
}
@@ -715,9 +741,9 @@ async function tryKeyboardTriggers(
}
await page.waitForTimeout(100);
await page.keyboard.press(key);
- await page.waitForTimeout(300);
+ await page.waitForTimeout(100);
- const result = await detectVisualChange(page, { frames: 3, intervalMs: 200, before });
+ const result = await detectVisualChange(page, { frames: 3, intervalMs: 100, before });
if (result.changed) {
return { mechanism: mechanismMap[key] };
}
@@ -786,10 +812,10 @@ async function recalibrateWithRetry(
try {
const before = await page.screenshot();
await attempt.action();
- await page.waitForTimeout(300);
+ await page.waitForTimeout(100);
if (startMechanism === "unknown") {
- const result = await detectVisualChange(page, { frames: 3, intervalMs: 200, before });
+ const result = await detectVisualChange(page, { frames: 3, intervalMs: 100, before });
if (result.changed) {
startMechanism = attempt.name;
}
@@ -1180,7 +1206,7 @@ async function detectScoreElement(page: Page): Promise<string | null> {
export async function surveyPage(page: Page): Promise<SurveyData> {
try {
const data = await page.evaluate(() => {
- // Check for full-screen overlay
+ // Check for full-screen overlay (language-agnostic: purely structural detection)
let hasOverlay = false;
const allEls = document.querySelectorAll("*");
const vw = window.innerWidth;
@@ -1192,20 +1218,10 @@ export async function surveyPage(page: Page): Promise<SurveyData> {
const zIndex = parseInt(style.zIndex, 10);
if (zIndex > 0 || style.zIndex === "auto") {
const rect = (el as HTMLElement).getBoundingClientRect();
- if (rect.width > vw * 0.8 && rect.height > vh * 0.8) {
- const text = ((el as HTMLElement).innerText || "").toLowerCase();
- if (
- text.includes("start") ||
- text.includes("play") ||
- text.includes("enter") ||
- text.includes("press") ||
- text.includes("begin") ||
- text.includes("click") ||
- text.length < 5 // empty overlay
- ) {
- hasOverlay = true;
- break;
- }
+ if (rect.width > vw * 0.5 && rect.height > vh * 0.5) {
+ // Large positioned overlay detected -- no text matching needed
+ hasOverlay = true;
+ break;
}
}
}