loop-benchmarking

Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.
git clone https://git.shiptheloop.com/loop-benchmarking.git
Log | Files | Refs | README

types.ts (15128B)


      1 /** A 10x20 boolean grid: true = filled cell, false = empty. Row 0 is the top. */
      2 export type Grid = boolean[][];
      3 
      4 /** Pixel bounds of the game grid on the page. */
      5 export interface GridBounds {
      6   x: number;
      7   y: number;
      8   width: number;
      9   height: number;
     10 }
     11 
     12 /** How the game renders its grid. */
     13 export type RendererType = "canvas" | "dom" | "svg" | "unknown";
     14 
     15 /** Key mappings for game controls. */
     16 export interface Controls {
     17   left: string;
     18   right: string;
     19   down: string;
     20   rotate: string;
     21   drop: string;
     22 }
     23 
     24 /**
     25  * Abstract game action -- the thing the bot wants to do, independent of which
     26  * physical key performs it.
     27  */
     28 export type GameAction =
     29   | "move_left"
     30   | "move_right"
     31   | "soft_drop"
     32   | "hard_drop"
     33   | "rotate_cw"
     34   | "rotate_ccw"
     35   | "pause"
     36   | "hold";
     37 
     38 /**
     39  * The result of trying a candidate key for a particular action.
     40  * - "suspected": the key did something plausible on one trial
     41  * - "confirmed": verified on a fresh reload (currently unused, reserved)
     42  * - "not_found": no candidate key produced the expected behaviour
     43  */
     44 export interface ControlMapping {
     45   /** The discovered key, or null if no candidate matched. */
     46   key: string | null;
     47   confidence: "suspected" | "confirmed" | "not_found";
     48   /** Human-readable description of what was observed. */
     49   observation: string;
     50 }
     51 
     52 /**
     53  * Discovered control map. Produced by driver.discoverControls() after the
     54  * game is started. Unlike the legacy Controls, individual actions may be null
     55  * (e.g. soft_drop is optional -- some games don't implement it).
     56  */
     57 export interface ControlMap {
     58   move_left: ControlMapping;
     59   move_right: ControlMapping;
     60   soft_drop: ControlMapping;
     61   hard_drop: ControlMapping;
     62   rotate_cw: ControlMapping;
     63   rotate_ccw: ControlMapping;
     64   /** Observations for every key tried, keyed by the raw key name. */
     65   key_observations: Record<string, string>;
     66 }
     67 
     68 /** How the game was started. */
     69 export type StartMechanism =
     70   | "auto"
     71   | "click_canvas"
     72   | "enter"
     73   | "space"
     74   | "button"
     75   | "anykey"
     76   | "unknown";
     77 
     78 /**
     79  * A candidate start mechanism discovered by the driver and verified by the bot.
     80  * The bot iterates candidates, asks the driver to try each, then decides
     81  * whether the result actually represents a started Tetris game.
     82  */
     83 export interface StartCandidate {
     84   /** Which mechanism type this candidate represents. */
     85   mechanism: StartMechanism;
     86   /** Human-readable label for logs. */
     87   label: string;
     88   /** CSS selector for buttons. */
     89   selector?: string;
     90   /** Visible text for buttons. */
     91   text?: string;
     92   /** Key to press for keyboard triggers. */
     93   key?: string;
     94   /** Pixel position for clicks. */
     95   position?: { x: number; y: number };
     96   /** Milliseconds to wait before measuring (for auto-start). */
     97   waitMs?: number;
     98 }
     99 
    100 /** What happened when a start mechanism was applied, without committing. */
    101 export interface TryStartResult {
    102   /** Did the screenshot pixels change? */
    103   visualChanged: boolean;
    104   /** Did the DOM snapshot change? */
    105   domChanged: boolean;
    106   /** Was there a JS error during the attempt? */
    107   errorOccurred: boolean;
    108   /** Clickable elements that appeared after applying. */
    109   newClickableElements: number;
    110   /** Clickable elements that disappeared after applying. */
    111   removedElements: number;
    112 }
    113 
    114 /** Standard Tetris piece types. */
    115 export type PieceType = "I" | "O" | "T" | "S" | "Z" | "J" | "L" | "unknown";
    116 
    117 /** Pre-test survey data collected before any tests run. */
    118 export interface SurveyData {
    119   has_overlay: boolean;
    120   has_canvas: boolean;
    121   has_dom_grid: boolean;
    122   visible_text: string[];
    123   clickable_elements: number;
    124 }
    125 
    126 /**
    127  * Landmarks detected on the loaded page to determine whether a game has
    128  * actually rendered (as opposed to a blank 200 OK or an empty DOM). Used by
    129  * the game_loads test. A game is considered "loaded" if the body has content
    130  * and at least one game-shaped element is present.
    131  */
    132 export interface GameLandmarks {
    133   bodyHasContent: boolean;
    134   hasCanvas: boolean;
    135   hasDomGrid: boolean;
    136   hasTetrisRatioElement: boolean;
    137   hasManyCellsContainer: boolean;
    138   landmarksFound: string[];
    139 }
    140 
    141 /** Configuration returned by calibration. */
    142 export interface DriverCalibration {
    143   renderer: RendererType;
    144   gridDetected: boolean;
    145   gridBounds: GridBounds | null;
    146   cellWidth: number;
    147   cellHeight: number;
    148   controls: Controls;
    149   /** Discovered control map, or null if discovery has not run yet. */
    150   controlMap?: ControlMap | null;
    151   startMechanism: StartMechanism;
    152   scoreElementSelector: string | null;
    153   levelElementSelector: string | null;
    154   backgroundColor: [number, number, number] | null;
    155   consoleErrors: string[];
    156   gridConfidence: number;
    157   gridDetectedAt: "initial" | "after_start";
    158   startButton?: {
    159     selector: string;
    160     text: string;
    161     disappeared: boolean;
    162     position: { x: number; y: number };
    163   };
    164   fromCache?: boolean;
    165 }
    166 
    167 /** Summary of how much the latest calibration differs from the first one. */
    168 export interface CalibrationDrift {
    169   drifted: boolean;
    170   changes: string[];
    171   recalibrations: number;
    172   cacheHits: number;
    173   cacheMisses: number;
    174 }
    175 
    176 /** Grid snapshot: the grid state plus derived information the bot needs. */
    177 export interface GridSnapshot {
    178   /** The 10x20 boolean grid. null if reading failed. */
    179   grid: Grid | null;
    180   /** Total filled cells. 0 if grid is null. */
    181   filledCount: number;
    182   /** Filled cells in the bottom N rows. */
    183   filledInBottom(rows: number): number;
    184   /** Whether any cell in the top N rows is filled. */
    185   hasFilledInTop(rows: number): boolean;
    186   /** Number of fully complete rows. */
    187   completeRows: number;
    188   /** Active piece cells (diff against settled grid). null if undetectable. */
    189   activePieceCells: [number, number][] | null;
    190   /** Identified piece type from active piece cells. null if no active piece. */
    191   activePieceType: PieceType | null;
    192 }
    193 
    194 /** The Driver interface. This is what the Bot sees. */
    195 export interface TetrisDriver {
    196   // -- Lifecycle --
    197   loadPage(url: string): Promise<{ loaded: boolean; detail: string; errorsOnLoad: number }>;
    198   surveyPage(): Promise<SurveyData>;
    199   /**
    200    * Detect high-level "game is present" landmarks on the loaded page. Used by
    201    * the game_loads test to decide pass/fail based on what the user would see
    202    * (canvas, DOM grid, tetris-ratio element, or a many-cells container) rather
    203    * than on whether load-time console errors occurred.
    204    */
    205   detectGameLandmarks(): Promise<GameLandmarks>;
    206   calibrate(): Promise<DriverCalibration>;
    207   recalibrate(): Promise<DriverCalibration>;
    208   /**
    209    * Lightweight, side-effect-free grid re-detection. Does NOT click, press
    210    * keys, or run start-mechanism detection. If the page has since spawned
    211    * its grid (common with DOM games that build cells in requestAnimationFrame
    212    * after a start button click), the cached calibration is updated; otherwise
    213    * the current calibration is left untouched.
    214    */
    215   refreshGridDetection(): Promise<void>;
    216   getCalibration(): DriverCalibration;
    217 
    218   // -- Start mechanism discovery/verification bridge --
    219   /** Return candidate start mechanisms in priority order. Does not apply them. */
    220   discoverStartCandidates(): Promise<StartCandidate[]>;
    221   /** Apply a candidate and report observable deltas. Does NOT commit. */
    222   tryStartMechanism(candidate: StartCandidate): Promise<TryStartResult>;
    223   /** Commit a verified start mechanism so subsequent calibrations reuse it. */
    224   confirmStartMechanism(
    225     candidate: StartCandidate
    226   ): void;
    227   /** Forget the confirmed mechanism (e.g. after reloading to try a different candidate). */
    228   clearConfirmedStartMechanism(): void;
    229   /**
    230    * Tell the driver the bot's bridge verification rejected every candidate.
    231    * This prevents calibrate() from running the legacy fallback detector,
    232    * which historically produced false positives like clicking Pause.
    233    */
    234   rejectStartMechanism(): void;
    235   getCalibrationDrift(): CalibrationDrift;
    236   /**
    237    * Start the inactivity watchdog. After this is called, readGrid() and
    238    * wait() throw InactivityAbortError if 120+ seconds pass without a
    239    * successful grid read. Call once the game is known to be running.
    240    */
    241   armInactivityWatchdog(): void;
    242 
    243   // -- Grid Reading --
    244   readGrid(settledGrid?: Grid | null): Promise<GridSnapshot>;
    245   gridsAreDifferent(a: Grid | null, b: Grid | null): boolean;
    246 
    247   // -- Input --
    248   pressKey(action: "left" | "right" | "down" | "rotate" | "drop"): Promise<void>;
    249   pressRawKey(key: string): Promise<void>;
    250   wait(ms: number): Promise<void>;
    251 
    252   // -- Control discovery --
    253   /**
    254    * Run the control discovery loop against the currently-started game.
    255    * Tries candidate keys for each abstract game action, observes grid deltas,
    256    * and classifies each. Populates the driver's cached control map so
    257    * subsequent pressKey() calls use the discovered mapping.
    258    *
    259    * Expensive (multiple reloads). Callers should invoke this at most once
    260    * per session; the result is cached and re-applied across calibration
    261    * cache hits.
    262    */
    263   discoverControls(serverUrl: string): Promise<ControlMap>;
    264   /**
    265    * Return the discovered key for a given action, or null if not found.
    266    * Before discoverControls() has run, returns the legacy default key from
    267    * the Controls struct.
    268    */
    269   getControl(action: GameAction): string | null;
    270 
    271   // -- Score/Level/Lines Reading --
    272   readScore(): Promise<number | null>;
    273   readLevel(): Promise<number | null>;
    274 
    275   // -- Page State Queries --
    276   detectGameOverText(): Promise<string | null>;
    277   detectRestartOption(): Promise<boolean>;
    278   detectNextPiecePreview(): Promise<boolean>;
    279   getConsoleErrors(): string[];
    280 
    281   // -- Screenshots --
    282   screenshot(): Promise<Buffer>;
    283   /**
    284    * Screenshot clipped to the grid area (uses current calibration's gridBounds).
    285    * Returns null if no grid has been detected yet. Useful as a fallback for
    286    * verifyGameStarted when the grid reader can't see active pieces because the
    287    * game renders them outside the cell layout (e.g. absolute-positioned divs
    288    * floating over the grid).
    289    */
    290   screenshotGridArea(): Promise<Buffer | null>;
    291   /**
    292    * Compute a string fingerprint of the grid container's DOM state, capturing
    293    * child count, class names, and inline position styles. Used by the start
    294    * verification fallback to detect piece movement for games that render the
    295    * active piece as absolute-positioned divs outside the cell layout -- those
    296    * changes are invisible to the grid reader and may be off-screen for pixel
    297    * diffs, but they always show up in the DOM.
    298    *
    299    * Returns an empty string if no grid container could be located.
    300    */
    301   captureGridDomFingerprint(): Promise<string>;
    302   measureDropInterval(): Promise<number>;
    303 }
    304 
    305 /** Competitive play results (Phase 8). */
    306 export interface CompetitivePlayResult {
    307   duration_seconds: number;
    308   pieces_placed: number;
    309   total_lines_cleared: number;
    310   single_clears: number;
    311   double_clears: number;
    312   triple_clears: number;
    313   tetris_clears: number;
    314   max_combo: number;
    315   score_readings: number[];
    316   score_final: number;
    317   score_increases: number[];
    318   level_readings: number[];
    319   level_final: number;
    320   game_over_reached: boolean;
    321   game_over_text_found: string | null;
    322   restart_available: boolean;
    323   next_piece_visible: boolean;
    324   speed_increased: boolean;
    325   bugs_detected: string[];
    326   rendering_trail_detected?: boolean;
    327 }
    328 
    329 /** Result of an individual test. */
    330 export interface TestResult {
    331   name: string;
    332   pass: boolean;
    333   detail: string;
    334 }
    335 
    336 /** Data collected during one continuous observation session. */
    337 export interface GameSession {
    338   started: boolean;
    339   startMechanism: string;
    340   piecesSpawned: number;
    341   piecesLocked: number;
    342   linesCleared: number;
    343   rotationsObserved: number;
    344   movementsObserved: number;
    345   hardDropsObserved: number;
    346   gameOverDetected: boolean;
    347   /** Game over text captured in Phase 6 immediately after triggering game over. */
    348   gameOverText?: string | null;
    349   /** Whether a restart option was visible in Phase 6 immediately after triggering game over. */
    350   gameOverRestartAvailable?: boolean;
    351   consoleErrors: string[];
    352   durationSeconds: number;
    353   pieceTypes: Set<string>;
    354   scoreValues: number[];
    355   /** Score reading taken just before the first detected line clear. */
    356   scoreBeforeClear?: number;
    357   /** Score reading taken just after the first detected line clear. */
    358   scoreAfterClear?: number;
    359   gridReadSuccess: number;
    360   gridReadFail: number;
    361   frames: number;
    362   events: GridEvent[];
    363   skippedPhases: string[];
    364   /**
    365    * Maximum number of distinct normalized shapes observed while pressing
    366    * rotate 4 times on a single piece in Phase 3 (basic mechanics). Used by
    367    * the rotate test to verify the game actually cycles through rotation
    368    * states rather than allowing only one rotation.
    369    */
    370   distinctRotationShapes: number;
    371   /**
    372    * Per-piece-type set of distinct normalized shapes observed during the
    373    * rotation probe in gameplay phases. Used by all_pieces_rotate to verify
    374    * that multiple piece types can each rotate through 2+ distinct shapes.
    375    */
    376   rotationShapesByPiece: Map<string, Set<string>>;
    377   /**
    378    * Landmarks detected immediately after the initial page load. Used by the
    379    * game_loads test to decide whether a game rendered at all, independent of
    380    * any console errors emitted during load.
    381    */
    382   gameLoadLandmarks?: GameLandmarks;
    383 }
    384 
    385 /** An event observed during continuous grid scanning. */
    386 export type GridEvent =
    387   | { type: "piece_spawned"; pieceType: PieceType; frame: number }
    388   | { type: "piece_locked"; frame: number; filledDelta: number }
    389   | { type: "line_cleared"; count: number; frame: number }
    390   | { type: "piece_moved"; direction: "left" | "right" | "down"; frame: number }
    391   | { type: "piece_rotated"; frame: number }
    392   | { type: "hard_drop"; frame: number }
    393   | { type: "game_over"; frame: number }
    394   | { type: "grid_read_failed"; frame: number };
    395 
    396 /** Gameplay statistics gathered during the play phase. */
    397 export interface GameplayStats {
    398   pieces_placed: number;
    399   lines_cleared: number;
    400   max_score_observed: number;
    401   play_duration_seconds: number;
    402   errors_during_play: number;
    403 }
    404 
    405 /** The full JSON report written at the end. */
    406 export interface BotReport {
    407   implementation: {
    408     renderer: string;
    409     grid_detected: boolean;
    410     grid_detected_at: string;
    411     grid_bounds: GridBounds | null;
    412     controls: Record<string, string>;
    413     control_discovery?: Record<string, string>;
    414     start_mechanism: string;
    415     score_element_found: boolean;
    416     grid_confidence: number;
    417     survey: SurveyData;
    418   };
    419   tests: Array<{ name: string; pass: boolean; detail: string }>;
    420   summary: {
    421     total: number;
    422     passed: number;
    423     failed: number;
    424     skipped: number;
    425     score: number;
    426   };
    427   gameplay: GameplayStats;
    428   competitive_play: CompetitivePlayResult | null;
    429   session: {
    430     frames: number;
    431     events_count: number;
    432     pieces_spawned: number;
    433     pieces_locked: number;
    434     lines_cleared: number;
    435     piece_types_seen: string[];
    436     grid_read_success_rate: number;
    437   };
    438   performance?: {
    439     load_time_ms: number;
    440   };
    441   accessibility?: {
    442     issues: string[];
    443     issue_count: number;
    444     pass: boolean;
    445   };
    446   calibration_drift?: CalibrationDrift;
    447 }

Impressum · Datenschutz