loop-benchmarking

Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.
git clone https://git.shiptheloop.com/loop-benchmarking.git
Log | Files | Refs | README

commit 4b971780246afdc6e97d3ed1d6c2aa6dcdeaa931
parent e82be6aca0708fad30ff11975bd16e5be13f53ff
Author: Brian Graham <brian@buildingbetterteams.de>
Date:   Wed, 15 Apr 2026 15:41:27 +0200

Re-eval 17 calibration runs; fix reeval.py artifact cleanup

Previous artifact-path fix broke the cleanup safety check:
rmtree guard still matched dashboard/ when artifact_dir moved to
artifacts/, so successful reevals wiped the agent-generated game code.
Update the guard to match the new artifacts/ path.

Bot vs human agreement on 17 calibration runs: 72/116 (62.1%).
DOM-detected games agree at ~91%.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Diffstat:
Aharness/reeval-calibration.py | 80+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mharness/reeval.py | 2+-
Mresults/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=js_lint=on_budget=low_model=haiku45_pw=avail_prompt=simple_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1/eval_results.json | 248++-----------------------------------------------------------------------------
Mresults/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=js_lint=on_budget=low_model=haiku45_pw=avail_prompt=simple_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run2/eval_results.json | 224++-----------------------------------------------------------------------------
Mresults/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=js_lint=on_budget=low_model=haiku45_pw=avail_prompt=simple_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run3/eval_results.json | 252++-----------------------------------------------------------------------------
Mresults/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=ts_lint=off_budget=low_model=gemma426b_pw=off_prompt=simple_prov=or_rndr=none_strat=none_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1/eval_results.json | 4++--
Mresults/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=ts_lint=off_budget=low_model=gemma426b_pw=off_prompt=simple_prov=or_rndr=none_strat=none_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1/gameplay-bot-report.json | 2+-
Mresults/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=ts_lint=off_budget=low_model=glm51_pw=off_prompt=simple_prov=zai_rndr=none_strat=none_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1/eval_results.json | 4++--
Mresults/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=ts_lint=off_budget=low_model=glm51_pw=off_prompt=simple_prov=zai_rndr=none_strat=none_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1/gameplay-bot-report.json | 2+-
Mresults/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=ts_lint=off_budget=low_model=glm51_pw=off_prompt=simple_prov=zai_rndr=none_strat=none_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run3/eval_results.json | 2+-
Mresults/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=ts_lint=on_budget=low_model=haiku45_pw=avail_prompt=detailed_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1/gameplay-bot-report.json | 453+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------
Mresults/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=uns_lint=on_budget=low_model=haiku45_pw=avail_prompt=simple_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1/eval_results.json | 72+++++++++++++++++++++++++++---------------------------------------------
Mresults/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=uns_lint=on_budget=low_model=haiku45_pw=avail_prompt=simple_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1/gameplay-bot-report.json | 62++++++++++++++++++++++----------------------------------------
Mresults/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=uns_lint=on_budget=low_model=haiku45_pw=avail_prompt=simple_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run2/gameplay-bot-report.json | 528+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
Mresults/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=es_lang=ts_lint=on_budget=low_model=haiku45_pw=avail_prompt=detailed_rndr=none_strat=none_tst=none_tedit=off_tglob=off_tgrep=on_tread=off_twrite=on_web=off_run2/eval_results.json | 250++-----------------------------------------------------------------------------
Mresults/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=es_lang=ts_lint=on_budget=low_model=haiku45_pw=avail_prompt=simple_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1/eval_results.json | 443++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Mresults/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=es_lang=ts_lint=on_budget=low_model=haiku45_pw=avail_prompt=simple_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1/gameplay-bot-report.json | 412++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------
Mresults/runs/tetris_arch=none_ctx=provided_noise=clean_dsgn=none_eff=high_echk=none_hlang=es_lang=ts_lint=off_budget=low_model=haiku45_pw=avail_prompt=detailed_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=off_tread=off_twrite=off_web=off_run3/eval_results.json | 2+-
Mresults/runs/tetris_arch=none_ctx=provided_noise=clean_dsgn=none_eff=high_echk=none_hlang=es_lang=ts_lint=off_budget=low_model=haiku45_pw=avail_prompt=detailed_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=off_tread=off_twrite=off_web=off_run3/gameplay-bot-report.json | 2+-
19 files changed, 1825 insertions(+), 1219 deletions(-)

diff --git a/harness/reeval-calibration.py b/harness/reeval-calibration.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 +"""Re-run eval scripts against the 17 calibration runs only. + +Keeps agent-generated artifacts in place and rewrites eval_results.json +and gameplay-bot-report.json. + +Usage: + python3 harness/reeval-calibration.py [-j N] +""" + +import json +import sys +from concurrent.futures import ThreadPoolExecutor, as_completed +from pathlib import Path + +SCRIPT_DIR = Path(__file__).resolve().parent +PROJECT_DIR = SCRIPT_DIR.parent + +sys.path.insert(0, str(SCRIPT_DIR)) +from reeval import reeval_single + + +def main(): + parallel = 1 + args = sys.argv[1:] + i = 0 + while i < len(args): + if args[i] == "-j" and i + 1 < len(args): + parallel = int(args[i + 1]) + i += 2 + else: + i += 1 + + calib_dir = PROJECT_DIR / "tasks" / "tetris" / "eval" / "gameplay-bot" / "calibration" + run_ids = [json.loads(f.read_text())["run_id"] for f in sorted(calib_dir.glob("*.json"))] + run_dirs = [PROJECT_DIR / "results" / "runs" / rid for rid in run_ids] + + missing = [rd for rd in run_dirs if not rd.exists()] + if missing: + print(f"ERROR: {len(missing)} calibration run dirs missing:") + for rd in missing: + print(f" {rd.name}") + sys.exit(1) + + print(f"Re-evaluating {len(run_dirs)} calibration runs (parallel={parallel})") + print() + + completed = skipped = errors = 0 + + if parallel <= 1: + for rd in run_dirs: + result = reeval_single(rd, PROJECT_DIR) + if result == "completed": + completed += 1 + elif result.startswith("skip"): + skipped += 1 + else: + errors += 1 + else: + with ThreadPoolExecutor(max_workers=parallel) as ex: + futures = {ex.submit(reeval_single, rd, PROJECT_DIR): rd for rd in run_dirs} + for f in as_completed(futures): + try: + result = f.result() + except Exception as e: + print(f" ERROR {futures[f].name}: {e}") + result = "error" + if result == "completed": + completed += 1 + elif result.startswith("skip"): + skipped += 1 + else: + errors += 1 + + print() + print(f"Done. Completed: {completed} | Skipped: {skipped} | Errors: {errors}") + + +if __name__ == "__main__": + main() diff --git a/harness/reeval.py b/harness/reeval.py @@ -77,7 +77,7 @@ def reeval_single(run_dir: Path, project_dir: Path) -> str: evaluate(task_dir, workspace, cell, run_dir) # Clean up temp workspace (but not artifact dirs) - if not str(workspace).startswith(str(project_dir / "dashboard")): + if not str(workspace).startswith(str(project_dir / "artifacts")): shutil.rmtree(workspace, ignore_errors=True) return "completed" diff --git a/results/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=js_lint=on_budget=low_model=haiku45_pw=avail_prompt=simple_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1/eval_results.json b/results/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=js_lint=on_budget=low_model=haiku45_pw=avail_prompt=simple_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1/eval_results.json @@ -104,251 +104,11 @@ }, "gameplay_bot": { "pass": false, - "score": 0.2, - "total": 26, - "passed": 2, - "failed": 8, - "report": { - "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_detected_at": "initial", - "grid_bounds": null, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "control_discovery": { - "move_left": "NOT FOUND", - "move_right": "NOT FOUND", - "soft_drop": "NOT FOUND", - "hard_drop": "NOT FOUND", - "rotate_cw": "NOT FOUND", - "rotate_ccw": "NOT FOUND", - "key:ArrowLeft": "grid read failed before press", - "key:a": "grid read failed before press", - "key:h": "grid read failed before press", - "key:ArrowRight": "grid read failed before press", - "key:d": "grid read failed before press", - "key:l": "grid read failed before press", - "key:ArrowUp": "grid read failed before press", - "key:x": "grid read failed before press", - "key:w": "grid read failed before press", - "key:Space": "grid read failed before press", - "key:Enter": "grid read failed before press", - "key:ArrowDown": "grid read failed before press" - }, - "start_mechanism": "auto", - "score_element_found": false, - "grid_confidence": 0, - "survey": { - "has_overlay": false, - "has_canvas": true, - "has_dom_grid": false, - "visible_text": [ - "TETRIS", - "Score", - "0", - "Lines", - "0", - "Level", - "1", - "Next Piece", - "Controls", - "\u2190 \u2192 Move", - "\u2193 Soft Drop", - "SPACE Hard Drop", - "Z Rotate Left", - "X Rotate Right", - "P Pause" - ], - "clickable_elements": 1 - } - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "loaded with landmarks: body_content, canvas, tetris_ratio" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": false, - "detail": "grid reader unreliable, cannot verify auto-drop" - }, - { - "name": "move_left", - "pass": false, - "detail": "grid reader unreliable, cannot verify movement" - }, - { - "name": "move_right", - "pass": false, - "detail": "grid reader unreliable, cannot verify movement" - }, - { - "name": "move_down", - "pass": false, - "detail": "skipped: no soft_drop key (game has only hard_drop)" - }, - { - "name": "rotate", - "pass": false, - "detail": "grid reader unreliable, cannot verify rotation" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "grid reader unreliable, cannot verify hard drop" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "skipped: not enough piece types to verify (saw 0 of J/L/T, need 2)" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "grid reader unreliable, cannot verify piece locking" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "could not detect new piece spawning at top via grid reader" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "skipped: mechanics phase failed" - }, - { - "name": "line_clear", - "pass": false, - "detail": "skipped: mechanics phase failed" - }, - { - "name": "score_increases_on_clear", - "pass": false, - "detail": "skipped: mechanics phase failed" - }, - { - "name": "score_element_visible", - "pass": false, - "detail": "no score display detected" - }, - { - "name": "game_over", - "pass": false, - "detail": "skipped: piece lifecycle failed" - }, - { - "name": "playable_30s", - "pass": false, - "detail": "skipped: gameplay phase failed" - }, - { - "name": "multi_line_clear", - "pass": false, - "detail": "skipped: competitive play phase did not run" - }, - { - "name": "score_scaling", - "pass": false, - "detail": "skipped: competitive play phase did not run" - }, - { - "name": "level_progression", - "pass": false, - "detail": "skipped: competitive play phase did not run" - }, - { - "name": "speed_progression", - "pass": false, - "detail": "skipped: competitive play phase did not run" - }, - { - "name": "next_piece_preview", - "pass": false, - "detail": "skipped: competitive play phase did not run" - }, - { - "name": "game_over_display", - "pass": false, - "detail": "skipped: competitive play phase did not run" - }, - { - "name": "counter_clockwise_rotation", - "pass": false, - "detail": "skipped: competitive play phase did not run" - }, - { - "name": "soft_drop_distinct", - "pass": false, - "detail": "skipped: competitive play phase did not run" - }, - { - "name": "rendering_clean", - "pass": false, - "detail": "skipped: competitive play phase did not run" - } - ], - "summary": { - "total": 26, - "passed": 2, - "failed": 8, - "skipped": 16, - "score": 0.2 - }, - "gameplay": { - "pieces_placed": 0, - "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 0, - "errors_during_play": 0 - }, - "competitive_play": null, - "session": { - "frames": 0, - "events_count": 0, - "pieces_spawned": 0, - "pieces_locked": 0, - "lines_cleared": 0, - "piece_types_seen": [], - "grid_read_success_rate": 0 - }, - "performance": { - "load_time_ms": 51 - }, - "accessibility": { - "issues": [], - "issue_count": 0, - "pass": true - }, - "calibration_drift": { - "drifted": true, - "changes": [ - "renderer", - "grid_bounds", - "controls", - "score_element", - "level_element" - ], - "recalibrations": 8, - "cacheHits": 0, - "cacheMisses": 8 - } - } + "score": 0, + "error": "Gameplay bot timed out after 300 seconds" }, - "outcome_score": 0.1, - "score": 0.1, + "outcome_score": 0.0, + "score": 0.0, "sonarqube": { "error": "no SonarQube token found", "score": 0 diff --git a/results/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=js_lint=on_budget=low_model=haiku45_pw=avail_prompt=simple_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run2/eval_results.json b/results/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=js_lint=on_budget=low_model=haiku45_pw=avail_prompt=simple_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run2/eval_results.json @@ -104,227 +104,11 @@ }, "gameplay_bot": { "pass": false, - "score": 0.18, - "total": 26, - "passed": 2, - "failed": 9, - "report": { - "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_detected_at": "initial", - "grid_bounds": null, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "button", - "score_element_found": false, - "grid_confidence": 0, - "survey": { - "has_overlay": false, - "has_canvas": true, - "has_dom_grid": false, - "visible_text": [ - "SCORE", - "Points:", - "0", - "Level:", - "1", - "Lines:", - "0", - "Speed increases every", - "10 lines cleared", - "NEXT PIECE", - "\u2191 Rotate", - "\u2190 \u2192 Move", - "\u2193 Soft Drop", - "Space Hard Drop", - "P Pause", - "START GAME", - "PAUSE" - ], - "clickable_elements": 3 - } - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "loaded with landmarks: body_content, canvas, dom_grid, tetris_ratio" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via button (#startBtn, \"Start Game\")" - }, - { - "name": "auto_drop", - "pass": false, - "detail": "grid reader unreliable, cannot verify auto-drop" - }, - { - "name": "move_left", - "pass": false, - "detail": "grid reader unreliable, cannot verify movement" - }, - { - "name": "move_right", - "pass": false, - "detail": "grid reader unreliable, cannot verify movement" - }, - { - "name": "move_down", - "pass": false, - "detail": "grid reader unreliable, cannot verify movement" - }, - { - "name": "rotate", - "pass": false, - "detail": "grid reader unreliable, cannot verify rotation" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "grid reader unreliable, cannot verify hard drop" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "skipped: not enough piece types to verify (saw 0 of J/L/T, need 2)" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "grid reader unreliable, cannot verify piece locking" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "could not detect new piece spawning at top via grid reader" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "skipped: mechanics phase failed" - }, - { - "name": "line_clear", - "pass": false, - "detail": "skipped: mechanics phase failed" - }, - { - "name": "score_increases_on_clear", - "pass": false, - "detail": "skipped: mechanics phase failed" - }, - { - "name": "score_element_visible", - "pass": false, - "detail": "no score display detected" - }, - { - "name": "game_over", - "pass": false, - "detail": "skipped: piece lifecycle failed" - }, - { - "name": "playable_30s", - "pass": false, - "detail": "skipped: gameplay phase failed" - }, - { - "name": "multi_line_clear", - "pass": false, - "detail": "skipped: competitive play phase did not run" - }, - { - "name": "score_scaling", - "pass": false, - "detail": "skipped: competitive play phase did not run" - }, - { - "name": "level_progression", - "pass": false, - "detail": "skipped: competitive play phase did not run" - }, - { - "name": "speed_progression", - "pass": false, - "detail": "skipped: competitive play phase did not run" - }, - { - "name": "next_piece_preview", - "pass": false, - "detail": "skipped: competitive play phase did not run" - }, - { - "name": "game_over_display", - "pass": false, - "detail": "skipped: competitive play phase did not run" - }, - { - "name": "counter_clockwise_rotation", - "pass": false, - "detail": "skipped: competitive play phase did not run" - }, - { - "name": "soft_drop_distinct", - "pass": false, - "detail": "skipped: competitive play phase did not run" - }, - { - "name": "rendering_clean", - "pass": false, - "detail": "skipped: competitive play phase did not run" - } - ], - "summary": { - "total": 26, - "passed": 2, - "failed": 9, - "skipped": 15, - "score": 0.18 - }, - "gameplay": { - "pieces_placed": 0, - "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 0, - "errors_during_play": 0 - }, - "competitive_play": null, - "session": { - "frames": 0, - "events_count": 0, - "pieces_spawned": 0, - "pieces_locked": 0, - "lines_cleared": 0, - "piece_types_seen": [], - "grid_read_success_rate": 0 - }, - "performance": { - "load_time_ms": 47 - }, - "accessibility": { - "issues": [], - "issue_count": 0, - "pass": true - }, - "calibration_drift": { - "drifted": false, - "changes": [], - "recalibrations": 0, - "cacheHits": 0, - "cacheMisses": 0 - } - } + "score": 0, + "error": "Gameplay bot timed out after 300 seconds" }, - "outcome_score": 0.09, - "score": 0.09, + "outcome_score": 0.0, + "score": 0.0, "sonarqube": { "error": "no SonarQube token found", "score": 0 diff --git a/results/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=js_lint=on_budget=low_model=haiku45_pw=avail_prompt=simple_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run3/eval_results.json b/results/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=js_lint=on_budget=low_model=haiku45_pw=avail_prompt=simple_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run3/eval_results.json @@ -104,255 +104,11 @@ }, "gameplay_bot": { "pass": false, - "score": 0.2, - "total": 26, - "passed": 2, - "failed": 8, - "report": { - "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_detected_at": "initial", - "grid_bounds": null, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "control_discovery": { - "move_left": "NOT FOUND", - "move_right": "NOT FOUND", - "soft_drop": "NOT FOUND", - "hard_drop": "NOT FOUND", - "rotate_cw": "NOT FOUND", - "rotate_ccw": "NOT FOUND", - "key:ArrowLeft": "grid read failed before press", - "key:a": "grid read failed before press", - "key:h": "grid read failed before press", - "key:ArrowRight": "grid read failed before press", - "key:d": "grid read failed before press", - "key:l": "grid read failed before press", - "key:ArrowUp": "grid read failed before press", - "key:x": "grid read failed before press", - "key:w": "grid read failed before press", - "key:Space": "grid read failed before press", - "key:Enter": "grid read failed before press", - "key:ArrowDown": "grid read failed before press" - }, - "start_mechanism": "auto", - "score_element_found": false, - "grid_confidence": 0, - "survey": { - "has_overlay": false, - "has_canvas": false, - "has_dom_grid": true, - "visible_text": [ - "TETRIS", - "PAUSE", - "SCORE", - "0", - "LEVEL", - "1", - "LINES", - "0", - "NEXT PIECE", - "Controls:", - "\u2190 \u2192 : Move left/right", - "\u2191 : Rotate", - "\u2193 : Fast drop", - "Space : Pause", - "Scoring:", - "1 Line: 100 pts", - "2 Lines: 300 pts", - "3 Lines: 500 pts", - "4 Lines: 800 pts", - "NEW GAME" - ], - "clickable_elements": 3 - } - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "loaded with landmarks: body_content, dom_grid, tetris_ratio, cells_container" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": false, - "detail": "grid reader unreliable, cannot verify auto-drop" - }, - { - "name": "move_left", - "pass": false, - "detail": "grid reader unreliable, cannot verify movement" - }, - { - "name": "move_right", - "pass": false, - "detail": "grid reader unreliable, cannot verify movement" - }, - { - "name": "move_down", - "pass": false, - "detail": "skipped: no soft_drop key (game has only hard_drop)" - }, - { - "name": "rotate", - "pass": false, - "detail": "grid reader unreliable, cannot verify rotation" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "grid reader unreliable, cannot verify hard drop" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "skipped: not enough piece types to verify (saw 0 of J/L/T, need 2)" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "grid reader unreliable, cannot verify piece locking" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "could not detect new piece spawning at top via grid reader" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "skipped: mechanics phase failed" - }, - { - "name": "line_clear", - "pass": false, - "detail": "skipped: mechanics phase failed" - }, - { - "name": "score_increases_on_clear", - "pass": false, - "detail": "skipped: mechanics phase failed" - }, - { - "name": "score_element_visible", - "pass": false, - "detail": "no score display detected" - }, - { - "name": "game_over", - "pass": false, - "detail": "skipped: piece lifecycle failed" - }, - { - "name": "playable_30s", - "pass": false, - "detail": "skipped: gameplay phase failed" - }, - { - "name": "multi_line_clear", - "pass": false, - "detail": "skipped: competitive play phase did not run" - }, - { - "name": "score_scaling", - "pass": false, - "detail": "skipped: competitive play phase did not run" - }, - { - "name": "level_progression", - "pass": false, - "detail": "skipped: competitive play phase did not run" - }, - { - "name": "speed_progression", - "pass": false, - "detail": "skipped: competitive play phase did not run" - }, - { - "name": "next_piece_preview", - "pass": false, - "detail": "skipped: competitive play phase did not run" - }, - { - "name": "game_over_display", - "pass": false, - "detail": "skipped: competitive play phase did not run" - }, - { - "name": "counter_clockwise_rotation", - "pass": false, - "detail": "skipped: competitive play phase did not run" - }, - { - "name": "soft_drop_distinct", - "pass": false, - "detail": "skipped: competitive play phase did not run" - }, - { - "name": "rendering_clean", - "pass": false, - "detail": "skipped: competitive play phase did not run" - } - ], - "summary": { - "total": 26, - "passed": 2, - "failed": 8, - "skipped": 16, - "score": 0.2 - }, - "gameplay": { - "pieces_placed": 0, - "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 0, - "errors_during_play": 0 - }, - "competitive_play": null, - "session": { - "frames": 0, - "events_count": 0, - "pieces_spawned": 0, - "pieces_locked": 0, - "lines_cleared": 0, - "piece_types_seen": [], - "grid_read_success_rate": 0 - }, - "performance": { - "load_time_ms": 72 - }, - "accessibility": { - "issues": [], - "issue_count": 0, - "pass": true - }, - "calibration_drift": { - "drifted": true, - "changes": [ - "renderer", - "grid_bounds", - "score_element", - "level_element" - ], - "recalibrations": 8, - "cacheHits": 0, - "cacheMisses": 8 - } - } + "score": 0, + "error": "Gameplay bot timed out after 300 seconds" }, - "outcome_score": 0.1, - "score": 0.1, + "outcome_score": 0.0, + "score": 0.0, "sonarqube": { "error": "no SonarQube token found", "score": 0 diff --git a/results/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=ts_lint=off_budget=low_model=gemma426b_pw=off_prompt=simple_prov=or_rndr=none_strat=none_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1/eval_results.json b/results/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=ts_lint=off_budget=low_model=gemma426b_pw=off_prompt=simple_prov=or_rndr=none_strat=none_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1/eval_results.json @@ -31,7 +31,7 @@ }, "code_analysis": { "files": { - "total": 16, + "total": 17, "code": 10, "docs": 1, "unnecessary": 0, @@ -303,7 +303,7 @@ "grid_read_success_rate": 0 }, "performance": { - "load_time_ms": 37 + "load_time_ms": 24 }, "accessibility": { "issues": [ diff --git a/results/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=ts_lint=off_budget=low_model=gemma426b_pw=off_prompt=simple_prov=or_rndr=none_strat=none_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1/gameplay-bot-report.json b/results/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=ts_lint=off_budget=low_model=gemma426b_pw=off_prompt=simple_prov=or_rndr=none_strat=none_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1/gameplay-bot-report.json @@ -188,7 +188,7 @@ "grid_read_success_rate": 0 }, "performance": { - "load_time_ms": 37 + "load_time_ms": 24 }, "accessibility": { "issues": [ diff --git a/results/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=ts_lint=off_budget=low_model=glm51_pw=off_prompt=simple_prov=zai_rndr=none_strat=none_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1/eval_results.json b/results/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=ts_lint=off_budget=low_model=glm51_pw=off_prompt=simple_prov=zai_rndr=none_strat=none_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1/eval_results.json @@ -31,7 +31,7 @@ }, "code_analysis": { "files": { - "total": 12, + "total": 13, "code": 7, "docs": 1, "unnecessary": 0, @@ -313,7 +313,7 @@ "grid_read_success_rate": 0 }, "performance": { - "load_time_ms": 54 + "load_time_ms": 44 }, "accessibility": { "issues": [ diff --git a/results/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=ts_lint=off_budget=low_model=glm51_pw=off_prompt=simple_prov=zai_rndr=none_strat=none_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1/gameplay-bot-report.json b/results/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=ts_lint=off_budget=low_model=glm51_pw=off_prompt=simple_prov=zai_rndr=none_strat=none_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1/gameplay-bot-report.json @@ -198,7 +198,7 @@ "grid_read_success_rate": 0 }, "performance": { - "load_time_ms": 54 + "load_time_ms": 44 }, "accessibility": { "issues": [ diff --git a/results/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=ts_lint=off_budget=low_model=glm51_pw=off_prompt=simple_prov=zai_rndr=none_strat=none_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run3/eval_results.json b/results/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=ts_lint=off_budget=low_model=glm51_pw=off_prompt=simple_prov=zai_rndr=none_strat=none_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run3/eval_results.json @@ -31,7 +31,7 @@ }, "code_analysis": { "files": { - "total": 12, + "total": 13, "code": 7, "docs": 1, "unnecessary": 0, diff --git a/results/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=ts_lint=on_budget=low_model=haiku45_pw=avail_prompt=detailed_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1/gameplay-bot-report.json b/results/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=ts_lint=on_budget=low_model=haiku45_pw=avail_prompt=detailed_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1/gameplay-bot-report.json @@ -1,8 +1,14 @@ { "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_bounds": null, + "renderer": "dom", + "grid_detected": true, + "grid_detected_at": "initial", + "grid_bounds": { + "x": 360.5, + "y": 50, + "width": 329, + "height": 639 + }, "controls": { "left": "ArrowLeft", "right": "ArrowRight", @@ -10,120 +16,459 @@ "rotate": "ArrowUp", "drop": "Space" }, - "start_mechanism": "unknown", - "score_element_found": false, - "grid_confidence": 0 + "control_discovery": { + "move_left": "ArrowLeft (moved 2 col(s) left)", + "move_right": "ArrowRight (moved 4 col(s) right)", + "soft_drop": "ArrowDown (moved 2 row(s) down)", + "hard_drop": "Space (teleported 11 rows to bottom)", + "rotate_cw": "ArrowUp (shape changed (rotation))", + "rotate_ccw": "NOT FOUND", + "key:ArrowLeft": "moved 2 col(s) left", + "key:ArrowRight": "moved 4 col(s) right", + "key:ArrowUp": "shape changed (rotation)", + "key:Space": "teleported 11 rows to bottom", + "key:s": "no change", + "key:ArrowDown": "moved 2 row(s) down", + "key:z": "other change", + "key:Control": "grid empty before press (no piece)" + }, + "start_mechanism": "auto", + "score_element_found": true, + "grid_confidence": 0, + "survey": { + "has_overlay": false, + "has_canvas": false, + "has_dom_grid": true, + "visible_text": [ + "SCORE", + "0", + "LEVEL", + "1", + "LINES", + "0", + "NEXT", + "Controls", + "← → Move", + "↓ Soft Drop", + "↑ Rotate CW", + "Z Rotate CCW", + "Space Hard Drop" + ], + "clickable_elements": 1 + } }, "tests": [ { "name": "game_loads", "pass": true, - "detail": "no console errors" + "detail": "loaded with landmarks: body_content, dom_grid, tetris_ratio, cells_container" }, { "name": "game_starts", - "pass": false, - "detail": "could not start game with any mechanism" + "pass": true, + "detail": "started via auto" }, { "name": "auto_drop", - "pass": false, - "detail": "skipped: game did not start" + "pass": true, + "detail": "grid state changed after 5s with no input (grid-verified)" }, { "name": "move_left", - "pass": false, - "detail": "skipped: game did not start" + "pass": true, + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_right", - "pass": false, - "detail": "skipped: game did not start" + "pass": true, + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_down", - "pass": false, - "detail": "skipped: game did not start" + "pass": true, + "detail": "grid state changed after key press (grid-verified)" }, { "name": "rotate", - "pass": false, - "detail": "skipped: game did not start" + "pass": true, + "detail": "piece cycled through 4 distinct shapes after 4 rotate presses (grid-verified)" }, { - "name": "all_pieces_rotate", - "pass": false, - "detail": "skipped: game did not start" + "name": "hard_drop", + "pass": true, + "detail": "piece immediately dropped to bottom (grid-verified)" }, { - "name": "hard_drop", - "pass": false, - "detail": "skipped: game did not start" + "name": "all_pieces_rotate", + "pass": true, + "detail": "3 J/L/T piece type(s) rotated to 3+ distinct shapes [T:4 J:4 L:4]" }, { "name": "piece_locks", - "pass": false, - "detail": "skipped: game did not start" + "pass": true, + "detail": "filled cells persist at bottom (grid-verified, 2 lock event(s))" }, { "name": "new_piece_spawns", - "pass": false, - "detail": "skipped: game did not start" + "pass": true, + "detail": "1 new piece(s) detected at top of grid" }, { "name": "multiple_pieces", - "pass": false, - "detail": "skipped: mechanics phase not met" + "pass": true, + "detail": "12 pieces placed during play session" }, { "name": "line_clear", + "pass": true, + "detail": "8 line(s) cleared (grid-verified)" + }, + { + "name": "score_increases_on_clear", + "pass": true, + "detail": "score went from 0 to 100 after line clear" + }, + { + "name": "score_element_visible", + "pass": true, + "detail": "score display found (#score)" + }, + { + "name": "game_over", + "pass": true, + "detail": "game stopped after stacking to top (grid-verified)" + }, + { + "name": "playable_30s", + "pass": true, + "detail": "played for 30s, placed 17 pieces, no crashes" + }, + { + "name": "multi_line_clear", "pass": false, - "detail": "skipped: mechanics phase not met" + "detail": "skipped: no multi-line clear opportunity occurred during play" }, { - "name": "score_changes", + "name": "score_scaling", "pass": false, - "detail": "skipped: mechanics phase not met" + "detail": "skipped: no multi-line clear occurred to test scaling" }, { - "name": "game_over", + "name": "level_progression", "pass": false, - "detail": "skipped: gameplay phase not met" + "detail": "skipped: only 1 lines cleared (need 10+)" }, { - "name": "playable_30s", + "name": "speed_progression", + "pass": false, + "detail": "skipped: level did not increase, cannot test speed change" + }, + { + "name": "next_piece_preview", + "pass": true, + "detail": "next piece preview display found" + }, + { + "name": "game_over_display", + "pass": true, + "detail": "overlay detected, restart clickable present (phase6)" + }, + { + "name": "counter_clockwise_rotation", "pass": false, - "detail": "skipped: gameplay phase not met" + "detail": "Z key does same as Up arrow or does not rotate" + }, + { + "name": "soft_drop_distinct", + "pass": false, + "detail": "skipped: could not test soft drop behavior" + }, + { + "name": "rendering_clean", + "pass": false, + "detail": "skipped: not enough data to assess rendering trails" } ], "summary": { - "total": 16, - "passed": 1, - "failed": 15, - "score": 0.06 + "total": 26, + "passed": 19, + "failed": 1, + "skipped": 6, + "score": 0.95 }, "gameplay": { - "pieces_placed": 0, - "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 0, + "pieces_placed": 17, + "lines_cleared": 8, + "max_score_observed": 300, + "play_duration_seconds": 30, "errors_during_play": 0 }, + "competitive_play": { + "duration_seconds": 60, + "pieces_placed": 2, + "total_lines_cleared": 1, + "single_clears": 1, + "double_clears": 0, + "triple_clears": 0, + "tetris_clears": 0, + "max_combo": 1, + "score_readings": [ + 0, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100 + ], + "score_final": 100, + "score_increases": [ + 100 + ], + "level_readings": [ + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1 + ], + "level_final": 1, + "game_over_reached": false, + "game_over_text_found": null, + "restart_available": false, + "next_piece_visible": true, + "speed_increased": false, + "bugs_detected": [] + }, "session": { - "frames": 0, - "events_count": 0, - "pieces_spawned": 0, - "pieces_locked": 0, - "lines_cleared": 0, - "piece_types_seen": [], - "grid_read_success_rate": 0 + "frames": 966, + "events_count": 15, + "pieces_spawned": 1, + "pieces_locked": 12, + "lines_cleared": 8, + "piece_types_seen": [ + "I", + "T" + ], + "grid_read_success_rate": 1 }, "performance": { - "load_time_ms": 64 + "load_time_ms": 28 }, "accessibility": { "issues": [], "issue_count": 0, "pass": true + }, + "calibration_drift": { + "drifted": false, + "changes": [], + "recalibrations": 21, + "cacheHits": 21, + "cacheMisses": 0 } } \ No newline at end of file diff --git a/results/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=uns_lint=on_budget=low_model=haiku45_pw=avail_prompt=simple_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1/eval_results.json b/results/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=uns_lint=on_budget=low_model=haiku45_pw=avail_prompt=simple_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1/eval_results.json @@ -107,10 +107,10 @@ }, "gameplay_bot": { "pass": false, - "score": 0.3, + "score": 0.67, "total": 26, - "passed": 3, - "failed": 7, + "passed": 2, + "failed": 1, "report": { "implementation": { "renderer": "dom", @@ -118,7 +118,7 @@ "grid_detected_at": "initial", "grid_bounds": { "x": 331.140625, - "y": -271, + "y": 97, "width": 300, "height": 600 }, @@ -129,25 +129,7 @@ "rotate": "ArrowUp", "drop": "Space" }, - "control_discovery": { - "move_left": "ArrowLeft (moved 2 col(s) left)", - "move_right": "NOT FOUND", - "soft_drop": "NOT FOUND", - "hard_drop": "NOT FOUND", - "rotate_cw": "NOT FOUND", - "rotate_ccw": "NOT FOUND", - "key:ArrowLeft": "moved 2 col(s) left", - "key:ArrowRight": "grid empty before press (no piece)", - "key:d": "grid empty before press (no piece)", - "key:l": "grid empty before press (no piece)", - "key:ArrowUp": "no change", - "key:x": "no change", - "key:w": "no change", - "key:Space": "grid empty before press (no piece)", - "key:Enter": "no change", - "key:ArrowDown": "grid empty before press (no piece)" - }, - "start_mechanism": "button", + "start_mechanism": "unknown", "score_element_found": true, "grid_confidence": 0, "survey": { @@ -187,53 +169,53 @@ }, { "name": "game_starts", - "pass": true, - "detail": "started via button (#startBtn, \"Start Game\")" + "pass": false, + "detail": "could not start game with any mechanism" }, { "name": "auto_drop", "pass": false, - "detail": "piece did not move down in 5 seconds (grid-verified)" + "detail": "skipped: game did not start" }, { "name": "move_left", "pass": false, - "detail": "no grid change detected after key press" + "detail": "skipped: game did not start" }, { "name": "move_right", "pass": false, - "detail": "no grid change detected after key press" + "detail": "skipped: game did not start" }, { "name": "move_down", "pass": false, - "detail": "skipped: no soft_drop key (game has only hard_drop)" + "detail": "skipped: game did not start" }, { "name": "rotate", "pass": false, - "detail": "no shape change detected after rotate key (0 distinct shape(s))" + "detail": "skipped: game did not start" }, { "name": "hard_drop", "pass": false, - "detail": "no grid change with bottom cells detected after hard drop key" + "detail": "skipped: game did not start" }, { "name": "all_pieces_rotate", "pass": false, - "detail": "skipped: not enough piece types to verify (saw 0 of J/L/T, need 2)" + "detail": "skipped: game did not start" }, { "name": "piece_locks", "pass": false, - "detail": "could not verify piece locking via grid reader" + "detail": "skipped: game did not start" }, { "name": "new_piece_spawns", "pass": false, - "detail": "could not detect new piece spawning at top via grid reader" + "detail": "skipped: game did not start" }, { "name": "multiple_pieces", @@ -313,10 +295,10 @@ ], "summary": { "total": 26, - "passed": 3, - "failed": 7, - "skipped": 16, - "score": 0.3 + "passed": 2, + "failed": 1, + "skipped": 23, + "score": 0.67 }, "gameplay": { "pieces_placed": 0, @@ -327,16 +309,16 @@ }, "competitive_play": null, "session": { - "frames": 18, + "frames": 0, "events_count": 0, "pieces_spawned": 0, "pieces_locked": 0, "lines_cleared": 0, "piece_types_seen": [], - "grid_read_success_rate": 1 + "grid_read_success_rate": 0 }, "performance": { - "load_time_ms": 34 + "load_time_ms": 28 }, "accessibility": { "issues": [], @@ -346,14 +328,14 @@ "calibration_drift": { "drifted": false, "changes": [], - "recalibrations": 8, - "cacheHits": 8, + "recalibrations": 0, + "cacheHits": 0, "cacheMisses": 0 } } }, - "outcome_score": 0.15, - "score": 0.15, + "outcome_score": 0.335, + "score": 0.335, "sonarqube": { "error": "no SonarQube token found", "score": 0 diff --git a/results/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=uns_lint=on_budget=low_model=haiku45_pw=avail_prompt=simple_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1/gameplay-bot-report.json b/results/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=uns_lint=on_budget=low_model=haiku45_pw=avail_prompt=simple_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1/gameplay-bot-report.json @@ -5,7 +5,7 @@ "grid_detected_at": "initial", "grid_bounds": { "x": 331.140625, - "y": -271, + "y": 97, "width": 300, "height": 600 }, @@ -16,25 +16,7 @@ "rotate": "ArrowUp", "drop": "Space" }, - "control_discovery": { - "move_left": "ArrowLeft (moved 2 col(s) left)", - "move_right": "NOT FOUND", - "soft_drop": "NOT FOUND", - "hard_drop": "NOT FOUND", - "rotate_cw": "NOT FOUND", - "rotate_ccw": "NOT FOUND", - "key:ArrowLeft": "moved 2 col(s) left", - "key:ArrowRight": "grid empty before press (no piece)", - "key:d": "grid empty before press (no piece)", - "key:l": "grid empty before press (no piece)", - "key:ArrowUp": "no change", - "key:x": "no change", - "key:w": "no change", - "key:Space": "grid empty before press (no piece)", - "key:Enter": "no change", - "key:ArrowDown": "grid empty before press (no piece)" - }, - "start_mechanism": "button", + "start_mechanism": "unknown", "score_element_found": true, "grid_confidence": 0, "survey": { @@ -74,53 +56,53 @@ }, { "name": "game_starts", - "pass": true, - "detail": "started via button (#startBtn, \"Start Game\")" + "pass": false, + "detail": "could not start game with any mechanism" }, { "name": "auto_drop", "pass": false, - "detail": "piece did not move down in 5 seconds (grid-verified)" + "detail": "skipped: game did not start" }, { "name": "move_left", "pass": false, - "detail": "no grid change detected after key press" + "detail": "skipped: game did not start" }, { "name": "move_right", "pass": false, - "detail": "no grid change detected after key press" + "detail": "skipped: game did not start" }, { "name": "move_down", "pass": false, - "detail": "skipped: no soft_drop key (game has only hard_drop)" + "detail": "skipped: game did not start" }, { "name": "rotate", "pass": false, - "detail": "no shape change detected after rotate key (0 distinct shape(s))" + "detail": "skipped: game did not start" }, { "name": "hard_drop", "pass": false, - "detail": "no grid change with bottom cells detected after hard drop key" + "detail": "skipped: game did not start" }, { "name": "all_pieces_rotate", "pass": false, - "detail": "skipped: not enough piece types to verify (saw 0 of J/L/T, need 2)" + "detail": "skipped: game did not start" }, { "name": "piece_locks", "pass": false, - "detail": "could not verify piece locking via grid reader" + "detail": "skipped: game did not start" }, { "name": "new_piece_spawns", "pass": false, - "detail": "could not detect new piece spawning at top via grid reader" + "detail": "skipped: game did not start" }, { "name": "multiple_pieces", @@ -200,10 +182,10 @@ ], "summary": { "total": 26, - "passed": 3, - "failed": 7, - "skipped": 16, - "score": 0.3 + "passed": 2, + "failed": 1, + "skipped": 23, + "score": 0.67 }, "gameplay": { "pieces_placed": 0, @@ -214,16 +196,16 @@ }, "competitive_play": null, "session": { - "frames": 18, + "frames": 0, "events_count": 0, "pieces_spawned": 0, "pieces_locked": 0, "lines_cleared": 0, "piece_types_seen": [], - "grid_read_success_rate": 1 + "grid_read_success_rate": 0 }, "performance": { - "load_time_ms": 34 + "load_time_ms": 28 }, "accessibility": { "issues": [], @@ -233,8 +215,8 @@ "calibration_drift": { "drifted": false, "changes": [], - "recalibrations": 8, - "cacheHits": 8, + "recalibrations": 0, + "cacheHits": 0, "cacheMisses": 0 } } \ No newline at end of file diff --git a/results/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=uns_lint=on_budget=low_model=haiku45_pw=avail_prompt=simple_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run2/gameplay-bot-report.json b/results/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=uns_lint=on_budget=low_model=haiku45_pw=avail_prompt=simple_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run2/gameplay-bot-report.json @@ -1,129 +1,549 @@ { "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_bounds": null, + "renderer": "dom", + "grid_detected": true, + "grid_detected_at": "initial", + "grid_bounds": { + "x": 380, + "y": -152, + "width": 300, + "height": 600 + }, "controls": { "left": "ArrowLeft", "right": "ArrowRight", "down": "ArrowDown", - "rotate": "ArrowUp", + "rotate": "x", "drop": "Space" }, - "start_mechanism": "unknown", - "score_element_found": false, - "grid_confidence": 0 + "control_discovery": { + "move_left": "ArrowLeft (moved 2 col(s) left)", + "move_right": "NOT FOUND", + "soft_drop": "NOT FOUND", + "hard_drop": "Space (teleported 7 rows to bottom)", + "rotate_cw": "x (shape changed (rotation))", + "rotate_ccw": "z (shape changed (rotation))", + "key:ArrowLeft": "moved 2 col(s) left", + "key:ArrowRight": "other change (disappeared=2, appeared=2)", + "key:d": "no change", + "key:l": "no change", + "key:ArrowUp": "no change", + "key:x": "shape changed (rotation)", + "key:Space": "teleported 7 rows to bottom", + "key:s": "no change", + "key:ArrowDown": "other change (disappeared=3, appeared=3)", + "key:z": "shape changed (rotation)" + }, + "start_mechanism": "button", + "score_element_found": true, + "grid_confidence": 0, + "survey": { + "has_overlay": false, + "has_canvas": false, + "has_dom_grid": false, + "visible_text": [ + "SCORE", + "0", + "LEVEL", + "1", + "LINES", + "0", + "NEXT PIECE", + "CONTROLS", + "← → Move", + "↓ Soft Drop", + "Space Hard Drop", + "Z X Rotate", + "P Pause/Resume", + "START GAME", + "PAUSE" + ], + "clickable_elements": 3 + } }, "tests": [ { "name": "game_loads", "pass": true, - "detail": "no console errors" + "detail": "loaded with landmarks: body_content, dom_grid, tetris_ratio" }, { "name": "game_starts", - "pass": false, - "detail": "could not start game with any mechanism" + "pass": true, + "detail": "started via button (#startBtn, \"Start Game\")" }, { "name": "auto_drop", - "pass": false, - "detail": "skipped: game did not start" + "pass": true, + "detail": "grid state changed after 5s with no input (grid-verified)" }, { "name": "move_left", - "pass": false, - "detail": "skipped: game did not start" + "pass": true, + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_right", - "pass": false, - "detail": "skipped: game did not start" + "pass": true, + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_down", "pass": false, - "detail": "skipped: game did not start" + "detail": "skipped: no soft_drop key (game has only hard_drop)" }, { "name": "rotate", - "pass": false, - "detail": "skipped: game did not start" + "pass": true, + "detail": "piece cycled through 4 distinct shapes after 4 rotate presses (grid-verified)" }, { - "name": "all_pieces_rotate", - "pass": false, - "detail": "skipped: game did not start" + "name": "hard_drop", + "pass": true, + "detail": "piece immediately dropped to bottom (grid-verified)" }, { - "name": "hard_drop", - "pass": false, - "detail": "skipped: game did not start" + "name": "all_pieces_rotate", + "pass": true, + "detail": "3 J/L/T piece type(s) rotated to 3+ distinct shapes [J:3 T:4 L:4]" }, { "name": "piece_locks", - "pass": false, - "detail": "skipped: game did not start" + "pass": true, + "detail": "filled cells persist at bottom (grid-verified, 2 lock event(s))" }, { "name": "new_piece_spawns", - "pass": false, - "detail": "skipped: game did not start" + "pass": true, + "detail": "1 new piece(s) detected at top of grid" }, { "name": "multiple_pieces", - "pass": false, - "detail": "skipped: mechanics phase not met" + "pass": true, + "detail": "6 pieces placed during play session" }, { "name": "line_clear", + "pass": true, + "detail": "4 line(s) cleared (grid-verified)" + }, + { + "name": "score_increases_on_clear", + "pass": true, + "detail": "score went from 81 to 200 after line clear" + }, + { + "name": "score_element_visible", + "pass": true, + "detail": "score display found (#score)" + }, + { + "name": "game_over", + "pass": true, + "detail": "game stopped after stacking to top (grid-verified)" + }, + { + "name": "playable_30s", + "pass": true, + "detail": "played for 30s, placed 12 pieces, no crashes" + }, + { + "name": "multi_line_clear", "pass": false, - "detail": "skipped: mechanics phase not met" + "detail": "skipped: no multi-line clear opportunity occurred during play" }, { - "name": "score_changes", + "name": "score_scaling", "pass": false, - "detail": "skipped: mechanics phase not met" + "detail": "skipped: no multi-line clear occurred to test scaling" }, { - "name": "game_over", + "name": "level_progression", "pass": false, - "detail": "skipped: gameplay phase not met" + "detail": "skipped: only 3 lines cleared (need 10+)" }, { - "name": "playable_30s", + "name": "speed_progression", "pass": false, - "detail": "skipped: gameplay phase not met" + "detail": "skipped: level did not increase, cannot test speed change" + }, + { + "name": "next_piece_preview", + "pass": true, + "detail": "next piece preview display found" + }, + { + "name": "game_over_display", + "pass": true, + "detail": "overlay detected, restart clickable present (phase6)" + }, + { + "name": "counter_clockwise_rotation", + "pass": true, + "detail": "Z key rotates opposite direction from Up arrow" + }, + { + "name": "soft_drop_distinct", + "pass": false, + "detail": "skipped: no soft_drop key (game has only hard_drop)" + }, + { + "name": "rendering_clean", + "pass": false, + "detail": "skipped: not enough data to assess rendering trails" } ], "summary": { - "total": 16, - "passed": 1, - "failed": 15, - "score": 0.06 + "total": 26, + "passed": 19, + "failed": 0, + "skipped": 7, + "score": 1 }, "gameplay": { - "pieces_placed": 0, - "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 0, + "pieces_placed": 12, + "lines_cleared": 4, + "max_score_observed": 244, + "play_duration_seconds": 30, "errors_during_play": 0 }, + "competitive_play": { + "duration_seconds": 60, + "pieces_placed": 10, + "total_lines_cleared": 3, + "single_clears": 3, + "double_clears": 0, + "triple_clears": 0, + "tetris_clears": 0, + "max_combo": 1, + "score_readings": [ + 1, + 3, + 3, + 4, + 4, + 5, + 5, + 6, + 6, + 7, + 7, + 7, + 8, + 8, + 9, + 9, + 10, + 10, + 11, + 11, + 11, + 12, + 12, + 13, + 13, + 14, + 14, + 14, + 15, + 15, + 16, + 16, + 17, + 17, + 18, + 18, + 18, + 60, + 60, + 61, + 61, + 61, + 179, + 180, + 180, + 181, + 199, + 199, + 200, + 200, + 318, + 318, + 318, + 337, + 338, + 338, + 338, + 357, + 358, + 358, + 376, + 376, + 376, + 377, + 377, + 394, + 394, + 395, + 395, + 396, + 514, + 514, + 515, + 515, + 516, + 516, + 517, + 517, + 518, + 518, + 518, + 519, + 519, + 520, + 520, + 521, + 521, + 522, + 522, + 523, + 523, + 523, + 524, + 524, + 525, + 525, + 526, + 526, + 527, + 527, + 528, + 528, + 528, + 528, + 528, + 529, + 529, + 530, + 530, + 531, + 531, + 532, + 532, + 533, + 533, + 533, + 534, + 534, + 535, + 535, + 536, + 536, + 537, + 537, + 538, + 538, + 539, + 539, + 539, + 540, + 540, + 541, + 541, + 541, + 541, + 542, + 542, + 543, + 543, + 544, + 544, + 544, + 545, + 545, + 545 + ], + "score_final": 545, + "score_increases": [ + 2, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 42, + 1, + 118, + 1, + 1, + 18, + 1, + 118, + 19, + 1, + 19, + 1, + 18, + 1, + 17, + 1, + 1, + 118, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1 + ], + "level_readings": [ + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1 + ], + "level_final": 1, + "game_over_reached": false, + "game_over_text_found": null, + "restart_available": false, + "next_piece_visible": true, + "speed_increased": false, + "bugs_detected": [] + }, "session": { - "frames": 0, - "events_count": 0, - "pieces_spawned": 0, - "pieces_locked": 0, - "lines_cleared": 0, - "piece_types_seen": [], - "grid_read_success_rate": 0 + "frames": 1014, + "events_count": 12, + "pieces_spawned": 1, + "pieces_locked": 6, + "lines_cleared": 4, + "piece_types_seen": [ + "unknown", + "L", + "I", + "T", + "Z", + "S", + "J" + ], + "grid_read_success_rate": 1 }, "performance": { - "load_time_ms": 143 + "load_time_ms": 30 }, "accessibility": { "issues": [], "issue_count": 0, "pass": true + }, + "calibration_drift": { + "drifted": false, + "changes": [], + "recalibrations": 20, + "cacheHits": 20, + "cacheMisses": 0 } } \ No newline at end of file diff --git a/results/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=es_lang=ts_lint=on_budget=low_model=haiku45_pw=avail_prompt=detailed_rndr=none_strat=none_tst=none_tedit=off_tglob=off_tgrep=on_tread=off_twrite=on_web=off_run2/eval_results.json b/results/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=es_lang=ts_lint=on_budget=low_model=haiku45_pw=avail_prompt=detailed_rndr=none_strat=none_tst=none_tedit=off_tglob=off_tgrep=on_tread=off_twrite=on_web=off_run2/eval_results.json @@ -106,253 +106,11 @@ }, "gameplay_bot": { "pass": false, - "score": 0.2, - "total": 26, - "passed": 2, - "failed": 8, - "report": { - "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_detected_at": "initial", - "grid_bounds": null, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "control_discovery": { - "move_left": "ArrowLeft (moved 4 col(s) left)", - "move_right": "NOT FOUND", - "soft_drop": "NOT FOUND", - "hard_drop": "NOT FOUND", - "rotate_cw": "NOT FOUND", - "rotate_ccw": "NOT FOUND", - "key:ArrowLeft": "moved 4 col(s) left", - "key:ArrowRight": "grid read failed before press", - "key:d": "grid read failed before press", - "key:l": "grid read failed before press", - "key:ArrowUp": "grid read failed before press", - "key:x": "grid read failed before press", - "key:w": "grid read failed before press", - "key:Space": "grid read failed before press", - "key:Enter": "grid read failed before press", - "key:ArrowDown": "grid read failed before press" - }, - "start_mechanism": "auto", - "score_element_found": false, - "grid_confidence": 0, - "survey": { - "has_overlay": false, - "has_canvas": false, - "has_dom_grid": true, - "visible_text": [ - "TETRIS", - "PUNTUACI\u00d3N", - "0", - "NIVEL", - "1", - "L\u00cdNEAS", - "0", - "SIGUIENTE", - "CONTROLES", - "\u2190\u2192", - "Mover", - "\u2193", - "Ca\u00edda suave", - "\u2191", - "Rotar (CW)", - "Z", - "Rotar (CCW)", - "ESPACIO", - "Ca\u00edda fuerte", - "R" - ], - "clickable_elements": 1 - } - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "loaded with landmarks: body_content, dom_grid, tetris_ratio, cells_container" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": false, - "detail": "grid reader unreliable, cannot verify auto-drop" - }, - { - "name": "move_left", - "pass": false, - "detail": "grid reader unreliable, cannot verify movement" - }, - { - "name": "move_right", - "pass": false, - "detail": "grid reader unreliable, cannot verify movement" - }, - { - "name": "move_down", - "pass": false, - "detail": "skipped: no soft_drop key (game has only hard_drop)" - }, - { - "name": "rotate", - "pass": false, - "detail": "grid reader unreliable, cannot verify rotation" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "grid reader unreliable, cannot verify hard drop" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "skipped: not enough piece types to verify (saw 0 of J/L/T, need 2)" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "grid reader unreliable, cannot verify piece locking" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "could not detect new piece spawning at top via grid reader" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "skipped: mechanics phase failed" - }, - { - "name": "line_clear", - "pass": false, - "detail": "skipped: mechanics phase failed" - }, - { - "name": "score_increases_on_clear", - "pass": false, - "detail": "skipped: mechanics phase failed" - }, - { - "name": "score_element_visible", - "pass": false, - "detail": "no score display detected" - }, - { - "name": "game_over", - "pass": false, - "detail": "skipped: piece lifecycle failed" - }, - { - "name": "playable_30s", - "pass": false, - "detail": "skipped: gameplay phase failed" - }, - { - "name": "multi_line_clear", - "pass": false, - "detail": "skipped: competitive play phase did not run" - }, - { - "name": "score_scaling", - "pass": false, - "detail": "skipped: competitive play phase did not run" - }, - { - "name": "level_progression", - "pass": false, - "detail": "skipped: competitive play phase did not run" - }, - { - "name": "speed_progression", - "pass": false, - "detail": "skipped: competitive play phase did not run" - }, - { - "name": "next_piece_preview", - "pass": false, - "detail": "skipped: competitive play phase did not run" - }, - { - "name": "game_over_display", - "pass": false, - "detail": "skipped: competitive play phase did not run" - }, - { - "name": "counter_clockwise_rotation", - "pass": false, - "detail": "skipped: competitive play phase did not run" - }, - { - "name": "soft_drop_distinct", - "pass": false, - "detail": "skipped: competitive play phase did not run" - }, - { - "name": "rendering_clean", - "pass": false, - "detail": "skipped: competitive play phase did not run" - } - ], - "summary": { - "total": 26, - "passed": 2, - "failed": 8, - "skipped": 16, - "score": 0.2 - }, - "gameplay": { - "pieces_placed": 0, - "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 0, - "errors_during_play": 0 - }, - "competitive_play": null, - "session": { - "frames": 0, - "events_count": 0, - "pieces_spawned": 0, - "pieces_locked": 0, - "lines_cleared": 0, - "piece_types_seen": [], - "grid_read_success_rate": 0 - }, - "performance": { - "load_time_ms": 38 - }, - "accessibility": { - "issues": [], - "issue_count": 0, - "pass": true - }, - "calibration_drift": { - "drifted": true, - "changes": [ - "renderer", - "grid_bounds", - "score_element", - "level_element" - ], - "recalibrations": 8, - "cacheHits": 1, - "cacheMisses": 7 - } - } + "score": 0, + "error": "Gameplay bot timed out after 300 seconds" }, - "outcome_score": 0.1, - "score": 0.1, + "outcome_score": 0.0, + "score": 0.0, "sonarqube": { "error": "no SonarQube token found", "score": 0 diff --git a/results/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=es_lang=ts_lint=on_budget=low_model=haiku45_pw=avail_prompt=simple_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1/eval_results.json b/results/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=es_lang=ts_lint=on_budget=low_model=haiku45_pw=avail_prompt=simple_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1/eval_results.json @@ -112,11 +112,446 @@ }, "gameplay_bot": { "pass": false, - "score": 0, - "error": "Gameplay bot timed out after 300 seconds" + "score": 0.9, + "total": 26, + "passed": 19, + "failed": 2, + "report": { + "implementation": { + "renderer": "dom", + "grid_detected": true, + "grid_detected_at": "initial", + "grid_bounds": { + "x": 368.921875, + "y": -368, + "width": 300, + "height": 600 + }, + "controls": { + "left": "ArrowLeft", + "right": "ArrowRight", + "down": "s", + "rotate": "ArrowUp", + "drop": "Space" + }, + "control_discovery": { + "move_left": "ArrowLeft (moved 2 col(s) left)", + "move_right": "ArrowRight (moved 2 col(s) right)", + "soft_drop": "s (moved 2 row(s) down)", + "hard_drop": "Space (teleported 8 rows to bottom)", + "rotate_cw": "NOT FOUND", + "rotate_ccw": "NOT FOUND", + "key:ArrowLeft": "moved 2 col(s) left", + "key:ArrowRight": "moved 2 col(s) right", + "key:ArrowUp": "no change", + "key:x": "no change", + "key:w": "moved 2 row(s) down", + "key:Space": "teleported 8 rows to bottom", + "key:s": "moved 2 row(s) down", + "key:z": "no change", + "key:Control": "no change" + }, + "start_mechanism": "button", + "score_element_found": true, + "grid_confidence": 0, + "survey": { + "has_overlay": false, + "has_canvas": false, + "has_dom_grid": false, + "visible_text": [ + "PUNTUACI\u00d3N", + "0", + "NIVEL", + "1", + "L\u00cdNEAS", + "0", + "SIGUIENTE", + "CONTROLES", + "Izquierda", + "\u2190 A", + "Derecha", + "\u2192 D", + "Bajar", + "\u2193 S", + "Ca\u00edda r\u00e1pida", + "ESPACIO", + "Rotar", + "W \u2191", + "INICIAR JUEGO", + "PAUSAR" + ], + "clickable_elements": 3 + } + }, + "tests": [ + { + "name": "game_loads", + "pass": true, + "detail": "loaded with landmarks: body_content, dom_grid, tetris_ratio" + }, + { + "name": "game_starts", + "pass": true, + "detail": "started via button (#startBtn, \"Iniciar Juego\")" + }, + { + "name": "auto_drop", + "pass": true, + "detail": "grid state changed after 5s with no input (grid-verified)" + }, + { + "name": "move_left", + "pass": true, + "detail": "grid state changed after key press (grid-verified)" + }, + { + "name": "move_right", + "pass": true, + "detail": "grid state changed after key press (grid-verified)" + }, + { + "name": "move_down", + "pass": true, + "detail": "grid state changed after key press (grid-verified)" + }, + { + "name": "rotate", + "pass": true, + "detail": "piece cycled through 4 distinct shapes after 4 rotate presses (grid-verified)" + }, + { + "name": "hard_drop", + "pass": true, + "detail": "piece immediately dropped to bottom (grid-verified)" + }, + { + "name": "all_pieces_rotate", + "pass": true, + "detail": "3 J/L/T piece type(s) rotated to 3+ distinct shapes [L:4 J:4 T:4]" + }, + { + "name": "piece_locks", + "pass": true, + "detail": "filled cells persist at bottom (grid-verified, 2 lock event(s))" + }, + { + "name": "new_piece_spawns", + "pass": true, + "detail": "1 new piece(s) detected at top of grid" + }, + { + "name": "multiple_pieces", + "pass": true, + "detail": "14 pieces placed during play session" + }, + { + "name": "line_clear", + "pass": true, + "detail": "16 line(s) cleared (grid-verified)" + }, + { + "name": "score_increases_on_clear", + "pass": true, + "detail": "score went from 0 to 100 after line clear" + }, + { + "name": "score_element_visible", + "pass": true, + "detail": "score display found (#score)" + }, + { + "name": "game_over", + "pass": true, + "detail": "game stopped after stacking to top (grid-verified)" + }, + { + "name": "playable_30s", + "pass": true, + "detail": "played for 30s, placed 30 pieces, no crashes" + }, + { + "name": "multi_line_clear", + "pass": false, + "detail": "skipped: no multi-line clear opportunity occurred during play" + }, + { + "name": "score_scaling", + "pass": false, + "detail": "skipped: no multi-line clear occurred to test scaling" + }, + { + "name": "level_progression", + "pass": false, + "detail": "skipped: only 1 lines cleared (need 10+)" + }, + { + "name": "speed_progression", + "pass": false, + "detail": "skipped: level did not increase, cannot test speed change" + }, + { + "name": "next_piece_preview", + "pass": true, + "detail": "next piece preview display found" + }, + { + "name": "game_over_display", + "pass": false, + "detail": "no overlay or restart UI found (phase6)" + }, + { + "name": "counter_clockwise_rotation", + "pass": false, + "detail": "Z key does same as Up arrow or does not rotate" + }, + { + "name": "soft_drop_distinct", + "pass": true, + "detail": "Down arrow moves piece 1 row (distinct from hard drop)" + }, + { + "name": "rendering_clean", + "pass": false, + "detail": "skipped: not enough data to assess rendering trails" + } + ], + "summary": { + "total": 26, + "passed": 19, + "failed": 2, + "skipped": 5, + "score": 0.9 + }, + "gameplay": { + "pieces_placed": 30, + "lines_cleared": 16, + "max_score_observed": 300, + "play_duration_seconds": 30, + "errors_during_play": 0 + }, + "competitive_play": { + "duration_seconds": 60, + "pieces_placed": 6, + "total_lines_cleared": 1, + "single_clears": 1, + "double_clears": 0, + "triple_clears": 0, + "tetris_clears": 0, + "max_combo": 1, + "score_readings": [ + 0, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100 + ], + "score_final": 100, + "score_increases": [ + 100 + ], + "level_readings": [ + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1 + ], + "level_final": 1, + "game_over_reached": false, + "game_over_text_found": null, + "restart_available": false, + "next_piece_visible": true, + "speed_increased": false, + "bugs_detected": [] + }, + "session": { + "frames": 811, + "events_count": 15, + "pieces_spawned": 1, + "pieces_locked": 14, + "lines_cleared": 16, + "piece_types_seen": [ + "S", + "Z", + "L", + "T" + ], + "grid_read_success_rate": 1 + }, + "performance": { + "load_time_ms": 36 + }, + "accessibility": { + "issues": [], + "issue_count": 0, + "pass": true + }, + "calibration_drift": { + "drifted": false, + "changes": [], + "recalibrations": 19, + "cacheHits": 19, + "cacheMisses": 0 + } + } }, - "outcome_score": 0.0, - "score": 0.0, + "outcome_score": 0.45, + "score": 0.45, "sonarqube": { "error": "no SonarQube token found", "score": 0 diff --git a/results/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=es_lang=ts_lint=on_budget=low_model=haiku45_pw=avail_prompt=simple_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1/gameplay-bot-report.json b/results/runs/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=es_lang=ts_lint=on_budget=low_model=haiku45_pw=avail_prompt=simple_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1/gameplay-bot-report.json @@ -1,129 +1,433 @@ { "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_bounds": null, + "renderer": "dom", + "grid_detected": true, + "grid_detected_at": "initial", + "grid_bounds": { + "x": 368.921875, + "y": -368, + "width": 300, + "height": 600 + }, "controls": { "left": "ArrowLeft", "right": "ArrowRight", - "down": "ArrowDown", + "down": "s", "rotate": "ArrowUp", "drop": "Space" }, - "start_mechanism": "unknown", + "control_discovery": { + "move_left": "ArrowLeft (moved 2 col(s) left)", + "move_right": "ArrowRight (moved 2 col(s) right)", + "soft_drop": "s (moved 2 row(s) down)", + "hard_drop": "Space (teleported 8 rows to bottom)", + "rotate_cw": "NOT FOUND", + "rotate_ccw": "NOT FOUND", + "key:ArrowLeft": "moved 2 col(s) left", + "key:ArrowRight": "moved 2 col(s) right", + "key:ArrowUp": "no change", + "key:x": "no change", + "key:w": "moved 2 row(s) down", + "key:Space": "teleported 8 rows to bottom", + "key:s": "moved 2 row(s) down", + "key:z": "no change", + "key:Control": "no change" + }, + "start_mechanism": "button", "score_element_found": true, - "grid_confidence": 0 + "grid_confidence": 0, + "survey": { + "has_overlay": false, + "has_canvas": false, + "has_dom_grid": false, + "visible_text": [ + "PUNTUACIÓN", + "0", + "NIVEL", + "1", + "LÍNEAS", + "0", + "SIGUIENTE", + "CONTROLES", + "Izquierda", + "← A", + "Derecha", + "→ D", + "Bajar", + "↓ S", + "Caída rápida", + "ESPACIO", + "Rotar", + "W ↑", + "INICIAR JUEGO", + "PAUSAR" + ], + "clickable_elements": 3 + } }, "tests": [ { "name": "game_loads", "pass": true, - "detail": "no console errors" + "detail": "loaded with landmarks: body_content, dom_grid, tetris_ratio" }, { "name": "game_starts", - "pass": false, - "detail": "could not start game with any mechanism" + "pass": true, + "detail": "started via button (#startBtn, \"Iniciar Juego\")" }, { "name": "auto_drop", - "pass": false, - "detail": "skipped: game did not start" + "pass": true, + "detail": "grid state changed after 5s with no input (grid-verified)" }, { "name": "move_left", - "pass": false, - "detail": "skipped: game did not start" + "pass": true, + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_right", - "pass": false, - "detail": "skipped: game did not start" + "pass": true, + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_down", - "pass": false, - "detail": "skipped: game did not start" + "pass": true, + "detail": "grid state changed after key press (grid-verified)" }, { "name": "rotate", - "pass": false, - "detail": "skipped: game did not start" + "pass": true, + "detail": "piece cycled through 4 distinct shapes after 4 rotate presses (grid-verified)" }, { - "name": "all_pieces_rotate", - "pass": false, - "detail": "skipped: game did not start" + "name": "hard_drop", + "pass": true, + "detail": "piece immediately dropped to bottom (grid-verified)" }, { - "name": "hard_drop", - "pass": false, - "detail": "skipped: game did not start" + "name": "all_pieces_rotate", + "pass": true, + "detail": "3 J/L/T piece type(s) rotated to 3+ distinct shapes [L:4 J:4 T:4]" }, { "name": "piece_locks", - "pass": false, - "detail": "skipped: game did not start" + "pass": true, + "detail": "filled cells persist at bottom (grid-verified, 2 lock event(s))" }, { "name": "new_piece_spawns", - "pass": false, - "detail": "skipped: game did not start" + "pass": true, + "detail": "1 new piece(s) detected at top of grid" }, { "name": "multiple_pieces", - "pass": false, - "detail": "skipped: mechanics phase not met" + "pass": true, + "detail": "14 pieces placed during play session" }, { "name": "line_clear", + "pass": true, + "detail": "16 line(s) cleared (grid-verified)" + }, + { + "name": "score_increases_on_clear", + "pass": true, + "detail": "score went from 0 to 100 after line clear" + }, + { + "name": "score_element_visible", + "pass": true, + "detail": "score display found (#score)" + }, + { + "name": "game_over", + "pass": true, + "detail": "game stopped after stacking to top (grid-verified)" + }, + { + "name": "playable_30s", + "pass": true, + "detail": "played for 30s, placed 30 pieces, no crashes" + }, + { + "name": "multi_line_clear", "pass": false, - "detail": "skipped: mechanics phase not met" + "detail": "skipped: no multi-line clear opportunity occurred during play" }, { - "name": "score_changes", + "name": "score_scaling", "pass": false, - "detail": "skipped: mechanics phase not met" + "detail": "skipped: no multi-line clear occurred to test scaling" }, { - "name": "game_over", + "name": "level_progression", "pass": false, - "detail": "skipped: gameplay phase not met" + "detail": "skipped: only 1 lines cleared (need 10+)" }, { - "name": "playable_30s", + "name": "speed_progression", + "pass": false, + "detail": "skipped: level did not increase, cannot test speed change" + }, + { + "name": "next_piece_preview", + "pass": true, + "detail": "next piece preview display found" + }, + { + "name": "game_over_display", + "pass": false, + "detail": "no overlay or restart UI found (phase6)" + }, + { + "name": "counter_clockwise_rotation", "pass": false, - "detail": "skipped: gameplay phase not met" + "detail": "Z key does same as Up arrow or does not rotate" + }, + { + "name": "soft_drop_distinct", + "pass": true, + "detail": "Down arrow moves piece 1 row (distinct from hard drop)" + }, + { + "name": "rendering_clean", + "pass": false, + "detail": "skipped: not enough data to assess rendering trails" } ], "summary": { - "total": 16, - "passed": 1, - "failed": 15, - "score": 0.06 + "total": 26, + "passed": 19, + "failed": 2, + "skipped": 5, + "score": 0.9 }, "gameplay": { - "pieces_placed": 0, - "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 0, + "pieces_placed": 30, + "lines_cleared": 16, + "max_score_observed": 300, + "play_duration_seconds": 30, "errors_during_play": 0 }, + "competitive_play": { + "duration_seconds": 60, + "pieces_placed": 6, + "total_lines_cleared": 1, + "single_clears": 1, + "double_clears": 0, + "triple_clears": 0, + "tetris_clears": 0, + "max_combo": 1, + "score_readings": [ + 0, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100, + 100 + ], + "score_final": 100, + "score_increases": [ + 100 + ], + "level_readings": [ + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1 + ], + "level_final": 1, + "game_over_reached": false, + "game_over_text_found": null, + "restart_available": false, + "next_piece_visible": true, + "speed_increased": false, + "bugs_detected": [] + }, "session": { - "frames": 0, - "events_count": 0, - "pieces_spawned": 0, - "pieces_locked": 0, - "lines_cleared": 0, - "piece_types_seen": [], - "grid_read_success_rate": 0 + "frames": 811, + "events_count": 15, + "pieces_spawned": 1, + "pieces_locked": 14, + "lines_cleared": 16, + "piece_types_seen": [ + "S", + "Z", + "L", + "T" + ], + "grid_read_success_rate": 1 }, "performance": { - "load_time_ms": 77 + "load_time_ms": 36 }, "accessibility": { "issues": [], "issue_count": 0, "pass": true + }, + "calibration_drift": { + "drifted": false, + "changes": [], + "recalibrations": 19, + "cacheHits": 19, + "cacheMisses": 0 } } \ No newline at end of file diff --git a/results/runs/tetris_arch=none_ctx=provided_noise=clean_dsgn=none_eff=high_echk=none_hlang=es_lang=ts_lint=off_budget=low_model=haiku45_pw=avail_prompt=detailed_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=off_tread=off_twrite=off_web=off_run3/eval_results.json b/results/runs/tetris_arch=none_ctx=provided_noise=clean_dsgn=none_eff=high_echk=none_hlang=es_lang=ts_lint=off_budget=low_model=haiku45_pw=avail_prompt=detailed_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=off_tread=off_twrite=off_web=off_run3/eval_results.json @@ -314,7 +314,7 @@ "grid_read_success_rate": 0 }, "performance": { - "load_time_ms": 42 + "load_time_ms": 31 }, "accessibility": { "issues": [], diff --git a/results/runs/tetris_arch=none_ctx=provided_noise=clean_dsgn=none_eff=high_echk=none_hlang=es_lang=ts_lint=off_budget=low_model=haiku45_pw=avail_prompt=detailed_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=off_tread=off_twrite=off_web=off_run3/gameplay-bot-report.json b/results/runs/tetris_arch=none_ctx=provided_noise=clean_dsgn=none_eff=high_echk=none_hlang=es_lang=ts_lint=off_budget=low_model=haiku45_pw=avail_prompt=detailed_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=off_tread=off_twrite=off_web=off_run3/gameplay-bot-report.json @@ -196,7 +196,7 @@ "grid_read_success_rate": 0 }, "performance": { - "load_time_ms": 42 + "load_time_ms": 31 }, "accessibility": { "issues": [],

Impressum · Datenschutz