commit fd8274318dc475fe75d10c9b588c4af38d451c91
parent 711df365354d81be00d01bce2428e7c283e0ec2b
Author: Brian Graham <brian@buildingbetterteams.de>
Date: Thu, 16 Apr 2026 12:10:23 +0200
Add human labels for 3 more calibration runs
bbb70053 (haiku-4.5 DOM) flagged as very laggy -- playable but the
lag hurts playability. c1013100 (gemma-4-26b) fails to load.
e047cf3a (haiku-4.5) plays fully.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Diffstat:
3 files changed, 43 insertions(+), 45 deletions(-)
diff --git a/tasks/tetris/eval/gameplay-bot/calibration/bbb70053.json b/tasks/tetris/eval/gameplay-bot/calibration/bbb70053.json
@@ -2,33 +2,33 @@
"run_id": "tetris_arch=none_ctx=provided_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=ts_lint=off_budget=low_model=haiku45_pw=avail_prompt=simple_rndr=none_strat=none_tst=none_tedit=off_tglob=off_tgrep=off_tread=on_twrite=off_web=on_run3",
"short_id": "bbb70053",
"label": "DOM game (haiku-4.5, en)",
- "notes": "",
- "human_tested_at": "",
+ "notes": "Very laggy; hurts playability even though it is playable.",
+ "human_tested_at": "2026-04-16",
"human_tests": {
- "game_loads": null,
- "game_starts": null,
- "auto_drop": null,
- "move_left": null,
- "move_right": null,
- "move_down": null,
- "rotate": null,
- "hard_drop": null,
+ "game_loads": true,
+ "game_starts": true,
+ "auto_drop": true,
+ "move_left": true,
+ "move_right": true,
+ "move_down": true,
+ "rotate": true,
+ "hard_drop": true,
"all_pieces_rotate": null,
- "piece_locks": null,
- "new_piece_spawns": null,
- "multiple_pieces": null,
- "line_clear": null,
+ "piece_locks": true,
+ "new_piece_spawns": true,
+ "multiple_pieces": true,
+ "line_clear": true,
"score_increases_on_clear": null,
"score_element_visible": null,
- "game_over": null,
- "playable_30s": null,
+ "game_over": true,
+ "playable_30s": true,
"multi_line_clear": null,
"score_scaling": null,
"level_progression": null,
"speed_progression": null,
"next_piece_preview": null,
- "game_over_display": null,
- "counter_clockwise_rotation": null,
+ "game_over_display": true,
+ "counter_clockwise_rotation": true,
"soft_drop_distinct": null
}
-}
-\ No newline at end of file
+}
diff --git a/tasks/tetris/eval/gameplay-bot/calibration/c1013100.json b/tasks/tetris/eval/gameplay-bot/calibration/c1013100.json
@@ -3,10 +3,10 @@
"short_id": "c1013100",
"label": "Calibration (gemma-4-26b, en)",
"notes": "",
- "human_tested_at": "",
+ "human_tested_at": "2026-04-16",
"human_tests": {
- "game_loads": null,
- "game_starts": null,
+ "game_loads": false,
+ "game_starts": false,
"auto_drop": null,
"move_left": null,
"move_right": null,
@@ -32,4 +32,4 @@
"soft_drop_distinct": null,
"rendering_clean": null
}
-}
-\ No newline at end of file
+}
diff --git a/tasks/tetris/eval/gameplay-bot/calibration/e047cf3a.json b/tasks/tetris/eval/gameplay-bot/calibration/e047cf3a.json
@@ -3,33 +3,34 @@
"short_id": "e047cf3a",
"label": "Calibration (haiku-4.5, en)",
"notes": "",
- "human_tested_at": "",
+ "human_tested_at": "2026-04-16",
"human_tests": {
- "game_loads": null,
- "game_starts": null,
- "auto_drop": null,
- "move_left": null,
- "move_right": null,
- "move_down": null,
- "rotate": null,
- "hard_drop": null,
- "all_pieces_rotate": null,
- "piece_locks": null,
- "new_piece_spawns": null,
+ "game_loads": true,
+ "game_starts": true,
+ "auto_drop": true,
+ "move_left": true,
+ "move_right": true,
+ "move_down": true,
+ "rotate": true,
+ "hard_drop": true,
+ "all_pieces_rotate": true,
+ "piece_locks": true,
+ "new_piece_spawns": true,
"multiple_pieces": null,
- "line_clear": null,
+ "line_clear": true,
"score_increases_on_clear": null,
"score_element_visible": null,
- "game_over": null,
- "playable_30s": null,
+ "game_over": true,
+ "playable_30s": true,
"multi_line_clear": null,
"score_scaling": null,
"level_progression": null,
"speed_progression": null,
- "next_piece_preview": null,
- "game_over_display": null,
+ "next_piece_preview": true,
+ "game_over_display": true,
"counter_clockwise_rotation": null,
"soft_drop_distinct": null,
- "rendering_clean": null
+ "rendering_clean": null,
+ "score_changes": true
}
-}
-\ No newline at end of file
+}