loop-benchmarking

Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.
git clone https://git.shiptheloop.com/loop-benchmarking.git
Log | Files | Refs | README

commit fd8274318dc475fe75d10c9b588c4af38d451c91
parent 711df365354d81be00d01bce2428e7c283e0ec2b
Author: Brian Graham <brian@buildingbetterteams.de>
Date:   Thu, 16 Apr 2026 12:10:23 +0200

Add human labels for 3 more calibration runs

bbb70053 (haiku-4.5 DOM) flagged as very laggy -- playable but the
lag hurts playability. c1013100 (gemma-4-26b) fails to load.
e047cf3a (haiku-4.5) plays fully.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Diffstat:
Mtasks/tetris/eval/gameplay-bot/calibration/bbb70053.json | 39+++++++++++++++++++--------------------
Mtasks/tetris/eval/gameplay-bot/calibration/c1013100.json | 9++++-----
Mtasks/tetris/eval/gameplay-bot/calibration/e047cf3a.json | 40++++++++++++++++++++--------------------
3 files changed, 43 insertions(+), 45 deletions(-)

diff --git a/tasks/tetris/eval/gameplay-bot/calibration/bbb70053.json b/tasks/tetris/eval/gameplay-bot/calibration/bbb70053.json @@ -2,33 +2,33 @@ "run_id": "tetris_arch=none_ctx=provided_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=ts_lint=off_budget=low_model=haiku45_pw=avail_prompt=simple_rndr=none_strat=none_tst=none_tedit=off_tglob=off_tgrep=off_tread=on_twrite=off_web=on_run3", "short_id": "bbb70053", "label": "DOM game (haiku-4.5, en)", - "notes": "", - "human_tested_at": "", + "notes": "Very laggy; hurts playability even though it is playable.", + "human_tested_at": "2026-04-16", "human_tests": { - "game_loads": null, - "game_starts": null, - "auto_drop": null, - "move_left": null, - "move_right": null, - "move_down": null, - "rotate": null, - "hard_drop": null, + "game_loads": true, + "game_starts": true, + "auto_drop": true, + "move_left": true, + "move_right": true, + "move_down": true, + "rotate": true, + "hard_drop": true, "all_pieces_rotate": null, - "piece_locks": null, - "new_piece_spawns": null, - "multiple_pieces": null, - "line_clear": null, + "piece_locks": true, + "new_piece_spawns": true, + "multiple_pieces": true, + "line_clear": true, "score_increases_on_clear": null, "score_element_visible": null, - "game_over": null, - "playable_30s": null, + "game_over": true, + "playable_30s": true, "multi_line_clear": null, "score_scaling": null, "level_progression": null, "speed_progression": null, "next_piece_preview": null, - "game_over_display": null, - "counter_clockwise_rotation": null, + "game_over_display": true, + "counter_clockwise_rotation": true, "soft_drop_distinct": null } -} -\ No newline at end of file +} diff --git a/tasks/tetris/eval/gameplay-bot/calibration/c1013100.json b/tasks/tetris/eval/gameplay-bot/calibration/c1013100.json @@ -3,10 +3,10 @@ "short_id": "c1013100", "label": "Calibration (gemma-4-26b, en)", "notes": "", - "human_tested_at": "", + "human_tested_at": "2026-04-16", "human_tests": { - "game_loads": null, - "game_starts": null, + "game_loads": false, + "game_starts": false, "auto_drop": null, "move_left": null, "move_right": null, @@ -32,4 +32,4 @@ "soft_drop_distinct": null, "rendering_clean": null } -} -\ No newline at end of file +} diff --git a/tasks/tetris/eval/gameplay-bot/calibration/e047cf3a.json b/tasks/tetris/eval/gameplay-bot/calibration/e047cf3a.json @@ -3,33 +3,34 @@ "short_id": "e047cf3a", "label": "Calibration (haiku-4.5, en)", "notes": "", - "human_tested_at": "", + "human_tested_at": "2026-04-16", "human_tests": { - "game_loads": null, - "game_starts": null, - "auto_drop": null, - "move_left": null, - "move_right": null, - "move_down": null, - "rotate": null, - "hard_drop": null, - "all_pieces_rotate": null, - "piece_locks": null, - "new_piece_spawns": null, + "game_loads": true, + "game_starts": true, + "auto_drop": true, + "move_left": true, + "move_right": true, + "move_down": true, + "rotate": true, + "hard_drop": true, + "all_pieces_rotate": true, + "piece_locks": true, + "new_piece_spawns": true, "multiple_pieces": null, - "line_clear": null, + "line_clear": true, "score_increases_on_clear": null, "score_element_visible": null, - "game_over": null, - "playable_30s": null, + "game_over": true, + "playable_30s": true, "multi_line_clear": null, "score_scaling": null, "level_progression": null, "speed_progression": null, - "next_piece_preview": null, - "game_over_display": null, + "next_piece_preview": true, + "game_over_display": true, "counter_clockwise_rotation": null, "soft_drop_distinct": null, - "rendering_clean": null + "rendering_clean": null, + "score_changes": true } -} -\ No newline at end of file +}

Impressum · Datenschutz