loop-benchmarking

Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.
git clone https://git.shiptheloop.com/loop-benchmarking.git
Log | Files | Refs | README

commit a8683609ce323889201069673c68c03616555eb6
parent da239183820ed8946e7f489fa3abfe7ff2513ba6
Author: Brian Graham <brian@buildingbetterteams.de>
Date:   Sun, 12 Apr 2026 18:23:03 +0200

Add 7 new games to calibration page

opus, qwen, glm-5.1, haiku, gemma-4-26b across various score ranges
(0-44%). Human tests unanswered, ready for testing.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Diffstat:
Atasks/tetris/eval/gameplay-bot/calibration/2763232d.json | 36++++++++++++++++++++++++++++++++++++
Atasks/tetris/eval/gameplay-bot/calibration/5ae88633.json | 36++++++++++++++++++++++++++++++++++++
Atasks/tetris/eval/gameplay-bot/calibration/6f157de1.json | 36++++++++++++++++++++++++++++++++++++
Atasks/tetris/eval/gameplay-bot/calibration/7c167ef9.json | 36++++++++++++++++++++++++++++++++++++
Atasks/tetris/eval/gameplay-bot/calibration/9b785a51.json | 36++++++++++++++++++++++++++++++++++++
Atasks/tetris/eval/gameplay-bot/calibration/c1013100.json | 36++++++++++++++++++++++++++++++++++++
Atasks/tetris/eval/gameplay-bot/calibration/e047cf3a.json | 36++++++++++++++++++++++++++++++++++++
7 files changed, 252 insertions(+), 0 deletions(-)

diff --git a/tasks/tetris/eval/gameplay-bot/calibration/2763232d.json b/tasks/tetris/eval/gameplay-bot/calibration/2763232d.json @@ -0,0 +1,35 @@ +{ + "run_id": "tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=js_lint=on_budget=low_model=qwen36p_pw=off_prompt=simple_prov=or_rndr=none_strat=none_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1", + "short_id": "2763232d", + "label": "Calibration (qwen-3.6-plus, en)", + "notes": "", + "human_tested_at": "", + "human_tests": { + "game_loads": null, + "game_starts": null, + "auto_drop": null, + "move_left": null, + "move_right": null, + "move_down": null, + "rotate": null, + "hard_drop": null, + "all_pieces_rotate": null, + "piece_locks": null, + "new_piece_spawns": null, + "multiple_pieces": null, + "line_clear": null, + "score_increases_on_clear": null, + "score_element_visible": null, + "game_over": null, + "playable_30s": null, + "multi_line_clear": null, + "score_scaling": null, + "level_progression": null, + "speed_progression": null, + "next_piece_preview": null, + "game_over_display": null, + "counter_clockwise_rotation": null, + "soft_drop_distinct": null, + "rendering_clean": null + } +} +\ No newline at end of file diff --git a/tasks/tetris/eval/gameplay-bot/calibration/5ae88633.json b/tasks/tetris/eval/gameplay-bot/calibration/5ae88633.json @@ -0,0 +1,35 @@ +{ + "run_id": "tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=js_lint=on_budget=low_model=haiku45_pw=avail_prompt=simple_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run2", + "short_id": "5ae88633", + "label": "Calibration (haiku-4.5, en)", + "notes": "", + "human_tested_at": "", + "human_tests": { + "game_loads": null, + "game_starts": null, + "auto_drop": null, + "move_left": null, + "move_right": null, + "move_down": null, + "rotate": null, + "hard_drop": null, + "all_pieces_rotate": null, + "piece_locks": null, + "new_piece_spawns": null, + "multiple_pieces": null, + "line_clear": null, + "score_increases_on_clear": null, + "score_element_visible": null, + "game_over": null, + "playable_30s": null, + "multi_line_clear": null, + "score_scaling": null, + "level_progression": null, + "speed_progression": null, + "next_piece_preview": null, + "game_over_display": null, + "counter_clockwise_rotation": null, + "soft_drop_distinct": null, + "rendering_clean": null + } +} +\ No newline at end of file diff --git a/tasks/tetris/eval/gameplay-bot/calibration/6f157de1.json b/tasks/tetris/eval/gameplay-bot/calibration/6f157de1.json @@ -0,0 +1,35 @@ +{ + "run_id": "tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=js_lint=on_budget=low_model=opus46_pw=avail_prompt=simple_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run3", + "short_id": "6f157de1", + "label": "Calibration (opus-4.6, en)", + "notes": "", + "human_tested_at": "", + "human_tests": { + "game_loads": null, + "game_starts": null, + "auto_drop": null, + "move_left": null, + "move_right": null, + "move_down": null, + "rotate": null, + "hard_drop": null, + "all_pieces_rotate": null, + "piece_locks": null, + "new_piece_spawns": null, + "multiple_pieces": null, + "line_clear": null, + "score_increases_on_clear": null, + "score_element_visible": null, + "game_over": null, + "playable_30s": null, + "multi_line_clear": null, + "score_scaling": null, + "level_progression": null, + "speed_progression": null, + "next_piece_preview": null, + "game_over_display": null, + "counter_clockwise_rotation": null, + "soft_drop_distinct": null, + "rendering_clean": null + } +} +\ No newline at end of file diff --git a/tasks/tetris/eval/gameplay-bot/calibration/7c167ef9.json b/tasks/tetris/eval/gameplay-bot/calibration/7c167ef9.json @@ -0,0 +1,35 @@ +{ + "run_id": "tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=ts_lint=off_budget=low_model=glm51_pw=off_prompt=simple_prov=zai_rndr=none_strat=none_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1", + "short_id": "7c167ef9", + "label": "Calibration (glm-5.1, en)", + "notes": "", + "human_tested_at": "", + "human_tests": { + "game_loads": null, + "game_starts": null, + "auto_drop": null, + "move_left": null, + "move_right": null, + "move_down": null, + "rotate": null, + "hard_drop": null, + "all_pieces_rotate": null, + "piece_locks": null, + "new_piece_spawns": null, + "multiple_pieces": null, + "line_clear": null, + "score_increases_on_clear": null, + "score_element_visible": null, + "game_over": null, + "playable_30s": null, + "multi_line_clear": null, + "score_scaling": null, + "level_progression": null, + "speed_progression": null, + "next_piece_preview": null, + "game_over_display": null, + "counter_clockwise_rotation": null, + "soft_drop_distinct": null, + "rendering_clean": null + } +} +\ No newline at end of file diff --git a/tasks/tetris/eval/gameplay-bot/calibration/9b785a51.json b/tasks/tetris/eval/gameplay-bot/calibration/9b785a51.json @@ -0,0 +1,35 @@ +{ + "run_id": "tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=ts_lint=off_budget=low_model=glm51_pw=off_prompt=simple_prov=zai_rndr=none_strat=none_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run3", + "short_id": "9b785a51", + "label": "Calibration (glm-5.1, en)", + "notes": "", + "human_tested_at": "", + "human_tests": { + "game_loads": null, + "game_starts": null, + "auto_drop": null, + "move_left": null, + "move_right": null, + "move_down": null, + "rotate": null, + "hard_drop": null, + "all_pieces_rotate": null, + "piece_locks": null, + "new_piece_spawns": null, + "multiple_pieces": null, + "line_clear": null, + "score_increases_on_clear": null, + "score_element_visible": null, + "game_over": null, + "playable_30s": null, + "multi_line_clear": null, + "score_scaling": null, + "level_progression": null, + "speed_progression": null, + "next_piece_preview": null, + "game_over_display": null, + "counter_clockwise_rotation": null, + "soft_drop_distinct": null, + "rendering_clean": null + } +} +\ No newline at end of file diff --git a/tasks/tetris/eval/gameplay-bot/calibration/c1013100.json b/tasks/tetris/eval/gameplay-bot/calibration/c1013100.json @@ -0,0 +1,35 @@ +{ + "run_id": "tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=ts_lint=off_budget=low_model=gemma426b_pw=off_prompt=simple_prov=or_rndr=none_strat=none_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1", + "short_id": "c1013100", + "label": "Calibration (gemma-4-26b, en)", + "notes": "", + "human_tested_at": "", + "human_tests": { + "game_loads": null, + "game_starts": null, + "auto_drop": null, + "move_left": null, + "move_right": null, + "move_down": null, + "rotate": null, + "hard_drop": null, + "all_pieces_rotate": null, + "piece_locks": null, + "new_piece_spawns": null, + "multiple_pieces": null, + "line_clear": null, + "score_increases_on_clear": null, + "score_element_visible": null, + "game_over": null, + "playable_30s": null, + "multi_line_clear": null, + "score_scaling": null, + "level_progression": null, + "speed_progression": null, + "next_piece_preview": null, + "game_over_display": null, + "counter_clockwise_rotation": null, + "soft_drop_distinct": null, + "rendering_clean": null + } +} +\ No newline at end of file diff --git a/tasks/tetris/eval/gameplay-bot/calibration/e047cf3a.json b/tasks/tetris/eval/gameplay-bot/calibration/e047cf3a.json @@ -0,0 +1,35 @@ +{ + "run_id": "tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=js_lint=on_budget=low_model=haiku45_pw=avail_prompt=simple_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1", + "short_id": "e047cf3a", + "label": "Calibration (haiku-4.5, en)", + "notes": "", + "human_tested_at": "", + "human_tests": { + "game_loads": null, + "game_starts": null, + "auto_drop": null, + "move_left": null, + "move_right": null, + "move_down": null, + "rotate": null, + "hard_drop": null, + "all_pieces_rotate": null, + "piece_locks": null, + "new_piece_spawns": null, + "multiple_pieces": null, + "line_clear": null, + "score_increases_on_clear": null, + "score_element_visible": null, + "game_over": null, + "playable_30s": null, + "multi_line_clear": null, + "score_scaling": null, + "level_progression": null, + "speed_progression": null, + "next_piece_preview": null, + "game_over_display": null, + "counter_clockwise_rotation": null, + "soft_drop_distinct": null, + "rendering_clean": null + } +} +\ No newline at end of file

Impressum · Datenschutz