loop-benchmarking

Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.
git clone https://git.shiptheloop.com/loop-benchmarking.git
Log | Files | Refs | README

commit b19aa539899396ddc9373afcaa71621210b6e113
parent 9fab5af2106b43a76d5ec6827903ccd666eb3945
Author: Brian Graham <brian@buildingbetterteams.de>
Date:   Thu,  9 Apr 2026 08:04:42 +0200

Calibration: copy button instead of JSON block, update human results

Replace inline JSON pre block with a clean "Copy JSON" button.
Updated 4c7db3b9 (Spanish, all mechanics work) and 8fe72fce (English,
19 human passes including multi-line clear, score scaling, CCW rotation).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Diffstat:
Mdashboard/src/components/Calibrate.tsx | 32+++++++++++++++++++++++---------
Mtasks/tetris/eval/gameplay-bot/calibration/4c7db3b9.json | 8++++----
Mtasks/tetris/eval/gameplay-bot/calibration/8fe72fce.json | 20++++++++++----------
3 files changed, 37 insertions(+), 23 deletions(-)

diff --git a/dashboard/src/components/Calibrate.tsx b/dashboard/src/components/Calibrate.tsx @@ -56,6 +56,27 @@ function TriState({ value, onChange }: { value: boolean | null; onChange: (v: bo ); } +function CopyButton({ text, label }: { text: string; label: string }) { + const [copied, setCopied] = useState(false); + return ( + <button + onClick={() => { navigator.clipboard.writeText(text); setCopied(true); setTimeout(() => setCopied(false), 1500); }} + style={{ + padding: "4px 12px", + fontSize: "0.7rem", + background: copied ? "var(--green)" : "transparent", + color: copied ? "#fff" : "var(--text-muted)", + border: `1px solid ${copied ? "var(--green)" : "var(--border)"}`, + cursor: "pointer", + fontFamily: "var(--font-mono)", + transition: "all 0.15s", + }} + > + {copied ? "Copied!" : label} + </button> + ); +} + function CalibrationCard({ data, editing, onUpdate }: { data: ComparisonData; editing: boolean; onUpdate: (tests: Record<string, boolean | null>, notes: string) => void }) { const { entry, botScore, botTests, artifactUrl } = data; const [humanTests, setHumanTests] = useState<Record<string, boolean | null>>({ ...entry.human_tests }); @@ -190,15 +211,8 @@ function CalibrationCard({ data, editing, onUpdate }: { data: ComparisonData; ed </table> {showEditor && ( - <div style={{ marginTop: "12px", padding: "8px", background: "hsl(var(--bg-secondary))", fontSize: "0.65rem" }}> - <div style={{ fontWeight: 600, marginBottom: "4px", color: "var(--text-muted)" }}>Copy this JSON to update the calibration file:</div> - <pre - style={{ margin: 0, padding: "8px", background: "hsl(var(--bg) / 0.5)", border: "1px solid var(--border)", overflow: "auto", maxHeight: "200px", cursor: "pointer", fontSize: "0.6rem" }} - onClick={e => { navigator.clipboard.writeText(JSON.stringify(exportData, null, 2)); (e.target as HTMLElement).style.outline = "2px solid var(--green)"; setTimeout(() => { (e.target as HTMLElement).style.outline = ""; }, 500); }} - title="Click to copy" - > - {JSON.stringify(exportData, null, 2)} - </pre> + <div style={{ marginTop: "12px", display: "flex", justifyContent: "flex-end" }}> + <CopyButton text={JSON.stringify(exportData, null, 2)} label={`Copy ${entry.short_id} JSON`} /> </div> )} </div> diff --git a/tasks/tetris/eval/gameplay-bot/calibration/4c7db3b9.json b/tasks/tetris/eval/gameplay-bot/calibration/4c7db3b9.json @@ -2,7 +2,7 @@ "run_id": "tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=es_lang=ts_lint=on_budget=low_model=haiku45_pw=avail_prompt=detailed_rndr=none_strat=none_tst=none_tedit=off_tglob=off_tgrep=on_tread=off_twrite=on_web=off_run2", "short_id": "4c7db3b9", "label": "Spanish looks ok", - "notes": "Spanish game. Looks ok, playable.", + "notes": "Spanish game. All basic mechanics work. Down arrow feels laggy. Score did not increase on down press, unsure about line clear scoring. Only tested single line clear. Game is in Spanish.", "human_tested_at": "2026-04-09", "human_tests": { "game_loads": true, @@ -12,14 +12,14 @@ "move_right": true, "move_down": true, "rotate": true, - "hard_drop": null, + "hard_drop": true, "all_pieces_rotate": null, "piece_locks": true, "new_piece_spawns": true, "multiple_pieces": true, - "line_clear": null, + "line_clear": true, "score_changes": null, - "game_over": null, + "game_over": true, "playable_30s": true, "multi_line_clear": null, "score_scaling": null, diff --git a/tasks/tetris/eval/gameplay-bot/calibration/8fe72fce.json b/tasks/tetris/eval/gameplay-bot/calibration/8fe72fce.json @@ -2,7 +2,7 @@ "run_id": "tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=uns_lint=on_budget=low_model=haiku45_pw=avail_prompt=simple_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run2", "short_id": "8fe72fce", "label": "English playable", - "notes": "Playable English game.", + "notes": "Playable English game. Has a pause button. Multi-line clear works, score scales, counter-clockwise rotation works, next piece preview and game over display present.", "human_tested_at": "2026-04-09", "human_tests": { "game_loads": true, @@ -12,22 +12,22 @@ "move_right": true, "move_down": true, "rotate": true, - "hard_drop": null, + "hard_drop": true, "all_pieces_rotate": null, "piece_locks": true, "new_piece_spawns": true, "multiple_pieces": true, - "line_clear": null, - "score_changes": null, - "game_over": null, + "line_clear": true, + "score_changes": true, + "game_over": true, "playable_30s": true, - "multi_line_clear": null, - "score_scaling": null, + "multi_line_clear": true, + "score_scaling": true, "level_progression": null, "speed_progression": null, - "next_piece_preview": null, - "game_over_display": null, - "counter_clockwise_rotation": null, + "next_piece_preview": true, + "game_over_display": true, + "counter_clockwise_rotation": true, "soft_drop_distinct": null } }

Impressum · Datenschutz