commit 1cc1aa1c2d533cc2b3737893c84456794cc80568 parent 4c1ca47d2ad560fc2037407b1b702e3ec33d9497 Author: Brian Graham <brian@buildingbetterteams.de> Date: Mon, 6 Apr 2026 11:44:18 +0200 Re-eval 159 runs (57 haiku, 51 opus, 51 sonnet) Diffstat:
199 files changed, 7489 insertions(+), 17294 deletions(-)
diff --git a/results/analysis/main_effects_build_quality.json b/results/analysis/main_effects_build_quality.json @@ -0,0 +1,267 @@ +{ + "language": { + "values": { + "javascript": { + "mean": 1.0, + "effect": 0.1164, + "n": 9 + }, + "typescript": { + "mean": 0.8899, + "effect": 0.0063, + "n": 105 + }, + "unspecified": { + "mean": 0.67, + "effect": -0.2136, + "n": 8 + } + }, + "spread": 0.33 + }, + "tool_edit": { + "values": { + "off": { + "mean": 0.67, + "effect": -0.2136, + "n": 6 + }, + "on": { + "mean": 0.8947, + "effect": 0.011, + "n": 116 + } + }, + "spread": 0.2247 + }, + "prompt_style": { + "values": { + "detailed": { + "mean": 0.67, + "effect": -0.2136, + "n": 5 + }, + "simple": { + "mean": 0.8927, + "effect": 0.0091, + "n": 117 + } + }, + "spread": 0.2227 + }, + "model": { + "values": { + "haiku": { + "mean": 0.8167, + "effect": -0.0669, + "n": 54 + }, + "opus": { + "mean": 0.9633, + "effect": 0.0797, + "n": 27 + }, + "sonnet": { + "mean": 0.9193, + "effect": 0.0357, + "n": 41 + } + }, + "spread": 0.1466 + }, + "tool_glob": { + "values": { + "off": { + "mean": 0.7512, + "effect": -0.1324, + "n": 8 + }, + "on": { + "mean": 0.8929, + "effect": 0.0093, + "n": 114 + } + }, + "spread": 0.1417 + }, + "sub_agents": { + "values": { + "off": { + "mean": 0.7525, + "effect": -0.1311, + "n": 8 + }, + "on": { + "mean": 0.8928, + "effect": 0.0092, + "n": 114 + } + }, + "spread": 0.1403 + }, + "claude_version": { + "values": { + "2.1.91 (Claude Code)": { + "mean": 0.8144, + "effect": -0.0692, + "n": 48 + }, + "2.1.92 (Claude Code)": { + "mean": 0.9285, + "effect": 0.0449, + "n": 74 + } + }, + "spread": 0.1141 + }, + "tool_read": { + "values": { + "off": { + "mean": 0.78, + "effect": -0.1036, + "n": 9 + }, + "on": { + "mean": 0.8919, + "effect": 0.0083, + "n": 113 + } + }, + "spread": 0.1119 + }, + "tool_write": { + "values": { + "off": { + "mean": 0.8167, + "effect": -0.0669, + "n": 9 + }, + "on": { + "mean": 0.8889, + "effect": 0.0053, + "n": 113 + } + }, + "spread": 0.0722 + }, + "context_file": { + "values": { + "none": { + "mean": 0.8769, + "effect": -0.0067, + "n": 110 + }, + "provided": { + "mean": 0.945, + "effect": 0.0614, + "n": 12 + } + }, + "spread": 0.0681 + }, + "effort": { + "values": { + "high": { + "mean": 0.8802, + "effect": -0.0034, + "n": 113 + }, + "max": { + "mean": 0.9267, + "effect": 0.0431, + "n": 9 + } + }, + "spread": 0.0465 + }, + "tool_grep": { + "values": { + "off": { + "mean": 0.8586, + "effect": -0.025, + "n": 7 + }, + "on": { + "mean": 0.8851, + "effect": 0.0015, + "n": 115 + } + }, + "spread": 0.0265 + }, + "playwright": { + "values": { + "off": { + "mean": 0.8625, + "effect": -0.0211, + "n": 12 + }, + "on": { + "mean": 0.8859, + "effect": 0.0023, + "n": 110 + } + }, + "spread": 0.0234 + }, + "web_search": { + "values": { + "off": { + "mean": 0.868, + "effect": -0.0156, + "n": 10 + }, + "on": { + "mean": 0.885, + "effect": 0.0014, + "n": 112 + } + }, + "spread": 0.017 + }, + "max_budget": { + "values": { + "high": { + "mean": 0.89, + "effect": 0.0064, + "n": 12 + }, + "low": { + "mean": 0.8829, + "effect": -0.0007, + "n": 110 + } + }, + "spread": 0.0071 + }, + "human_language": { + "values": { + "en": { + "mean": 0.8831, + "effect": -0.0005, + "n": 113 + }, + "es": { + "mean": 0.89, + "effect": 0.0064, + "n": 9 + } + }, + "spread": 0.0069 + }, + "linter": { + "values": { + "off": { + "mean": 0.8821, + "effect": -0.0015, + "n": 14 + }, + "on": { + "mean": 0.8838, + "effect": 0.0002, + "n": 108 + } + }, + "spread": 0.0017 + } +} +\ No newline at end of file diff --git a/results/analysis/main_effects_code_quality.json b/results/analysis/main_effects_code_quality.json @@ -1,267 +1,267 @@ { - "human_language": { + "max_budget": { "values": { - "en": { - "mean": 0.715, - "effect": 0.0158, - "n": 150 + "high": { + "mean": 0.365, + "effect": -0.1907, + "n": 12 }, - "es": { - "mean": 0.4367, - "effect": -0.2626, - "n": 9 + "low": { + "mean": 0.5713, + "effect": 0.0156, + "n": 147 } }, - "spread": 0.2783 + "spread": 0.2063 }, - "model": { + "language": { "values": { - "haiku": { - "mean": 0.5918, - "effect": -0.1075, - "n": 57 + "javascript": { + "mean": 0.7389, + "effect": 0.1832, + "n": 9 }, - "opus": { - "mean": 0.8029, - "effect": 0.1037, - "n": 51 + "typescript": { + "mean": 0.543, + "effect": -0.0127, + "n": 142 }, - "sonnet": { - "mean": 0.7157, - "effect": 0.0164, - "n": 51 + "unspecified": { + "mean": 0.575, + "effect": 0.0193, + "n": 8 } }, - "spread": 0.2111 + "spread": 0.1959 }, - "prompt_style": { + "tool_edit": { "values": { - "detailed": { - "mean": 0.525, - "effect": -0.1742, - "n": 10 + "off": { + "mean": 0.7217, + "effect": 0.1659, + "n": 12 }, - "simple": { - "mean": 0.7109, - "effect": 0.0117, - "n": 149 + "on": { + "mean": 0.5422, + "effect": -0.0135, + "n": 147 } }, - "spread": 0.1859 + "spread": 0.1795 }, - "language": { + "effort": { "values": { - "javascript": { - "mean": 0.7389, - "effect": 0.0396, - "n": 9 - }, - "typescript": { - "mean": 0.7037, - "effect": 0.0045, - "n": 142 + "high": { + "mean": 0.5471, + "effect": -0.0086, + "n": 150 }, - "unspecified": { - "mean": 0.575, - "effect": -0.1242, - "n": 8 + "max": { + "mean": 0.6989, + "effect": 0.1432, + "n": 9 } }, - "spread": 0.1639 + "spread": 0.1518 }, "claude_version": { "values": { "2.1.91 (Claude Code)": { - "mean": 0.5935, - "effect": -0.1057, + "mean": 0.4559, + "effect": -0.0998, "n": 51 }, "2.1.92 (Claude Code)": { - "mean": 0.7492, - "effect": 0.0499, + "mean": 0.6029, + "effect": 0.0471, "n": 108 } }, - "spread": 0.1557 + "spread": 0.147 }, - "tool_grep": { + "model": { + "values": { + "haiku": { + "mean": 0.4686, + "effect": -0.0871, + "n": 57 + }, + "opus": { + "mean": 0.6108, + "effect": 0.0551, + "n": 51 + }, + "sonnet": { + "mean": 0.598, + "effect": 0.0423, + "n": 51 + } + }, + "spread": 0.1422 + }, + "playwright": { "values": { "off": { - "mean": 0.7675, - "effect": 0.0683, - "n": 12 + "mean": 0.4286, + "effect": -0.1272, + "n": 14 }, "on": { - "mean": 0.6937, - "effect": -0.0056, - "n": 147 + "mean": 0.568, + "effect": 0.0123, + "n": 145 } }, - "spread": 0.0738 + "spread": 0.1394 }, - "tool_read": { + "sub_agents": { "values": { "off": { - "mean": 0.7667, - "effect": 0.0674, - "n": 12 + "mean": 0.6807, + "effect": 0.125, + "n": 14 }, "on": { - "mean": 0.6937, - "effect": -0.0055, - "n": 147 + "mean": 0.5437, + "effect": -0.0121, + "n": 145 } }, - "spread": 0.073 + "spread": 0.137 }, - "tool_glob": { + "tool_grep": { "values": { "off": { - "mean": 0.7625, - "effect": 0.0633, + "mean": 0.65, + "effect": 0.0943, "n": 12 }, "on": { - "mean": 0.6941, - "effect": -0.0052, + "mean": 0.548, + "effect": -0.0077, "n": 147 } }, - "spread": 0.0684 + "spread": 0.102 }, - "context_file": { + "tool_glob": { "values": { - "none": { - "mean": 0.7042, - "effect": 0.005, - "n": 147 - }, - "provided": { - "mean": 0.6383, - "effect": -0.0609, + "off": { + "mean": 0.6458, + "effect": 0.0901, "n": 12 + }, + "on": { + "mean": 0.5484, + "effect": -0.0074, + "n": 147 } }, - "spread": 0.0659 + "spread": 0.0974 }, - "effort": { + "human_language": { "values": { - "high": { - "mean": 0.6969, - "effect": -0.0023, + "en": { + "mean": 0.5505, + "effect": -0.0053, "n": 150 }, - "max": { - "mean": 0.7378, - "effect": 0.0385, + "es": { + "mean": 0.6433, + "effect": 0.0876, "n": 9 } }, - "spread": 0.0409 + "spread": 0.0928 }, - "playwright": { + "context_file": { "values": { - "off": { - "mean": 0.6714, - "effect": -0.0278, - "n": 14 + "none": { + "mean": 0.549, + "effect": -0.0067, + "n": 147 }, - "on": { - "mean": 0.7019, - "effect": 0.0027, - "n": 145 + "provided": { + "mean": 0.6383, + "effect": 0.0826, + "n": 12 } }, - "spread": 0.0305 + "spread": 0.0893 }, - "web_search": { + "linter": { "values": { "off": { - "mean": 0.7246, - "effect": 0.0254, - "n": 13 + "mean": 0.4893, + "effect": -0.0664, + "n": 14 }, "on": { - "mean": 0.697, - "effect": -0.0023, - "n": 146 + "mean": 0.5621, + "effect": 0.0064, + "n": 145 } }, - "spread": 0.0276 + "spread": 0.0728 }, - "max_budget": { + "prompt_style": { "values": { - "high": { - "mean": 0.6775, - "effect": -0.0217, - "n": 12 + "detailed": { + "mean": 0.505, + "effect": -0.0507, + "n": 10 }, - "low": { - "mean": 0.701, - "effect": 0.0018, - "n": 147 + "simple": { + "mean": 0.5591, + "effect": 0.0034, + "n": 149 } }, - "spread": 0.0235 + "spread": 0.0541 }, - "tool_write": { + "tool_read": { "values": { "off": { - "mean": 0.7192, - "effect": 0.0199, + "mean": 0.575, + "effect": 0.0193, "n": 12 }, "on": { - "mean": 0.6976, + "mean": 0.5541, "effect": -0.0016, "n": 147 } }, - "spread": 0.0216 - }, - "sub_agents": { - "values": { - "off": { - "mean": 0.7164, - "effect": 0.0172, - "n": 14 - }, - "on": { - "mean": 0.6976, - "effect": -0.0017, - "n": 145 - } - }, - "spread": 0.0188 + "spread": 0.0209 }, - "linter": { + "web_search": { "values": { "off": { - "mean": 0.7107, - "effect": 0.0115, - "n": 14 + "mean": 0.5423, + "effect": -0.0134, + "n": 13 }, "on": { - "mean": 0.6981, - "effect": -0.0011, - "n": 145 + "mean": 0.5569, + "effect": 0.0012, + "n": 146 } }, - "spread": 0.0126 + "spread": 0.0146 }, - "tool_edit": { + "tool_write": { "values": { "off": { - "mean": 0.6925, - "effect": -0.0067, + "mean": 0.5542, + "effect": -0.0016, "n": 12 }, "on": { - "mean": 0.6998, - "effect": 0.0006, + "mean": 0.5559, + "effect": 0.0001, "n": 147 } }, - "spread": 0.0073 + "spread": 0.0017 } } \ No newline at end of file diff --git a/results/analysis/main_effects_gameplay.json b/results/analysis/main_effects_gameplay.json @@ -1,267 +1,267 @@ { + "context_file": { + "values": { + "none": { + "mean": 0.0802, + "effect": -0.0396, + "n": 147 + }, + "provided": { + "mean": 0.605, + "effect": 0.4852, + "n": 12 + } + }, + "spread": 0.5248 + }, "prompt_style": { "values": { "detailed": { - "mean": 0.784, - "effect": 0.4004, + "mean": 0.27, + "effect": 0.1502, "n": 10 }, "simple": { - "mean": 0.3568, - "effect": -0.0269, + "mean": 0.1097, + "effect": -0.0101, "n": 149 } }, - "spread": 0.4272 + "spread": 0.1603 }, - "effort": { + "max_budget": { "values": { "high": { - "mean": 0.4067, - "effect": 0.023, - "n": 150 - }, - "max": { - "mean": 0.0, - "effect": -0.3836, - "n": 9 - } - }, - "spread": 0.4067 - }, - "human_language": { - "values": { - "en": { - "mean": 0.4067, - "effect": 0.023, - "n": 150 + "mean": 0.25, + "effect": 0.1302, + "n": 12 }, - "es": { - "mean": 0.0, - "effect": -0.3836, - "n": 9 + "low": { + "mean": 0.1092, + "effect": -0.0106, + "n": 147 } }, - "spread": 0.4067 + "spread": 0.1408 }, "language": { "values": { "javascript": { - "mean": 0.1044, - "effect": -0.2792, + "mean": 0.0211, + "effect": -0.0987, "n": 9 }, "typescript": { - "mean": 0.4216, - "effect": 0.038, + "mean": 0.1328, + "effect": 0.013, "n": 142 }, "unspecified": { - "mean": 0.0238, - "effect": -0.3599, + "mean": 0.0, + "effect": -0.1198, "n": 8 } }, - "spread": 0.3978 - }, - "model": { - "values": { - "haiku": { - "mean": 0.2077, - "effect": -0.1759, - "n": 57 - }, - "opus": { - "mean": 0.5976, - "effect": 0.214, - "n": 51 - }, - "sonnet": { - "mean": 0.3663, - "effect": -0.0174, - "n": 51 - } - }, - "spread": 0.3899 + "spread": 0.1328 }, - "claude_version": { + "tool_edit": { "values": { - "2.1.91 (Claude Code)": { - "mean": 0.2322, - "effect": -0.1515, - "n": 51 + "off": { + "mean": 0.0, + "effect": -0.1198, + "n": 12 }, - "2.1.92 (Claude Code)": { - "mean": 0.4552, - "effect": 0.0715, - "n": 108 + "on": { + "mean": 0.1296, + "effect": 0.0098, + "n": 147 } }, - "spread": 0.223 + "spread": 0.1296 }, "tool_read": { "values": { "off": { - "mean": 0.1875, - "effect": -0.1961, + "mean": 0.0, + "effect": -0.1198, "n": 12 }, "on": { - "mean": 0.3997, - "effect": 0.016, + "mean": 0.1296, + "effect": 0.0098, "n": 147 } }, - "spread": 0.2122 + "spread": 0.1296 }, "tool_write": { "values": { "off": { - "mean": 0.2192, - "effect": -0.1645, + "mean": 0.0, + "effect": -0.1198, "n": 12 }, "on": { - "mean": 0.3971, - "effect": 0.0134, + "mean": 0.1296, + "effect": 0.0098, "n": 147 } }, - "spread": 0.1779 + "spread": 0.1296 }, - "web_search": { + "human_language": { + "values": { + "en": { + "mean": 0.127, + "effect": 0.0072, + "n": 150 + }, + "es": { + "mean": 0.0, + "effect": -0.1198, + "n": 9 + } + }, + "spread": 0.127 + }, + "playwright": { "values": { "off": { - "mean": 0.2554, - "effect": -0.1283, - "n": 13 + "mean": 0.0179, + "effect": -0.102, + "n": 14 }, "on": { - "mean": 0.3951, - "effect": 0.0114, - "n": 146 + "mean": 0.1297, + "effect": 0.0098, + "n": 145 } }, - "spread": 0.1397 + "spread": 0.1118 }, - "max_budget": { + "effort": { "values": { "high": { - "mean": 0.5008, - "effect": 0.1172, - "n": 12 + "mean": 0.1145, + "effect": -0.0053, + "n": 150 }, - "low": { - "mean": 0.3741, - "effect": -0.0096, - "n": 147 + "max": { + "mean": 0.2089, + "effect": 0.0891, + "n": 9 } }, - "spread": 0.1267 + "spread": 0.0944 }, - "playwright": { + "tool_grep": { "values": { "off": { - "mean": 0.4879, - "effect": 0.1042, - "n": 14 + "mean": 0.0467, + "effect": -0.0731, + "n": 12 }, "on": { - "mean": 0.3736, - "effect": -0.0101, - "n": 145 + "mean": 0.1258, + "effect": 0.006, + "n": 147 } }, - "spread": 0.1143 + "spread": 0.0791 }, - "context_file": { + "model": { "values": { - "none": { - "mean": 0.392, - "effect": 0.0083, - "n": 147 + "haiku": { + "mean": 0.1395, + "effect": 0.0197, + "n": 57 }, - "provided": { - "mean": 0.2817, - "effect": -0.102, - "n": 12 + "opus": { + "mean": 0.0739, + "effect": -0.0459, + "n": 51 + }, + "sonnet": { + "mean": 0.1437, + "effect": 0.0239, + "n": 51 } }, - "spread": 0.1103 + "spread": 0.0698 }, - "linter": { + "tool_glob": { "values": { "off": { - "mean": 0.3007, - "effect": -0.0829, - "n": 14 + "mean": 0.0625, + "effect": -0.0573, + "n": 12 }, "on": { - "mean": 0.3917, - "effect": 0.008, - "n": 145 + "mean": 0.1245, + "effect": 0.0047, + "n": 147 } }, - "spread": 0.091 + "spread": 0.062 }, "sub_agents": { "values": { "off": { - "mean": 0.4243, - "effect": 0.0406, + "mean": 0.0807, + "effect": -0.0391, "n": 14 }, "on": { - "mean": 0.3797, - "effect": -0.0039, + "mean": 0.1236, + "effect": 0.0038, "n": 145 } }, - "spread": 0.0446 + "spread": 0.0429 }, - "tool_glob": { + "claude_version": { "values": { - "off": { - "mean": 0.4083, - "effect": 0.0247, - "n": 12 + "2.1.91 (Claude Code)": { + "mean": 0.1363, + "effect": 0.0165, + "n": 51 }, - "on": { - "mean": 0.3816, - "effect": -0.002, - "n": 147 + "2.1.92 (Claude Code)": { + "mean": 0.112, + "effect": -0.0078, + "n": 108 } }, - "spread": 0.0267 + "spread": 0.0243 }, - "tool_edit": { + "web_search": { "values": { "off": { - "mean": 0.4033, - "effect": 0.0197, - "n": 12 + "mean": 0.14, + "effect": 0.0202, + "n": 13 }, "on": { - "mean": 0.382, - "effect": -0.0016, - "n": 147 + "mean": 0.118, + "effect": -0.0018, + "n": 146 } }, - "spread": 0.0213 + "spread": 0.022 }, - "tool_grep": { + "linter": { "values": { "off": { - "mean": 0.3867, - "effect": 0.003, - "n": 12 + "mean": 0.1343, + "effect": 0.0145, + "n": 14 }, "on": { - "mean": 0.3834, - "effect": -0.0002, - "n": 147 + "mean": 0.1184, + "effect": -0.0014, + "n": 145 } }, - "spread": 0.0033 + "spread": 0.0159 } } \ No newline at end of file diff --git a/results/analysis/main_effects_score.json b/results/analysis/main_effects_score.json @@ -1,267 +1,267 @@ { - "human_language": { + "context_file": { "values": { - "en": { - "mean": 0.6381, - "effect": 0.0148, - "n": 150 + "none": { + "mean": 0.0638, + "effect": -0.0408, + "n": 147 }, - "es": { - "mean": 0.3763, - "effect": -0.2469, - "n": 9 + "provided": { + "mean": 0.6046, + "effect": 0.4999, + "n": 12 } }, - "spread": 0.2618 + "spread": 0.5408 }, - "model": { + "effort": { "values": { - "haiku": { - "mean": 0.5018, - "effect": -0.1214, - "n": 57 - }, - "opus": { - "mean": 0.7379, - "effect": 0.1146, - "n": 51 + "high": { + "mean": 0.0955, + "effect": -0.0092, + "n": 150 }, - "sonnet": { - "mean": 0.6444, - "effect": 0.0211, - "n": 51 + "max": { + "mean": 0.2572, + "effect": 0.1526, + "n": 9 } }, - "spread": 0.2361 + "spread": 0.1617 }, "language": { "values": { "javascript": { - "mean": 0.5887, - "effect": -0.0346, + "mean": 0.0211, + "effect": -0.0835, "n": 9 }, "typescript": { - "mean": 0.635, - "effect": 0.0118, + "mean": 0.1158, + "effect": 0.0112, "n": 142 }, "unspecified": { - "mean": 0.4534, - "effect": -0.1699, + "mean": 0.0, + "effect": -0.1047, "n": 8 } }, - "spread": 0.1816 + "spread": 0.1158 }, - "claude_version": { - "values": { - "2.1.91 (Claude Code)": { - "mean": 0.5065, - "effect": -0.1168, - "n": 51 - }, - "2.1.92 (Claude Code)": { - "mean": 0.6784, - "effect": 0.0551, - "n": 108 - } - }, - "spread": 0.1719 - }, - "effort": { + "tool_edit": { "values": { - "high": { - "mean": 0.6299, - "effect": 0.0066, - "n": 150 + "off": { + "mean": 0.0, + "effect": -0.1047, + "n": 12 }, - "max": { - "mean": 0.5126, - "effect": -0.1106, - "n": 9 + "on": { + "mean": 0.1132, + "effect": 0.0085, + "n": 147 } }, - "spread": 0.1173 + "spread": 0.1132 }, "tool_read": { "values": { "off": { - "mean": 0.5435, - "effect": -0.0798, + "mean": 0.0, + "effect": -0.1047, "n": 12 }, "on": { - "mean": 0.6298, - "effect": 0.0065, + "mean": 0.1132, + "effect": 0.0085, "n": 147 } }, - "spread": 0.0863 + "spread": 0.1132 }, "tool_write": { "values": { "off": { - "mean": 0.5573, - "effect": -0.066, + "mean": 0.0, + "effect": -0.1047, "n": 12 }, "on": { - "mean": 0.6287, - "effect": 0.0054, + "mean": 0.1132, + "effect": 0.0085, "n": 147 } }, - "spread": 0.0714 + "spread": 0.1132 }, - "prompt_style": { + "max_budget": { "values": { - "detailed": { - "mean": 0.6642, - "effect": 0.0409, - "n": 10 + "high": { + "mean": 0.2063, + "effect": 0.1016, + "n": 12 }, - "simple": { - "mean": 0.6205, - "effect": -0.0027, - "n": 149 + "low": { + "mean": 0.0964, + "effect": -0.0083, + "n": 147 } }, - "spread": 0.0437 + "spread": 0.1099 }, - "linter": { + "playwright": { "values": { "off": { - "mean": 0.5889, - "effect": -0.0343, + "mean": 0.0179, + "effect": -0.0868, "n": 14 }, "on": { - "mean": 0.6266, - "effect": 0.0033, + "mean": 0.113, + "effect": 0.0084, "n": 145 } }, - "spread": 0.0377 + "spread": 0.0951 }, - "web_search": { + "human_language": { "values": { - "off": { - "mean": 0.591, - "effect": -0.0323, - "n": 13 + "en": { + "mean": 0.1092, + "effect": 0.0045, + "n": 150 }, - "on": { - "mean": 0.6261, - "effect": 0.0029, - "n": 146 + "es": { + "mean": 0.0289, + "effect": -0.0758, + "n": 9 } }, - "spread": 0.0351 + "spread": 0.0803 }, - "tool_edit": { + "tool_glob": { "values": { "off": { - "mean": 0.5924, - "effect": -0.0309, + "mean": 0.0312, + "effect": -0.0734, "n": 12 }, "on": { - "mean": 0.6258, - "effect": 0.0025, + "mean": 0.1106, + "effect": 0.006, "n": 147 } }, - "spread": 0.0334 + "spread": 0.0794 }, - "context_file": { + "prompt_style": { "values": { - "none": { - "mean": 0.6257, - "effect": 0.0024, - "n": 147 + "detailed": { + "mean": 0.179, + "effect": 0.0743, + "n": 10 }, - "provided": { - "mean": 0.5936, - "effect": -0.0297, - "n": 12 + "simple": { + "mean": 0.0997, + "effect": -0.005, + "n": 149 } }, - "spread": 0.0321 + "spread": 0.0793 }, - "playwright": { + "tool_grep": { "values": { "off": { - "mean": 0.6438, - "effect": 0.0205, - "n": 14 + "mean": 0.0467, + "effect": -0.058, + "n": 12 }, "on": { - "mean": 0.6213, - "effect": -0.002, - "n": 145 + "mean": 0.1094, + "effect": 0.0047, + "n": 147 } }, - "spread": 0.0225 + "spread": 0.0627 }, - "tool_glob": { + "sub_agents": { "values": { "off": { - "mean": 0.6418, - "effect": 0.0186, - "n": 12 + "mean": 0.0493, + "effect": -0.0554, + "n": 14 }, "on": { - "mean": 0.6218, - "effect": -0.0015, - "n": 147 + "mean": 0.11, + "effect": 0.0053, + "n": 145 } }, - "spread": 0.02 + "spread": 0.0607 }, - "max_budget": { + "model": { "values": { - "high": { - "mean": 0.6406, - "effect": 0.0173, - "n": 12 + "haiku": { + "mean": 0.1214, + "effect": 0.0167, + "n": 57 }, - "low": { - "mean": 0.6219, - "effect": -0.0014, - "n": 147 + "opus": { + "mean": 0.0674, + "effect": -0.0373, + "n": 51 + }, + "sonnet": { + "mean": 0.1232, + "effect": 0.0186, + "n": 51 } }, - "spread": 0.0187 + "spread": 0.0558 }, - "tool_grep": { + "linter": { "values": { "off": { - "mean": 0.6216, - "effect": -0.0017, - "n": 12 + "mean": 0.0761, + "effect": -0.0286, + "n": 14 }, "on": { - "mean": 0.6234, - "effect": 0.0001, - "n": 147 + "mean": 0.1074, + "effect": 0.0028, + "n": 145 } }, - "spread": 0.0018 + "spread": 0.0313 }, - "sub_agents": { + "claude_version": { + "values": { + "2.1.91 (Claude Code)": { + "mean": 0.1234, + "effect": 0.0188, + "n": 51 + }, + "2.1.92 (Claude Code)": { + "mean": 0.0958, + "effect": -0.0089, + "n": 108 + } + }, + "spread": 0.0276 + }, + "web_search": { "values": { "off": { - "mean": 0.6244, - "effect": 0.0011, - "n": 14 + "mean": 0.1158, + "effect": 0.0111, + "n": 13 }, "on": { - "mean": 0.6232, - "effect": -0.0001, - "n": 145 + "mean": 0.1037, + "effect": -0.001, + "n": 146 } }, - "spread": 0.0012 + "spread": 0.0121 } } \ No newline at end of file diff --git a/results/analysis/main_effects_sonarqube.json b/results/analysis/main_effects_sonarqube.json @@ -0,0 +1,217 @@ +{ + "context_file": { + "values": { + "none": { + "mean": 0.1089, + "effect": -0.1213, + "n": 37 + }, + "provided": { + "mean": 0.6042, + "effect": 0.374, + "n": 12 + } + }, + "spread": 0.4953 + }, + "effort": { + "values": { + "high": { + "mean": 0.1939, + "effect": -0.0363, + "n": 44 + }, + "max": { + "mean": 0.55, + "effect": 0.3198, + "n": 5 + } + }, + "spread": 0.3561 + }, + "human_language": { + "values": { + "en": { + "mean": 0.2242, + "effect": -0.006, + "n": 48 + }, + "es": { + "mean": 0.52, + "effect": 0.2898, + "n": 1 + } + }, + "spread": 0.2958 + }, + "linter": { + "values": { + "off": { + "mean": 0.0, + "effect": -0.2302, + "n": 8 + }, + "on": { + "mean": 0.2751, + "effect": 0.0449, + "n": 41 + } + }, + "spread": 0.2751 + }, + "language": { + "values": { + "javascript": { + "mean": 0.0, + "effect": -0.2302, + "n": 6 + }, + "typescript": { + "mean": 0.2623, + "effect": 0.0321, + "n": 43 + } + }, + "spread": 0.2623 + }, + "prompt_style": { + "values": { + "detailed": { + "mean": 0.0, + "effect": -0.2302, + "n": 4 + }, + "simple": { + "mean": 0.2507, + "effect": 0.0205, + "n": 45 + } + }, + "spread": 0.2507 + }, + "playwright": { + "values": { + "off": { + "mean": 0.0, + "effect": -0.2302, + "n": 2 + }, + "on": { + "mean": 0.24, + "effect": 0.0098, + "n": 47 + } + }, + "spread": 0.24 + }, + "sub_agents": { + "values": { + "off": { + "mean": 0.0, + "effect": -0.2302, + "n": 2 + }, + "on": { + "mean": 0.24, + "effect": 0.0098, + "n": 47 + } + }, + "spread": 0.24 + }, + "tool_glob": { + "values": { + "off": { + "mean": 0.0, + "effect": -0.2302, + "n": 2 + }, + "on": { + "mean": 0.24, + "effect": 0.0098, + "n": 47 + } + }, + "spread": 0.24 + }, + "web_search": { + "values": { + "off": { + "mean": 0.0, + "effect": -0.2302, + "n": 2 + }, + "on": { + "mean": 0.24, + "effect": 0.0098, + "n": 47 + } + }, + "spread": 0.24 + }, + "tool_edit": { + "values": { + "off": { + "mean": 0.0, + "effect": -0.2302, + "n": 1 + }, + "on": { + "mean": 0.235, + "effect": 0.0048, + "n": 48 + } + }, + "spread": 0.235 + }, + "claude_version": { + "values": { + "2.1.91 (Claude Code)": { + "mean": 0.2692, + "effect": 0.039, + "n": 13 + }, + "2.1.92 (Claude Code)": { + "mean": 0.2161, + "effect": -0.0141, + "n": 36 + } + }, + "spread": 0.0531 + }, + "max_budget": { + "values": { + "high": { + "mean": 0.195, + "effect": -0.0352, + "n": 10 + }, + "low": { + "mean": 0.2392, + "effect": 0.009, + "n": 39 + } + }, + "spread": 0.0442 + }, + "model": { + "values": { + "haiku": { + "mean": 0.2188, + "effect": -0.0115, + "n": 16 + }, + "opus": { + "mean": 0.2214, + "effect": -0.0088, + "n": 14 + }, + "sonnet": { + "mean": 0.2463, + "effect": 0.0161, + "n": 19 + } + }, + "spread": 0.0275 + } +} +\ No newline at end of file diff --git a/results/analysis/main_effects_structural.json b/results/analysis/main_effects_structural.json @@ -1,267 +1,267 @@ { - "tool_edit": { + "tool_read": { "values": { "off": { - "mean": 0.8125, - "effect": -0.109, - "n": 12 + "mean": 0.6944, + "effect": -0.1649, + "n": 9 }, "on": { - "mean": 0.9306, - "effect": 0.0091, - "n": 144 + "mean": 0.8724, + "effect": 0.013, + "n": 114 } }, - "spread": 0.1181 + "spread": 0.178 }, - "claude_version": { + "model": { "values": { - "2.1.91 (Claude Code)": { - "mean": 0.8529, - "effect": -0.0685, + "haiku": { + "mean": 0.7798, + "effect": -0.0795, "n": 51 }, - "2.1.92 (Claude Code)": { - "mean": 0.9548, - "effect": 0.0333, - "n": 105 + "opus": { + "mean": 0.9507, + "effect": 0.0914, + "n": 27 + }, + "sonnet": { + "mean": 0.8947, + "effect": 0.0353, + "n": 45 } }, - "spread": 0.1019 + "spread": 0.1709 }, - "model": { + "tool_edit": { "values": { - "haiku": { - "mean": 0.8684, - "effect": -0.0531, - "n": 57 - }, - "opus": { - "mean": 0.9688, - "effect": 0.0473, - "n": 48 + "off": { + "mean": 0.7188, + "effect": -0.1406, + "n": 8 }, - "sonnet": { - "mean": 0.9363, - "effect": 0.0148, - "n": 51 + "on": { + "mean": 0.8691, + "effect": 0.0098, + "n": 115 } }, - "spread": 0.1004 + "spread": 0.1503 }, - "human_language": { + "claude_version": { "values": { - "en": { - "mean": 0.925, - "effect": 0.0035, - "n": 150 + "2.1.91 (Claude Code)": { + "mean": 0.7836, + "effect": -0.0758, + "n": 45 }, - "es": { - "mean": 0.8333, - "effect": -0.0881, - "n": 6 + "2.1.92 (Claude Code)": { + "mean": 0.9031, + "effect": 0.0437, + "n": 78 } }, - "spread": 0.0917 + "spread": 0.1195 }, - "language": { + "tool_grep": { "values": { - "javascript": { - "mean": 1.0, - "effect": 0.0785, + "off": { + "mean": 0.75, + "effect": -0.1093, "n": 9 }, - "typescript": { - "mean": 0.9119, - "effect": -0.0096, - "n": 139 + "on": { + "mean": 0.868, + "effect": 0.0086, + "n": 114 + } + }, + "spread": 0.118 + }, + "tool_glob": { + "values": { + "off": { + "mean": 0.75, + "effect": -0.1093, + "n": 7 }, - "unspecified": { - "mean": 1.0, - "effect": 0.0785, - "n": 8 + "on": { + "mean": 0.8659, + "effect": 0.0066, + "n": 116 } }, - "spread": 0.0881 + "spread": 0.1159 }, - "max_budget": { + "prompt_style": { "values": { - "high": { - "mean": 0.8542, - "effect": -0.0673, - "n": 12 + "detailed": { + "mean": 0.759, + "effect": -0.1003, + "n": 10 }, - "low": { - "mean": 0.9271, - "effect": 0.0056, - "n": 144 + "simple": { + "mean": 0.8682, + "effect": 0.0089, + "n": 113 } }, - "spread": 0.0729 + "spread": 0.1092 }, - "tool_grep": { + "playwright": { "values": { "off": { - "mean": 0.8542, - "effect": -0.0673, - "n": 12 + "mean": 0.767, + "effect": -0.0923, + "n": 10 }, "on": { - "mean": 0.9271, - "effect": 0.0056, - "n": 144 + "mean": 0.8675, + "effect": 0.0082, + "n": 113 } }, - "spread": 0.0729 + "spread": 0.1005 }, - "tool_read": { + "language": { "values": { - "off": { - "mean": 0.8542, - "effect": -0.0673, - "n": 12 + "javascript": { + "mean": 0.89, + "effect": 0.0307, + "n": 9 }, - "on": { - "mean": 0.9271, - "effect": 0.0056, - "n": 144 + "typescript": { + "mean": 0.8617, + "effect": 0.0023, + "n": 106 + }, + "unspecified": { + "mean": 0.7938, + "effect": -0.0656, + "n": 8 } }, - "spread": 0.0729 + "spread": 0.0962 }, - "prompt_style": { + "tool_write": { "values": { - "detailed": { - "mean": 0.875, - "effect": -0.0465, - "n": 10 + "off": { + "mean": 0.7917, + "effect": -0.0677, + "n": 6 }, - "simple": { - "mean": 0.9247, - "effect": 0.0032, - "n": 146 + "on": { + "mean": 0.8628, + "effect": 0.0035, + "n": 117 } }, - "spread": 0.0497 + "spread": 0.0711 }, "effort": { "values": { "high": { - "mean": 0.9235, - "effect": 0.002, - "n": 147 + "mean": 0.8548, + "effect": -0.0045, + "n": 114 }, "max": { - "mean": 0.8889, - "effect": -0.0326, + "mean": 0.9167, + "effect": 0.0573, "n": 9 } }, - "spread": 0.0346 + "spread": 0.0619 }, "sub_agents": { "values": { "off": { - "mean": 0.8929, - "effect": -0.0286, - "n": 14 + "mean": 0.8109, + "effect": -0.0484, + "n": 11 }, "on": { - "mean": 0.9243, - "effect": 0.0028, - "n": 142 + "mean": 0.8641, + "effect": 0.0048, + "n": 112 } }, - "spread": 0.0314 + "spread": 0.0532 }, - "tool_write": { + "linter": { "values": { "off": { - "mean": 0.8958, - "effect": -0.0256, - "n": 12 + "mean": 0.8157, + "effect": -0.0436, + "n": 14 }, "on": { - "mean": 0.9236, - "effect": 0.0021, - "n": 144 + "mean": 0.865, + "effect": 0.0056, + "n": 109 } }, - "spread": 0.0278 + "spread": 0.0493 }, "web_search": { "values": { "off": { - "mean": 0.9423, - "effect": 0.0208, - "n": 13 + "mean": 0.817, + "effect": -0.0423, + "n": 10 }, "on": { - "mean": 0.9196, - "effect": -0.0019, - "n": 143 + "mean": 0.8631, + "effect": 0.0037, + "n": 113 } }, - "spread": 0.0227 + "spread": 0.0461 }, "context_file": { "values": { "none": { - "mean": 0.9201, - "effect": -0.0013, - "n": 144 + "mean": 0.8554, + "effect": -0.0039, + "n": 111 }, "provided": { - "mean": 0.9375, - "effect": 0.016, + "mean": 0.8958, + "effect": 0.0365, "n": 12 } }, - "spread": 0.0174 + "spread": 0.0404 }, - "linter": { - "values": { - "off": { - "mean": 0.9286, - "effect": 0.0071, - "n": 14 - }, - "on": { - "mean": 0.9208, - "effect": -0.0007, - "n": 142 - } - }, - "spread": 0.0078 - }, - "playwright": { + "human_language": { "values": { - "off": { - "mean": 0.9286, - "effect": 0.0071, - "n": 14 + "en": { + "mean": 0.8614, + "effect": 0.0021, + "n": 114 }, - "on": { - "mean": 0.9208, - "effect": -0.0007, - "n": 142 + "es": { + "mean": 0.8333, + "effect": -0.026, + "n": 9 } }, - "spread": 0.0078 + "spread": 0.0281 }, - "tool_glob": { + "max_budget": { "values": { - "off": { - "mean": 0.9167, - "effect": -0.0048, + "high": { + "mean": 0.8542, + "effect": -0.0052, "n": 12 }, - "on": { - "mean": 0.9219, - "effect": 0.0004, - "n": 144 + "low": { + "mean": 0.8599, + "effect": 0.0006, + "n": 111 } }, - "spread": 0.0052 + "spread": 0.0057 } } \ No newline at end of file diff --git a/results/analysis/main_effects_transcript.json b/results/analysis/main_effects_transcript.json @@ -2,266 +2,266 @@ "model": { "values": { "haiku": { - "mean": 0.7763, - "effect": -0.1385, + "mean": 0.7368, + "effect": -0.1638, "n": 57 }, "opus": { "mean": 1.0, - "effect": 0.0852, + "effect": 0.0994, "n": 51 }, "sonnet": { "mean": 0.9843, - "effect": 0.0695, + "effect": 0.0837, "n": 51 } }, - "spread": 0.2237 + "spread": 0.2632 }, "claude_version": { "values": { "2.1.91 (Claude Code)": { - "mean": 0.7696, - "effect": -0.1452, + "mean": 0.7255, + "effect": -0.1751, "n": 51 }, "2.1.92 (Claude Code)": { "mean": 0.9833, - "effect": 0.0686, + "effect": 0.0827, "n": 108 } }, - "spread": 0.2137 + "spread": 0.2578 + }, + "tool_read": { + "values": { + "off": { + "mean": 0.6875, + "effect": -0.2131, + "n": 12 + }, + "on": { + "mean": 0.918, + "effect": 0.0174, + "n": 147 + } + }, + "spread": 0.2305 }, "language": { "values": { "javascript": { "mean": 1.0, - "effect": 0.0852, + "effect": 0.0994, "n": 9 }, "typescript": { - "mean": 0.9063, - "effect": -0.0084, + "mean": 0.8905, + "effect": -0.0101, "n": 142 }, "unspecified": { "mean": 0.9688, - "effect": 0.054, + "effect": 0.0681, "n": 8 } }, - "spread": 0.0937 - }, - "human_language": { - "values": { - "en": { - "mean": 0.9193, - "effect": 0.0046, - "n": 150 - }, - "es": { - "mean": 0.8389, - "effect": -0.0759, - "n": 9 - } - }, - "spread": 0.0804 + "spread": 0.1095 }, "playwright": { "values": { "off": { "mean": 0.8643, - "effect": -0.0505, + "effect": -0.0363, "n": 14 }, "on": { - "mean": 0.9197, - "effect": 0.0049, + "mean": 0.9041, + "effect": 0.0035, "n": 145 } }, - "spread": 0.0554 + "spread": 0.0398 }, "linter": { "values": { "off": { "mean": 0.8679, - "effect": -0.0469, + "effect": -0.0328, "n": 14 }, "on": { - "mean": 0.9193, - "effect": 0.0045, - "n": 145 - } - }, - "spread": 0.0514 - }, - "tool_read": { - "values": { - "off": { - "mean": 0.875, - "effect": -0.0398, - "n": 12 - }, - "on": { - "mean": 0.918, + "mean": 0.9038, "effect": 0.0032, - "n": 147 + "n": 145 } }, - "spread": 0.043 + "spread": 0.0359 }, "max_budget": { "values": { "high": { "mean": 0.8792, - "effect": -0.0356, + "effect": -0.0215, "n": 12 }, "low": { - "mean": 0.9177, - "effect": 0.0029, + "mean": 0.9024, + "effect": 0.0018, "n": 147 } }, - "spread": 0.0385 + "spread": 0.0232 }, "tool_edit": { "values": { "off": { "mean": 0.8792, - "effect": -0.0356, + "effect": -0.0215, "n": 12 }, "on": { - "mean": 0.9177, - "effect": 0.0029, + "mean": 0.9024, + "effect": 0.0018, "n": 147 } }, - "spread": 0.0385 + "spread": 0.0232 }, "tool_write": { "values": { "off": { "mean": 0.8792, - "effect": -0.0356, + "effect": -0.0215, "n": 12 }, "on": { - "mean": 0.9177, - "effect": 0.0029, + "mean": 0.9024, + "effect": 0.0018, "n": 147 } }, - "spread": 0.0385 + "spread": 0.0232 + }, + "human_language": { + "values": { + "en": { + "mean": 0.8993, + "effect": -0.0013, + "n": 150 + }, + "es": { + "mean": 0.9222, + "effect": 0.0216, + "n": 9 + } + }, + "spread": 0.0229 }, "sub_agents": { "values": { "off": { "mean": 0.8821, - "effect": -0.0326, + "effect": -0.0185, "n": 14 }, "on": { - "mean": 0.9179, - "effect": 0.0032, + "mean": 0.9024, + "effect": 0.0018, "n": 145 } }, - "spread": 0.0358 + "spread": 0.0203 }, "tool_glob": { "values": { "off": { "mean": 0.8833, - "effect": -0.0314, + "effect": -0.0173, "n": 12 }, "on": { - "mean": 0.9173, - "effect": 0.0026, + "mean": 0.902, + "effect": 0.0014, "n": 147 } }, - "spread": 0.034 + "spread": 0.0187 + }, + "effort": { + "values": { + "high": { + "mean": 0.8997, + "effect": -0.001, + "n": 150 + }, + "max": { + "mean": 0.9167, + "effect": 0.016, + "n": 9 + } + }, + "spread": 0.017 }, "tool_grep": { "values": { "off": { "mean": 0.8875, - "effect": -0.0273, + "effect": -0.0131, "n": 12 }, "on": { - "mean": 0.917, - "effect": 0.0022, + "mean": 0.9017, + "effect": 0.0011, "n": 147 } }, - "spread": 0.0295 + "spread": 0.0142 }, "context_file": { "values": { "none": { - "mean": 0.9167, - "effect": 0.0019, + "mean": 0.9014, + "effect": 0.0007, "n": 147 }, "provided": { "mean": 0.8917, - "effect": -0.0231, + "effect": -0.009, "n": 12 } }, - "spread": 0.025 + "spread": 0.0097 }, "web_search": { "values": { "off": { "mean": 0.8923, - "effect": -0.0225, + "effect": -0.0083, "n": 13 }, "on": { - "mean": 0.9168, - "effect": 0.002, + "mean": 0.9014, + "effect": 0.0007, "n": 146 } }, - "spread": 0.0245 + "spread": 0.0091 }, "prompt_style": { "values": { "detailed": { "mean": 0.895, - "effect": -0.0198, + "effect": -0.0056, "n": 10 }, "simple": { - "mean": 0.9161, - "effect": 0.0013, + "mean": 0.901, + "effect": 0.0004, "n": 149 } }, - "spread": 0.0211 - }, - "effort": { - "values": { - "high": { - "mean": 0.9147, - "effect": -0.0001, - "n": 150 - }, - "max": { - "mean": 0.9167, - "effect": 0.0019, - "n": 9 - } - }, - "spread": 0.002 + "spread": 0.006 } } \ No newline at end of file diff --git a/results/index.jsonl b/results/index.jsonl @@ -1,159 +1,159 @@ -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T19:59:11.076296+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T19:59:32.351290+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T19:59:55.659323+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T06:46:14.389007+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T06:46:36.359954+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T06:47:20.748761+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-04T21:42:12.786363+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-04T21:58:29.900236+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-04T21:57:36.969571+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=detailed_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run1","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=detailed_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off","completed_at":"2026-04-04T20:24:23.986627+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=off_tool_edit=off_tool_glob=off_tool_grep=off_tool_read=off_tool_write=off_web_search=off_run1","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=off_tool_edit=off_tool_glob=off_tool_grep=off_tool_read=off_tool_write=off_web_search=off","completed_at":"2026-04-03T18:37:03.014208+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=off_tool_edit=off_tool_glob=off_tool_grep=off_tool_read=off_tool_write=off_web_search=off_run2","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=off_tool_edit=off_tool_glob=off_tool_grep=off_tool_read=off_tool_write=off_web_search=off","completed_at":"2026-04-03T18:37:23.108082+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=off_tool_edit=off_tool_glob=off_tool_grep=off_tool_read=off_tool_write=off_web_search=off_run3","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=off_tool_edit=off_tool_glob=off_tool_grep=off_tool_read=off_tool_write=off_web_search=off","completed_at":"2026-04-03T18:41:13.333121+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run1","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off","completed_at":"2026-04-04T20:17:47.627694+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T20:37:10.970114+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T20:37:09.900301+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T20:35:10.729588+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T21:43:50.904766+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T21:45:00.368788+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T21:48:01.764611+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T05:43:46.980046+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T05:44:43.169610+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T19:39:10.870002+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T20:46:26.400041+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T20:44:48.686432+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T20:45:11.072141+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T22:00:04.904763+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T19:56:29.452347+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T19:56:26.634211+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T06:15:30.738305+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T06:29:48.276358+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T06:34:46.712959+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T20:36:23.056914+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T20:39:17.839627+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T20:41:00.673494+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T19:53:09.020668+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T19:58:22.294680+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T19:58:45.116530+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T20:38:46.002214+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T20:40:37.227520+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T20:42:05.384929+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T20:24:31.353521+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T20:28:08.059700+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T20:27:54.919444+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-04T08:51:19.169046+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-04T08:49:47.576163+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T20:36:54.110927+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T20:36:43.094997+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T20:37:39.835213+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T20:36:55.359971+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run1","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on","completed_at":"2026-04-03T20:09:11.289803+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run2","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on","completed_at":"2026-04-03T20:14:20.080371+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run3","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on","completed_at":"2026-04-03T20:13:47.758489+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run1","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on","completed_at":"2026-04-03T20:17:45.103375+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run2","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on","completed_at":"2026-04-03T20:19:18.568908+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run3","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on","completed_at":"2026-04-03T20:22:17.063412+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run1","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off","completed_at":"2026-04-03T20:43:03.409815+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run2","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off","completed_at":"2026-04-03T20:43:40.282743+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T19:14:26.851817+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T19:11:09.488782+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T19:12:36.135542+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T21:48:37.261266+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T21:49:18.421793+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T21:49:30.178832+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T06:41:08.954745+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T06:41:10.237903+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T06:43:29.313548+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T21:53:52.919817+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T21:56:10.524909+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T21:56:01.783667+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T21:39:08.458092+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T19:51:57.541240+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T21:39:11.755473+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T21:39:35.240914+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T21:39:36.296175+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T21:38:59.162109+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T21:43:19.539022+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T21:44:34.164279+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T21:45:47.860125+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run1","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on","completed_at":"2026-04-05T19:47:23.981479+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run2","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on","completed_at":"2026-04-05T19:48:02.042012+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run3","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on","completed_at":"2026-04-05T19:46:32.890087+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run1","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on","completed_at":"2026-04-05T19:47:15.615085+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run2","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on","completed_at":"2026-04-05T19:50:03.614395+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run3","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on","completed_at":"2026-04-05T21:43:37.223458+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run1","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off","completed_at":"2026-04-05T21:55:43.014791+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run2","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off","completed_at":"2026-04-05T21:59:19.246820+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run3","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off","completed_at":"2026-04-05T22:02:47.959579+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-04T21:00:53.756188+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-04T21:22:28.664145+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-04T21:00:10.672556+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T19:38:54.914546+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T19:38:55.252420+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T05:58:46.410223+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T05:05:35.109321+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-04T21:42:14.345914+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-04T21:47:38.333466+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T06:05:12.119803+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T06:15:31.567926+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T06:13:05.102138+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T05:17:55.215229+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T05:23:27.382997+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T05:13:05.553241+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T05:22:22.970786+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T05:24:29.542398+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T05:36:44.507712+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T05:36:50.072667+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T05:27:37.222404+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T05:42:22.469041+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run1","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on","completed_at":"2026-04-04T22:17:05.206931+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run2","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on","completed_at":"2026-04-05T19:29:03.407545+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run3","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on","completed_at":"2026-04-04T22:18:43.621863+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run1","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on","completed_at":"2026-04-05T05:18:02.207544+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run2","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on","completed_at":"2026-04-05T05:17:28.821271+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run3","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on","completed_at":"2026-04-05T05:04:55.065204+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run1","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off","completed_at":"2026-04-05T06:17:00.182240+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run2","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off","completed_at":"2026-04-05T06:20:34.617227+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run3","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off","completed_at":"2026-04-05T06:24:33.410085+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-04T21:04:34.885122+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-04T21:36:41.721861+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-04T21:11:17.256644+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-04T08:31:26.255450+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-04T08:28:47.315460+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T06:48:25.182566+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T06:48:58.365562+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T06:50:57.420728+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-04T21:47:01.274994+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-04T22:00:40.897695+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-04T21:56:38.633500+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T20:03:41.571216+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T20:05:07.170256+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T20:09:11.887288+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T19:45:24.239737+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T06:52:50.645410+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T19:46:38.063853+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T05:12:08.035213+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-04T21:53:14.676366+00:00"} -{"run_id":"tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-04T22:11:07.374754+00:00"} -{"run_id":"tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T19:50:12.863462+00:00"} -{"run_id":"tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T19:52:07.304632+00:00"} -{"run_id":"tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"haiku","cell_id":"tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T19:54:56.953646+00:00"} -{"run_id":"tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T06:42:14.154311+00:00"} -{"run_id":"tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T06:46:45.312262+00:00"} -{"run_id":"tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"opus","cell_id":"tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T06:43:08.609342+00:00"} -{"run_id":"tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-04T21:34:54.215164+00:00"} -{"run_id":"tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-04T21:37:35.722817+00:00"} -{"run_id":"tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-04T21:36:09.154037+00:00"} -{"run_id":"tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"haiku","cell_id":"tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T18:49:38.321463+00:00"} -{"run_id":"tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"haiku","cell_id":"tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T18:49:48.529132+00:00"} -{"run_id":"tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"haiku","cell_id":"tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T18:53:35.752453+00:00"} -{"run_id":"tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"haiku","cell_id":"tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T20:40:57.297570+00:00"} -{"run_id":"tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"haiku","cell_id":"tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T20:41:10.072835+00:00"} -{"run_id":"tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"haiku","cell_id":"tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-03T20:38:44.044823+00:00"} -{"run_id":"tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"opus","cell_id":"tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T21:58:12.019082+00:00"} -{"run_id":"tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"opus","cell_id":"tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T21:50:07.704550+00:00"} -{"run_id":"tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"opus","cell_id":"tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T21:54:23.048656+00:00"} -{"run_id":"tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T06:02:26.660332+00:00"} -{"run_id":"tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T06:03:58.503906+00:00"} -{"run_id":"tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3","task":"tetris","model":"sonnet","cell_id":"tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on","completed_at":"2026-04-05T05:58:32.199764+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T19:59:11.076296+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T19:59:32.351290+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T19:59:55.659323+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T06:46:14.389007+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T06:46:36.359954+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T06:47:20.748761+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-04T21:42:12.786363+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-04T21:58:29.900236+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-04T21:57:36.969571+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=detailed_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run1", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=detailed_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off", "completed_at": "2026-04-04T20:24:23.986627+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=off_tool_edit=off_tool_glob=off_tool_grep=off_tool_read=off_tool_write=off_web_search=off_run1", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=off_tool_edit=off_tool_glob=off_tool_grep=off_tool_read=off_tool_write=off_web_search=off", "completed_at": "2026-04-03T18:37:03.014208+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=off_tool_edit=off_tool_glob=off_tool_grep=off_tool_read=off_tool_write=off_web_search=off_run2", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=off_tool_edit=off_tool_glob=off_tool_grep=off_tool_read=off_tool_write=off_web_search=off", "completed_at": "2026-04-03T18:37:23.108082+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=off_tool_edit=off_tool_glob=off_tool_grep=off_tool_read=off_tool_write=off_web_search=off_run3", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=off_tool_edit=off_tool_glob=off_tool_grep=off_tool_read=off_tool_write=off_web_search=off", "completed_at": "2026-04-03T18:41:13.333121+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run1", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off", "completed_at": "2026-04-04T20:17:47.627694+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T20:37:10.970114+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T20:37:09.900301+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T20:35:10.729588+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T21:43:50.904766+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T21:45:00.368788+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T21:48:01.764611+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T05:43:46.980046+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T05:44:43.169610+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T19:39:10.870002+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T20:46:26.400041+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T20:44:48.686432+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T20:45:11.072141+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T22:00:04.904763+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T19:56:29.452347+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T19:56:26.634211+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T06:15:30.738305+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T06:29:48.276358+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T06:34:46.712959+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T20:36:23.056914+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T20:39:17.839627+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T20:41:00.673494+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T19:53:09.020668+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T19:58:22.294680+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T19:58:45.116530+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T20:38:46.002214+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T20:40:37.227520+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T20:42:05.384929+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T20:24:31.353521+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T20:28:08.059700+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T20:27:54.919444+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-04T08:51:19.169046+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-04T08:49:47.576163+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T20:36:54.110927+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T20:36:43.094997+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T20:37:39.835213+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T20:36:55.359971+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run1", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on", "completed_at": "2026-04-03T20:09:11.289803+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run2", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on", "completed_at": "2026-04-03T20:14:20.080371+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run3", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on", "completed_at": "2026-04-03T20:13:47.758489+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run1", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on", "completed_at": "2026-04-03T20:17:45.103375+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run2", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on", "completed_at": "2026-04-03T20:19:18.568908+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run3", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on", "completed_at": "2026-04-03T20:22:17.063412+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run1", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off", "completed_at": "2026-04-03T20:43:03.409815+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run2", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off", "completed_at": "2026-04-03T20:43:40.282743+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T19:14:26.851817+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T19:11:09.488782+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T19:12:36.135542+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T21:48:37.261266+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T21:49:18.421793+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T21:49:30.178832+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T06:41:08.954745+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T06:41:10.237903+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T06:43:29.313548+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T21:53:52.919817+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T21:56:10.524909+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T21:56:01.783667+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T21:39:08.458092+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T19:51:57.541240+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T21:39:11.755473+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T21:39:35.240914+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T21:39:36.296175+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T21:38:59.162109+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T21:43:19.539022+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T21:44:34.164279+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T21:45:47.860125+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run1", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on", "completed_at": "2026-04-05T19:47:23.981479+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run2", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on", "completed_at": "2026-04-05T19:48:02.042012+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run3", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on", "completed_at": "2026-04-05T19:46:32.890087+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run1", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on", "completed_at": "2026-04-05T19:47:15.615085+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run2", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on", "completed_at": "2026-04-05T19:50:03.614395+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run3", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on", "completed_at": "2026-04-05T21:43:37.223458+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run1", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off", "completed_at": "2026-04-05T21:55:43.014791+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run2", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off", "completed_at": "2026-04-05T21:59:19.246820+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run3", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off", "completed_at": "2026-04-05T22:02:47.959579+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-04T21:00:53.756188+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-04T21:22:28.664145+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-04T21:00:10.672556+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T19:38:54.914546+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T19:38:55.252420+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T05:58:46.410223+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T05:05:35.109321+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-04T21:42:14.345914+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-04T21:47:38.333466+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T06:05:12.119803+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T06:15:31.567926+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T06:13:05.102138+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T05:17:55.215229+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T05:23:27.382997+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T05:13:05.553241+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T05:22:22.970786+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T05:24:29.542398+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T05:36:44.507712+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T05:36:50.072667+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T05:27:37.222404+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T05:42:22.469041+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run1", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on", "completed_at": "2026-04-04T22:17:05.206931+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run2", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on", "completed_at": "2026-04-05T19:29:03.407545+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run3", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on", "completed_at": "2026-04-04T22:18:43.621863+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run1", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on", "completed_at": "2026-04-05T05:18:02.207544+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run2", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on", "completed_at": "2026-04-05T05:17:28.821271+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run3", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on", "completed_at": "2026-04-05T05:04:55.065204+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run1", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off", "completed_at": "2026-04-05T06:17:00.182240+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run2", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off", "completed_at": "2026-04-05T06:20:34.617227+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run3", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off", "completed_at": "2026-04-05T06:24:33.410085+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-04T21:04:34.885122+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-04T21:36:41.721861+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-04T21:11:17.256644+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-04T08:31:26.255450+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-04T08:28:47.315460+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T06:48:25.182566+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T06:48:58.365562+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T06:50:57.420728+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-04T21:47:01.274994+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-04T22:00:40.897695+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-04T21:56:38.633500+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T20:03:41.571216+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T20:05:07.170256+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T20:09:11.887288+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T19:45:24.239737+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T06:52:50.645410+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T19:46:38.063853+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T05:12:08.035213+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-04T21:53:14.676366+00:00"} +{"run_id": "tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-04T22:11:07.374754+00:00"} +{"run_id": "tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T19:50:12.863462+00:00"} +{"run_id": "tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T19:52:07.304632+00:00"} +{"run_id": "tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T19:54:56.953646+00:00"} +{"run_id": "tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T06:42:14.154311+00:00"} +{"run_id": "tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T06:46:45.312262+00:00"} +{"run_id": "tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T06:43:08.609342+00:00"} +{"run_id": "tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-04T21:34:54.215164+00:00"} +{"run_id": "tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-04T21:37:35.722817+00:00"} +{"run_id": "tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-04T21:36:09.154037+00:00"} +{"run_id": "tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T18:49:38.321463+00:00"} +{"run_id": "tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T18:49:48.529132+00:00"} +{"run_id": "tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T18:53:35.752453+00:00"} +{"run_id": "tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T20:40:57.297570+00:00"} +{"run_id": "tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T20:41:10.072835+00:00"} +{"run_id": "tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "haiku", "cell_id": "tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-03T20:38:44.044823+00:00"} +{"run_id": "tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T21:58:12.019082+00:00"} +{"run_id": "tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T21:50:07.704550+00:00"} +{"run_id": "tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "opus", "cell_id": "tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T21:54:23.048656+00:00"} +{"run_id": "tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T06:02:26.660332+00:00"} +{"run_id": "tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T06:03:58.503906+00:00"} +{"run_id": "tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3", "task": "tetris", "model": "sonnet", "cell_id": "tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on", "completed_at": "2026-04-05T05:58:32.199764+00:00"} diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -20,11 +20,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -36,14 +31,15 @@ "note": "not applicable for javascript" }, "performance": { - "bundle_size_bytes": 114140, + "pass": true, + "bundle_size_bytes": 53769, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { - "total": 5, + "total": 4, "code": 2, "docs": 0, "unnecessary": 0, @@ -52,8 +48,8 @@ "lines_of_code": 694, "dependencies": { "production": 0, - "dev": 5, - "total": 5 + "dev": 3, + "total": 3 }, "complexity": "minimal", "console_logs": 0, @@ -121,137 +117,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.94, - "total": 16, - "passed": 15, - "failed": 1, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 300, - "height": 600 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "x", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "grid accumulated cells: 16 -> 36" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [186] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 43 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 15, - "failed": 1, - "score": 0.94 - }, - "gameplay": { - "pieces_placed": 94, - "lines_cleared": 1, - "max_score_observed": 120, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 1146 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.88 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/gameplay-bot-report.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/gameplay-bot-report.json @@ -16,7 +16,8 @@ "drop": "Space" }, "start_mechanism": "auto", - "score_element_found": true + "score_element_found": true, + "grid_confidence": 1 }, "tests": [ { @@ -32,62 +33,62 @@ { "name": "auto_drop", "pass": true, - "detail": "grid state changed after 5s with no input" + "detail": "grid state changed after 5s with no input (grid-verified)" }, { "name": "move_left", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_right", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_down", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "rotate", "pass": true, - "detail": "piece shape changed after rotate key" + "detail": "piece shape changed after rotate key (grid-verified, 1 rotation(s))" }, { "name": "all_pieces_rotate", "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" + "detail": "rotation confirmed but could not identify individual piece types" }, { "name": "hard_drop", "pass": true, - "detail": "piece immediately dropped and new piece appeared" + "detail": "piece immediately dropped to bottom (grid-verified)" }, { "name": "piece_locks", "pass": true, - "detail": "filled cells persist at bottom" + "detail": "filled cells persist at bottom (grid-verified, 2 lock event(s))" }, { "name": "new_piece_spawns", "pass": true, - "detail": "new piece detected at top of grid" + "detail": "2 new piece(s) detected at top of grid" }, { "name": "multiple_pieces", "pass": true, - "detail": "grid accumulated cells: 16 -> 36" + "detail": "11 pieces placed during play session" }, { "name": "line_clear", "pass": true, - "detail": "line cleared via strategic placement" + "detail": "1 line(s) cleared (grid-verified)" }, { "name": "score_changes", "pass": false, - "detail": "score did not increase: [186] -> no change after polling" + "detail": "score stayed at 250" }, { "name": "game_over", @@ -97,7 +98,7 @@ { "name": "playable_30s", "pass": true, - "detail": "played for 30s, placed 43 pieces, no crashes" + "detail": "played for 30s, placed 39 pieces, no crashes" } ], "summary": { @@ -107,14 +108,25 @@ "score": 0.94 }, "gameplay": { - "pieces_placed": 94, + "pieces_placed": 39, "lines_cleared": 1, - "max_score_observed": 120, + "max_score_observed": 250, "play_duration_seconds": 30, "errors_during_play": 0 }, + "session": { + "frames": 305, + "events_count": 10, + "pieces_spawned": 2, + "pieces_locked": 11, + "lines_cleared": 1, + "piece_types_seen": [ + "unknown" + ], + "grid_read_success_rate": 1 + }, "performance": { - "load_time_ms": 1146 + "load_time_ms": 30 }, "accessibility": { "issues": [ diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -1,11 +1,11 @@ { "structural": { - "pass": true, + "pass": false, "checks": [ { "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" + "pass": false, + "detail": "no index.html found in workspace root, dist/, or public/" }, { "name": "package_json_exists", @@ -18,12 +18,7 @@ "detail": "no build script defined (static project)" } ], - "score": 1.0 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "score": 0.67 }, "quality": { "lint": { @@ -36,24 +31,25 @@ "note": "not applicable for javascript" }, "performance": { - "bundle_size_bytes": 158345, + "pass": true, + "bundle_size_bytes": 0, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { - "total": 5, - "code": 2, + "total": 3, + "code": 1, "docs": 0, "unnecessary": 0, "unnecessary_list": [] }, - "lines_of_code": 1264, + "lines_of_code": 632, "dependencies": { "production": 0, - "dev": 5, - "total": 5 + "dev": 3, + "total": 3 }, "complexity": "minimal", "console_logs": 0, @@ -85,7 +81,7 @@ "ratio_pct": 0.0 }, "separation_of_concerns": { - "verdict": "unclear", + "verdict": "single-file", "files_with_rendering": 0, "files_with_logic": 0, "files_with_both": 0 @@ -124,5 +120,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.53 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/gameplay-bot-report.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/gameplay-bot-report.json @@ -1,13 +1,8 @@ { "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 300, - "height": 600 - }, + "renderer": "unknown", + "grid_detected": false, + "grid_bounds": null, "controls": { "left": "ArrowLeft", "right": "ArrowRight", @@ -15,8 +10,9 @@ "rotate": "ArrowUp", "drop": "Space" }, - "start_mechanism": "space", - "score_element_found": true + "start_mechanism": "click_canvas", + "score_element_found": false, + "grid_confidence": 0 }, "tests": [ { @@ -27,94 +23,103 @@ { "name": "game_starts", "pass": true, - "detail": "started via space" + "detail": "started via click_canvas" }, { "name": "auto_drop", "pass": false, - "detail": "piece did not move in 5 seconds" + "detail": "grid reader unreliable, cannot verify auto-drop" }, { "name": "move_left", "pass": false, - "detail": "no change detected after key press" + "detail": "grid reader unreliable, cannot verify movement" }, { "name": "move_right", "pass": false, - "detail": "no change detected after key press" + "detail": "grid reader unreliable, cannot verify movement" }, { "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "grid reader unreliable, cannot verify movement" }, { "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" + "pass": false, + "detail": "grid reader unreliable, cannot verify rotation" }, { "name": "all_pieces_rotate", "pass": false, - "detail": "could not detect any piece rotations" + "detail": "could not detect any piece rotations via grid reader" }, { "name": "hard_drop", "pass": false, - "detail": "no change detected after hard drop key" + "detail": "grid reader unreliable, cannot verify hard drop" }, { "name": "piece_locks", "pass": false, - "detail": "could not verify piece locking at bottom" + "detail": "10 lock event(s) but 0 spawns detected - likely false positive from UI misread" }, { "name": "new_piece_spawns", "pass": false, - "detail": "could not detect new piece at top" + "detail": "could not detect new piece spawning at top via grid reader" }, { "name": "multiple_pieces", "pass": false, - "detail": "grid did not accumulate filled cells" + "detail": "only 10 piece(s) detected, need at least 3" }, { "name": "line_clear", "pass": false, - "detail": "exception: keyboard.press: Test timeout of 180000ms exceeded." + "detail": "could not trigger or detect a line clear via grid reader" }, { "name": "score_changes", "pass": false, - "detail": "could not read score element" + "detail": "no score element found" }, { "name": "game_over", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "pass": true, + "detail": "game stopped after stacking to top" }, { "name": "playable_30s", "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "detail": "only played for 7s" } ], "summary": { "total": 16, - "passed": 4, - "failed": 12, - "score": 0.25 + "passed": 3, + "failed": 13, + "score": 0.19 }, "gameplay": { - "pieces_placed": 100, + "pieces_placed": 26, "lines_cleared": 0, "max_score_observed": 0, - "play_duration_seconds": 0, + "play_duration_seconds": 7, "errors_during_play": 0 }, + "session": { + "frames": 33, + "events_count": 1, + "pieces_spawned": 0, + "pieces_locked": 10, + "lines_cleared": 0, + "piece_types_seen": [], + "grid_read_success_rate": 0 + }, "performance": { - "load_time_ms": 1087 + "load_time_ms": 47 }, "accessibility": { "issues": [], diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -1,11 +1,11 @@ { "structural": { - "pass": true, + "pass": false, "checks": [ { "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" + "pass": false, + "detail": "no index.html found in workspace root, dist/, or public/" }, { "name": "package_json_exists", @@ -18,12 +18,7 @@ "detail": "no build script defined (static project)" } ], - "score": 1.0 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "score": 0.67 }, "quality": { "lint": { @@ -36,24 +31,25 @@ "note": "not applicable for javascript" }, "performance": { - "bundle_size_bytes": 160196, + "pass": true, + "bundle_size_bytes": 0, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { - "total": 5, - "code": 2, + "total": 3, + "code": 1, "docs": 0, "unnecessary": 0, "unnecessary_list": [] }, - "lines_of_code": 1362, + "lines_of_code": 681, "dependencies": { "production": 0, - "dev": 5, - "total": 5 + "dev": 3, + "total": 3 }, "complexity": "minimal", "console_logs": 0, @@ -85,7 +81,7 @@ "ratio_pct": 0.0 }, "separation_of_concerns": { - "verdict": "unclear", + "verdict": "single-file", "files_with_rendering": 0, "files_with_logic": 0, "files_with_both": 0 @@ -121,8 +117,144 @@ }, "gameplay_bot": { "pass": false, - "score": 0, - "error": "Gameplay bot timed out after 180 seconds" + "score": 0.19, + "total": 16, + "passed": 3, + "failed": 13, + "report": { + "implementation": { + "renderer": "unknown", + "grid_detected": false, + "grid_bounds": null, + "controls": { + "left": "ArrowLeft", + "right": "ArrowRight", + "down": "ArrowDown", + "rotate": "ArrowUp", + "drop": "Space" + }, + "start_mechanism": "click_canvas", + "score_element_found": false, + "grid_confidence": 0 + }, + "tests": [ + { + "name": "game_loads", + "pass": true, + "detail": "no console errors" + }, + { + "name": "game_starts", + "pass": true, + "detail": "started via click_canvas" + }, + { + "name": "auto_drop", + "pass": false, + "detail": "grid reader unreliable, cannot verify auto-drop" + }, + { + "name": "move_left", + "pass": false, + "detail": "grid reader unreliable, cannot verify movement" + }, + { + "name": "move_right", + "pass": false, + "detail": "grid reader unreliable, cannot verify movement" + }, + { + "name": "move_down", + "pass": false, + "detail": "grid reader unreliable, cannot verify movement" + }, + { + "name": "rotate", + "pass": false, + "detail": "grid reader unreliable, cannot verify rotation" + }, + { + "name": "all_pieces_rotate", + "pass": false, + "detail": "could not detect any piece rotations via grid reader" + }, + { + "name": "hard_drop", + "pass": false, + "detail": "grid reader unreliable, cannot verify hard drop" + }, + { + "name": "piece_locks", + "pass": false, + "detail": "10 lock event(s) but 0 spawns detected - likely false positive from UI misread" + }, + { + "name": "new_piece_spawns", + "pass": false, + "detail": "could not detect new piece spawning at top via grid reader" + }, + { + "name": "multiple_pieces", + "pass": false, + "detail": "only 10 piece(s) detected, need at least 3" + }, + { + "name": "line_clear", + "pass": false, + "detail": "could not trigger or detect a line clear via grid reader" + }, + { + "name": "score_changes", + "pass": false, + "detail": "no score element found" + }, + { + "name": "game_over", + "pass": true, + "detail": "game stopped after stacking to top" + }, + { + "name": "playable_30s", + "pass": false, + "detail": "only played for 7s" + } + ], + "summary": { + "total": 16, + "passed": 3, + "failed": 13, + "score": 0.19 + }, + "gameplay": { + "pieces_placed": 26, + "lines_cleared": 0, + "max_score_observed": 0, + "play_duration_seconds": 7, + "errors_during_play": 0 + }, + "session": { + "frames": 33, + "events_count": 1, + "pieces_spawned": 0, + "pieces_locked": 10, + "lines_cleared": 0, + "piece_types_seen": [], + "grid_read_success_rate": 0 + }, + "performance": { + "load_time_ms": 42 + }, + "accessibility": { + "issues": [], + "issue_count": 0, + "pass": true + } + } }, - "score": 0.53 + "outcome_score": 0.19, + "score": 0.19, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/gameplay-bot-report.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/gameplay-bot-report.json @@ -1,13 +1,8 @@ { "implementation": { - "renderer": "dom", - "grid_detected": true, - "grid_bounds": { - "x": 362.5, - "y": 125, - "width": 325, - "height": 635 - }, + "renderer": "unknown", + "grid_detected": false, + "grid_bounds": null, "controls": { "left": "ArrowLeft", "right": "ArrowRight", @@ -15,8 +10,9 @@ "rotate": "ArrowUp", "drop": "Space" }, - "start_mechanism": "auto", - "score_element_found": true + "start_mechanism": "click_canvas", + "score_element_found": false, + "grid_confidence": 0 }, "tests": [ { @@ -27,94 +23,103 @@ { "name": "game_starts", "pass": true, - "detail": "started via auto" + "detail": "started via click_canvas" }, { "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" + "pass": false, + "detail": "grid reader unreliable, cannot verify auto-drop" }, { "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "grid reader unreliable, cannot verify movement" }, { "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "grid reader unreliable, cannot verify movement" }, { "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "grid reader unreliable, cannot verify movement" }, { "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" + "pass": false, + "detail": "grid reader unreliable, cannot verify rotation" }, { "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other, unknown] failed: [] (tested 2 piece types in 60 attempts)" + "pass": false, + "detail": "could not detect any piece rotations via grid reader" }, { "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" + "pass": false, + "detail": "grid reader unreliable, cannot verify hard drop" }, { "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" + "pass": false, + "detail": "10 lock event(s) but 0 spawns detected - likely false positive from UI misread" }, { "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" + "pass": false, + "detail": "could not detect new piece spawning at top via grid reader" }, { "name": "multiple_pieces", - "pass": true, - "detail": "game still responding after 10 piece drops" + "pass": false, + "detail": "only 10 piece(s) detected, need at least 3" }, { "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" + "pass": false, + "detail": "could not trigger or detect a line clear via grid reader" }, { "name": "score_changes", "pass": false, - "detail": "score did not increase: [0] -> no change after polling" + "detail": "no score element found" }, { "name": "game_over", - "pass": false, - "detail": "exception: keyboard.press: Test timeout of 180000ms exceeded." + "pass": true, + "detail": "game stopped after stacking to top" }, { "name": "playable_30s", "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "detail": "only played for 7s" } ], "summary": { "total": 16, - "passed": 13, - "failed": 3, - "score": 0.81 + "passed": 3, + "failed": 13, + "score": 0.19 }, "gameplay": { - "pieces_placed": 110, - "lines_cleared": 1, + "pieces_placed": 26, + "lines_cleared": 0, "max_score_observed": 0, - "play_duration_seconds": 0, + "play_duration_seconds": 7, "errors_during_play": 0 }, + "session": { + "frames": 33, + "events_count": 1, + "pieces_spawned": 0, + "pieces_locked": 10, + "lines_cleared": 0, + "piece_types_seen": [], + "grid_read_success_rate": 0 + }, "performance": { - "load_time_ms": 1076 + "load_time_ms": 42 }, "accessibility": { "issues": [], diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -20,11 +20,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -36,10 +31,11 @@ "note": "not applicable for javascript" }, "performance": { - "bundle_size_bytes": 125289, + "pass": true, + "bundle_size_bytes": 124933, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -124,5 +120,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.5862 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "error": "SonarQube scan timed out", + "score": 0 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/gameplay-bot-report.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/gameplay-bot-report.json @@ -12,11 +12,12 @@ "left": "ArrowLeft", "right": "ArrowRight", "down": "ArrowDown", - "rotate": "z", + "rotate": "x", "drop": "Space" }, "start_mechanism": "auto", - "score_element_found": true + "score_element_found": true, + "grid_confidence": 1 }, "tests": [ { @@ -31,94 +32,107 @@ }, { "name": "auto_drop", - "pass": true, - "detail": "pixels changed after 5s with no input" + "pass": false, + "detail": "piece did not move down in 5 seconds (grid-verified)" }, { "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "no grid change detected after key press" }, { "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "no grid change detected after key press" }, { "name": "move_down", "pass": false, - "detail": "no change detected after key press" + "detail": "no grid change detected after key press" }, { "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" + "pass": false, + "detail": "no shape change detected after rotate key" }, { "name": "all_pieces_rotate", "pass": false, - "detail": "could not detect any piece rotations" + "detail": "could not detect any piece rotations via grid reader" }, { "name": "hard_drop", "pass": false, - "detail": "no change detected after hard drop key" + "detail": "no grid change with bottom cells detected after hard drop key" }, { "name": "piece_locks", "pass": false, - "detail": "could not verify piece locking at bottom" + "detail": "10 lock event(s) but 0 spawns detected - likely false positive from UI misread" }, { "name": "new_piece_spawns", "pass": false, - "detail": "could not detect new piece at top" + "detail": "could not detect new piece spawning at top via grid reader" }, { "name": "multiple_pieces", "pass": false, - "detail": "grid did not accumulate filled cells" + "detail": "only 10 piece(s) detected, need at least 3" }, { "name": "line_clear", "pass": true, - "detail": "line cleared via strategic placement" + "detail": "1 line(s) cleared (grid-verified)" }, { "name": "score_changes", - "pass": false, - "detail": "score did not increase: [186] -> no change after polling" + "pass": true, + "detail": "score changed from 194 to 222" }, { "name": "game_over", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "pass": true, + "detail": "game stopped after stacking to top" }, { "name": "playable_30s", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "pass": true, + "detail": "played for 30s, placed 20 pieces, no crashes" } ], "summary": { "total": 16, - "passed": 7, - "failed": 9, - "score": 0.44 + "passed": 6, + "failed": 10, + "score": 0.38 }, "gameplay": { - "pieces_placed": 101, + "pieces_placed": 20, "lines_cleared": 1, - "max_score_observed": 0, - "play_duration_seconds": 0, + "max_score_observed": 222, + "play_duration_seconds": 30, "errors_during_play": 0 }, + "session": { + "frames": 349, + "events_count": 2, + "pieces_spawned": 0, + "pieces_locked": 10, + "lines_cleared": 1, + "piece_types_seen": [], + "grid_read_success_rate": 1 + }, "performance": { - "load_time_ms": 1212 + "load_time_ms": 50 }, "accessibility": { - "issues": [], - "issue_count": 0, - "pass": true + "issues": [ + "canvas without aria-label or role", + "canvas without aria-label or role", + "canvas without aria-label or role" + ], + "issue_count": 3, + "pass": false } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -20,11 +20,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -36,10 +31,11 @@ "note": "not applicable for javascript" }, "performance": { - "bundle_size_bytes": 112984, + "pass": true, + "bundle_size_bytes": 112631, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -122,131 +118,12 @@ "gameplay_bot": { "pass": false, "score": 0, - "total": 16, - "passed": 0, - "failed": 16, - "report": { - "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_bounds": null, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "unknown", - "score_element_found": false - }, - "tests": [ - { - "name": "game_loads", - "pass": false, - "detail": "1 console error(s): Cannot read properties of undefined (reading '2')" - }, - { - "name": "game_starts", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "auto_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_left", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_right", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_down", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "line_clear", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "score_changes", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "game_over", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "playable_30s", - "pass": false, - "detail": "skipped: page did not load" - } - ], - "summary": { - "total": 16, - "passed": 0, - "failed": 16, - "score": 0 - }, - "gameplay": { - "pieces_placed": 0, - "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 0, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 4164 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.53 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "error": "SonarQube scan timed out", + "score": 0 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -20,11 +20,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -36,10 +31,11 @@ "note": "not applicable for javascript" }, "performance": { - "bundle_size_bytes": 115820, + "pass": true, + "bundle_size_bytes": 115465, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -122,131 +118,12 @@ "gameplay_bot": { "pass": false, "score": 0, - "total": 16, - "passed": 0, - "failed": 16, - "report": { - "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_bounds": null, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "unknown", - "score_element_found": false - }, - "tests": [ - { - "name": "game_loads", - "pass": false, - "detail": "1 console error(s): Cannot read properties of undefined (reading '0')" - }, - { - "name": "game_starts", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "auto_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_left", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_right", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_down", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "line_clear", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "score_changes", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "game_over", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "playable_30s", - "pass": false, - "detail": "skipped: page did not load" - } - ], - "summary": { - "total": 16, - "passed": 0, - "failed": 16, - "score": 0 - }, - "gameplay": { - "pieces_placed": 0, - "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 0, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 2636 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.5956 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "error": "SonarQube scan timed out", + "score": 0 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -20,11 +20,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -36,10 +31,11 @@ "note": "not applicable for javascript" }, "performance": { - "bundle_size_bytes": 116153, + "pass": true, + "bundle_size_bytes": 115797, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -122,131 +118,12 @@ "gameplay_bot": { "pass": false, "score": 0, - "total": 16, - "passed": 0, - "failed": 16, - "report": { - "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_bounds": null, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "unknown", - "score_element_found": false - }, - "tests": [ - { - "name": "game_loads", - "pass": false, - "detail": "1 console error(s): Cannot read properties of undefined (reading '0')" - }, - { - "name": "game_starts", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "auto_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_left", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_right", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_down", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "line_clear", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "score_changes", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "game_over", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "playable_30s", - "pass": false, - "detail": "skipped: page did not load" - } - ], - "summary": { - "total": 16, - "passed": 0, - "failed": 16, - "score": 0 - }, - "gameplay": { - "pieces_placed": 0, - "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 0, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 5201 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.5862 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "error": "SonarQube scan timed out", + "score": 0 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -1,11 +1,11 @@ { "structural": { - "pass": true, + "pass": false, "checks": [ { "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" + "pass": false, + "detail": "no index.html found in workspace root, dist/, or public/" }, { "name": "package_json_exists", @@ -18,12 +18,7 @@ "detail": "no build script defined (static project)" } ], - "score": 1.0 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "score": 0.67 }, "quality": { "lint": { @@ -36,20 +31,21 @@ "note": "not applicable for javascript" }, "performance": { - "bundle_size_bytes": 188835, + "pass": true, + "bundle_size_bytes": 0, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { - "total": 5, - "code": 2, + "total": 4, + "code": 1, "docs": 0, "unnecessary": 0, "unnecessary_list": [] }, - "lines_of_code": 1778, + "lines_of_code": 889, "dependencies": { "production": 0, "dev": 5, @@ -85,7 +81,7 @@ "ratio_pct": 0.0 }, "separation_of_concerns": { - "verdict": "unclear", + "verdict": "single-file", "files_with_rendering": 0, "files_with_logic": 0, "files_with_both": 0 @@ -124,5 +120,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.53 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "error": "SonarQube scan timed out", + "score": 0 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/gameplay-bot-report.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/gameplay-bot-report.json @@ -1,13 +1,8 @@ { "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 320, - "height": 640 - }, + "renderer": "unknown", + "grid_detected": false, + "grid_bounds": null, "controls": { "left": "ArrowLeft", "right": "ArrowRight", @@ -15,8 +10,9 @@ "rotate": "ArrowUp", "drop": "Space" }, - "start_mechanism": "enter", - "score_element_found": false + "start_mechanism": "click_canvas", + "score_element_found": false, + "grid_confidence": 0 }, "tests": [ { @@ -27,94 +23,103 @@ { "name": "game_starts", "pass": true, - "detail": "started via enter" + "detail": "started via click_canvas" }, { "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" + "pass": false, + "detail": "grid reader unreliable, cannot verify auto-drop" }, { "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "grid reader unreliable, cannot verify movement" }, { "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "grid reader unreliable, cannot verify movement" }, { "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "grid reader unreliable, cannot verify movement" }, { "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" + "pass": false, + "detail": "grid reader unreliable, cannot verify rotation" }, { "name": "all_pieces_rotate", "pass": false, - "detail": "rotated: [] failed: [unknown] (tested 1 piece types in 60 attempts)" + "detail": "could not detect any piece rotations via grid reader" }, { "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" + "pass": false, + "detail": "grid reader unreliable, cannot verify hard drop" }, { "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" + "pass": false, + "detail": "10 lock event(s) but 0 spawns detected - likely false positive from UI misread" }, { "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" + "pass": false, + "detail": "could not detect new piece spawning at top via grid reader" }, { "name": "multiple_pieces", - "pass": true, - "detail": "game still responding after 10 piece drops" + "pass": false, + "detail": "only 10 piece(s) detected, need at least 3" }, { "name": "line_clear", - "pass": true, - "detail": "5 line(s) cleared during AI play" + "pass": false, + "detail": "could not trigger or detect a line clear via grid reader" }, { "name": "score_changes", "pass": false, - "detail": "no score element found and no number changed" + "detail": "no score element found" }, { "name": "game_over", - "pass": false, - "detail": "exception: page.screenshot: Test timeout of 180000ms exceeded.\nCall log:\n\u001b[2m - taking page screenshot\u001b[22m\n" + "pass": true, + "detail": "game stopped after stacking to top" }, { "name": "playable_30s", "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "detail": "only played for 7s" } ], "summary": { "total": 16, - "passed": 12, - "failed": 4, - "score": 0.75 + "passed": 3, + "failed": 13, + "score": 0.19 }, "gameplay": { - "pieces_placed": 25, - "lines_cleared": 5, + "pieces_placed": 26, + "lines_cleared": 0, "max_score_observed": 0, - "play_duration_seconds": 0, + "play_duration_seconds": 7, "errors_during_play": 0 }, + "session": { + "frames": 33, + "events_count": 1, + "pieces_spawned": 0, + "pieces_locked": 10, + "lines_cleared": 0, + "piece_types_seen": [], + "grid_read_success_rate": 0 + }, "performance": { - "load_time_ms": 2636 + "load_time_ms": 20 }, "accessibility": { "issues": [], diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=javascript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -20,11 +20,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -36,10 +31,11 @@ "note": "not applicable for javascript" }, "performance": { - "bundle_size_bytes": 105425, + "pass": true, + "bundle_size_bytes": 105072, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -124,5 +120,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.53 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "error": "SonarQube scan timed out", + "score": 0 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=detailed_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=detailed_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run1/eval_results.json @@ -1,11 +1,11 @@ { "structural": { - "pass": true, + "pass": false, "checks": [ { "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" + "pass": false, + "detail": "no index.html found in workspace root, dist/, or public/" }, { "name": "package_json_exists", @@ -18,12 +18,7 @@ "detail": "no build script defined (static project)" } ], - "score": 1.0 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "score": 0.67 }, "quality": { "lint": { @@ -36,15 +31,16 @@ "error": "no tsconfig.json" }, "performance": { - "bundle_size_bytes": 214327, + "pass": true, + "bundle_size_bytes": 0, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { "files": { - "total": 9, - "code": 2, + "total": 8, + "code": 1, "docs": 4, "unnecessary": 2, "unnecessary_list": [ @@ -52,7 +48,7 @@ "IMPLEMENTATION.md" ] }, - "lines_of_code": 1896, + "lines_of_code": 948, "dependencies": { "production": 0, "dev": 6, @@ -88,7 +84,7 @@ "ratio_pct": 0.0 }, "separation_of_concerns": { - "verdict": "unclear", + "verdict": "single-file", "files_with_rendering": 0, "files_with_logic": 0, "files_with_both": 0 @@ -127,5 +123,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.3763 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "error": "SonarQube scan timed out", + "score": 0 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=detailed_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run1/gameplay-bot-report.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=detailed_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run1/gameplay-bot-report.json @@ -1,22 +1,18 @@ { "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 300, - "height": 600 - }, + "renderer": "unknown", + "grid_detected": false, + "grid_bounds": null, "controls": { "left": "ArrowLeft", "right": "ArrowRight", "down": "ArrowDown", - "rotate": "z", + "rotate": "ArrowUp", "drop": "Space" }, - "start_mechanism": "auto", - "score_element_found": true + "start_mechanism": "click_canvas", + "score_element_found": false, + "grid_confidence": 0 }, "tests": [ { @@ -27,94 +23,103 @@ { "name": "game_starts", "pass": true, - "detail": "started via auto" + "detail": "started via click_canvas" }, { "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" + "pass": false, + "detail": "grid reader unreliable, cannot verify auto-drop" }, { "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "grid reader unreliable, cannot verify movement" }, { "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "grid reader unreliable, cannot verify movement" }, { "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "grid reader unreliable, cannot verify movement" }, { "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" + "pass": false, + "detail": "grid reader unreliable, cannot verify rotation" }, { "name": "all_pieces_rotate", "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "detail": "could not detect any piece rotations via grid reader" }, { "name": "hard_drop", "pass": false, - "detail": "exception: page.screenshot: Target page, context or browser has been closed" + "detail": "grid reader unreliable, cannot verify hard drop" }, { "name": "piece_locks", "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "detail": "10 lock event(s) but 0 spawns detected - likely false positive from UI misread" }, { "name": "new_piece_spawns", "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "detail": "could not detect new piece spawning at top via grid reader" }, { "name": "multiple_pieces", "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "detail": "only 10 piece(s) detected, need at least 3" }, { "name": "line_clear", "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "detail": "could not trigger or detect a line clear via grid reader" }, { "name": "score_changes", "pass": false, - "detail": "could not read score element" + "detail": "no score element found" }, { "name": "game_over", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "pass": true, + "detail": "game stopped after stacking to top" }, { "name": "playable_30s", "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "detail": "only played for 7s" } ], "summary": { "total": 16, - "passed": 7, - "failed": 9, - "score": 0.44 + "passed": 3, + "failed": 13, + "score": 0.19 }, "gameplay": { - "pieces_placed": 47, + "pieces_placed": 26, "lines_cleared": 0, "max_score_observed": 0, - "play_duration_seconds": 0, + "play_duration_seconds": 7, "errors_during_play": 0 }, + "session": { + "frames": 33, + "events_count": 1, + "pieces_spawned": 0, + "pieces_locked": 10, + "lines_cleared": 0, + "piece_types_seen": [], + "grid_read_success_rate": 0 + }, "performance": { - "load_time_ms": 6329 + "load_time_ms": 59 }, "accessibility": { "issues": [], diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=off_tool_edit=off_tool_glob=off_tool_grep=off_tool_read=off_tool_write=off_web_search=off_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=off_tool_edit=off_tool_glob=off_tool_grep=off_tool_read=off_tool_write=off_web_search=off_run1/eval_results.json @@ -25,11 +25,6 @@ ], "score": 0.75 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -41,14 +36,15 @@ "error": "no tsconfig.json" }, "performance": { - "bundle_size_bytes": 162873, + "pass": true, + "bundle_size_bytes": 96636, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { "files": { - "total": 13, + "total": 12, "code": 3, "docs": 6, "unnecessary": 1, @@ -59,8 +55,8 @@ "lines_of_code": 867, "dependencies": { "production": 0, - "dev": 6, - "total": 6 + "dev": 4, + "total": 4 }, "complexity": "moderate", "console_logs": 0, @@ -131,5 +127,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.4294 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=off_tool_edit=off_tool_glob=off_tool_grep=off_tool_read=off_tool_write=off_web_search=off_run1/gameplay-bot-report.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=off_tool_edit=off_tool_glob=off_tool_grep=off_tool_read=off_tool_write=off_web_search=off_run1/gameplay-bot-report.json @@ -12,11 +12,12 @@ "left": "ArrowLeft", "right": "ArrowRight", "down": "ArrowDown", - "rotate": "x", + "rotate": "ArrowUp", "drop": "Space" }, "start_mechanism": "auto", - "score_element_found": true + "score_element_found": true, + "grid_confidence": 1 }, "tests": [ { @@ -32,93 +33,106 @@ { "name": "auto_drop", "pass": true, - "detail": "grid state changed after 5s with no input" + "detail": "grid state changed after 5s with no input (grid-verified)" }, { "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "no grid change detected after key press" }, { "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "no grid change detected after key press" }, { "name": "move_down", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" + "pass": false, + "detail": "no shape change detected after rotate key" }, { "name": "all_pieces_rotate", "pass": false, - "detail": "rotated: [] failed: [unknown] (tested 1 piece types in 60 attempts)" + "detail": "could not detect any piece rotations via grid reader" }, { "name": "hard_drop", "pass": true, - "detail": "piece immediately dropped and new piece appeared" + "detail": "piece immediately dropped to bottom (grid-verified)" }, { "name": "piece_locks", "pass": true, - "detail": "filled cells persist at bottom" + "detail": "filled cells persist at bottom (grid-verified, 2 lock event(s))" }, { "name": "new_piece_spawns", "pass": true, - "detail": "new piece appeared at top after delay" + "detail": "1 new piece(s) detected at top of grid" }, { "name": "multiple_pieces", "pass": true, - "detail": "game still responding after 10 piece drops" + "detail": "11 pieces placed during play session" }, { "name": "line_clear", "pass": true, - "detail": "4 line(s) cleared during AI play" + "detail": "1 line(s) cleared (grid-verified)" }, { "name": "score_changes", "pass": false, - "detail": "score did not increase: [155] -> no change after polling" + "detail": "score stayed at 155" }, { "name": "game_over", "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "detail": "could not trigger or detect game over" }, { "name": "playable_30s", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "pass": true, + "detail": "played for 30s, placed 33 pieces, no crashes" } ], "summary": { "total": 16, - "passed": 12, - "failed": 4, - "score": 0.75 + "passed": 10, + "failed": 6, + "score": 0.63 }, "gameplay": { - "pieces_placed": 30, - "lines_cleared": 4, - "max_score_observed": 0, - "play_duration_seconds": 0, + "pieces_placed": 33, + "lines_cleared": 1, + "max_score_observed": 155, + "play_duration_seconds": 30, "errors_during_play": 0 }, + "session": { + "frames": 319, + "events_count": 7, + "pieces_spawned": 1, + "pieces_locked": 11, + "lines_cleared": 1, + "piece_types_seen": [ + "I" + ], + "grid_read_success_rate": 1 + }, "performance": { - "load_time_ms": 6258 + "load_time_ms": 204 }, "accessibility": { - "issues": [], - "issue_count": 0, - "pass": true + "issues": [ + "canvas without aria-label or role" + ], + "issue_count": 1, + "pass": false } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=off_tool_edit=off_tool_glob=off_tool_grep=off_tool_read=off_tool_write=off_web_search=off_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=off_tool_edit=off_tool_glob=off_tool_grep=off_tool_read=off_tool_write=off_web_search=off_run2/eval_results.json @@ -4,8 +4,8 @@ "checks": [ { "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" + "pass": false, + "detail": "no index.html found in workspace root, dist/, or public/" }, { "name": "package_json_exists", @@ -23,12 +23,7 @@ "detail": "TypeScript files found but no tsconfig.json" } ], - "score": 0.75 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "score": 0.5 }, "quality": { "lint": { @@ -41,26 +36,27 @@ "error": "no tsconfig.json" }, "performance": { - "bundle_size_bytes": 215459, + "pass": true, + "bundle_size_bytes": 0, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { "files": { - "total": 12, - "code": 4, + "total": 10, + "code": 3, "docs": 5, "unnecessary": 1, "unnecessary_list": [ "README.md" ] }, - "lines_of_code": 2018, + "lines_of_code": 1387, "dependencies": { "production": 0, - "dev": 6, - "total": 6 + "dev": 4, + "total": 4 }, "complexity": "moderate", "console_logs": 0, @@ -131,5 +127,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.3606 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=off_tool_edit=off_tool_glob=off_tool_grep=off_tool_read=off_tool_write=off_web_search=off_run2/gameplay-bot-report.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=off_tool_edit=off_tool_glob=off_tool_grep=off_tool_read=off_tool_write=off_web_search=off_run2/gameplay-bot-report.json @@ -1,13 +1,8 @@ { "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 240, - "height": 400 - }, + "renderer": "unknown", + "grid_detected": false, + "grid_bounds": null, "controls": { "left": "ArrowLeft", "right": "ArrowRight", @@ -15,8 +10,9 @@ "rotate": "ArrowUp", "drop": "Space" }, - "start_mechanism": "auto", - "score_element_found": false + "start_mechanism": "click_canvas", + "score_element_found": false, + "grid_confidence": 0 }, "tests": [ { @@ -27,67 +23,67 @@ { "name": "game_starts", "pass": true, - "detail": "started via auto" + "detail": "started via click_canvas" }, { "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" + "pass": false, + "detail": "grid reader unreliable, cannot verify auto-drop" }, { "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "grid reader unreliable, cannot verify movement" }, { "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "grid reader unreliable, cannot verify movement" }, { "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "grid reader unreliable, cannot verify movement" }, { "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" + "pass": false, + "detail": "grid reader unreliable, cannot verify rotation" }, { "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" + "pass": false, + "detail": "could not detect any piece rotations via grid reader" }, { "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" + "pass": false, + "detail": "grid reader unreliable, cannot verify hard drop" }, { "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" + "pass": false, + "detail": "10 lock event(s) but 0 spawns detected - likely false positive from UI misread" }, { "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" + "pass": false, + "detail": "could not detect new piece spawning at top via grid reader" }, { "name": "multiple_pieces", - "pass": true, - "detail": "grid accumulated cells: 13 -> 44" + "pass": false, + "detail": "only 10 piece(s) detected, need at least 3" }, { "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" + "pass": false, + "detail": "could not trigger or detect a line clear via grid reader" }, { "name": "score_changes", "pass": false, - "detail": "score did not increase: [278] -> no change after polling" + "detail": "no score element found" }, { "name": "game_over", @@ -97,24 +93,33 @@ { "name": "playable_30s", "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "detail": "only played for 7s" } ], "summary": { "total": 16, - "passed": 14, - "failed": 2, - "score": 0.88 + "passed": 3, + "failed": 13, + "score": 0.19 }, "gameplay": { - "pieces_placed": 33, - "lines_cleared": 1, + "pieces_placed": 26, + "lines_cleared": 0, "max_score_observed": 0, - "play_duration_seconds": 0, + "play_duration_seconds": 7, "errors_during_play": 0 }, + "session": { + "frames": 33, + "events_count": 1, + "pieces_spawned": 0, + "pieces_locked": 10, + "lines_cleared": 0, + "piece_types_seen": [], + "grid_read_success_rate": 0 + }, "performance": { - "load_time_ms": 819 + "load_time_ms": 20 }, "accessibility": { "issues": [], diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=off_tool_edit=off_tool_glob=off_tool_grep=off_tool_read=off_tool_write=off_web_search=off_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=off_tool_edit=off_tool_glob=off_tool_grep=off_tool_read=off_tool_write=off_web_search=off_run3/eval_results.json @@ -4,8 +4,8 @@ "checks": [ { "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" + "pass": false, + "detail": "no index.html found in workspace root, dist/, or public/" }, { "name": "package_json_exists", @@ -23,12 +23,7 @@ "detail": "TypeScript files found but no tsconfig.json" } ], - "score": 0.75 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "score": 0.5 }, "quality": { "lint": { @@ -41,26 +36,27 @@ "error": "no tsconfig.json" }, "performance": { - "bundle_size_bytes": 177581, + "pass": true, + "bundle_size_bytes": 0, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { "files": { - "total": 16, - "code": 5, + "total": 14, + "code": 4, "docs": 5, "unnecessary": 1, "unnecessary_list": [ "README.md" ] }, - "lines_of_code": 1249, + "lines_of_code": 1180, "dependencies": { "production": 0, - "dev": 6, - "total": 6 + "dev": 4, + "total": 4 }, "complexity": "moderate", "console_logs": 0, @@ -129,130 +125,12 @@ "gameplay_bot": { "pass": false, "score": 0, - "total": 16, - "passed": 0, - "failed": 16, - "report": { - "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_bounds": null, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "unknown", - "score_element_found": false - }, - "tests": [ - { - "name": "game_loads", - "pass": false, - "detail": "1 console error(s): Unexpected token '<'" - }, - { - "name": "game_starts", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "auto_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_left", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_right", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_down", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "line_clear", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "score_changes", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "game_over", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "playable_30s", - "pass": false, - "detail": "skipped: page did not load" - } - ], - "summary": { - "total": 16, - "passed": 0, - "failed": 16, - "score": 0 - }, - "gameplay": { - "pieces_placed": 0, - "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 0, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 416 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role" - ], - "issue_count": 1, - "pass": false - } - } + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.42 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=off_tool_edit=off_tool_glob=off_tool_grep=off_tool_read=off_tool_write=off_web_search=off_run3/gameplay-bot-report.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=off_tool_edit=off_tool_glob=off_tool_grep=off_tool_read=off_tool_write=off_web_search=off_run3/gameplay-bot-report.json @@ -10,112 +10,120 @@ "rotate": "ArrowUp", "drop": "Space" }, - "start_mechanism": "unknown", - "score_element_found": false + "start_mechanism": "click_canvas", + "score_element_found": false, + "grid_confidence": 0 }, "tests": [ { "name": "game_loads", - "pass": false, - "detail": "1 console error(s): Unexpected token '<'" + "pass": true, + "detail": "no console errors" }, { "name": "game_starts", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "started via click_canvas" }, { "name": "auto_drop", "pass": false, - "detail": "skipped: page did not load" + "detail": "grid reader unreliable, cannot verify auto-drop" }, { "name": "move_left", "pass": false, - "detail": "skipped: page did not load" + "detail": "grid reader unreliable, cannot verify movement" }, { "name": "move_right", "pass": false, - "detail": "skipped: page did not load" + "detail": "grid reader unreliable, cannot verify movement" }, { "name": "move_down", "pass": false, - "detail": "skipped: page did not load" + "detail": "grid reader unreliable, cannot verify movement" }, { "name": "rotate", "pass": false, - "detail": "skipped: page did not load" + "detail": "grid reader unreliable, cannot verify rotation" }, { "name": "all_pieces_rotate", "pass": false, - "detail": "skipped: page did not load" + "detail": "could not detect any piece rotations via grid reader" }, { "name": "hard_drop", "pass": false, - "detail": "skipped: page did not load" + "detail": "grid reader unreliable, cannot verify hard drop" }, { "name": "piece_locks", "pass": false, - "detail": "skipped: page did not load" + "detail": "10 lock event(s) but 0 spawns detected - likely false positive from UI misread" }, { "name": "new_piece_spawns", "pass": false, - "detail": "skipped: page did not load" + "detail": "could not detect new piece spawning at top via grid reader" }, { "name": "multiple_pieces", "pass": false, - "detail": "skipped: page did not load" + "detail": "only 10 piece(s) detected, need at least 3" }, { "name": "line_clear", "pass": false, - "detail": "skipped: page did not load" + "detail": "could not trigger or detect a line clear via grid reader" }, { "name": "score_changes", "pass": false, - "detail": "skipped: page did not load" + "detail": "no score element found" }, { "name": "game_over", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "game stopped after stacking to top" }, { "name": "playable_30s", "pass": false, - "detail": "skipped: page did not load" + "detail": "only played for 7s" } ], "summary": { "total": 16, - "passed": 0, - "failed": 16, - "score": 0 + "passed": 3, + "failed": 13, + "score": 0.19 }, "gameplay": { - "pieces_placed": 0, + "pieces_placed": 26, "lines_cleared": 0, "max_score_observed": 0, - "play_duration_seconds": 0, + "play_duration_seconds": 7, "errors_during_play": 0 }, + "session": { + "frames": 33, + "events_count": 1, + "pieces_spawned": 0, + "pieces_locked": 10, + "lines_cleared": 0, + "piece_types_seen": [], + "grid_read_success_rate": 0 + }, "performance": { - "load_time_ms": 416 + "load_time_ms": 21 }, "accessibility": { - "issues": [ - "canvas without aria-label or role" - ], - "issue_count": 1, - "pass": false + "issues": [], + "issue_count": 0, + "pass": true } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run1/eval_results.json @@ -1,6 +1,6 @@ { "structural": { - "pass": true, + "pass": false, "checks": [ { "name": "entry_point_exists", @@ -19,16 +19,11 @@ }, { "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" + "pass": false, + "detail": "tsc --noEmit failed" } ], - "score": 1.0 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "score": 0.75 }, "quality": { "lint": { @@ -40,10 +35,11 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 31208, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -127,8 +123,139 @@ }, "gameplay_bot": { "pass": false, - "score": 0, - "error": "Gameplay bot timed out after 180 seconds" + "score": 0.25, + "total": 16, + "passed": 4, + "failed": 12, + "report": { + "implementation": { + "renderer": "canvas", + "grid_detected": true, + "grid_bounds": { + "x": 0, + "y": 0, + "width": 50, + "height": 100 + }, + "controls": { + "left": "ArrowLeft", + "right": "ArrowRight", + "down": "ArrowDown", + "rotate": "ArrowUp", + "drop": "Space" + }, + "start_mechanism": "click_canvas", + "score_element_found": true + }, + "tests": [ + { + "name": "game_loads", + "pass": true, + "detail": "no console errors" + }, + { + "name": "game_starts", + "pass": true, + "detail": "started via click_canvas" + }, + { + "name": "auto_drop", + "pass": false, + "detail": "piece did not move in 5 seconds" + }, + { + "name": "move_left", + "pass": false, + "detail": "no change detected after key press" + }, + { + "name": "move_right", + "pass": false, + "detail": "no change detected after key press" + }, + { + "name": "move_down", + "pass": true, + "detail": "grid state changed after key press" + }, + { + "name": "rotate", + "pass": true, + "detail": "piece shape changed after rotate key" + }, + { + "name": "all_pieces_rotate", + "pass": false, + "detail": "could not detect any piece rotations" + }, + { + "name": "hard_drop", + "pass": false, + "detail": "no change detected after hard drop key" + }, + { + "name": "piece_locks", + "pass": false, + "detail": "exception: page.screenshot: Target page, context or browser has been closed" + }, + { + "name": "new_piece_spawns", + "pass": false, + "detail": "exception: keyboard.press: Target page, context or browser has been closed" + }, + { + "name": "multiple_pieces", + "pass": false, + "detail": "exception: keyboard.press: Target page, context or browser has been closed" + }, + { + "name": "line_clear", + "pass": false, + "detail": "exception: keyboard.press: Target page, context or browser has been closed" + }, + { + "name": "score_changes", + "pass": false, + "detail": "could not read score element" + }, + { + "name": "game_over", + "pass": false, + "detail": "exception: keyboard.press: Target page, context or browser has been closed" + }, + { + "name": "playable_30s", + "pass": false, + "detail": "exception: keyboard.press: Target page, context or browser has been closed" + } + ], + "summary": { + "total": 16, + "passed": 4, + "failed": 12, + "score": 0.25 + }, + "gameplay": { + "pieces_placed": 60, + "lines_cleared": 0, + "max_score_observed": 0, + "play_duration_seconds": 0, + "errors_during_play": 0 + }, + "performance": { + "load_time_ms": 750 + }, + "accessibility": { + "issues": [], + "issue_count": 0, + "pass": true + } + } }, - "score": 0.5238 + "outcome_score": 0.25, + "score": 0.25, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,14 +35,15 @@ "pass": true }, "performance": { - "bundle_size_bytes": 186325, + "pass": true, + "bundle_size_bytes": 112434, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { - "total": 18, + "total": 17, "code": 5, "docs": 6, "unnecessary": 1, @@ -58,8 +54,8 @@ "lines_of_code": 1206, "dependencies": { "production": 0, - "dev": 7, - "total": 7 + "dev": 5, + "total": 5 }, "complexity": "moderate", "console_logs": 0, @@ -130,5 +126,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.5363 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "error": "SonarQube scan timed out", + "score": 0 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -4,8 +4,8 @@ "checks": [ { "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" + "pass": false, + "detail": "no index.html found in workspace root, dist/, or public/" }, { "name": "package_json_exists", @@ -23,12 +23,7 @@ "detail": "TypeScript files found but no tsconfig.json" } ], - "score": 0.75 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "score": 0.5 }, "quality": { "lint": { @@ -41,15 +36,16 @@ "error": "no tsconfig.json" }, "performance": { - "bundle_size_bytes": 234393, + "pass": true, + "bundle_size_bytes": 0, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { "files": { - "total": 31, - "code": 14, + "total": 29, + "code": 13, "docs": 10, "unnecessary": 2, "unnecessary_list": [ @@ -57,11 +53,11 @@ "README.md" ] }, - "lines_of_code": 1708, + "lines_of_code": 1648, "dependencies": { "production": 0, - "dev": 7, - "total": 7 + "dev": 5, + "total": 5 }, "complexity": "over-engineered", "console_logs": 0, @@ -130,128 +126,12 @@ "gameplay_bot": { "pass": false, "score": 0, - "total": 16, - "passed": 0, - "failed": 16, - "report": { - "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_bounds": null, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "unknown", - "score_element_found": false - }, - "tests": [ - { - "name": "game_loads", - "pass": false, - "detail": "exception: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:38155/\", waiting until \"domcontentloaded\"\u001b[22m\n" - }, - { - "name": "game_starts", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "auto_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_left", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_right", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_down", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "line_clear", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "score_changes", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "game_over", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "playable_30s", - "pass": false, - "detail": "skipped: page did not load" - } - ], - "summary": { - "total": 16, - "passed": 0, - "failed": 16, - "score": 0 - }, - "gameplay": { - "pieces_placed": 0, - "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 0, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 4052 - }, - "accessibility": { - "issues": [], - "issue_count": 0, - "pass": true - } - } + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.3731 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,14 +35,15 @@ "pass": true }, "performance": { - "bundle_size_bytes": 174721, + "pass": true, + "bundle_size_bytes": 105700, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { - "total": 16, + "total": 15, "code": 5, "docs": 5, "unnecessary": 1, @@ -58,8 +54,8 @@ "lines_of_code": 1195, "dependencies": { "production": 0, - "dev": 7, - "total": 7 + "dev": 5, + "total": 5 }, "complexity": "moderate", "console_logs": 10, @@ -258,5 +254,10 @@ } } }, - "score": 0.7925 + "outcome_score": 0.44, + "score": 0.44, + "sonarqube": { + "error": "SonarQube scan timed out", + "score": 0 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,10 +35,11 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 35223, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -125,134 +121,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.75, - "total": 16, - "passed": 12, - "failed": 4, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 300, - "height": 600 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "enter", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via enter" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "game still responding after 10 piece drops" - }, - { - "name": "line_clear", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" - }, - { - "name": "score_changes", - "pass": false, - "detail": "could not read score element" - }, - { - "name": "game_over", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" - }, - { - "name": "playable_30s", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" - } - ], - "summary": { - "total": 16, - "passed": 12, - "failed": 4, - "score": 0.75 - }, - "gameplay": { - "pieces_placed": 11, - "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 0, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 1031 - }, - "accessibility": { - "issues": [], - "issue_count": 0, - "pass": true - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.8112 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,66 +35,15 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 27064, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { - "files": { - "total": 8, - "code": 3, - "docs": 0, - "unnecessary": 0, - "unnecessary_list": [] - }, - "lines_of_code": 943, - "dependencies": { - "production": 0, - "dev": 7, - "total": 7 - }, - "complexity": "moderate", - "console_logs": 0, - "magic_numbers": { - "count": 26, - "excessive": true - }, - "function_length": { - "count": 55, - "average": 6.8, - "max": 26, - "long_functions": 0 - }, - "max_nesting_depth": 8, - "global_declarations": 62, - "naming": { - "dominant_style": "camelCase", - "consistency_pct": 100.0, - "camel_case": 297, - "snake_case": 0 - }, - "error_handling": { - "try_catch_blocks": 0, - "has_error_handling": false - }, - "comments": { - "comment_lines": 39, - "source_lines": 681, - "ratio_pct": 5.7 - }, - "separation_of_concerns": { - "verdict": "mixed", - "files_with_rendering": 2, - "files_with_logic": 2, - "files_with_both": 2 - }, - "html_validation": { - "valid": false, - "errors": 2 - }, - "duplication_percentage": 0.0, - "score": 0.9 + "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/code-analysis.py', '/tmp/reeval-pf5imcma/loop-bench-y7z0amb9', 'typescript']' timed out after 120 seconds", + "score": 0 }, "transcript_analysis": { "total_events": 46, @@ -125,134 +69,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.13, - "total": 16, - "passed": 2, - "failed": 14, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 320, - "height": 640 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": false, - "detail": "exception: page.screenshot: Target page, context or browser has been closed" - }, - { - "name": "move_left", - "pass": false, - "detail": "exception: page.screenshot: Target page, context or browser has been closed" - }, - { - "name": "move_right", - "pass": false, - "detail": "exception: page.screenshot: Target page, context or browser has been closed" - }, - { - "name": "move_down", - "pass": false, - "detail": "exception: page.screenshot: Target page, context or browser has been closed" - }, - { - "name": "rotate", - "pass": false, - "detail": "exception: page.screenshot: Target page, context or browser has been closed" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "exception: page.reload: Target page, context or browser has been closed" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "exception: page.screenshot: Target page, context or browser has been closed" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" - }, - { - "name": "line_clear", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" - }, - { - "name": "score_changes", - "pass": false, - "detail": "could not read score element" - }, - { - "name": "game_over", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" - }, - { - "name": "playable_30s", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" - } - ], - "summary": { - "total": 16, - "passed": 2, - "failed": 14, - "score": 0.13 - }, - "gameplay": { - "pieces_placed": 0, - "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 0, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": -1 - }, - "accessibility": { - "issues": [], - "issue_count": 0, - "pass": true - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.6269 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "error": "SonarQube scan timed out", + "score": 0 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,66 +35,15 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 52194, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { - "files": { - "total": 8, - "code": 3, - "docs": 0, - "unnecessary": 0, - "unnecessary_list": [] - }, - "lines_of_code": 1526, - "dependencies": { - "production": 0, - "dev": 7, - "total": 7 - }, - "complexity": "moderate", - "console_logs": 0, - "magic_numbers": { - "count": 36, - "excessive": true - }, - "function_length": { - "count": 67, - "average": 7.4, - "max": 48, - "long_functions": 0 - }, - "max_nesting_depth": 14, - "global_declarations": 38, - "naming": { - "dominant_style": "camelCase", - "consistency_pct": 100.0, - "camel_case": 526, - "snake_case": 0 - }, - "error_handling": { - "try_catch_blocks": 0, - "has_error_handling": false - }, - "comments": { - "comment_lines": 86, - "source_lines": 1180, - "ratio_pct": 7.3 - }, - "separation_of_concerns": { - "verdict": "mixed", - "files_with_rendering": 2, - "files_with_logic": 2, - "files_with_both": 2 - }, - "html_validation": { - "valid": false, - "errors": 8 - }, - "duplication_percentage": 0.0, - "score": 0.85 + "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/code-analysis.py', '/tmp/reeval-traf_u58/loop-bench-drho0eka', 'typescript']' timed out after 120 seconds", + "score": 0 }, "transcript_analysis": { "total_events": 47, @@ -125,134 +69,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.13, - "total": 16, - "passed": 2, - "failed": 14, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 60, - "height": 120 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "x", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": false, - "detail": "exception: page.screenshot: Target page, context or browser has been closed" - }, - { - "name": "move_left", - "pass": false, - "detail": "exception: page.screenshot: Target page, context or browser has been closed" - }, - { - "name": "move_right", - "pass": false, - "detail": "exception: page.screenshot: Target page, context or browser has been closed" - }, - { - "name": "move_down", - "pass": false, - "detail": "exception: page.screenshot: Target page, context or browser has been closed" - }, - { - "name": "rotate", - "pass": false, - "detail": "exception: page.screenshot: Target page, context or browser has been closed" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "exception: page.reload: Target page, context or browser has been closed" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "exception: page.screenshot: Target page, context or browser has been closed" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" - }, - { - "name": "line_clear", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" - }, - { - "name": "score_changes", - "pass": false, - "detail": "could not read score element" - }, - { - "name": "game_over", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" - }, - { - "name": "playable_30s", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" - } - ], - "summary": { - "total": 16, - "passed": 2, - "failed": 14, - "score": 0.13 - }, - "gameplay": { - "pieces_placed": 0, - "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 0, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 75 - }, - "accessibility": { - "issues": [], - "issue_count": 0, - "pass": true - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.6175 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "error": "SonarQube scan timed out", + "score": 0 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -1,6 +1,6 @@ { "structural": { - "pass": true, + "pass": false, "checks": [ { "name": "entry_point_exists", @@ -19,16 +19,11 @@ }, { "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" + "pass": false, + "detail": "tsc --noEmit failed" } ], - "score": 1.0 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "score": 0.75 }, "quality": { "lint": { @@ -40,10 +35,11 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 19331, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -259,5 +255,10 @@ } } }, - "score": 0.8112 + "outcome_score": 0.375, + "score": 0.375, + "sonarqube": { + "error": "SonarQube scan timed out", + "score": 0 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,66 +35,15 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 25214, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { - "files": { - "total": 7, - "code": 3, - "docs": 0, - "unnecessary": 0, - "unnecessary_list": [] - }, - "lines_of_code": 1723, - "dependencies": { - "production": 0, - "dev": 7, - "total": 7 - }, - "complexity": "moderate", - "console_logs": 0, - "magic_numbers": { - "count": 36, - "excessive": true - }, - "function_length": { - "count": 59, - "average": 9.5, - "max": 45, - "long_functions": 0 - }, - "max_nesting_depth": 12, - "global_declarations": 32, - "naming": { - "dominant_style": "camelCase", - "consistency_pct": 100.0, - "camel_case": 556, - "snake_case": 0 - }, - "error_handling": { - "try_catch_blocks": 0, - "has_error_handling": false - }, - "comments": { - "comment_lines": 132, - "source_lines": 1194, - "ratio_pct": 11.1 - }, - "separation_of_concerns": { - "verdict": "mixed", - "files_with_rendering": 2, - "files_with_logic": 2, - "files_with_both": 2 - }, - "html_validation": { - "valid": false, - "errors": 0 - }, - "duplication_percentage": 0.0, - "score": 0.5 + "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/code-analysis.py', '/tmp/reeval-ivkll6nh/loop-bench-e35s4mjy', 'typescript']' timed out after 120 seconds", + "score": 0 }, "transcript_analysis": { "total_events": 57, @@ -125,138 +69,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.63, - "total": 16, - "passed": 10, - "failed": 6, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 50, - "height": 100 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "x", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "pixels changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "could not detect any piece rotations" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "no change detected after hard drop key" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "could not verify piece locking at bottom" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "could not detect new piece at top" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "grid did not accumulate filled cells" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [268] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 78 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 10, - "failed": 6, - "score": 0.63 - }, - "gameplay": { - "pieces_placed": 188, - "lines_cleared": 1, - "max_score_observed": 200, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 44 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 3, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.7081 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "error": "SonarQube scan timed out", + "score": 0 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=off_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,66 +35,15 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 19141, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { - "files": { - "total": 7, - "code": 3, - "docs": 0, - "unnecessary": 0, - "unnecessary_list": [] - }, - "lines_of_code": 1376, - "dependencies": { - "production": 0, - "dev": 7, - "total": 7 - }, - "complexity": "moderate", - "console_logs": 0, - "magic_numbers": { - "count": 58, - "excessive": true - }, - "function_length": { - "count": 69, - "average": 5.9, - "max": 17, - "long_functions": 0 - }, - "max_nesting_depth": 12, - "global_declarations": 84, - "naming": { - "dominant_style": "camelCase", - "consistency_pct": 100.0, - "camel_case": 538, - "snake_case": 0 - }, - "error_handling": { - "try_catch_blocks": 0, - "has_error_handling": false - }, - "comments": { - "comment_lines": 71, - "source_lines": 995, - "ratio_pct": 7.1 - }, - "separation_of_concerns": { - "verdict": "mixed", - "files_with_rendering": 2, - "files_with_logic": 2, - "files_with_both": 2 - }, - "html_validation": { - "valid": false, - "errors": 8 - }, - "duplication_percentage": 0.0, - "score": 0.85 + "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/code-analysis.py', '/tmp/reeval-1mwrka44/loop-bench-294r5yrr', 'typescript']' timed out after 120 seconds", + "score": 0 }, "transcript_analysis": { "total_events": 41, @@ -125,137 +69,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.94, - "total": 16, - "passed": 15, - "failed": 1, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 320, - "height": 640 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "x", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "game still responding after 10 piece drops" - }, - { - "name": "line_clear", - "pass": true, - "detail": "11 line(s) cleared during AI play" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [0] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 86 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 15, - "failed": 1, - "score": 0.94 - }, - "gameplay": { - "pieces_placed": 127, - "lines_cleared": 41, - "max_score_observed": 0, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 79 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.8581 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "error": "SonarQube scan timed out", + "score": 0 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -14,8 +14,8 @@ }, { "name": "build_succeeds", - "pass": false, - "detail": "npm run build failed" + "pass": true, + "detail": "npm run build completed successfully" }, { "name": "typescript_compiles", @@ -23,12 +23,7 @@ "detail": "TypeScript files found but no tsconfig.json" } ], - "score": 0.5 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "score": 0.75 }, "quality": { "lint": { @@ -41,14 +36,15 @@ "error": "no tsconfig.json" }, "performance": { - "bundle_size_bytes": 220260, + "pass": true, + "bundle_size_bytes": 149685, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { "files": { - "total": 18, + "total": 17, "code": 5, "docs": 10, "unnecessary": 2, @@ -60,8 +56,8 @@ "lines_of_code": 1183, "dependencies": { "production": 0, - "dev": 6, - "total": 6 + "dev": 4, + "total": 4 }, "complexity": "moderate", "console_logs": 3, @@ -261,5 +257,10 @@ } } }, - "score": 0.5337 + "outcome_score": 0.28, + "score": 0.28, + "sonarqube": { + "error": "SonarQube scan timed out", + "score": 0 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -25,11 +25,6 @@ ], "score": 0.75 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -41,14 +36,15 @@ "error": "no tsconfig.json" }, "performance": { - "bundle_size_bytes": 174551, + "pass": true, + "bundle_size_bytes": 107311, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { "files": { - "total": 16, + "total": 15, "code": 4, "docs": 7, "unnecessary": 2, @@ -60,8 +56,8 @@ "lines_of_code": 989, "dependencies": { "production": 0, - "dev": 7, - "total": 7 + "dev": 5, + "total": 5 }, "complexity": "moderate", "console_logs": 0, @@ -129,20 +125,15 @@ }, "gameplay_bot": { "pass": false, - "score": 0.81, + "score": 0, "total": 16, - "passed": 13, - "failed": 3, + "passed": 0, + "failed": 16, "report": { "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 300, - "height": 600 - }, + "renderer": "unknown", + "grid_detected": false, + "grid_bounds": null, "controls": { "left": "ArrowLeft", "right": "ArrowRight", @@ -150,116 +141,128 @@ "rotate": "ArrowUp", "drop": "Space" }, - "start_mechanism": "auto", - "score_element_found": true + "start_mechanism": "unknown", + "score_element_found": false, + "grid_confidence": 0 }, "tests": [ { "name": "game_loads", - "pass": true, - "detail": "no console errors" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:40141/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "game_starts", - "pass": true, - "detail": "started via auto" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:40141/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:40141/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:40141/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "move_right", "pass": false, - "detail": "no change detected after key press" + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:40141/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:40141/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "rotate", "pass": false, - "detail": "no change detected after rotate key" + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:40141/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other, unknown] failed: [] (tested 2 piece types in 60 attempts)" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:40141/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:40141/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:40141/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "new_piece_spawns", - "pass": true, - "detail": "visual change suggests new piece spawned" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:40141/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "multiple_pieces", - "pass": true, - "detail": "game still responding after 10 piece drops" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:40141/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:40141/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "score_changes", "pass": false, - "detail": "score did not increase: [0] -> no change after polling" + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:40141/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:40141/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "playable_30s", - "pass": true, - "detail": "played for 26s, placed 100 pieces, no crashes" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:40141/\", waiting until \"domcontentloaded\"\u001b[22m\n" } ], "summary": { "total": 16, - "passed": 13, - "failed": 3, - "score": 0.81 + "passed": 0, + "failed": 16, + "score": 0 }, "gameplay": { - "pieces_placed": 191, - "lines_cleared": 1, + "pieces_placed": 0, + "lines_cleared": 0, "max_score_observed": 0, - "play_duration_seconds": 26, + "play_duration_seconds": 0, "errors_during_play": 0 }, + "session": { + "frames": 0, + "events_count": 0, + "pieces_spawned": 0, + "pieces_locked": 0, + "lines_cleared": 0, + "piece_types_seen": [], + "grid_read_success_rate": 0 + }, "performance": { - "load_time_ms": 436 + "load_time_ms": -1 }, "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false + "issues": [], + "issue_count": 0, + "pass": true } } }, - "score": 0.645 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "error": "SonarQube scan timed out", + "score": 0 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/gameplay-bot-report.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/gameplay-bot-report.json @@ -1,13 +1,8 @@ { "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 300, - "height": 600 - }, + "renderer": "unknown", + "grid_detected": false, + "grid_bounds": null, "controls": { "left": "ArrowLeft", "right": "ArrowRight", @@ -15,113 +10,120 @@ "rotate": "ArrowUp", "drop": "Space" }, - "start_mechanism": "auto", - "score_element_found": true + "start_mechanism": "unknown", + "score_element_found": false, + "grid_confidence": 0 }, "tests": [ { "name": "game_loads", - "pass": true, - "detail": "no console errors" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:40141/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "game_starts", - "pass": true, - "detail": "started via auto" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:40141/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:40141/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:40141/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "move_right", "pass": false, - "detail": "no change detected after key press" + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:40141/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:40141/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "rotate", "pass": false, - "detail": "no change detected after rotate key" + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:40141/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other, unknown] failed: [] (tested 2 piece types in 60 attempts)" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:40141/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:40141/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:40141/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "new_piece_spawns", - "pass": true, - "detail": "visual change suggests new piece spawned" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:40141/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "multiple_pieces", - "pass": true, - "detail": "game still responding after 10 piece drops" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:40141/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:40141/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "score_changes", "pass": false, - "detail": "score did not increase: [0] -> no change after polling" + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:40141/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:40141/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "playable_30s", - "pass": true, - "detail": "played for 26s, placed 100 pieces, no crashes" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:40141/\", waiting until \"domcontentloaded\"\u001b[22m\n" } ], "summary": { "total": 16, - "passed": 13, - "failed": 3, - "score": 0.81 + "passed": 0, + "failed": 16, + "score": 0 }, "gameplay": { - "pieces_placed": 191, - "lines_cleared": 1, + "pieces_placed": 0, + "lines_cleared": 0, "max_score_observed": 0, - "play_duration_seconds": 26, + "play_duration_seconds": 0, "errors_during_play": 0 }, + "session": { + "frames": 0, + "events_count": 0, + "pieces_spawned": 0, + "pieces_locked": 0, + "lines_cleared": 0, + "piece_types_seen": [], + "grid_read_success_rate": 0 + }, "performance": { - "load_time_ms": 436 + "load_time_ms": -1 }, "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false + "issues": [], + "issue_count": 0, + "pass": true } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -4,8 +4,8 @@ "checks": [ { "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" + "pass": false, + "detail": "no index.html found in workspace root, dist/, or public/" }, { "name": "package_json_exists", @@ -23,12 +23,7 @@ "detail": "TypeScript files found but no tsconfig.json" } ], - "score": 0.75 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "score": 0.5 }, "quality": { "lint": { @@ -41,15 +36,16 @@ "error": "no tsconfig.json" }, "performance": { - "bundle_size_bytes": 292649, + "pass": true, + "bundle_size_bytes": 0, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { "files": { - "total": 32, - "code": 20, + "total": 30, + "code": 19, "docs": 6, "unnecessary": 3, "unnecessary_list": [ @@ -58,11 +54,11 @@ "README.md" ] }, - "lines_of_code": 2694, + "lines_of_code": 2429, "dependencies": { "production": 0, - "dev": 7, - "total": 7 + "dev": 5, + "total": 5 }, "complexity": "over-engineered", "console_logs": 3, @@ -131,131 +127,12 @@ "gameplay_bot": { "pass": false, "score": 0, - "total": 16, - "passed": 0, - "failed": 16, - "report": { - "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_bounds": null, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "unknown", - "score_element_found": false - }, - "tests": [ - { - "name": "game_loads", - "pass": false, - "detail": "1 console error(s): Unexpected token '<'" - }, - { - "name": "game_starts", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "auto_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_left", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_right", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_down", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "line_clear", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "score_changes", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "game_over", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "playable_30s", - "pass": false, - "detail": "skipped: page did not load" - } - ], - "summary": { - "total": 16, - "passed": 0, - "failed": 16, - "score": 0 - }, - "gameplay": { - "pieces_placed": 0, - "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 0, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 165 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.2775 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,66 +35,15 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 26774, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { - "files": { - "total": 7, - "code": 3, - "docs": 0, - "unnecessary": 0, - "unnecessary_list": [] - }, - "lines_of_code": 1043, - "dependencies": { - "production": 0, - "dev": 5, - "total": 5 - }, - "complexity": "moderate", - "console_logs": 0, - "magic_numbers": { - "count": 28, - "excessive": true - }, - "function_length": { - "count": 53, - "average": 7.2, - "max": 26, - "long_functions": 0 - }, - "max_nesting_depth": 10, - "global_declarations": 54, - "naming": { - "dominant_style": "camelCase", - "consistency_pct": 100.0, - "camel_case": 367, - "snake_case": 0 - }, - "error_handling": { - "try_catch_blocks": 0, - "has_error_handling": false - }, - "comments": { - "comment_lines": 67, - "source_lines": 756, - "ratio_pct": 8.9 - }, - "separation_of_concerns": { - "verdict": "mixed", - "files_with_rendering": 2, - "files_with_logic": 2, - "files_with_both": 2 - }, - "html_validation": { - "valid": false, - "errors": 6 - }, - "duplication_percentage": 0.0, - "score": 0.85 + "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/code-analysis.py', '/tmp/reeval-trbyqbmh/loop-bench-dwwdc2bs', 'typescript']' timed out after 120 seconds", + "score": 0 }, "transcript_analysis": { "total_events": 42, @@ -125,137 +69,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.88, - "total": 16, - "passed": 14, - "failed": 2, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 320, - "height": 640 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "x", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": false, - "detail": "no change detected after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "grid accumulated cells: 20 -> 49" - }, - { - "name": "line_clear", - "pass": true, - "detail": "1 line(s) cleared during AI play" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [158] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 80 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 14, - "failed": 2, - "score": 0.88 - }, - "gameplay": { - "pieces_placed": 121, - "lines_cleared": 2, - "max_score_observed": 164, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 32 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.8665 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "error": "SonarQube scan timed out", + "score": 0 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -1,6 +1,6 @@ { "structural": { - "pass": false, + "pass": true, "checks": [ { "name": "entry_point_exists", @@ -19,16 +19,11 @@ }, { "name": "typescript_compiles", - "pass": false, - "detail": "tsc --noEmit failed" + "pass": true, + "detail": "tsc --noEmit passed" } ], - "score": 0.75 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "score": 1.0 }, "quality": { "lint": { @@ -40,10 +35,11 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 25309, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -125,137 +121,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.94, - "total": 16, - "passed": 15, - "failed": 1, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 320, - "height": 640 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "grid accumulated cells: 16 -> 40" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [160] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 79 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 15, - "failed": 1, - "score": 0.94 - }, - "gameplay": { - "pieces_placed": 130, - "lines_cleared": 1, - "max_score_observed": 222, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 31 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.8581 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,66 +35,15 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 41673, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { - "files": { - "total": 11, - "code": 5, - "docs": 0, - "unnecessary": 0, - "unnecessary_list": [] - }, - "lines_of_code": 1339, - "dependencies": { - "production": 0, - "dev": 7, - "total": 7 - }, - "complexity": "moderate", - "console_logs": 0, - "magic_numbers": { - "count": 40, - "excessive": true - }, - "function_length": { - "count": 53, - "average": 8.5, - "max": 48, - "long_functions": 0 - }, - "max_nesting_depth": 12, - "global_declarations": 16, - "naming": { - "dominant_style": "camelCase", - "consistency_pct": 100.0, - "camel_case": 532, - "snake_case": 0 - }, - "error_handling": { - "try_catch_blocks": 0, - "has_error_handling": false - }, - "comments": { - "comment_lines": 99, - "source_lines": 1000, - "ratio_pct": 9.9 - }, - "separation_of_concerns": { - "verdict": "mixed", - "files_with_rendering": 3, - "files_with_logic": 3, - "files_with_both": 3 - }, - "html_validation": { - "valid": false, - "errors": 2 - }, - "duplication_percentage": 0.0, - "score": 0.85 + "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/code-analysis.py', '/tmp/reeval-8enzhjux/loop-bench-qyww_eoe', 'typescript']' timed out after 120 seconds", + "score": 0 }, "transcript_analysis": { "total_events": 49, @@ -125,137 +69,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.94, - "total": 16, - "passed": 15, - "failed": 1, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 320, - "height": 640 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "x", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "grid accumulated cells: 20 -> 43" - }, - { - "name": "line_clear", - "pass": true, - "detail": "1 line(s) cleared during AI play" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [214] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 79 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 15, - "failed": 1, - "score": 0.94 - }, - "gameplay": { - "pieces_placed": 120, - "lines_cleared": 1, - "max_score_observed": 182, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 35 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.8706 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "error": "SonarQube scan timed out", + "score": 0 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,66 +35,15 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 22161, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { - "files": { - "total": 7, - "code": 3, - "docs": 0, - "unnecessary": 0, - "unnecessary_list": [] - }, - "lines_of_code": 1441, - "dependencies": { - "production": 0, - "dev": 7, - "total": 7 - }, - "complexity": "moderate", - "console_logs": 0, - "magic_numbers": { - "count": 34, - "excessive": true - }, - "function_length": { - "count": 77, - "average": 6.8, - "max": 50, - "long_functions": 0 - }, - "max_nesting_depth": 12, - "global_declarations": 18, - "naming": { - "dominant_style": "camelCase", - "consistency_pct": 100.0, - "camel_case": 576, - "snake_case": 0 - }, - "error_handling": { - "try_catch_blocks": 0, - "has_error_handling": false - }, - "comments": { - "comment_lines": 115, - "source_lines": 1022, - "ratio_pct": 11.3 - }, - "separation_of_concerns": { - "verdict": "mixed", - "files_with_rendering": 2, - "files_with_logic": 2, - "files_with_both": 2 - }, - "html_validation": { - "valid": false, - "errors": 2 - }, - "duplication_percentage": 0.0, - "score": 0.85 + "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/code-analysis.py', '/tmp/reeval-82_506qo/loop-bench-5vcsw5m5', 'typescript']' timed out after 120 seconds", + "score": 0 }, "transcript_analysis": { "total_events": 98, @@ -125,138 +69,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.63, - "total": 16, - "passed": 10, - "failed": 6, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 60, - "height": 120 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "x", - "drop": "Space" - }, - "start_mechanism": "enter", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via enter" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "pixels changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "could not detect any piece rotations" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "no change detected after hard drop key" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "could not verify piece locking at bottom" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "could not detect new piece at top" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "grid did not accumulate filled cells" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [210] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 78 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 10, - "failed": 6, - "score": 0.63 - }, - "gameplay": { - "pieces_placed": 188, - "lines_cleared": 1, - "max_score_observed": 176, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 60 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 3, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.7675 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "error": "SonarQube scan timed out", + "score": 0 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,66 +35,15 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 49430, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { - "files": { - "total": 9, - "code": 4, - "docs": 0, - "unnecessary": 0, - "unnecessary_list": [] - }, - "lines_of_code": 1471, - "dependencies": { - "production": 0, - "dev": 7, - "total": 7 - }, - "complexity": "moderate", - "console_logs": 0, - "magic_numbers": { - "count": 72, - "excessive": true - }, - "function_length": { - "count": 60, - "average": 7.9, - "max": 44, - "long_functions": 0 - }, - "max_nesting_depth": 12, - "global_declarations": 18, - "naming": { - "dominant_style": "camelCase", - "consistency_pct": 100.0, - "camel_case": 464, - "snake_case": 0 - }, - "error_handling": { - "try_catch_blocks": 0, - "has_error_handling": false - }, - "comments": { - "comment_lines": 136, - "source_lines": 983, - "ratio_pct": 13.8 - }, - "separation_of_concerns": { - "verdict": "mixed", - "files_with_rendering": 2, - "files_with_logic": 2, - "files_with_both": 2 - }, - "html_validation": { - "valid": false, - "errors": 5 - }, - "duplication_percentage": 0.0, - "score": 0.85 + "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/code-analysis.py', '/tmp/reeval-9ibz17n0/loop-bench-fffti9wy', 'typescript']' timed out after 120 seconds", + "score": 0 }, "transcript_analysis": { "total_events": 46, @@ -125,138 +69,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.56, - "total": 16, - "passed": 9, - "failed": 7, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 52, - "height": 104 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "z", - "drop": "Space" - }, - "start_mechanism": "enter", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via enter" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "pixels changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": false, - "detail": "no change detected after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "could not detect any piece rotations" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "no change detected after hard drop key" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "could not verify piece locking at bottom" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "could not detect new piece at top" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "grid did not accumulate filled cells" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [208] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 78 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 9, - "failed": 7, - "score": 0.56 - }, - "gameplay": { - "pieces_placed": 188, - "lines_cleared": 1, - "max_score_observed": 190, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 82 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 3, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.7519 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "error": "SonarQube scan timed out", + "score": 0 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -25,11 +25,6 @@ ], "score": 0.75 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,10 +35,11 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 132652, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -256,5 +252,10 @@ } } }, - "score": 0.7612 + "outcome_score": 0.345, + "score": 0.345, + "sonarqube": { + "error": "SonarQube scan timed out", + "score": 0 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,14 +35,15 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 30693, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { - "total": 17, + "total": 16, "code": 4, "docs": 6, "unnecessary": 2, @@ -59,8 +55,8 @@ "lines_of_code": 1100, "dependencies": { "production": 0, - "dev": 6, - "total": 6 + "dev": 4, + "total": 4 }, "complexity": "moderate", "console_logs": 0, @@ -128,137 +124,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.94, - "total": 16, - "passed": 15, - "failed": 1, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 300, - "height": 600 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [I, other] failed: [] (tested 2 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "visual change suggests new piece spawned" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "game still responding after 10 piece drops" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [0] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 80 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 15, - "failed": 1, - "score": 0.94 - }, - "gameplay": { - "pieces_placed": 167, - "lines_cleared": 1, - "max_score_observed": 0, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 29 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.8113 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -25,11 +25,6 @@ ], "score": 0.75 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -41,14 +36,15 @@ "error": "no tsconfig.json" }, "performance": { - "bundle_size_bytes": 186790, + "pass": true, + "bundle_size_bytes": 119010, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { "files": { - "total": 14, + "total": 13, "code": 3, "docs": 7, "unnecessary": 1, @@ -59,8 +55,8 @@ "lines_of_code": 1097, "dependencies": { "production": 0, - "dev": 6, - "total": 6 + "dev": 4, + "total": 4 }, "complexity": "moderate", "console_logs": 0, @@ -128,137 +124,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.94, - "total": 16, - "passed": 15, - "failed": 1, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 300, - "height": 600 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "z", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "grid accumulated cells: 16 -> 38" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [0] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 26s, placed 100 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 15, - "failed": 1, - "score": 0.94 - }, - "gameplay": { - "pieces_placed": 151, - "lines_cleared": 1, - "max_score_observed": 0, - "play_duration_seconds": 26, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 49 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.7231 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,14 +35,15 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 17118, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { - "total": 21, + "total": 20, "code": 5, "docs": 10, "unnecessary": 2, @@ -59,8 +55,8 @@ "lines_of_code": 1373, "dependencies": { "production": 0, - "dev": 6, - "total": 6 + "dev": 4, + "total": 4 }, "complexity": "moderate", "console_logs": 0, @@ -131,5 +127,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.4519 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "error": "SonarQube scan timed out", + "score": 0 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -1,11 +1,11 @@ { "structural": { - "pass": true, + "pass": false, "checks": [ { "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" + "pass": false, + "detail": "no index.html found in workspace root, dist/, or public/" }, { "name": "package_json_exists", @@ -18,12 +18,7 @@ "detail": "no build script defined (static project)" } ], - "score": 1.0 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "score": 0.67 }, "quality": { "lint": { @@ -36,24 +31,25 @@ "error": "no tsconfig.json" }, "performance": { - "bundle_size_bytes": 173186, + "pass": true, + "bundle_size_bytes": 0, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { "files": { - "total": 5, - "code": 2, + "total": 3, + "code": 1, "docs": 0, "unnecessary": 0, "unnecessary_list": [] }, - "lines_of_code": 1522, + "lines_of_code": 761, "dependencies": { "production": 0, - "dev": 7, - "total": 7 + "dev": 5, + "total": 5 }, "complexity": "minimal", "console_logs": 0, @@ -85,7 +81,7 @@ "ratio_pct": 0.0 }, "separation_of_concerns": { - "verdict": "unclear", + "verdict": "single-file", "files_with_rendering": 0, "files_with_logic": 0, "files_with_both": 0 @@ -121,136 +117,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.69, - "total": 16, - "passed": 11, - "failed": 5, - "report": { - "implementation": { - "renderer": "dom", - "grid_detected": true, - "grid_bounds": { - "x": 360.5, - "y": 50, - "width": 329, - "height": 639 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "z", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "could not detect any piece rotations" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "no change detected after hard drop key" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "grid did not accumulate filled cells" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [0] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": false, - "detail": "390 console errors, 0 play errors during 30s" - } - ], - "summary": { - "total": 16, - "passed": 11, - "failed": 5, - "score": 0.69 - }, - "gameplay": { - "pieces_placed": 188, - "lines_cleared": 1, - "max_score_observed": 0, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 27 - }, - "accessibility": { - "issues": [ - "no headings found" - ], - "issue_count": 1, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.6544 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -25,11 +25,6 @@ ], "score": 0.75 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -41,14 +36,15 @@ "error": "no tsconfig.json" }, "performance": { - "bundle_size_bytes": 170486, + "pass": true, + "bundle_size_bytes": 102950, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { "files": { - "total": 12, + "total": 11, "code": 3, "docs": 6, "unnecessary": 1, @@ -59,8 +55,8 @@ "lines_of_code": 1346, "dependencies": { "production": 0, - "dev": 7, - "total": 7 + "dev": 5, + "total": 5 }, "complexity": "moderate", "console_logs": 0, @@ -99,10 +95,10 @@ }, "html_validation": { "valid": false, - "errors": 0 + "errors": 1 }, "duplication_percentage": 0.0, - "score": 0.45 + "score": 0.8 }, "transcript_analysis": { "total_events": 91, @@ -260,5 +256,10 @@ } } }, - "score": 0.6294 + "outcome_score": 0.88, + "score": 0.88, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -25,11 +25,6 @@ ], "score": 0.75 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -41,14 +36,15 @@ "error": "no tsconfig.json" }, "performance": { - "bundle_size_bytes": 178224, + "pass": true, + "bundle_size_bytes": 114939, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { "files": { - "total": 13, + "total": 12, "code": 3, "docs": 6, "unnecessary": 2, @@ -60,8 +56,8 @@ "lines_of_code": 1352, "dependencies": { "production": 0, - "dev": 7, - "total": 7 + "dev": 5, + "total": 5 }, "complexity": "moderate", "console_logs": 0, @@ -100,10 +96,10 @@ }, "html_validation": { "valid": false, - "errors": 0 + "errors": 2 }, "duplication_percentage": 0.0, - "score": 0.35 + "score": 0.7 }, "transcript_analysis": { "total_events": 73, @@ -129,138 +125,144 @@ }, "gameplay_bot": { "pass": false, - "score": 0.88, + "score": 0, "total": 16, - "passed": 14, - "failed": 2, + "passed": 0, + "failed": 16, "report": { "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 300, - "height": 600 - }, + "renderer": "unknown", + "grid_detected": false, + "grid_bounds": null, "controls": { "left": "ArrowLeft", "right": "ArrowRight", "down": "ArrowDown", - "rotate": "z", + "rotate": "ArrowUp", "drop": "Space" }, - "start_mechanism": "auto", - "score_element_found": true + "start_mechanism": "unknown", + "score_element_found": false, + "grid_confidence": 0 }, "tests": [ { "name": "game_loads", - "pass": true, - "detail": "no console errors" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:34125/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "game_starts", - "pass": true, - "detail": "started via auto" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:34125/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:34125/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:34125/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:34125/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:34125/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:34125/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [unknown] failed: [] (tested 1 piece types in 60 attempts)" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:34125/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:34125/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:34125/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected after drop" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:34125/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "multiple_pieces", - "pass": true, - "detail": "game still responding after 10 piece drops" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:34125/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "line_clear", - "pass": true, - "detail": "9 line(s) cleared during AI play" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:34125/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "score_changes", "pass": false, - "detail": "score did not increase: [0] -> no change after polling" + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:34125/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "game_over", "pass": false, - "detail": "could not trigger or detect game over" + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:34125/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "playable_30s", - "pass": true, - "detail": "played for 28s, placed 100 pieces, no crashes" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:34125/\", waiting until \"domcontentloaded\"\u001b[22m\n" } ], "summary": { "total": 16, - "passed": 14, - "failed": 2, - "score": 0.88 + "passed": 0, + "failed": 16, + "score": 0 }, "gameplay": { - "pieces_placed": 152, - "lines_cleared": 45, + "pieces_placed": 0, + "lines_cleared": 0, "max_score_observed": 0, - "play_duration_seconds": 28, + "play_duration_seconds": 0, "errors_during_play": 0 }, + "session": { + "frames": 0, + "events_count": 0, + "pieces_spawned": 0, + "pieces_locked": 0, + "lines_cleared": 0, + "piece_types_seen": [], + "grid_read_success_rate": 0 + }, "performance": { - "load_time_ms": 31 + "load_time_ms": -1 }, "accessibility": { - "issues": [ - "no headings found", - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 3, - "pass": false + "issues": [], + "issue_count": 0, + "pass": true } } }, - "score": 0.6106 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "error": "SonarQube scan timed out", + "score": 0 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/gameplay-bot-report.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/gameplay-bot-report.json @@ -1,128 +1,129 @@ { "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 300, - "height": 600 - }, + "renderer": "unknown", + "grid_detected": false, + "grid_bounds": null, "controls": { "left": "ArrowLeft", "right": "ArrowRight", "down": "ArrowDown", - "rotate": "z", + "rotate": "ArrowUp", "drop": "Space" }, - "start_mechanism": "auto", - "score_element_found": true + "start_mechanism": "unknown", + "score_element_found": false, + "grid_confidence": 0 }, "tests": [ { "name": "game_loads", - "pass": true, - "detail": "no console errors" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:34125/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "game_starts", - "pass": true, - "detail": "started via auto" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:34125/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:34125/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:34125/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:34125/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:34125/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:34125/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [unknown] failed: [] (tested 1 piece types in 60 attempts)" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:34125/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:34125/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:34125/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected after drop" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:34125/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "multiple_pieces", - "pass": true, - "detail": "game still responding after 10 piece drops" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:34125/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "line_clear", - "pass": true, - "detail": "9 line(s) cleared during AI play" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:34125/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "score_changes", "pass": false, - "detail": "score did not increase: [0] -> no change after polling" + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:34125/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "game_over", "pass": false, - "detail": "could not trigger or detect game over" + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:34125/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "playable_30s", - "pass": true, - "detail": "played for 28s, placed 100 pieces, no crashes" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:34125/\", waiting until \"domcontentloaded\"\u001b[22m\n" } ], "summary": { "total": 16, - "passed": 14, - "failed": 2, - "score": 0.88 + "passed": 0, + "failed": 16, + "score": 0 }, "gameplay": { - "pieces_placed": 152, - "lines_cleared": 45, + "pieces_placed": 0, + "lines_cleared": 0, "max_score_observed": 0, - "play_duration_seconds": 28, + "play_duration_seconds": 0, "errors_during_play": 0 }, + "session": { + "frames": 0, + "events_count": 0, + "pieces_spawned": 0, + "pieces_locked": 0, + "lines_cleared": 0, + "piece_types_seen": [], + "grid_read_success_rate": 0 + }, "performance": { - "load_time_ms": 31 + "load_time_ms": -1 }, "accessibility": { - "issues": [ - "no headings found", - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 3, - "pass": false + "issues": [], + "issue_count": 0, + "pass": true } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -1,6 +1,6 @@ { "structural": { - "pass": false, + "pass": true, "checks": [ { "name": "entry_point_exists", @@ -19,16 +19,11 @@ }, { "name": "typescript_compiles", - "pass": false, - "detail": "tsc --noEmit failed" + "pass": true, + "detail": "tsc --noEmit passed" } ], - "score": 0.75 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "score": 1.0 }, "quality": { "lint": { @@ -41,14 +36,15 @@ "errors": 2 }, "performance": { - "bundle_size_bytes": 139236, + "pass": true, + "bundle_size_bytes": 76033, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { "files": { - "total": 10, + "total": 9, "code": 4, "docs": 2, "unnecessary": 1, @@ -59,8 +55,8 @@ "lines_of_code": 1214, "dependencies": { "production": 0, - "dev": 7, - "total": 7 + "dev": 5, + "total": 5 }, "complexity": "moderate", "console_logs": 0, @@ -131,5 +127,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.4325 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,14 +35,15 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 13946, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { - "total": 16, + "total": 15, "code": 3, "docs": 8, "unnecessary": 2, @@ -59,8 +55,8 @@ "lines_of_code": 1171, "dependencies": { "production": 0, - "dev": 5, - "total": 5 + "dev": 3, + "total": 3 }, "complexity": "moderate", "console_logs": 0, @@ -128,136 +124,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.81, - "total": 16, - "passed": 13, - "failed": 3, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 300, - "height": 600 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": false, - "detail": "no change detected after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": false, - "detail": "no change detected after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "grid accumulated cells: 16 -> 51" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [0] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 78 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 13, - "failed": 3, - "score": 0.81 - }, - "gameplay": { - "pieces_placed": 129, - "lines_cleared": 1, - "max_score_observed": 0, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 531 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role" - ], - "issue_count": 1, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.7706 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -1,6 +1,6 @@ { "structural": { - "pass": false, + "pass": true, "checks": [ { "name": "entry_point_exists", @@ -19,16 +19,11 @@ }, { "name": "typescript_compiles", - "pass": false, - "detail": "tsc --noEmit failed" + "pass": true, + "detail": "tsc --noEmit passed" } ], - "score": 0.75 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "score": 1.0 }, "quality": { "lint": { @@ -41,14 +36,15 @@ "errors": 1 }, "performance": { - "bundle_size_bytes": 190896, + "pass": true, + "bundle_size_bytes": 123979, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { "files": { - "total": 16, + "total": 15, "code": 4, "docs": 8, "unnecessary": 3, @@ -61,8 +57,8 @@ "lines_of_code": 1268, "dependencies": { "production": 0, - "dev": 7, - "total": 7 + "dev": 5, + "total": 5 }, "complexity": "moderate", "console_logs": 1, @@ -133,5 +129,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.3788 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -25,11 +25,6 @@ ], "score": 0.75 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -41,14 +36,15 @@ "error": "no tsconfig.json" }, "performance": { - "bundle_size_bytes": 208459, + "pass": true, + "bundle_size_bytes": 144461, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { "files": { - "total": 15, + "total": 14, "code": 3, "docs": 8, "unnecessary": 1, @@ -59,8 +55,8 @@ "lines_of_code": 1024, "dependencies": { "production": 0, - "dev": 7, - "total": 7 + "dev": 5, + "total": 5 }, "complexity": "moderate", "console_logs": 0, @@ -99,10 +95,10 @@ }, "html_validation": { "valid": false, - "errors": 0 + "errors": 1 }, "duplication_percentage": 0.0, - "score": 0.45 + "score": 0.8 }, "transcript_analysis": { "total_events": 115, @@ -128,20 +124,15 @@ }, "gameplay_bot": { "pass": false, - "score": 0.88, + "score": 0, "total": 16, - "passed": 14, - "failed": 2, + "passed": 0, + "failed": 16, "report": { "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 300, - "height": 600 - }, + "renderer": "unknown", + "grid_detected": false, + "grid_bounds": null, "controls": { "left": "ArrowLeft", "right": "ArrowRight", @@ -149,116 +140,128 @@ "rotate": "ArrowUp", "drop": "Space" }, - "start_mechanism": "auto", - "score_element_found": true + "start_mechanism": "unknown", + "score_element_found": false, + "grid_confidence": 0 }, "tests": [ { "name": "game_loads", - "pass": true, - "detail": "no console errors" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:37589/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "game_starts", - "pass": true, - "detail": "started via auto" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:37589/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:37589/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:37589/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:37589/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:37589/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "rotate", "pass": false, - "detail": "no change detected after rotate key" + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:37589/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:37589/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:37589/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:37589/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:37589/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "multiple_pieces", - "pass": true, - "detail": "grid accumulated cells: 16 -> 41" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:37589/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:37589/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "score_changes", "pass": false, - "detail": "score did not increase: [0] -> no change after polling" + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:37589/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:37589/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "playable_30s", - "pass": true, - "detail": "played for 33s, placed 56 pieces, no crashes" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:37589/\", waiting until \"domcontentloaded\"\u001b[22m\n" } ], "summary": { "total": 16, - "passed": 14, - "failed": 2, - "score": 0.88 + "passed": 0, + "failed": 16, + "score": 0 }, "gameplay": { - "pieces_placed": 107, - "lines_cleared": 1, + "pieces_placed": 0, + "lines_cleared": 0, "max_score_observed": 0, - "play_duration_seconds": 33, + "play_duration_seconds": 0, "errors_during_play": 0 }, + "session": { + "frames": 0, + "events_count": 0, + "pieces_spawned": 0, + "pieces_locked": 0, + "lines_cleared": 0, + "piece_types_seen": [], + "grid_read_success_rate": 0 + }, "performance": { - "load_time_ms": 757 + "load_time_ms": -1 }, "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false + "issues": [], + "issue_count": 0, + "pass": true } } }, - "score": 0.6294 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "error": "SonarQube scan timed out", + "score": 0 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/gameplay-bot-report.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/gameplay-bot-report.json @@ -1,13 +1,8 @@ { "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 300, - "height": 600 - }, + "renderer": "unknown", + "grid_detected": false, + "grid_bounds": null, "controls": { "left": "ArrowLeft", "right": "ArrowRight", @@ -15,113 +10,120 @@ "rotate": "ArrowUp", "drop": "Space" }, - "start_mechanism": "auto", - "score_element_found": true + "start_mechanism": "unknown", + "score_element_found": false, + "grid_confidence": 0 }, "tests": [ { "name": "game_loads", - "pass": true, - "detail": "no console errors" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:37589/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "game_starts", - "pass": true, - "detail": "started via auto" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:37589/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:37589/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:37589/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:37589/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:37589/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "rotate", "pass": false, - "detail": "no change detected after rotate key" + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:37589/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:37589/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:37589/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:37589/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:37589/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "multiple_pieces", - "pass": true, - "detail": "grid accumulated cells: 16 -> 41" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:37589/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:37589/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "score_changes", "pass": false, - "detail": "score did not increase: [0] -> no change after polling" + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:37589/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:37589/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "playable_30s", - "pass": true, - "detail": "played for 33s, placed 56 pieces, no crashes" + "pass": false, + "detail": "page load failed: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:37589/\", waiting until \"domcontentloaded\"\u001b[22m\n" } ], "summary": { "total": 16, - "passed": 14, - "failed": 2, - "score": 0.88 + "passed": 0, + "failed": 16, + "score": 0 }, "gameplay": { - "pieces_placed": 107, - "lines_cleared": 1, + "pieces_placed": 0, + "lines_cleared": 0, "max_score_observed": 0, - "play_duration_seconds": 33, + "play_duration_seconds": 0, "errors_during_play": 0 }, + "session": { + "frames": 0, + "events_count": 0, + "pieces_spawned": 0, + "pieces_locked": 0, + "lines_cleared": 0, + "piece_types_seen": [], + "grid_read_success_rate": 0 + }, "performance": { - "load_time_ms": 757 + "load_time_ms": -1 }, "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false + "issues": [], + "issue_count": 0, + "pass": true } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -25,11 +25,6 @@ ], "score": 0.75 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -41,14 +36,15 @@ "error": "no tsconfig.json" }, "performance": { - "bundle_size_bytes": 206431, + "pass": true, + "bundle_size_bytes": 138507, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { "files": { - "total": 17, + "total": 16, "code": 4, "docs": 8, "unnecessary": 2, @@ -60,8 +56,8 @@ "lines_of_code": 1056, "dependencies": { "production": 0, - "dev": 7, - "total": 7 + "dev": 5, + "total": 5 }, "complexity": "moderate", "console_logs": 2, @@ -132,5 +128,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.3281 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -25,11 +25,6 @@ ], "score": 0.75 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -41,14 +36,15 @@ "error": "no tsconfig.json" }, "performance": { - "bundle_size_bytes": 179840, + "pass": true, + "bundle_size_bytes": 110726, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { "files": { - "total": 13, + "total": 12, "code": 3, "docs": 5, "unnecessary": 4, @@ -62,8 +58,8 @@ "lines_of_code": 1273, "dependencies": { "production": 0, - "dev": 7, - "total": 7 + "dev": 5, + "total": 5 }, "complexity": "moderate", "console_logs": 0, @@ -102,10 +98,10 @@ }, "html_validation": { "valid": false, - "errors": 0 + "errors": 2 }, "duplication_percentage": 0.0, - "score": 0.25 + "score": 0.6 }, "transcript_analysis": { "total_events": 79, @@ -134,5 +130,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.3169 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -1,6 +1,6 @@ { "structural": { - "pass": true, + "pass": false, "checks": [ { "name": "entry_point_exists", @@ -19,16 +19,11 @@ }, { "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" + "pass": false, + "detail": "tsc --noEmit failed" } ], - "score": 1.0 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "score": 0.75 }, "quality": { "lint": { @@ -40,10 +35,11 @@ "pass": true }, "performance": { - "bundle_size_bytes": 169612, + "pass": true, + "bundle_size_bytes": 169255, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -128,8 +124,139 @@ }, "gameplay_bot": { "pass": false, - "score": 0, - "error": "Gameplay bot timed out after 180 seconds" + "score": 0.25, + "total": 16, + "passed": 4, + "failed": 12, + "report": { + "implementation": { + "renderer": "dom", + "grid_detected": true, + "grid_bounds": { + "x": 240, + "y": 110, + "width": 520, + "height": 1040 + }, + "controls": { + "left": "ArrowLeft", + "right": "ArrowRight", + "down": "ArrowDown", + "rotate": "ArrowUp", + "drop": "Space" + }, + "start_mechanism": "click_canvas", + "score_element_found": true + }, + "tests": [ + { + "name": "game_loads", + "pass": true, + "detail": "no console errors" + }, + { + "name": "game_starts", + "pass": true, + "detail": "started via click_canvas" + }, + { + "name": "auto_drop", + "pass": false, + "detail": "piece did not move in 5 seconds" + }, + { + "name": "move_left", + "pass": false, + "detail": "no change detected after key press" + }, + { + "name": "move_right", + "pass": false, + "detail": "no change detected after key press" + }, + { + "name": "move_down", + "pass": true, + "detail": "grid state changed after key press" + }, + { + "name": "rotate", + "pass": true, + "detail": "piece shape changed after rotate key" + }, + { + "name": "all_pieces_rotate", + "pass": false, + "detail": "could not detect any piece rotations" + }, + { + "name": "hard_drop", + "pass": false, + "detail": "no change detected after hard drop key" + }, + { + "name": "piece_locks", + "pass": false, + "detail": "could not verify piece locking at bottom" + }, + { + "name": "new_piece_spawns", + "pass": false, + "detail": "could not detect new piece at top" + }, + { + "name": "multiple_pieces", + "pass": false, + "detail": "exception: page.waitForTimeout: Test timeout of 180000ms exceeded." + }, + { + "name": "line_clear", + "pass": false, + "detail": "exception: keyboard.press: Target page, context or browser has been closed" + }, + { + "name": "score_changes", + "pass": false, + "detail": "could not read score element" + }, + { + "name": "game_over", + "pass": false, + "detail": "exception: keyboard.press: Target page, context or browser has been closed" + }, + { + "name": "playable_30s", + "pass": false, + "detail": "exception: keyboard.press: Target page, context or browser has been closed" + } + ], + "summary": { + "total": 16, + "passed": 4, + "failed": 12, + "score": 0.25 + }, + "gameplay": { + "pieces_placed": 60, + "lines_cleared": 0, + "max_score_observed": 0, + "play_duration_seconds": 0, + "errors_during_play": 0 + }, + "performance": { + "load_time_ms": 9051 + }, + "accessibility": { + "issues": [], + "issue_count": 0, + "pass": true + } + } }, - "score": 0.51 + "outcome_score": 0.125, + "score": 0.125, + "sonarqube": { + "error": "SonarQube scan timed out", + "score": 0 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,10 +35,11 @@ "pass": true }, "performance": { - "bundle_size_bytes": 135674, + "pass": true, + "bundle_size_bytes": 135317, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -127,8 +123,139 @@ }, "gameplay_bot": { "pass": false, - "score": 0, - "error": "Gameplay bot timed out after 180 seconds" + "score": 0.5, + "total": 16, + "passed": 8, + "failed": 8, + "report": { + "implementation": { + "renderer": "canvas", + "grid_detected": true, + "grid_bounds": { + "x": 0, + "y": 0, + "width": 300, + "height": 600 + }, + "controls": { + "left": "ArrowLeft", + "right": "ArrowRight", + "down": "ArrowDown", + "rotate": "ArrowUp", + "drop": "Space" + }, + "start_mechanism": "auto", + "score_element_found": false + }, + "tests": [ + { + "name": "game_loads", + "pass": true, + "detail": "no console errors" + }, + { + "name": "game_starts", + "pass": true, + "detail": "started via auto" + }, + { + "name": "auto_drop", + "pass": true, + "detail": "pixels changed after 5s with no input" + }, + { + "name": "move_left", + "pass": true, + "detail": "grid state changed after key press" + }, + { + "name": "move_right", + "pass": true, + "detail": "grid state changed after key press" + }, + { + "name": "move_down", + "pass": true, + "detail": "grid state changed after key press" + }, + { + "name": "rotate", + "pass": true, + "detail": "piece shape changed after rotate key" + }, + { + "name": "all_pieces_rotate", + "pass": false, + "detail": "could not detect any piece rotations" + }, + { + "name": "hard_drop", + "pass": true, + "detail": "visual change detected after hard drop" + }, + { + "name": "piece_locks", + "pass": false, + "detail": "exception: page.screenshot: Timeout 10000ms exceeded.\nCall log:\n\u001b[2m - taking page screenshot\u001b[22m\n\u001b[2m - waiting for fonts to load...\u001b[22m\n\u001b[2m - fonts loaded\u001b[22m\n" + }, + { + "name": "new_piece_spawns", + "pass": false, + "detail": "exception: keyboard.press: Target page, context or browser has been closed" + }, + { + "name": "multiple_pieces", + "pass": false, + "detail": "exception: keyboard.press: Target page, context or browser has been closed" + }, + { + "name": "line_clear", + "pass": false, + "detail": "exception: keyboard.press: Target page, context or browser has been closed" + }, + { + "name": "score_changes", + "pass": false, + "detail": "exception: page.evaluate: Target page, context or browser has been closed" + }, + { + "name": "game_over", + "pass": false, + "detail": "exception: keyboard.press: Target page, context or browser has been closed" + }, + { + "name": "playable_30s", + "pass": false, + "detail": "exception: keyboard.press: Target page, context or browser has been closed" + } + ], + "summary": { + "total": 16, + "passed": 8, + "failed": 8, + "score": 0.5 + }, + "gameplay": { + "pieces_placed": 60, + "lines_cleared": 0, + "max_score_observed": 0, + "play_duration_seconds": 0, + "errors_during_play": 0 + }, + "performance": { + "load_time_ms": 2815 + }, + "accessibility": { + "issues": [], + "issue_count": 0, + "pass": true + } + } }, - "score": 0.5425 + "outcome_score": 0.25, + "score": 0.25, + "sonarqube": { + "error": "SonarQube scan timed out", + "score": 0 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,14 +35,15 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 38556, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { - "total": 19, + "total": 18, "code": 6, "docs": 6, "unnecessary": 2, @@ -59,8 +55,8 @@ "lines_of_code": 1488, "dependencies": { "production": 0, - "dev": 5, - "total": 5 + "dev": 3, + "total": 3 }, "complexity": "over-engineered", "console_logs": 3, @@ -128,137 +124,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.44, - "total": 16, - "passed": 7, - "failed": 9, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 240, - "height": 400 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "click_canvas", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via click_canvas" - }, - { - "name": "auto_drop", - "pass": false, - "detail": "piece did not move in 5 seconds" - }, - { - "name": "move_left", - "pass": false, - "detail": "no change detected after key press" - }, - { - "name": "move_right", - "pass": false, - "detail": "no change detected after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "could not detect any piece rotations" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "no change detected after hard drop key" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "could not verify piece locking at bottom" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "could not detect new piece at top" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "grid did not accumulate filled cells" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [0] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 78 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 7, - "failed": 9, - "score": 0.44 - }, - "gameplay": { - "pieces_placed": 188, - "lines_cleared": 1, - "max_score_observed": 0, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 39 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.625 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -1,6 +1,6 @@ { "structural": { - "pass": true, + "pass": false, "checks": [ { "name": "entry_point_exists", @@ -19,16 +19,11 @@ }, { "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" + "pass": false, + "detail": "tsc --noEmit failed" } ], - "score": 1.0 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "score": 0.75 }, "quality": { "lint": { @@ -40,14 +35,15 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 13766, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { - "total": 15, + "total": 14, "code": 4, "docs": 6, "unnecessary": 1, @@ -58,8 +54,8 @@ "lines_of_code": 1169, "dependencies": { "production": 0, - "dev": 7, - "total": 7 + "dev": 5, + "total": 5 }, "complexity": "moderate", "console_logs": 0, @@ -130,5 +126,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.5487 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -25,83 +25,13 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": true - }, - "performance": { - "bundle_size_bytes": 203276, - "size_under_512kb": true - }, - "score": 0.67 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-4v0qdx8e/loop-bench-ktrmyvlb', 'typescript']' timed out after 120 seconds" }, "code_analysis": { - "files": { - "total": 15, - "code": 3, - "docs": 7, - "unnecessary": 1, - "unnecessary_list": [ - "README.md" - ] - }, - "lines_of_code": 1308, - "dependencies": { - "production": 0, - "dev": 7, - "total": 7 - }, - "complexity": "moderate", - "console_logs": 0, - "magic_numbers": { - "count": 22, - "excessive": true - }, - "function_length": { - "count": 52, - "average": 7.1, - "max": 30, - "long_functions": 0 - }, - "max_nesting_depth": 12, - "global_declarations": 0, - "naming": { - "dominant_style": "camelCase", - "consistency_pct": 100.0, - "camel_case": 629, - "snake_case": 0 - }, - "error_handling": { - "try_catch_blocks": 0, - "has_error_handling": false - }, - "comments": { - "comment_lines": 32, - "source_lines": 1083, - "ratio_pct": 3.0 - }, - "separation_of_concerns": { - "verdict": "mixed", - "files_with_rendering": 2, - "files_with_logic": 2, - "files_with_both": 2 - }, - "html_validation": { - "valid": false, - "errors": 2 - }, - "duplication_percentage": 0.0, - "score": 0.75 + "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/code-analysis.py', '/tmp/reeval-4v0qdx8e/loop-bench-ktrmyvlb', 'typescript']' timed out after 120 seconds", + "score": 0 }, "transcript_analysis": { "total_events": 120, @@ -130,5 +60,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.5269 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -25,85 +25,13 @@ ], "score": 0.75 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": false, - "error": "no tsconfig.json" - }, - "performance": { - "bundle_size_bytes": 160372, - "size_under_512kb": true - }, - "score": 0.33 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-g_6vs4uj/loop-bench-v_scnslz', 'typescript']' timed out after 120 seconds" }, "code_analysis": { - "files": { - "total": 13, - "code": 4, - "docs": 6, - "unnecessary": 2, - "unnecessary_list": [ - "server.js", - "README.md" - ] - }, - "lines_of_code": 984, - "dependencies": { - "production": 0, - "dev": 7, - "total": 7 - }, - "complexity": "moderate", - "console_logs": 2, - "magic_numbers": { - "count": 17, - "excessive": false - }, - "function_length": { - "count": 54, - "average": 5.1, - "max": 12, - "long_functions": 0 - }, - "max_nesting_depth": 10, - "global_declarations": 20, - "naming": { - "dominant_style": "camelCase", - "consistency_pct": 100.0, - "camel_case": 311, - "snake_case": 0 - }, - "error_handling": { - "try_catch_blocks": 0, - "has_error_handling": false - }, - "comments": { - "comment_lines": 32, - "source_lines": 711, - "ratio_pct": 4.5 - }, - "separation_of_concerns": { - "verdict": "mixed", - "files_with_rendering": 2, - "files_with_logic": 2, - "files_with_both": 2 - }, - "html_validation": { - "valid": false, - "errors": 2 - }, - "duplication_percentage": 0.0, - "score": 0.66 + "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/code-analysis.py', '/tmp/reeval-g_6vs4uj/loop-bench-v_scnslz', 'typescript']' timed out after 120 seconds", + "score": 0 }, "transcript_analysis": { "total_events": 95, @@ -132,5 +60,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.3938 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run1/eval_results.json @@ -25,11 +25,6 @@ ], "score": 0.75 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -41,95 +36,29 @@ "error": "no tsconfig.json" }, "performance": { - "bundle_size_bytes": 170400, + "pass": true, + "bundle_size_bytes": 106524, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { - "files": { - "total": 12, - "code": 3, - "docs": 5, - "unnecessary": 1, - "unnecessary_list": [ - "README.md" - ] - }, - "lines_of_code": 1145, - "dependencies": { - "production": 0, - "dev": 7, - "total": 7 - }, - "complexity": "moderate", - "console_logs": 0, - "magic_numbers": { - "count": 12, - "excessive": false - }, - "function_length": { - "count": 50, - "average": 7.5, - "max": 30, - "long_functions": 0 - }, - "max_nesting_depth": 12, - "global_declarations": 12, - "naming": { - "dominant_style": "camelCase", - "consistency_pct": 100.0, - "camel_case": 748, - "snake_case": 0 - }, - "error_handling": { - "try_catch_blocks": 0, - "has_error_handling": false - }, - "comments": { - "comment_lines": 26, - "source_lines": 823, - "ratio_pct": 3.2 - }, - "separation_of_concerns": { - "verdict": "mixed", - "files_with_rendering": 2, - "files_with_logic": 2, - "files_with_both": 2 - }, - "html_validation": { - "valid": false, - "errors": 2 - }, - "duplication_percentage": 0.0, - "score": 0.8 + "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/code-analysis.py', '/tmp/reeval-u6jryf6_/loop-bench-fx9t030l', 'typescript']' timed out after 120 seconds", + "score": 0 }, "transcript_analysis": { - "total_events": 110, - "tool_calls": { - "total": 28, - "bash": 21, - "write": 0, - "edit": 3, - "read": 4 - }, - "wasted_turns": { - "total": 18, - "docs": 3, - "ascii_art": 5, - "server_starts": 10 - }, - "errors_encountered": 0, - "thinking_blocks": 29, - "text_blocks": 21, - "productivity_ratio": 0.36, - "self_tested": false, - "score": 0.75 + "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/transcript-analysis.py', '/root/loop-benchmarking/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run1']' timed out after 30 seconds", + "score": 0 }, "gameplay_bot": { "pass": false, "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.42 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run2/eval_results.json @@ -4,8 +4,8 @@ "checks": [ { "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" + "pass": false, + "detail": "no index.html found in workspace root, dist/, or public/" }, { "name": "package_json_exists", @@ -23,12 +23,7 @@ "detail": "TypeScript files found but no tsconfig.json" } ], - "score": 0.75 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "score": 0.5 }, "quality": { "lint": { @@ -41,96 +36,29 @@ "error": "no tsconfig.json" }, "performance": { - "bundle_size_bytes": 227740, + "pass": true, + "bundle_size_bytes": 0, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { - "files": { - "total": 17, - "code": 4, - "docs": 6, - "unnecessary": 2, - "unnecessary_list": [ - "QUICK_START.txt", - "README.md" - ] - }, - "lines_of_code": 1322, - "dependencies": { - "production": 0, - "dev": 7, - "total": 7 - }, - "complexity": "moderate", - "console_logs": 0, - "magic_numbers": { - "count": 14, - "excessive": false - }, - "function_length": { - "count": 47, - "average": 7.6, - "max": 28, - "long_functions": 0 - }, - "max_nesting_depth": 7, - "global_declarations": 8, - "naming": { - "dominant_style": "camelCase", - "consistency_pct": 100.0, - "camel_case": 564, - "snake_case": 0 - }, - "error_handling": { - "try_catch_blocks": 0, - "has_error_handling": false - }, - "comments": { - "comment_lines": 60, - "source_lines": 766, - "ratio_pct": 7.8 - }, - "separation_of_concerns": { - "verdict": "mixed", - "files_with_rendering": 2, - "files_with_logic": 2, - "files_with_both": 2 - }, - "html_validation": { - "valid": false, - "errors": 5 - }, - "duplication_percentage": 0.0, - "score": 0.75 + "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/code-analysis.py', '/tmp/reeval-23oypf3d/loop-bench-v114tywh', 'typescript']' timed out after 120 seconds", + "score": 0 }, "transcript_analysis": { - "total_events": 124, - "tool_calls": { - "total": 35, - "bash": 32, - "write": 0, - "edit": 2, - "read": 1 - }, - "wasted_turns": { - "total": 14, - "docs": 6, - "ascii_art": 3, - "server_starts": 5 - }, - "errors_encountered": 0, - "thinking_blocks": 36, - "text_blocks": 14, - "productivity_ratio": 0.6, - "self_tested": false, - "score": 0.75 + "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/transcript-analysis.py', '/root/loop-benchmarking/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run2']' timed out after 30 seconds", + "score": 0 }, "gameplay_bot": { "pass": false, "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.4106 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run3/eval_results.json @@ -4,8 +4,8 @@ "checks": [ { "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" + "pass": false, + "detail": "no index.html found in workspace root, dist/, or public/" }, { "name": "package_json_exists", @@ -23,12 +23,7 @@ "detail": "TypeScript files found but no tsconfig.json" } ], - "score": 0.75 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "score": 0.5 }, "quality": { "lint": { @@ -41,219 +36,29 @@ "error": "no tsconfig.json" }, "performance": { - "bundle_size_bytes": 214253, + "pass": true, + "bundle_size_bytes": 0, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { - "files": { - "total": 22, - "code": 5, - "docs": 8, - "unnecessary": 2, - "unnecessary_list": [ - "QUICK_START.txt", - "README.md" - ] - }, - "lines_of_code": 1407, - "dependencies": { - "production": 0, - "dev": 7, - "total": 7 - }, - "complexity": "moderate", - "console_logs": 0, - "magic_numbers": { - "count": 10, - "excessive": false - }, - "function_length": { - "count": 55, - "average": 4.9, - "max": 17, - "long_functions": 0 - }, - "max_nesting_depth": 14, - "global_declarations": 2, - "naming": { - "dominant_style": "camelCase", - "consistency_pct": 100.0, - "camel_case": 520, - "snake_case": 0 - }, - "error_handling": { - "try_catch_blocks": 0, - "has_error_handling": false - }, - "comments": { - "comment_lines": 20, - "source_lines": 813, - "ratio_pct": 2.5 - }, - "separation_of_concerns": { - "verdict": "mixed", - "files_with_rendering": 3, - "files_with_logic": 3, - "files_with_both": 3 - }, - "html_validation": { - "valid": true, - "errors": 0 - }, - "duplication_percentage": 0.0, - "score": 0.75 + "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/code-analysis.py', '/tmp/reeval-oyxhf1b6/loop-bench-n9n73btm', 'typescript']' timed out after 120 seconds", + "score": 0 }, "transcript_analysis": { - "total_events": 134, - "tool_calls": { - "total": 36, - "bash": 32, - "write": 0, - "edit": 2, - "read": 2 - }, - "wasted_turns": { - "total": 13, - "docs": 3, - "ascii_art": 5, - "server_starts": 5 - }, - "errors_encountered": 0, - "thinking_blocks": 37, - "text_blocks": 21, - "productivity_ratio": 0.64, - "self_tested": false, - "score": 0.75 + "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/transcript-analysis.py', '/root/loop-benchmarking/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run3']' timed out after 30 seconds", + "score": 0 }, "gameplay_bot": { "pass": false, "score": 0, - "total": 16, - "passed": 0, - "failed": 16, - "report": { - "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_bounds": null, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "unknown", - "score_element_found": false - }, - "tests": [ - { - "name": "game_loads", - "pass": false, - "detail": "1 console error(s): Unexpected token '<'" - }, - { - "name": "game_starts", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "auto_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_left", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_right", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_down", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "line_clear", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "score_changes", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "game_over", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "playable_30s", - "pass": false, - "detail": "skipped: page did not load" - } - ], - "summary": { - "total": 16, - "passed": 0, - "failed": 16, - "score": 0 - }, - "gameplay": { - "pieces_placed": 0, - "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 0, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 1915 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role" - ], - "issue_count": 1, - "pass": false - } - } + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.4106 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run1/eval_results.json @@ -1,34 +1,7 @@ { "structural": { - "pass": true, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": true, - "detail": "npm run build completed successfully" - }, - { - "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" - } - ], - "score": 1.0 - }, - "functional": { "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-2crbjsnw/loop-bench-npum8frs', 'typescript']' timed out after 120 seconds" }, "quality": { "lint": { @@ -40,69 +13,15 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 68823, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { - "files": { - "total": 43, - "code": 20, - "docs": 7, - "unnecessary": 2, - "unnecessary_list": [ - "FEATURES.md", - "README.md" - ] - }, - "lines_of_code": 2098, - "dependencies": { - "production": 0, - "dev": 8, - "total": 8 - }, - "complexity": "over-engineered", - "console_logs": 0, - "magic_numbers": { - "count": 36, - "excessive": true - }, - "function_length": { - "count": 125, - "average": 6.1, - "max": 34, - "long_functions": 0 - }, - "max_nesting_depth": 10, - "global_declarations": 14, - "naming": { - "dominant_style": "camelCase", - "consistency_pct": 98.8, - "camel_case": 1165, - "snake_case": 14 - }, - "error_handling": { - "try_catch_blocks": 0, - "has_error_handling": false - }, - "comments": { - "comment_lines": 50, - "source_lines": 1736, - "ratio_pct": 2.9 - }, - "separation_of_concerns": { - "verdict": "mixed", - "files_with_rendering": 6, - "files_with_logic": 10, - "files_with_both": 5 - }, - "html_validation": { - "valid": true, - "errors": 0 - }, - "duplication_percentage": 0.0, - "score": 0.6 + "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/code-analysis.py', '/tmp/reeval-2crbjsnw/loop-bench-npum8frs', 'typescript']' timed out after 120 seconds", + "score": 0 }, "transcript_analysis": { "total_events": 152, @@ -131,5 +50,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.4988 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run2/eval_results.json @@ -1,34 +1,7 @@ { "structural": { "pass": false, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": false, - "detail": "npm run build failed" - }, - { - "name": "typescript_compiles", - "pass": false, - "detail": "TypeScript files found but no tsconfig.json" - } - ], - "score": 0.5 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-h0p8kpvx/loop-bench-5dl5qz_l', 'typescript']' timed out after 120 seconds" }, "quality": { "lint": { @@ -41,70 +14,15 @@ "error": "no tsconfig.json" }, "performance": { - "bundle_size_bytes": 160868, + "pass": true, + "bundle_size_bytes": 96268, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { - "files": { - "total": 12, - "code": 4, - "docs": 5, - "unnecessary": 3, - "unnecessary_list": [ - "FEATURES.md", - "server.js", - "README.md" - ] - }, - "lines_of_code": 1092, - "dependencies": { - "production": 0, - "dev": 7, - "total": 7 - }, - "complexity": "moderate", - "console_logs": 1, - "magic_numbers": { - "count": 18, - "excessive": false - }, - "function_length": { - "count": 52, - "average": 6.6, - "max": 28, - "long_functions": 0 - }, - "max_nesting_depth": 12, - "global_declarations": 17, - "naming": { - "dominant_style": "camelCase", - "consistency_pct": 100.0, - "camel_case": 457, - "snake_case": 0 - }, - "error_handling": { - "try_catch_blocks": 0, - "has_error_handling": false - }, - "comments": { - "comment_lines": 37, - "source_lines": 749, - "ratio_pct": 4.9 - }, - "separation_of_concerns": { - "verdict": "mixed", - "files_with_rendering": 2, - "files_with_logic": 3, - "files_with_both": 2 - }, - "html_validation": { - "valid": false, - "errors": 3 - }, - "duplication_percentage": 0.0, - "score": 0.58 + "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/code-analysis.py', '/tmp/reeval-h0p8kpvx/loop-bench-5dl5qz_l', 'typescript']' timed out after 120 seconds", + "score": 0 }, "transcript_analysis": { "total_events": 94, @@ -133,5 +51,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.3475 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run3/eval_results.json @@ -1,34 +1,7 @@ { "structural": { - "pass": true, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": true, - "detail": "npm run build completed successfully" - }, - { - "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" - } - ], - "score": 1.0 - }, - "functional": { "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-bdk2kxg3/loop-bench-opdim046', 'typescript']' timed out after 120 seconds" }, "quality": { "lint": { @@ -40,68 +13,15 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 34913, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { - "files": { - "total": 19, - "code": 5, - "docs": 7, - "unnecessary": 1, - "unnecessary_list": [ - "README.md" - ] - }, - "lines_of_code": 1363, - "dependencies": { - "production": 0, - "dev": 5, - "total": 5 - }, - "complexity": "moderate", - "console_logs": 0, - "magic_numbers": { - "count": 10, - "excessive": false - }, - "function_length": { - "count": 63, - "average": 5.3, - "max": 15, - "long_functions": 0 - }, - "max_nesting_depth": 10, - "global_declarations": 10, - "naming": { - "dominant_style": "camelCase", - "consistency_pct": 100.0, - "camel_case": 573, - "snake_case": 0 - }, - "error_handling": { - "try_catch_blocks": 0, - "has_error_handling": false - }, - "comments": { - "comment_lines": 40, - "source_lines": 968, - "ratio_pct": 4.1 - }, - "separation_of_concerns": { - "verdict": "mixed", - "files_with_rendering": 3, - "files_with_logic": 3, - "files_with_both": 3 - }, - "html_validation": { - "valid": false, - "errors": 8 - }, - "duplication_percentage": 0.0, - "score": 0.8 + "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/code-analysis.py', '/tmp/reeval-bdk2kxg3/loop-bench-opdim046', 'typescript']' timed out after 120 seconds", + "score": 0 }, "transcript_analysis": { "total_events": 120, @@ -127,136 +47,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.31, - "total": 16, - "passed": 5, - "failed": 11, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 300, - "height": 600 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "click_canvas", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via click_canvas" - }, - { - "name": "auto_drop", - "pass": false, - "detail": "piece did not move in 5 seconds" - }, - { - "name": "move_left", - "pass": false, - "detail": "no change detected after key press" - }, - { - "name": "move_right", - "pass": false, - "detail": "no change detected after key press" - }, - { - "name": "move_down", - "pass": false, - "detail": "no change detected after key press" - }, - { - "name": "rotate", - "pass": false, - "detail": "no change detected after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "could not detect any piece rotations" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "no change detected after hard drop key" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "could not verify piece locking at bottom" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "could not detect new piece at top" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "grid did not accumulate filled cells" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [0] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 79 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 5, - "failed": 11, - "score": 0.31 - }, - "gameplay": { - "pieces_placed": 189, - "lines_cleared": 1, - "max_score_observed": 0, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 40 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role" - ], - "issue_count": 1, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.6331 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run1/eval_results.json @@ -1,34 +1,7 @@ { "structural": { - "pass": true, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": true, - "detail": "npm run build completed successfully" - }, - { - "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" - } - ], - "score": 1.0 - }, - "functional": { "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-33yz4i_u/loop-bench-jvbn5i1r', 'typescript']' timed out after 120 seconds" }, "quality": { "lint": { @@ -40,70 +13,15 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 18068, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { - "files": { - "total": 21, - "code": 8, - "docs": 9, - "unnecessary": 3, - "unnecessary_list": [ - "FEATURES.md", - "server.js", - "README.md" - ] - }, - "lines_of_code": 1516, - "dependencies": { - "production": 0, - "dev": 7, - "total": 7 - }, - "complexity": "over-engineered", - "console_logs": 2, - "magic_numbers": { - "count": 13, - "excessive": false - }, - "function_length": { - "count": 52, - "average": 5.2, - "max": 15, - "long_functions": 0 - }, - "max_nesting_depth": 12, - "global_declarations": 15, - "naming": { - "dominant_style": "camelCase", - "consistency_pct": 100.0, - "camel_case": 393, - "snake_case": 0 - }, - "error_handling": { - "try_catch_blocks": 0, - "has_error_handling": false - }, - "comments": { - "comment_lines": 20, - "source_lines": 784, - "ratio_pct": 2.6 - }, - "separation_of_concerns": { - "verdict": "mixed", - "files_with_rendering": 2, - "files_with_logic": 2, - "files_with_both": 2 - }, - "html_validation": { - "valid": false, - "errors": 3 - }, - "duplication_percentage": 0.0, - "score": 0.46 + "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/code-analysis.py', '/tmp/reeval-33yz4i_u/loop-bench-jvbn5i1r', 'typescript']' timed out after 120 seconds", + "score": 0 }, "transcript_analysis": { "total_events": 107, @@ -132,5 +50,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.485 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run2/eval_results.json @@ -25,84 +25,13 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": true - }, - "performance": { - "bundle_size_bytes": 12744, - "size_under_512kb": true - }, - "score": 0.67 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-0x3y75v_/loop-bench-tirri_jo', 'typescript']' timed out after 120 seconds" }, "code_analysis": { - "files": { - "total": 16, - "code": 4, - "docs": 7, - "unnecessary": 2, - "unnecessary_list": [ - "server.js", - "README.md" - ] - }, - "lines_of_code": 1098, - "dependencies": { - "production": 0, - "dev": 7, - "total": 7 - }, - "complexity": "moderate", - "console_logs": 2, - "magic_numbers": { - "count": 16, - "excessive": false - }, - "function_length": { - "count": 45, - "average": 8.0, - "max": 35, - "long_functions": 0 - }, - "max_nesting_depth": 11, - "global_declarations": 10, - "naming": { - "dominant_style": "camelCase", - "consistency_pct": 100.0, - "camel_case": 396, - "snake_case": 0 - }, - "error_handling": { - "try_catch_blocks": 0, - "has_error_handling": false - }, - "comments": { - "comment_lines": 34, - "source_lines": 744, - "ratio_pct": 4.6 - }, - "separation_of_concerns": { - "verdict": "mixed", - "files_with_rendering": 2, - "files_with_logic": 2, - "files_with_both": 2 - }, - "html_validation": { - "valid": true, - "errors": 0 - }, - "duplication_percentage": 0.0, - "score": 0.71 + "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/code-analysis.py', '/tmp/reeval-0x3y75v_/loop-bench-tirri_jo', 'typescript']' timed out after 120 seconds", + "score": 0 }, "transcript_analysis": { "total_events": 134, @@ -260,5 +189,10 @@ } } }, - "score": 0.8131 + "outcome_score": 0.94, + "score": 0.94, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -1,34 +1,7 @@ { "structural": { - "pass": true, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": true, - "detail": "npm run build completed successfully" - }, - { - "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" - } - ], - "score": 1.0 - }, - "functional": { "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-me5_pdo3/loop-bench-sdqv7jk5', 'typescript']' timed out after 120 seconds" }, "quality": { "lint": { @@ -40,69 +13,15 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 50297, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { - "files": { - "total": 39, - "code": 20, - "docs": 9, - "unnecessary": 2, - "unnecessary_list": [ - "server.js", - "README.md" - ] - }, - "lines_of_code": 1642, - "dependencies": { - "production": 0, - "dev": 7, - "total": 7 - }, - "complexity": "over-engineered", - "console_logs": 2, - "magic_numbers": { - "count": 19, - "excessive": false - }, - "function_length": { - "count": 75, - "average": 6.1, - "max": 35, - "long_functions": 0 - }, - "max_nesting_depth": 10, - "global_declarations": 29, - "naming": { - "dominant_style": "camelCase", - "consistency_pct": 100.0, - "camel_case": 685, - "snake_case": 0 - }, - "error_handling": { - "try_catch_blocks": 0, - "has_error_handling": false - }, - "comments": { - "comment_lines": 42, - "source_lines": 1296, - "ratio_pct": 3.2 - }, - "separation_of_concerns": { - "verdict": "mixed", - "files_with_rendering": 5, - "files_with_logic": 9, - "files_with_both": 4 - }, - "html_validation": { - "valid": false, - "errors": 1 - }, - "duplication_percentage": 0.0, - "score": 0.56 + "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/code-analysis.py', '/tmp/reeval-me5_pdo3/loop-bench-sdqv7jk5', 'typescript']' timed out after 120 seconds", + "score": 0 }, "transcript_analysis": { "total_events": 123, @@ -128,8 +47,139 @@ }, "gameplay_bot": { "pass": false, - "score": 0, - "error": "Gameplay bot timed out after 180 seconds" + "score": 0.13, + "total": 16, + "passed": 2, + "failed": 14, + "report": { + "implementation": { + "renderer": "unknown", + "grid_detected": true, + "grid_bounds": { + "x": 0, + "y": 0, + "width": 75, + "height": 150 + }, + "controls": { + "left": "ArrowLeft", + "right": "ArrowRight", + "down": "ArrowDown", + "rotate": "ArrowUp", + "drop": "Space" + }, + "start_mechanism": "unknown", + "score_element_found": false + }, + "tests": [ + { + "name": "game_loads", + "pass": true, + "detail": "no console errors" + }, + { + "name": "game_starts", + "pass": false, + "detail": "could not start game with any mechanism" + }, + { + "name": "auto_drop", + "pass": false, + "detail": "piece did not move in 5 seconds" + }, + { + "name": "move_left", + "pass": false, + "detail": "no change detected after key press" + }, + { + "name": "move_right", + "pass": false, + "detail": "no change detected after key press" + }, + { + "name": "move_down", + "pass": false, + "detail": "no change detected after key press" + }, + { + "name": "rotate", + "pass": false, + "detail": "no change detected after rotate key" + }, + { + "name": "all_pieces_rotate", + "pass": false, + "detail": "could not detect any piece rotations" + }, + { + "name": "hard_drop", + "pass": false, + "detail": "no change detected after hard drop key" + }, + { + "name": "piece_locks", + "pass": false, + "detail": "could not verify piece locking at bottom" + }, + { + "name": "new_piece_spawns", + "pass": false, + "detail": "could not detect new piece at top" + }, + { + "name": "multiple_pieces", + "pass": false, + "detail": "grid did not accumulate filled cells" + }, + { + "name": "line_clear", + "pass": true, + "detail": "line cleared via strategic placement" + }, + { + "name": "score_changes", + "pass": false, + "detail": "score did not increase: [0] -> no change after polling" + }, + { + "name": "game_over", + "pass": false, + "detail": "exception: keyboard.press: Target page, context or browser has been closed" + }, + { + "name": "playable_30s", + "pass": false, + "detail": "exception: keyboard.press: Target page, context or browser has been closed" + } + ], + "summary": { + "total": 16, + "passed": 2, + "failed": 14, + "score": 0.13 + }, + "gameplay": { + "pieces_placed": 110, + "lines_cleared": 1, + "max_score_observed": 0, + "play_duration_seconds": 0, + "errors_during_play": 0 + }, + "performance": { + "load_time_ms": 117 + }, + "accessibility": { + "issues": [], + "issue_count": 0, + "pass": true + } + } }, - "score": 0.4913 + "outcome_score": 0.13, + "score": 0.13, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -25,11 +25,6 @@ ], "score": 0.75 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -41,96 +36,29 @@ "error": "no tsconfig.json" }, "performance": { - "bundle_size_bytes": 159576, + "pass": true, + "bundle_size_bytes": 92139, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { - "files": { - "total": 12, - "code": 5, - "docs": 3, - "unnecessary": 2, - "unnecessary_list": [ - "server.js", - "README.md" - ] - }, - "lines_of_code": 1390, - "dependencies": { - "production": 0, - "dev": 7, - "total": 7 - }, - "complexity": "moderate", - "console_logs": 2, - "magic_numbers": { - "count": 15, - "excessive": false - }, - "function_length": { - "count": 50, - "average": 8.1, - "max": 40, - "long_functions": 0 - }, - "max_nesting_depth": 10, - "global_declarations": 16, - "naming": { - "dominant_style": "camelCase", - "consistency_pct": 100.0, - "camel_case": 830, - "snake_case": 0 - }, - "error_handling": { - "try_catch_blocks": 0, - "has_error_handling": false - }, - "comments": { - "comment_lines": 33, - "source_lines": 921, - "ratio_pct": 3.6 - }, - "separation_of_concerns": { - "verdict": "mixed", - "files_with_rendering": 2, - "files_with_logic": 2, - "files_with_both": 2 - }, - "html_validation": { - "valid": false, - "errors": 4 - }, - "duplication_percentage": 0.0, - "score": 0.66 + "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/code-analysis.py', '/tmp/reeval-dg9g3bx_/loop-bench-eenai5fl', 'typescript']' timed out after 120 seconds", + "score": 0 }, "transcript_analysis": { - "total_events": 69, - "tool_calls": { - "total": 18, - "bash": 13, - "write": 0, - "edit": 0, - "read": 5 - }, - "wasted_turns": { - "total": 6, - "docs": 2, - "ascii_art": 0, - "server_starts": 4 - }, - "errors_encountered": 0, - "thinking_blocks": 19, - "text_blocks": 10, - "productivity_ratio": 0.67, - "self_tested": false, - "score": 0.75 + "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/transcript-analysis.py', '/root/loop-benchmarking/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2']' timed out after 30 seconds", + "score": 0 }, "gameplay_bot": { "pass": false, "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.3938 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -1,34 +1,7 @@ { "structural": { - "pass": true, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": true, - "detail": "npm run build completed successfully" - }, - { - "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" - } - ], - "score": 1.0 - }, - "functional": { "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-jz1t_adw/loop-bench-ud3ikw5f', 'typescript']' timed out after 120 seconds" }, "quality": { "lint": { @@ -40,68 +13,15 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 29718, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { - "files": { - "total": 13, - "code": 5, - "docs": 2, - "unnecessary": 1, - "unnecessary_list": [ - "README.md" - ] - }, - "lines_of_code": 1207, - "dependencies": { - "production": 0, - "dev": 5, - "total": 5 - }, - "complexity": "moderate", - "console_logs": 0, - "magic_numbers": { - "count": 14, - "excessive": false - }, - "function_length": { - "count": 50, - "average": 6.3, - "max": 26, - "long_functions": 0 - }, - "max_nesting_depth": 12, - "global_declarations": 16, - "naming": { - "dominant_style": "camelCase", - "consistency_pct": 100.0, - "camel_case": 455, - "snake_case": 0 - }, - "error_handling": { - "try_catch_blocks": 0, - "has_error_handling": false - }, - "comments": { - "comment_lines": 31, - "source_lines": 848, - "ratio_pct": 3.7 - }, - "separation_of_concerns": { - "verdict": "mixed", - "files_with_rendering": 3, - "files_with_logic": 3, - "files_with_both": 3 - }, - "html_validation": { - "valid": false, - "errors": 5 - }, - "duplication_percentage": 0.0, - "score": 0.8 + "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/code-analysis.py', '/tmp/reeval-jz1t_adw/loop-bench-ud3ikw5f', 'typescript']' timed out after 120 seconds", + "score": 0 }, "transcript_analysis": { "total_events": 52, @@ -127,136 +47,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.94, - "total": 16, - "passed": 15, - "failed": 1, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 300, - "height": 600 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [I, other] failed: [] (tested 2 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "visual change suggests new piece spawned" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "grid accumulated cells: 20 -> 24" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [0] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 28s, placed 100 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 15, - "failed": 1, - "score": 0.94 - }, - "gameplay": { - "pieces_placed": 210, - "lines_cleared": 1, - "max_score_observed": 0, - "play_duration_seconds": 28, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 159 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role" - ], - "issue_count": 1, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.8363 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -1,34 +1,7 @@ { "structural": { - "pass": true, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": true, - "detail": "npm run build completed successfully" - }, - { - "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" - } - ], - "score": 1.0 - }, - "functional": { "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-loagbwpu/loop-bench-xt9_gk40', 'typescript']' timed out after 120 seconds" }, "quality": { "lint": { @@ -40,66 +13,15 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 25687, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { - "files": { - "total": 7, - "code": 3, - "docs": 0, - "unnecessary": 0, - "unnecessary_list": [] - }, - "lines_of_code": 895, - "dependencies": { - "production": 0, - "dev": 4, - "total": 4 - }, - "complexity": "moderate", - "console_logs": 0, - "magic_numbers": { - "count": 16, - "excessive": false - }, - "function_length": { - "count": 61, - "average": 6.6, - "max": 22, - "long_functions": 0 - }, - "max_nesting_depth": 8, - "global_declarations": 48, - "naming": { - "dominant_style": "camelCase", - "consistency_pct": 100.0, - "camel_case": 275, - "snake_case": 0 - }, - "error_handling": { - "try_catch_blocks": 0, - "has_error_handling": false - }, - "comments": { - "comment_lines": 37, - "source_lines": 688, - "ratio_pct": 5.4 - }, - "separation_of_concerns": { - "verdict": "mixed", - "files_with_rendering": 2, - "files_with_logic": 2, - "files_with_both": 2 - }, - "html_validation": { - "valid": false, - "errors": 7 - }, - "duplication_percentage": 0.0, - "score": 0.95 + "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/code-analysis.py', '/tmp/reeval-loagbwpu/loop-bench-xt9_gk40', 'typescript']' timed out after 120 seconds", + "score": 0 }, "transcript_analysis": { "total_events": 40, @@ -125,137 +47,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.94, - "total": 16, - "passed": 15, - "failed": 1, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 300, - "height": 600 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "grid accumulated cells: 20 -> 40" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [210] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 76 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 15, - "failed": 1, - "score": 0.94 - }, - "gameplay": { - "pieces_placed": 127, - "lines_cleared": 1, - "max_score_observed": 158, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 22 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.9065 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -1,34 +1,7 @@ { "structural": { - "pass": true, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": true, - "detail": "npm run build completed successfully" - }, - { - "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" - } - ], - "score": 1.0 - }, - "functional": { "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-de89ue8t/loop-bench-3ur6qash', 'typescript']' timed out after 120 seconds" }, "quality": { "lint": { @@ -40,66 +13,15 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 43767, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { - "files": { - "total": 7, - "code": 3, - "docs": 0, - "unnecessary": 0, - "unnecessary_list": [] - }, - "lines_of_code": 1292, - "dependencies": { - "production": 0, - "dev": 4, - "total": 4 - }, - "complexity": "moderate", - "console_logs": 0, - "magic_numbers": { - "count": 30, - "excessive": true - }, - "function_length": { - "count": 59, - "average": 7.3, - "max": 27, - "long_functions": 0 - }, - "max_nesting_depth": 12, - "global_declarations": 34, - "naming": { - "dominant_style": "camelCase", - "consistency_pct": 100.0, - "camel_case": 435, - "snake_case": 0 - }, - "error_handling": { - "try_catch_blocks": 0, - "has_error_handling": false - }, - "comments": { - "comment_lines": 96, - "source_lines": 991, - "ratio_pct": 9.7 - }, - "separation_of_concerns": { - "verdict": "mixed", - "files_with_rendering": 2, - "files_with_logic": 2, - "files_with_both": 2 - }, - "html_validation": { - "valid": false, - "errors": 8 - }, - "duplication_percentage": 0.0, - "score": 0.85 + "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/code-analysis.py', '/tmp/reeval-de89ue8t/loop-bench-3ur6qash', 'typescript']' timed out after 120 seconds", + "score": 0 }, "transcript_analysis": { "total_events": 41, @@ -125,137 +47,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.94, - "total": 16, - "passed": 15, - "failed": 1, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 320, - "height": 640 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "z", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "grid accumulated cells: 20 -> 46" - }, - { - "name": "line_clear", - "pass": true, - "detail": "1 line(s) cleared during AI play" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [132] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 78 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 15, - "failed": 1, - "score": 0.94 - }, - "gameplay": { - "pieces_placed": 119, - "lines_cleared": 2, - "max_score_observed": 200, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 26 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.8815 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -1,6 +1,6 @@ { "structural": { - "pass": true, + "pass": false, "checks": [ { "name": "entry_point_exists", @@ -19,35 +19,19 @@ }, { "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" + "pass": false, + "detail": "tsc --noEmit failed" } ], - "score": 1.0 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "score": 0.75 }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": true - }, - "performance": { - "bundle_size_bytes": 17548, - "size_under_512kb": true - }, - "score": 0.67 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-kdaj92n_/loop-bench-afsfv6ek', 'typescript']' timed out after 120 seconds" }, "code_analysis": { "files": { - "total": 6, + "total": 7, "code": 3, "docs": 0, "unnecessary": 0, @@ -56,8 +40,8 @@ "lines_of_code": 1323, "dependencies": { "production": 0, - "dev": 4, - "total": 4 + "dev": 6, + "total": 6 }, "complexity": "moderate", "console_logs": 0, @@ -96,10 +80,10 @@ }, "html_validation": { "valid": false, - "errors": 2 + "errors": 0 }, "duplication_percentage": 0.0, - "score": 0.85 + "score": 0.5 }, "transcript_analysis": { "total_events": 44, @@ -125,138 +109,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.63, - "total": 16, - "passed": 10, - "failed": 6, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 60, - "height": 120 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "z", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "pixels changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "could not detect any piece rotations" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "no change detected after hard drop key" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "could not verify piece locking at bottom" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "could not detect new piece at top" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "grid did not accumulate filled cells" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [128] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 78 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 10, - "failed": 6, - "score": 0.63 - }, - "gameplay": { - "pieces_placed": 188, - "lines_cleared": 1, - "max_score_observed": 160, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 34 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 3, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.804 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -25,25 +25,9 @@ ], "score": 0.75 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": true - }, - "performance": { - "bundle_size_bytes": 145626, - "size_under_512kb": true - }, - "score": 0.67 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-_3aet_ww/loop-bench-ztizwjzj', 'typescript']' timed out after 120 seconds" }, "code_analysis": { "files": { @@ -125,137 +109,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.94, - "total": 16, - "passed": 15, - "failed": 1, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 300, - "height": 600 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "z", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "game still responding after 10 piece drops" - }, - { - "name": "line_clear", - "pass": true, - "detail": "10 line(s) cleared during AI play" - }, - { - "name": "score_changes", - "pass": true, - "detail": "score changed from 0 to 122" - }, - { - "name": "game_over", - "pass": false, - "detail": "could not trigger or detect game over" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 28s, placed 100 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 15, - "failed": 1, - "score": 0.94 - }, - "gameplay": { - "pieces_placed": 141, - "lines_cleared": 46, - "max_score_observed": 0, - "play_duration_seconds": 28, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 617 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.7737 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -1,11 +1,11 @@ { "structural": { - "pass": true, + "pass": false, "checks": [ { "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" + "pass": false, + "detail": "no index.html found in workspace root, dist/, or public/" }, { "name": "package_json_exists", @@ -23,38 +23,21 @@ "detail": "tsc --noEmit passed" } ], - "score": 1.0 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "score": 0.75 }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": false, - "errors": 1 - }, - "performance": { - "bundle_size_bytes": 15037, - "size_under_512kb": true - }, - "score": 0.33 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-3a9i8rmm/loop-bench-jymd_531', 'typescript']' timed out after 120 seconds" }, "code_analysis": { "files": { - "total": 8, - "code": 4, + "total": 7, + "code": 3, "docs": 0, "unnecessary": 0, "unnecessary_list": [] }, - "lines_of_code": 2086, + "lines_of_code": 1526, "dependencies": { "production": 0, "dev": 7, @@ -126,137 +109,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.81, - "total": 16, - "passed": 13, - "failed": 3, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 320, - "height": 640 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "z", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": false, - "detail": "no change detected after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "rotated: [] failed: [I] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "grid accumulated cells: 20 -> 44" - }, - { - "name": "line_clear", - "pass": true, - "detail": "2 line(s) cleared during AI play" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [0] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 78 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 13, - "failed": 3, - "score": 0.81 - }, - "gameplay": { - "pieces_placed": 119, - "lines_cleared": 4, - "max_score_observed": 0, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 497 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.6887 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -1,6 +1,6 @@ { "structural": { - "pass": false, + "pass": true, "checks": [ { "name": "entry_point_exists", @@ -19,32 +19,15 @@ }, { "name": "typescript_compiles", - "pass": false, - "detail": "tsc --noEmit failed" + "pass": true, + "detail": "tsc --noEmit passed" } ], - "score": 0.75 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "score": 1.0 }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": false, - "errors": 2 - }, - "performance": { - "bundle_size_bytes": 151804, - "size_under_512kb": true - }, - "score": 0.33 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-mzmf8qdj/loop-bench-amrtiyou', 'typescript']' timed out after 120 seconds" }, "code_analysis": { "files": { @@ -96,11 +79,11 @@ "files_with_both": 2 }, "html_validation": { - "valid": true, + "valid": false, "errors": 0 }, "duplication_percentage": 0.0, - "score": 0.9 + "score": 0.5 }, "transcript_analysis": { "total_events": 56, @@ -126,137 +109,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.94, - "total": 16, - "passed": 15, - "failed": 1, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 320, - "height": 640 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "z", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "grid accumulated cells: 16 -> 40" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [130] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 78 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 15, - "failed": 1, - "score": 0.94 - }, - "gameplay": { - "pieces_placed": 129, - "lines_cleared": 1, - "max_score_observed": 162, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 27 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.7637 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -1,53 +1,15 @@ { "structural": { - "pass": true, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": true, - "detail": "npm run build completed successfully" - }, - { - "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" - } - ], - "score": 1.0 - }, - "functional": { "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-38mshnpy/loop-bench-dzgab_lb', 'typescript']' timed out after 120 seconds" }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": true - }, - "performance": { - "bundle_size_bytes": 49197, - "size_under_512kb": true - }, - "score": 0.67 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-38mshnpy/loop-bench-dzgab_lb', 'typescript']' timed out after 120 seconds" }, "code_analysis": { "files": { - "total": 7, + "total": 8, "code": 3, "docs": 0, "unnecessary": 0, @@ -56,8 +18,8 @@ "lines_of_code": 1367, "dependencies": { "production": 0, - "dev": 5, - "total": 5 + "dev": 7, + "total": 7 }, "complexity": "moderate", "console_logs": 0, @@ -125,138 +87,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.56, - "total": 16, - "passed": 9, - "failed": 7, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 25, - "height": 50 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "z", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "pixels changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": false, - "detail": "no change detected after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "could not detect any piece rotations" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "no change detected after hard drop key" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "could not verify piece locking at bottom" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "could not detect new piece at top" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "grid did not accumulate filled cells" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [240] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 77 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 9, - "failed": 7, - "score": 0.56 - }, - "gameplay": { - "pieces_placed": 187, - "lines_cleared": 1, - "max_score_observed": 186, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 60 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 3, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.7865 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -1,53 +1,15 @@ { "structural": { - "pass": true, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": true, - "detail": "npm run build completed successfully" - }, - { - "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" - } - ], - "score": 1.0 - }, - "functional": { "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-0n45h12w/loop-bench-p92ynomw', 'typescript']' timed out after 120 seconds" }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": true - }, - "performance": { - "bundle_size_bytes": 48079, - "size_under_512kb": true - }, - "score": 0.67 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-0n45h12w/loop-bench-p92ynomw', 'typescript']' timed out after 120 seconds" }, "code_analysis": { "files": { - "total": 10, + "total": 11, "code": 5, "docs": 0, "unnecessary": 0, @@ -56,8 +18,8 @@ "lines_of_code": 1617, "dependencies": { "production": 0, - "dev": 5, - "total": 5 + "dev": 7, + "total": 7 }, "complexity": "moderate", "console_logs": 0, @@ -257,5 +219,10 @@ } } }, - "score": 0.8665 + "outcome_score": 0.44, + "score": 0.44, + "sonarqube": { + "error": "SonarQube scan timed out", + "score": 0 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -1,53 +1,15 @@ { "structural": { - "pass": true, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": true, - "detail": "npm run build completed successfully" - }, - { - "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" - } - ], - "score": 1.0 - }, - "functional": { "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-ex80o7g1/loop-bench-mjng5_fj', 'typescript']' timed out after 120 seconds" }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": true - }, - "performance": { - "bundle_size_bytes": 28619, - "size_under_512kb": true - }, - "score": 0.67 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-ex80o7g1/loop-bench-mjng5_fj', 'typescript']' timed out after 120 seconds" }, "code_analysis": { "files": { - "total": 10, + "total": 11, "code": 5, "docs": 0, "unnecessary": 0, @@ -56,8 +18,8 @@ "lines_of_code": 1246, "dependencies": { "production": 0, - "dev": 5, - "total": 5 + "dev": 7, + "total": 7 }, "complexity": "moderate", "console_logs": 0, @@ -125,137 +87,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.94, - "total": 16, - "passed": 15, - "failed": 1, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 300, - "height": 600 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other, unknown] failed: [] (tested 2 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "grid accumulated cells: 32 -> 44" - }, - { - "name": "line_clear", - "pass": true, - "detail": "1 line(s) cleared during AI play" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [200] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 77 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 15, - "failed": 1, - "score": 0.94 - }, - "gameplay": { - "pieces_placed": 122, - "lines_cleared": 2, - "max_score_observed": 182, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 348 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.894 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -1,49 +1,11 @@ { "structural": { "pass": false, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": true, - "detail": "no build script defined (static project)" - }, - { - "name": "typescript_compiles", - "pass": false, - "detail": "tsc --noEmit failed" - } - ], - "score": 0.75 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-9ieawh2c/loop-bench-6s2flshh', 'typescript']' timed out after 120 seconds" }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": true - }, - "performance": { - "bundle_size_bytes": 50041, - "size_under_512kb": true - }, - "score": 0.67 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-9ieawh2c/loop-bench-6s2flshh', 'typescript']' timed out after 120 seconds" }, "code_analysis": { "files": { @@ -125,138 +87,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.63, - "total": 16, - "passed": 10, - "failed": 6, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 40, - "height": 80 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "z", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "pixels changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "could not detect any piece rotations" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "no change detected after hard drop key" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "could not verify piece locking at bottom" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "could not detect new piece at top" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "grid did not accumulate filled cells" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [154] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 78 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 10, - "failed": 6, - "score": 0.63 - }, - "gameplay": { - "pieces_placed": 188, - "lines_cleared": 1, - "max_score_observed": 222, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 34 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 3, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.7425 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -1,49 +1,11 @@ { "structural": { "pass": false, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": true, - "detail": "no build script defined (static project)" - }, - { - "name": "typescript_compiles", - "pass": false, - "detail": "tsc --noEmit failed" - } - ], - "score": 0.75 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-f54mliyx/loop-bench-pc2ap4ay', 'typescript']' timed out after 120 seconds" }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": true - }, - "performance": { - "bundle_size_bytes": 44999, - "size_under_512kb": true - }, - "score": 0.67 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-f54mliyx/loop-bench-pc2ap4ay', 'typescript']' timed out after 120 seconds" }, "code_analysis": { "files": { @@ -125,138 +87,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.63, - "total": 16, - "passed": 10, - "failed": 6, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 40, - "height": 80 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "z", - "drop": "Space" - }, - "start_mechanism": "enter", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via enter" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "pixels changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "could not detect any piece rotations" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "no change detected after hard drop key" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "could not verify piece locking at bottom" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "could not detect new piece at top" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "grid did not accumulate filled cells" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [0] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 78 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 10, - "failed": 6, - "score": 0.63 - }, - "gameplay": { - "pieces_placed": 188, - "lines_cleared": 1, - "max_score_observed": 0, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 35 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 3, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.7425 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -1,49 +1,11 @@ { "structural": { - "pass": true, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": true, - "detail": "npm run build completed successfully" - }, - { - "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" - } - ], - "score": 1.0 - }, - "functional": { "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-0ywraxje/loop-bench-q46nchnj', 'typescript']' timed out after 120 seconds" }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": true - }, - "performance": { - "bundle_size_bytes": 47466, - "size_under_512kb": true - }, - "score": 0.67 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-0ywraxje/loop-bench-q46nchnj', 'typescript']' timed out after 120 seconds" }, "code_analysis": { "files": { @@ -125,137 +87,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.88, - "total": 16, - "passed": 14, - "failed": 2, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 320, - "height": 640 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "x", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": false, - "detail": "no change detected after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "grid accumulated cells: 20 -> 41" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [272] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 80 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 14, - "failed": 2, - "score": 0.88 - }, - "gameplay": { - "pieces_placed": 131, - "lines_cleared": 1, - "max_score_observed": 174, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 21 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.8519 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -1,49 +1,11 @@ { "structural": { - "pass": true, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": true, - "detail": "npm run build completed successfully" - }, - { - "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" - } - ], - "score": 1.0 - }, - "functional": { "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-egdg4quh/loop-bench-amkr12d_', 'typescript']' timed out after 120 seconds" }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": true - }, - "performance": { - "bundle_size_bytes": 38300, - "size_under_512kb": true - }, - "score": 0.67 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-egdg4quh/loop-bench-amkr12d_', 'typescript']' timed out after 120 seconds" }, "code_analysis": { "files": { @@ -125,137 +87,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.88, - "total": 16, - "passed": 14, - "failed": 2, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 300, - "height": 600 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "z", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": false, - "detail": "no change detected after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "grid accumulated cells: 20 -> 41" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [162] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 79 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 14, - "failed": 2, - "score": 0.88 - }, - "gameplay": { - "pieces_placed": 130, - "lines_cleared": 1, - "max_score_observed": 192, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 21 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.8519 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -1,49 +1,11 @@ { "structural": { - "pass": true, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": true, - "detail": "npm run build completed successfully" - }, - { - "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" - } - ], - "score": 1.0 - }, - "functional": { "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-cy1ouzsu/loop-bench-t_rwtfd6', 'typescript']' timed out after 120 seconds" }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": true - }, - "performance": { - "bundle_size_bytes": 38664, - "size_under_512kb": true - }, - "score": 0.67 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-cy1ouzsu/loop-bench-t_rwtfd6', 'typescript']' timed out after 120 seconds" }, "code_analysis": { "files": { @@ -96,10 +58,10 @@ }, "html_validation": { "valid": false, - "errors": 3 + "errors": 0 }, "duplication_percentage": 0.0, - "score": 0.75 + "score": 0.4 }, "transcript_analysis": { "total_events": 73, @@ -125,137 +87,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.88, - "total": 16, - "passed": 14, - "failed": 2, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 300, - "height": 600 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": false, - "detail": "no change detected after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "grid accumulated cells: 20 -> 45" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [154] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 80 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 14, - "failed": 2, - "score": 0.88 - }, - "gameplay": { - "pieces_placed": 131, - "lines_cleared": 1, - "max_score_observed": 154, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 58 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.8331 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/gameplay-bot-report.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/gameplay-bot-report.json @@ -16,7 +16,8 @@ "drop": "Space" }, "start_mechanism": "auto", - "score_element_found": true + "score_element_found": true, + "grid_confidence": 1 }, "tests": [ { @@ -32,62 +33,62 @@ { "name": "auto_drop", "pass": true, - "detail": "grid state changed after 5s with no input" + "detail": "grid state changed after 5s with no input (grid-verified)" }, { "name": "move_left", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_right", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_down", - "pass": false, - "detail": "no change detected after key press" + "pass": true, + "detail": "grid state changed after key press (grid-verified)" }, { "name": "rotate", "pass": true, - "detail": "piece shape changed after rotate key" + "detail": "piece shape changed after rotate key (grid-verified, 1 rotation(s))" }, { "name": "all_pieces_rotate", "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" + "detail": "rotation confirmed but could not identify individual piece types" }, { "name": "hard_drop", "pass": true, - "detail": "piece immediately dropped and new piece appeared" + "detail": "piece immediately dropped to bottom (grid-verified)" }, { "name": "piece_locks", "pass": true, - "detail": "filled cells persist at bottom" + "detail": "filled cells persist at bottom (grid-verified, 2 lock event(s))" }, { "name": "new_piece_spawns", "pass": true, - "detail": "new piece detected at top of grid" + "detail": "5 new piece(s) detected at top of grid" }, { "name": "multiple_pieces", "pass": true, - "detail": "grid accumulated cells: 20 -> 45" + "detail": "11 pieces placed during play session" }, { "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" + "pass": false, + "detail": "could not trigger or detect a line clear via grid reader" }, { "name": "score_changes", "pass": false, - "detail": "score did not increase: [154] -> no change after polling" + "detail": "score stayed at 308" }, { "name": "game_over", @@ -97,7 +98,7 @@ { "name": "playable_30s", "pass": true, - "detail": "played for 30s, placed 80 pieces, no crashes" + "detail": "played for 30s, placed 41 pieces, no crashes" } ], "summary": { @@ -107,14 +108,25 @@ "score": 0.88 }, "gameplay": { - "pieces_placed": 131, - "lines_cleared": 1, - "max_score_observed": 154, + "pieces_placed": 41, + "lines_cleared": 0, + "max_score_observed": 308, "play_duration_seconds": 30, "errors_during_play": 0 }, + "session": { + "frames": 179, + "events_count": 10, + "pieces_spawned": 5, + "pieces_locked": 11, + "lines_cleared": 0, + "piece_types_seen": [ + "unknown" + ], + "grid_read_success_rate": 1 + }, "performance": { - "load_time_ms": 58 + "load_time_ms": 1458 }, "accessibility": { "issues": [ diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -1,49 +1,11 @@ { "structural": { - "pass": true, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": true, - "detail": "npm run build completed successfully" - }, - { - "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" - } - ], - "score": 1.0 - }, - "functional": { "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-1ryr7fh0/loop-bench-g913qn5g', 'typescript']' timed out after 120 seconds" }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": true - }, - "performance": { - "bundle_size_bytes": 42269, - "size_under_512kb": true - }, - "score": 0.67 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-1ryr7fh0/loop-bench-g913qn5g', 'typescript']' timed out after 120 seconds" }, "code_analysis": { "files": { @@ -125,137 +87,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.88, - "total": 16, - "passed": 14, - "failed": 2, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 300, - "height": 600 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "z", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": false, - "detail": "no change detected after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "grid accumulated cells: 20 -> 44" - }, - { - "name": "line_clear", - "pass": true, - "detail": "1 line(s) cleared during AI play" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [178] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 79 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 14, - "failed": 2, - "score": 0.88 - }, - "gameplay": { - "pieces_placed": 120, - "lines_cleared": 2, - "max_score_observed": 188, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 26 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.8612 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -1,49 +1,11 @@ { "structural": { - "pass": true, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": true, - "detail": "npm run build completed successfully" - }, - { - "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" - } - ], - "score": 1.0 - }, - "functional": { "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-dr_6_3of/loop-bench-xwpci4l5', 'typescript']' timed out after 120 seconds" }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": true - }, - "performance": { - "bundle_size_bytes": 42258, - "size_under_512kb": true - }, - "score": 0.67 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-dr_6_3of/loop-bench-xwpci4l5', 'typescript']' timed out after 120 seconds" }, "code_analysis": { "files": { @@ -125,136 +87,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.94, - "total": 16, - "passed": 15, - "failed": 1, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 300, - "height": 600 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "x", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": false - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "grid accumulated cells: 20 -> 43" - }, - { - "name": "line_clear", - "pass": true, - "detail": "1 line(s) cleared during AI play" - }, - { - "name": "score_changes", - "pass": false, - "detail": "no score element found and no number changed" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 80 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 15, - "failed": 1, - "score": 0.94 - }, - "gameplay": { - "pieces_placed": 121, - "lines_cleared": 2, - "max_score_observed": 0, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 22 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role" - ], - "issue_count": 1, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.8706 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -1,49 +1,11 @@ { "structural": { - "pass": true, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": true, - "detail": "npm run build completed successfully" - }, - { - "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" - } - ], - "score": 1.0 - }, - "functional": { "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-vqyy1j1m/loop-bench-io7ugig5', 'typescript']' timed out after 120 seconds" }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": true - }, - "performance": { - "bundle_size_bytes": 45625, - "size_under_512kb": true - }, - "score": 0.67 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-vqyy1j1m/loop-bench-io7ugig5', 'typescript']' timed out after 120 seconds" }, "code_analysis": { "files": { @@ -125,137 +87,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.94, - "total": 16, - "passed": 15, - "failed": 1, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 300, - "height": 600 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": false - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "grid accumulated cells: 16 -> 41" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": false, - "detail": "no score element found and no number changed" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 79 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 15, - "failed": 1, - "score": 0.94 - }, - "gameplay": { - "pieces_placed": 130, - "lines_cleared": 1, - "max_score_observed": 0, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 21 - }, - "accessibility": { - "issues": [ - "no headings found", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.88 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -25,25 +25,9 @@ ], "score": 0.75 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": true - }, - "performance": { - "bundle_size_bytes": 36796, - "size_under_512kb": true - }, - "score": 0.67 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-_e4j35hj/loop-bench-wyg6e789', 'typescript']' timed out after 120 seconds" }, "code_analysis": { "files": { @@ -125,137 +109,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.94, - "total": 16, - "passed": 15, - "failed": 1, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 320, - "height": 640 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "z", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "grid accumulated cells: 20 -> 44" - }, - { - "name": "line_clear", - "pass": true, - "detail": "1 line(s) cleared during AI play" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [240] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 80 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 15, - "failed": 1, - "score": 0.94 - }, - "gameplay": { - "pieces_placed": 121, - "lines_cleared": 2, - "max_score_observed": 152, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 35 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.8394 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run1/eval_results.json @@ -1,49 +1,11 @@ { "structural": { - "pass": true, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": true, - "detail": "no build script defined (static project)" - }, - { - "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" - } - ], - "score": 1.0 - }, - "functional": { "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-60ygqx7w/loop-bench-l6roqn5z', 'typescript']' timed out after 120 seconds" }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": true - }, - "performance": { - "bundle_size_bytes": 36505, - "size_under_512kb": true - }, - "score": 0.67 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-60ygqx7w/loop-bench-l6roqn5z', 'typescript']' timed out after 120 seconds" }, "code_analysis": { "files": { @@ -125,137 +87,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.94, - "total": 16, - "passed": 15, - "failed": 1, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 300, - "height": 600 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "grid accumulated cells: 20 -> 44" - }, - { - "name": "line_clear", - "pass": true, - "detail": "1 line(s) cleared during AI play" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [154] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 79 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 15, - "failed": 1, - "score": 0.94 - }, - "gameplay": { - "pieces_placed": 120, - "lines_cleared": 2, - "max_score_observed": 182, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 28 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.8706 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run2/eval_results.json @@ -1,49 +1,11 @@ { "structural": { - "pass": true, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": true, - "detail": "no build script defined (static project)" - }, - { - "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" - } - ], - "score": 1.0 - }, - "functional": { "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-3gvujmf9/loop-bench-k3paesnr', 'typescript']' timed out after 120 seconds" }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": true - }, - "performance": { - "bundle_size_bytes": 48614, - "size_under_512kb": true - }, - "score": 0.67 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-3gvujmf9/loop-bench-k3paesnr', 'typescript']' timed out after 120 seconds" }, "code_analysis": { "files": { @@ -125,138 +87,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.56, - "total": 16, - "passed": 9, - "failed": 7, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 64, - "height": 128 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "z", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "pixels changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": false, - "detail": "no change detected after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "could not detect any piece rotations" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "no change detected after hard drop key" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "could not verify piece locking at bottom" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "could not detect new piece at top" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "grid did not accumulate filled cells" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [138] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 78 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 9, - "failed": 7, - "score": 0.56 - }, - "gameplay": { - "pieces_placed": 188, - "lines_cleared": 1, - "max_score_observed": 168, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 24 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 3, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.7519 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run3/eval_results.json @@ -1,49 +1,11 @@ { "structural": { - "pass": true, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": true, - "detail": "npm run build completed successfully" - }, - { - "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" - } - ], - "score": 1.0 - }, - "functional": { "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-9kxwf7dz/loop-bench-j7p5ruqx', 'typescript']' timed out after 120 seconds" }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": true - }, - "performance": { - "bundle_size_bytes": 45822, - "size_under_512kb": true - }, - "score": 0.67 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-9kxwf7dz/loop-bench-j7p5ruqx', 'typescript']' timed out after 120 seconds" }, "code_analysis": { "files": { @@ -125,137 +87,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.75, - "total": 16, - "passed": 12, - "failed": 4, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 320, - "height": 640 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "x", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": false, - "detail": "no change detected after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [I] failed: [] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "no change detected after hard drop key" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "grid did not accumulate filled cells" - }, - { - "name": "line_clear", - "pass": true, - "detail": "1 line(s) cleared during AI play" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [154] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 80 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 12, - "failed": 4, - "score": 0.75 - }, - "gameplay": { - "pieces_placed": 180, - "lines_cleared": 2, - "max_score_observed": 172, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 29 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.8112 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run1/eval_results.json @@ -1,49 +1,11 @@ { "structural": { - "pass": true, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": true, - "detail": "npm run build completed successfully" - }, - { - "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" - } - ], - "score": 1.0 - }, - "functional": { "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-fw_9cpgu/loop-bench-sx5ep9jz', 'typescript']' timed out after 120 seconds" }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": true - }, - "performance": { - "bundle_size_bytes": 11827, - "size_under_512kb": true - }, - "score": 0.67 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-fw_9cpgu/loop-bench-sx5ep9jz', 'typescript']' timed out after 120 seconds" }, "code_analysis": { "files": { @@ -125,137 +87,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.75, - "total": 16, - "passed": 12, - "failed": 4, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 320, - "height": 640 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "could not detect any piece rotations" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "no change detected after hard drop key" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "grid did not accumulate filled cells" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [156] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 79 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 12, - "failed": 4, - "score": 0.75 - }, - "gameplay": { - "pieces_placed": 189, - "lines_cleared": 1, - "max_score_observed": 230, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 28 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.8206 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run2/eval_results.json @@ -1,49 +1,11 @@ { "structural": { - "pass": true, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": true, - "detail": "npm run build completed successfully" - }, - { - "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" - } - ], - "score": 1.0 - }, - "functional": { "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-5j34pz8r/loop-bench-t34c4nm0', 'typescript']' timed out after 120 seconds" }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": true - }, - "performance": { - "bundle_size_bytes": 32578, - "size_under_512kb": true - }, - "score": 0.67 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-5j34pz8r/loop-bench-t34c4nm0', 'typescript']' timed out after 120 seconds" }, "code_analysis": { "files": { @@ -125,137 +87,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.94, - "total": 16, - "passed": 15, - "failed": 1, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 320, - "height": 640 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "z", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "grid accumulated cells: 16 -> 44" - }, - { - "name": "line_clear", - "pass": true, - "detail": "1 line(s) cleared during AI play" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [164] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 78 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 15, - "failed": 1, - "score": 0.94 - }, - "gameplay": { - "pieces_placed": 119, - "lines_cleared": 2, - "max_score_observed": 186, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 26 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.88 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run3/eval_results.json @@ -1,49 +1,11 @@ { "structural": { - "pass": true, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": true, - "detail": "npm run build completed successfully" - }, - { - "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" - } - ], - "score": 1.0 - }, - "functional": { "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-0_q053vs/loop-bench-1oqz1n2q', 'typescript']' timed out after 120 seconds" }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": true - }, - "performance": { - "bundle_size_bytes": 47946, - "size_under_512kb": true - }, - "score": 0.67 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-0_q053vs/loop-bench-1oqz1n2q', 'typescript']' timed out after 120 seconds" }, "code_analysis": { "files": { @@ -125,139 +87,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.63, - "total": 16, - "passed": 10, - "failed": 6, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 64, - "height": 128 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "z", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "pixels changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "could not detect any piece rotations" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "no change detected after hard drop key" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "could not verify piece locking at bottom" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "could not detect new piece at top" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "grid did not accumulate filled cells" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [168] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 78 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 10, - "failed": 6, - "score": 0.63 - }, - "gameplay": { - "pieces_placed": 188, - "lines_cleared": 1, - "max_score_observed": 232, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 21 - }, - "accessibility": { - "issues": [ - "no headings found", - "canvas without aria-label or role", - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 4, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.7737 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run1/eval_results.json @@ -1,53 +1,15 @@ { "structural": { - "pass": true, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": true, - "detail": "npm run build completed successfully" - }, - { - "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" - } - ], - "score": 1.0 - }, - "functional": { "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-iuedekw4/loop-bench-ste9dw4l', 'typescript']' timed out after 120 seconds" }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": true - }, - "performance": { - "bundle_size_bytes": 46717, - "size_under_512kb": true - }, - "score": 0.67 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-iuedekw4/loop-bench-ste9dw4l', 'typescript']' timed out after 120 seconds" }, "code_analysis": { "files": { - "total": 7, + "total": 8, "code": 3, "docs": 0, "unnecessary": 0, @@ -56,8 +18,8 @@ "lines_of_code": 1454, "dependencies": { "production": 0, - "dev": 5, - "total": 5 + "dev": 7, + "total": 7 }, "complexity": "moderate", "console_logs": 0, @@ -96,10 +58,10 @@ }, "html_validation": { "valid": false, - "errors": 2 + "errors": 0 }, "duplication_percentage": 0.0, - "score": 0.85 + "score": 0.5 }, "transcript_analysis": { "total_events": 42, @@ -258,5 +220,10 @@ } } }, - "score": 0.804 + "outcome_score": 0.315, + "score": 0.315, + "sonarqube": { + "error": "SonarQube scan timed out", + "score": 0 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run2/eval_results.json @@ -1,34 +1,7 @@ { "structural": { - "pass": true, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": true, - "detail": "npm run build completed successfully" - }, - { - "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" - } - ], - "score": 1.0 - }, - "functional": { "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-r8vyc4_o/loop-bench-s2o7c9uv', 'typescript']' timed out after 120 seconds" }, "quality": { "lint": { @@ -40,66 +13,15 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 48181, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { - "files": { - "total": 8, - "code": 4, - "docs": 0, - "unnecessary": 0, - "unnecessary_list": [] - }, - "lines_of_code": 1468, - "dependencies": { - "production": 0, - "dev": 5, - "total": 5 - }, - "complexity": "moderate", - "console_logs": 0, - "magic_numbers": { - "count": 26, - "excessive": true - }, - "function_length": { - "count": 59, - "average": 6.4, - "max": 23, - "long_functions": 0 - }, - "max_nesting_depth": 14, - "global_declarations": 18, - "naming": { - "dominant_style": "camelCase", - "consistency_pct": 100.0, - "camel_case": 517, - "snake_case": 0 - }, - "error_handling": { - "try_catch_blocks": 0, - "has_error_handling": false - }, - "comments": { - "comment_lines": 71, - "source_lines": 1176, - "ratio_pct": 6.0 - }, - "separation_of_concerns": { - "verdict": "mixed", - "files_with_rendering": 3, - "files_with_logic": 3, - "files_with_both": 3 - }, - "html_validation": { - "valid": false, - "errors": 7 - }, - "duplication_percentage": 0.0, - "score": 0.85 + "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/code-analysis.py', '/tmp/reeval-r8vyc4_o/loop-bench-s2o7c9uv', 'typescript']' timed out after 120 seconds", + "score": 0 }, "transcript_analysis": { "total_events": 52, @@ -125,137 +47,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.94, - "total": 16, - "passed": 15, - "failed": 1, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 320, - "height": 640 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "z", - "drop": "Space" - }, - "start_mechanism": "enter", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via enter" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "grid accumulated cells: 20 -> 40" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [162] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 78 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 15, - "failed": 1, - "score": 0.94 - }, - "gameplay": { - "pieces_placed": 129, - "lines_cleared": 2, - "max_score_observed": 180, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 125 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.8815 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run3/eval_results.json @@ -25,29 +25,13 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": true - }, - "performance": { - "bundle_size_bytes": 36162, - "size_under_512kb": true - }, - "score": 0.67 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-dzqt_mrd/loop-bench-exsc6g82', 'typescript']' timed out after 120 seconds" }, "code_analysis": { "files": { - "total": 9, + "total": 10, "code": 4, "docs": 0, "unnecessary": 0, @@ -56,8 +40,8 @@ "lines_of_code": 1190, "dependencies": { "production": 0, - "dev": 5, - "total": 5 + "dev": 7, + "total": 7 }, "complexity": "moderate", "console_logs": 0, @@ -125,137 +109,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.81, - "total": 16, - "passed": 13, - "failed": 3, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 320, - "height": 640 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "z", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": false, - "detail": "no change detected after key press" - }, - { - "name": "rotate", - "pass": false, - "detail": "no change detected after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "grid accumulated cells: 20 -> 44" - }, - { - "name": "line_clear", - "pass": true, - "detail": "1 line(s) cleared during AI play" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [160] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 80 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 13, - "failed": 3, - "score": 0.81 - }, - "gameplay": { - "pieces_placed": 121, - "lines_cleared": 2, - "max_score_observed": 144, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 28 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.849 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -1,34 +1,7 @@ { "structural": { - "pass": true, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": true, - "detail": "npm run build completed successfully" - }, - { - "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" - } - ], - "score": 1.0 - }, - "functional": { "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-05r17q0m/loop-bench-doqfpofi', 'typescript']' timed out after 120 seconds" }, "quality": { "lint": { @@ -40,66 +13,15 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 51159, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { - "files": { - "total": 11, - "code": 5, - "docs": 0, - "unnecessary": 0, - "unnecessary_list": [] - }, - "lines_of_code": 1624, - "dependencies": { - "production": 0, - "dev": 8, - "total": 8 - }, - "complexity": "moderate", - "console_logs": 0, - "magic_numbers": { - "count": 42, - "excessive": true - }, - "function_length": { - "count": 68, - "average": 7.4, - "max": 43, - "long_functions": 0 - }, - "max_nesting_depth": 12, - "global_declarations": 20, - "naming": { - "dominant_style": "camelCase", - "consistency_pct": 100.0, - "camel_case": 549, - "snake_case": 0 - }, - "error_handling": { - "try_catch_blocks": 0, - "has_error_handling": false - }, - "comments": { - "comment_lines": 84, - "source_lines": 1269, - "ratio_pct": 6.6 - }, - "separation_of_concerns": { - "verdict": "mixed", - "files_with_rendering": 3, - "files_with_logic": 3, - "files_with_both": 3 - }, - "html_validation": { - "valid": false, - "errors": 9 - }, - "duplication_percentage": 0.0, - "score": 0.85 + "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/code-analysis.py', '/tmp/reeval-05r17q0m/loop-bench-doqfpofi', 'typescript']' timed out after 120 seconds", + "score": 0 }, "transcript_analysis": { "total_events": 48, @@ -125,138 +47,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.63, - "total": 16, - "passed": 10, - "failed": 6, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 40, - "height": 80 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "z", - "drop": "Space" - }, - "start_mechanism": "enter", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via enter" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "pixels changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "could not detect any piece rotations" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "no change detected after hard drop key" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "could not verify piece locking at bottom" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "could not detect new piece at top" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "grid did not accumulate filled cells" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [166] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 78 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 10, - "failed": 6, - "score": 0.63 - }, - "gameplay": { - "pieces_placed": 188, - "lines_cleared": 1, - "max_score_observed": 208, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 85 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 3, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.7737 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -1,34 +1,7 @@ { "structural": { - "pass": true, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": true, - "detail": "npm run build completed successfully" - }, - { - "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" - } - ], - "score": 1.0 - }, - "functional": { "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-idtri8by/loop-bench-9b42xbl4', 'typescript']' timed out after 120 seconds" }, "quality": { "lint": { @@ -40,66 +13,15 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 37747, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { - "files": { - "total": 8, - "code": 3, - "docs": 0, - "unnecessary": 0, - "unnecessary_list": [] - }, - "lines_of_code": 1199, - "dependencies": { - "production": 0, - "dev": 7, - "total": 7 - }, - "complexity": "moderate", - "console_logs": 0, - "magic_numbers": { - "count": 54, - "excessive": true - }, - "function_length": { - "count": 69, - "average": 6.0, - "max": 27, - "long_functions": 0 - }, - "max_nesting_depth": 12, - "global_declarations": 24, - "naming": { - "dominant_style": "camelCase", - "consistency_pct": 100.0, - "camel_case": 459, - "snake_case": 0 - }, - "error_handling": { - "try_catch_blocks": 0, - "has_error_handling": false - }, - "comments": { - "comment_lines": 80, - "source_lines": 892, - "ratio_pct": 9.0 - }, - "separation_of_concerns": { - "verdict": "mixed", - "files_with_rendering": 2, - "files_with_logic": 2, - "files_with_both": 2 - }, - "html_validation": { - "valid": false, - "errors": 9 - }, - "duplication_percentage": 0.0, - "score": 0.85 + "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/code-analysis.py', '/tmp/reeval-idtri8by/loop-bench-9b42xbl4', 'typescript']' timed out after 120 seconds", + "score": 0 }, "transcript_analysis": { "total_events": 53, @@ -125,138 +47,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.63, - "total": 16, - "passed": 10, - "failed": 6, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 36, - "height": 72 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "z", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "pixels changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "could not detect any piece rotations" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "no change detected after hard drop key" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "could not verify piece locking at bottom" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "could not detect new piece at top" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "grid did not accumulate filled cells" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [210] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 78 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 10, - "failed": 6, - "score": 0.63 - }, - "gameplay": { - "pieces_placed": 188, - "lines_cleared": 1, - "max_score_observed": 140, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 30 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 3, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.7737 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -1,34 +1,7 @@ { "structural": { - "pass": true, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": true, - "detail": "npm run build completed successfully" - }, - { - "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" - } - ], - "score": 1.0 - }, - "functional": { "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-dvwvbo88/loop-bench-amzysv_v', 'typescript']' timed out after 120 seconds" }, "quality": { "lint": { @@ -40,66 +13,15 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 36714, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { - "files": { - "total": 8, - "code": 3, - "docs": 0, - "unnecessary": 0, - "unnecessary_list": [] - }, - "lines_of_code": 1134, - "dependencies": { - "production": 0, - "dev": 7, - "total": 7 - }, - "complexity": "moderate", - "console_logs": 0, - "magic_numbers": { - "count": 20, - "excessive": false - }, - "function_length": { - "count": 56, - "average": 7.4, - "max": 43, - "long_functions": 0 - }, - "max_nesting_depth": 12, - "global_declarations": 66, - "naming": { - "dominant_style": "camelCase", - "consistency_pct": 100.0, - "camel_case": 385, - "snake_case": 0 - }, - "error_handling": { - "try_catch_blocks": 0, - "has_error_handling": false - }, - "comments": { - "comment_lines": 55, - "source_lines": 899, - "ratio_pct": 6.1 - }, - "separation_of_concerns": { - "verdict": "mixed", - "files_with_rendering": 2, - "files_with_logic": 2, - "files_with_both": 2 - }, - "html_validation": { - "valid": false, - "errors": 8 - }, - "duplication_percentage": 0.0, - "score": 0.9 + "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/code-analysis.py', '/tmp/reeval-dvwvbo88/loop-bench-amzysv_v', 'typescript']' timed out after 120 seconds", + "score": 0 }, "transcript_analysis": { "total_events": 43, @@ -125,137 +47,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.88, - "total": 16, - "passed": 14, - "failed": 2, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 320, - "height": 640 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "z", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": false, - "detail": "no change detected after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [I] failed: [] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "grid accumulated cells: 20 -> 42" - }, - { - "name": "line_clear", - "pass": true, - "detail": "1 line(s) cleared during AI play" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [226] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 79 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 14, - "failed": 2, - "score": 0.88 - }, - "gameplay": { - "pieces_placed": 120, - "lines_cleared": 2, - "max_score_observed": 196, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 23 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.8612 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -25,25 +25,9 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": true - }, - "performance": { - "bundle_size_bytes": 15602, - "size_under_512kb": true - }, - "score": 0.67 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-cfq1fa7e/loop-bench-kncx228p', 'typescript']' timed out after 120 seconds" }, "code_analysis": { "files": { @@ -125,137 +109,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.94, - "total": 16, - "passed": 15, - "failed": 1, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 300, - "height": 600 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "x", - "drop": "Space" - }, - "start_mechanism": "enter", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via enter" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected after drop" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "grid accumulated cells: 20 -> 48" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [186] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 79 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 15, - "failed": 1, - "score": 0.94 - }, - "gameplay": { - "pieces_placed": 130, - "lines_cleared": 1, - "max_score_observed": 174, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 79 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.805 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -1,34 +1,7 @@ { "structural": { - "pass": true, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": true, - "detail": "npm run build completed successfully" - }, - { - "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" - } - ], - "score": 1.0 - }, - "functional": { "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-le3rczwj/loop-bench-6c3p0wr6', 'typescript']' timed out after 120 seconds" }, "quality": { "lint": { @@ -40,66 +13,15 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 56061, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { - "files": { - "total": 9, - "code": 4, - "docs": 0, - "unnecessary": 0, - "unnecessary_list": [] - }, - "lines_of_code": 1647, - "dependencies": { - "production": 0, - "dev": 6, - "total": 6 - }, - "complexity": "moderate", - "console_logs": 0, - "magic_numbers": { - "count": 58, - "excessive": true - }, - "function_length": { - "count": 89, - "average": 7.7, - "max": 45, - "long_functions": 0 - }, - "max_nesting_depth": 15, - "global_declarations": 24, - "naming": { - "dominant_style": "camelCase", - "consistency_pct": 100.0, - "camel_case": 567, - "snake_case": 0 - }, - "error_handling": { - "try_catch_blocks": 0, - "has_error_handling": false - }, - "comments": { - "comment_lines": 119, - "source_lines": 1195, - "ratio_pct": 10.0 - }, - "separation_of_concerns": { - "verdict": "mixed", - "files_with_rendering": 2, - "files_with_logic": 2, - "files_with_both": 2 - }, - "html_validation": { - "valid": false, - "errors": 7 - }, - "duplication_percentage": 0.0, - "score": 0.85 + "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/code-analysis.py', '/tmp/reeval-le3rczwj/loop-bench-6c3p0wr6', 'typescript']' timed out after 120 seconds", + "score": 0 }, "transcript_analysis": { "total_events": 64, @@ -125,139 +47,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.69, - "total": 16, - "passed": 11, - "failed": 5, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 60, - "height": 120 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "x", - "drop": "Space" - }, - "start_mechanism": "space", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via space" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "pixels changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": false, - "detail": "no change detected after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "could not detect any piece rotations" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "visual change detected after hard drop" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "could not verify piece locking at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "visual change suggests new piece spawned" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "game still responding after 10 piece drops" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [214] -> no change after polling" - }, - { - "name": "game_over", - "pass": false, - "detail": "could not trigger or detect game over" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 78 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 11, - "failed": 5, - "score": 0.69 - }, - "gameplay": { - "pieces_placed": 188, - "lines_cleared": 1, - "max_score_observed": 72, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 41 - }, - "accessibility": { - "issues": [ - "no headings found", - "canvas without aria-label or role", - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 4, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.7925 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=off_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -1,34 +1,7 @@ { "structural": { - "pass": true, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": true, - "detail": "npm run build completed successfully" - }, - { - "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" - } - ], - "score": 1.0 - }, - "functional": { "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-2zfhi3g8/loop-bench-9r05i_lq', 'typescript']' timed out after 120 seconds" }, "quality": { "lint": { @@ -40,68 +13,15 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 25387, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { - "files": { - "total": 8, - "code": 3, - "docs": 1, - "unnecessary": 1, - "unnecessary_list": [ - "README.md" - ] - }, - "lines_of_code": 1535, - "dependencies": { - "production": 0, - "dev": 6, - "total": 6 - }, - "complexity": "moderate", - "console_logs": 0, - "magic_numbers": { - "count": 64, - "excessive": true - }, - "function_length": { - "count": 75, - "average": 6.7, - "max": 30, - "long_functions": 0 - }, - "max_nesting_depth": 12, - "global_declarations": 22, - "naming": { - "dominant_style": "camelCase", - "consistency_pct": 100.0, - "camel_case": 534, - "snake_case": 0 - }, - "error_handling": { - "try_catch_blocks": 0, - "has_error_handling": false - }, - "comments": { - "comment_lines": 136, - "source_lines": 1078, - "ratio_pct": 12.6 - }, - "separation_of_concerns": { - "verdict": "mixed", - "files_with_rendering": 2, - "files_with_logic": 2, - "files_with_both": 2 - }, - "html_validation": { - "valid": false, - "errors": 0 - }, - "duplication_percentage": 0.0, - "score": 0.4 + "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/code-analysis.py', '/tmp/reeval-2zfhi3g8/loop-bench-9r05i_lq', 'typescript']' timed out after 120 seconds", + "score": 0 }, "transcript_analysis": { "total_events": 82, @@ -127,140 +47,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.81, - "total": 16, - "passed": 13, - "failed": 3, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 42, - "height": 84 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "x", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "pixels changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "could not detect any piece rotations" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "visual change detected after hard drop" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "could not verify piece locking at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "visual change suggests new piece spawned" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "game still responding after 10 piece drops" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": true, - "detail": "score changed from 184 to 284" - }, - { - "name": "game_over", - "pass": false, - "detail": "could not trigger or detect game over" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 78 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 13, - "failed": 3, - "score": 0.81 - }, - "gameplay": { - "pieces_placed": 188, - "lines_cleared": 1, - "max_score_observed": 100, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 25 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role", - "canvas without aria-label or role", - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 5, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.7269 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -1,11 +1,11 @@ { "structural": { - "pass": true, + "pass": false, "checks": [ { "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" + "pass": false, + "detail": "no index.html found in workspace root, dist/, or public/" }, { "name": "package_json_exists", @@ -23,38 +23,21 @@ "detail": "tsc --noEmit passed" } ], - "score": 1.0 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "score": 0.75 }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": false, - "errors": 1 - }, - "performance": { - "bundle_size_bytes": 184192, - "size_under_512kb": true - }, - "score": 0.33 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-oqdanj22/loop-bench-zhl3wjhq', 'typescript']' timed out after 120 seconds" }, "code_analysis": { "files": { - "total": 8, - "code": 4, + "total": 7, + "code": 3, "docs": 0, "unnecessary": 0, "unnecessary_list": [] }, - "lines_of_code": 2477, + "lines_of_code": 1777, "dependencies": { "production": 0, "dev": 7, @@ -258,5 +241,10 @@ } } }, - "score": 0.695 + "outcome_score": 0.44, + "score": 0.44, + "sonarqube": { + "error": "SonarQube scan timed out", + "score": 0 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -1,11 +1,11 @@ { "structural": { - "pass": true, + "pass": false, "checks": [ { "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" + "pass": false, + "detail": "no index.html found in workspace root, dist/, or public/" }, { "name": "package_json_exists", @@ -23,37 +23,21 @@ "detail": "tsc --noEmit passed" } ], - "score": 1.0 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "score": 0.75 }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": true - }, - "performance": { - "bundle_size_bytes": 20120, - "size_under_512kb": true - }, - "score": 0.67 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-eb26nye_/loop-bench-_a_0urqo', 'typescript']' timed out after 120 seconds" }, "code_analysis": { "files": { - "total": 8, - "code": 4, + "total": 7, + "code": 3, "docs": 0, "unnecessary": 0, "unnecessary_list": [] }, - "lines_of_code": 2856, + "lines_of_code": 1999, "dependencies": { "production": 0, "dev": 7, @@ -257,5 +241,10 @@ } } }, - "score": 0.7862 + "outcome_score": 0.47, + "score": 0.47, + "sonarqube": { + "error": "SonarQube scan timed out", + "score": 0 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -25,11 +25,6 @@ ], "score": 0.75 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -41,66 +36,15 @@ "error": "no tsconfig.json" }, "performance": { + "pass": true, "bundle_size_bytes": 162000, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { - "files": { - "total": 7, - "code": 3, - "docs": 0, - "unnecessary": 0, - "unnecessary_list": [] - }, - "lines_of_code": 1657, - "dependencies": { - "production": 0, - "dev": 7, - "total": 7 - }, - "complexity": "moderate", - "console_logs": 0, - "magic_numbers": { - "count": 28, - "excessive": true - }, - "function_length": { - "count": 55, - "average": 6.6, - "max": 26, - "long_functions": 0 - }, - "max_nesting_depth": 10, - "global_declarations": 18, - "naming": { - "dominant_style": "camelCase", - "consistency_pct": 100.0, - "camel_case": 328, - "snake_case": 0 - }, - "error_handling": { - "try_catch_blocks": 0, - "has_error_handling": false - }, - "comments": { - "comment_lines": 101, - "source_lines": 781, - "ratio_pct": 12.9 - }, - "separation_of_concerns": { - "verdict": "mixed", - "files_with_rendering": 2, - "files_with_logic": 2, - "files_with_both": 2 - }, - "html_validation": { - "valid": false, - "errors": 0 - }, - "duplication_percentage": 0.0, - "score": 0.5 + "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/code-analysis.py', '/tmp/reeval-x_a00wq5/loop-bench-gmilst_1', 'typescript']' timed out after 120 seconds", + "score": 0 }, "transcript_analysis": { "total_events": 74, @@ -126,137 +70,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.88, - "total": 16, - "passed": 14, - "failed": 2, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 300, - "height": 600 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "z", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [unknown] failed: [] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "game still responding after 10 piece drops" - }, - { - "name": "line_clear", - "pass": true, - "detail": "11 line(s) cleared during AI play" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [0] -> no change after polling" - }, - { - "name": "game_over", - "pass": false, - "detail": "could not trigger or detect game over" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 83 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 14, - "failed": 2, - "score": 0.88 - }, - "gameplay": { - "pieces_placed": 125, - "lines_cleared": 39, - "max_score_observed": 0, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 31 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.6637 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/gameplay-bot-report.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=detailed_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/gameplay-bot-report.json @@ -16,7 +16,8 @@ "drop": "Space" }, "start_mechanism": "auto", - "score_element_found": true + "score_element_found": true, + "grid_confidence": 1 }, "tests": [ { @@ -32,62 +33,62 @@ { "name": "auto_drop", "pass": true, - "detail": "grid state changed after 5s with no input" + "detail": "grid state changed after 5s with no input (grid-verified)" }, { "name": "move_left", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_right", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_down", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "rotate", "pass": true, - "detail": "piece shape changed after rotate key" + "detail": "piece shape changed after rotate key (grid-verified, 1 rotation(s))" }, { "name": "all_pieces_rotate", "pass": true, - "detail": "rotated: [unknown] failed: [] (tested 1 piece types in 60 attempts)" + "detail": "rotation confirmed but could not identify individual piece types" }, { "name": "hard_drop", "pass": true, - "detail": "piece immediately dropped and new piece appeared" + "detail": "piece immediately dropped to bottom (grid-verified)" }, { "name": "piece_locks", "pass": true, - "detail": "filled cells persist at bottom" + "detail": "filled cells persist at bottom (grid-verified, 1 lock event(s))" }, { "name": "new_piece_spawns", "pass": true, - "detail": "new piece detected at top of grid" + "detail": "5 new piece(s) detected at top of grid" }, { "name": "multiple_pieces", "pass": true, - "detail": "game still responding after 10 piece drops" + "detail": "11 pieces placed during play session" }, { "name": "line_clear", "pass": true, - "detail": "11 line(s) cleared during AI play" + "detail": "1 line(s) cleared (grid-verified)" }, { "name": "score_changes", "pass": false, - "detail": "score did not increase: [0] -> no change after polling" + "detail": "score stayed at 0" }, { "name": "game_over", @@ -97,7 +98,7 @@ { "name": "playable_30s", "pass": true, - "detail": "played for 30s, placed 83 pieces, no crashes" + "detail": "played for 31s, placed 37 pieces, no crashes" } ], "summary": { @@ -107,14 +108,25 @@ "score": 0.88 }, "gameplay": { - "pieces_placed": 125, - "lines_cleared": 39, + "pieces_placed": 37, + "lines_cleared": 1, "max_score_observed": 0, - "play_duration_seconds": 30, + "play_duration_seconds": 31, "errors_during_play": 0 }, + "session": { + "frames": 145, + "events_count": 9, + "pieces_spawned": 5, + "pieces_locked": 11, + "lines_cleared": 1, + "piece_types_seen": [ + "unknown" + ], + "grid_read_success_rate": 1 + }, "performance": { - "load_time_ms": 31 + "load_time_ms": 447 }, "accessibility": { "issues": [ diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -25,81 +25,13 @@ ], "score": 0.75 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": true - }, - "performance": { - "bundle_size_bytes": 16947, - "size_under_512kb": true - }, - "score": 0.67 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-t3fuj7u6/loop-bench-_7ks5zgn', 'typescript']' timed out after 120 seconds" }, "code_analysis": { - "files": { - "total": 7, - "code": 3, - "docs": 0, - "unnecessary": 0, - "unnecessary_list": [] - }, - "lines_of_code": 1375, - "dependencies": { - "production": 0, - "dev": 7, - "total": 7 - }, - "complexity": "moderate", - "console_logs": 0, - "magic_numbers": { - "count": 66, - "excessive": true - }, - "function_length": { - "count": 58, - "average": 7.6, - "max": 27, - "long_functions": 0 - }, - "max_nesting_depth": 14, - "global_declarations": 30, - "naming": { - "dominant_style": "camelCase", - "consistency_pct": 92.4, - "camel_case": 363, - "snake_case": 30 - }, - "error_handling": { - "try_catch_blocks": 0, - "has_error_handling": false - }, - "comments": { - "comment_lines": 72, - "source_lines": 1035, - "ratio_pct": 7.0 - }, - "separation_of_concerns": { - "verdict": "mixed", - "files_with_rendering": 2, - "files_with_logic": 2, - "files_with_both": 2 - }, - "html_validation": { - "valid": false, - "errors": 2 - }, - "duplication_percentage": 0.0, - "score": 0.85 + "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/code-analysis.py', '/tmp/reeval-t3fuj7u6/loop-bench-_7ks5zgn', 'typescript']' timed out after 120 seconds", + "score": 0 }, "transcript_analysis": { "total_events": 55, @@ -125,138 +57,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.94, - "total": 16, - "passed": 15, - "failed": 1, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 320, - "height": 640 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "grid accumulated cells: 20 -> 33" - }, - { - "name": "line_clear", - "pass": true, - "detail": "2 line(s) cleared during AI play" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [148] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 79 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 15, - "failed": 1, - "score": 0.94 - }, - "gameplay": { - "pieces_placed": 120, - "lines_cleared": 5, - "max_score_observed": 238, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 103 - }, - "accessibility": { - "issues": [ - "no headings found", - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 3, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.8394 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/gameplay-bot-report.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/gameplay-bot-report.json @@ -12,11 +12,12 @@ "left": "ArrowLeft", "right": "ArrowRight", "down": "ArrowDown", - "rotate": "ArrowUp", + "rotate": "x", "drop": "Space" }, "start_mechanism": "auto", - "score_element_found": true + "score_element_found": true, + "grid_confidence": 1 }, "tests": [ { @@ -32,62 +33,62 @@ { "name": "auto_drop", "pass": true, - "detail": "grid state changed after 5s with no input" + "detail": "grid state changed after 5s with no input (grid-verified)" }, { "name": "move_left", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_right", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_down", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "rotate", "pass": true, - "detail": "piece shape changed after rotate key" + "detail": "piece shape changed after rotate key (grid-verified, 1 rotation(s))" }, { "name": "all_pieces_rotate", "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" + "detail": "rotation confirmed but could not identify individual piece types" }, { "name": "hard_drop", "pass": true, - "detail": "piece immediately dropped and new piece appeared" + "detail": "piece immediately dropped to bottom (grid-verified)" }, { "name": "piece_locks", "pass": true, - "detail": "filled cells persist at bottom" + "detail": "filled cells persist at bottom (grid-verified, 2 lock event(s))" }, { "name": "new_piece_spawns", "pass": true, - "detail": "new piece detected at top of grid" + "detail": "6 new piece(s) detected at top of grid" }, { "name": "multiple_pieces", "pass": true, - "detail": "grid accumulated cells: 20 -> 33" + "detail": "11 pieces placed during play session" }, { "name": "line_clear", "pass": true, - "detail": "2 line(s) cleared during AI play" + "detail": "1 line(s) cleared (grid-verified)" }, { "name": "score_changes", "pass": false, - "detail": "score did not increase: [148] -> no change after polling" + "detail": "score stayed at 296" }, { "name": "game_over", @@ -97,7 +98,7 @@ { "name": "playable_30s", "pass": true, - "detail": "played for 30s, placed 79 pieces, no crashes" + "detail": "played for 30s, placed 31 pieces, no crashes" } ], "summary": { @@ -107,22 +108,29 @@ "score": 0.94 }, "gameplay": { - "pieces_placed": 120, - "lines_cleared": 5, - "max_score_observed": 238, + "pieces_placed": 31, + "lines_cleared": 1, + "max_score_observed": 296, "play_duration_seconds": 30, "errors_during_play": 0 }, + "session": { + "frames": 159, + "events_count": 11, + "pieces_spawned": 6, + "pieces_locked": 11, + "lines_cleared": 1, + "piece_types_seen": [ + "unknown" + ], + "grid_read_success_rate": 1 + }, "performance": { - "load_time_ms": 103 + "load_time_ms": 4276 }, "accessibility": { - "issues": [ - "no headings found", - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 3, - "pass": false + "issues": [], + "issue_count": 0, + "pass": true } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -25,25 +25,9 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": true - }, - "performance": { - "bundle_size_bytes": 25381, - "size_under_512kb": true - }, - "score": 0.67 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-i4ymkfiq/loop-bench-hdcmmx_h', 'typescript']' timed out after 120 seconds" }, "code_analysis": { "files": { @@ -96,10 +80,10 @@ }, "html_validation": { "valid": false, - "errors": 0 + "errors": 6 }, "duplication_percentage": 0.0, - "score": 0.5 + "score": 0.85 }, "transcript_analysis": { "total_events": 57, @@ -125,138 +109,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.81, - "total": 16, - "passed": 13, - "failed": 3, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 40, - "height": 80 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "x", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "pixels changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "could not detect any piece rotations" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "visual change detected after hard drop" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "could not verify piece locking at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "visual change suggests new piece spawned" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "game still responding after 10 piece drops" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": true, - "detail": "score changed from 138 to 246" - }, - { - "name": "game_over", - "pass": false, - "detail": "could not trigger or detect game over" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 77 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 13, - "failed": 3, - "score": 0.81 - }, - "gameplay": { - "pieces_placed": 187, - "lines_cleared": 1, - "max_score_observed": 66, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 28 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 3, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.7644 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=off_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -25,25 +25,9 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": true - }, - "performance": { - "bundle_size_bytes": 17708, - "size_under_512kb": true - }, - "score": 0.67 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-d59z4ylb/loop-bench-udnn4aje', 'typescript']' timed out after 120 seconds" }, "code_analysis": { "files": { @@ -124,138 +108,14 @@ "score": 1.0 }, "gameplay_bot": { - "pass": true, - "score": 1, - "total": 16, - "passed": 16, - "failed": 0, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 300, - "height": 600 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "x", - "drop": "Space" - }, - "start_mechanism": "enter", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via enter" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "game still responding after 10 piece drops" - }, - { - "name": "line_clear", - "pass": true, - "detail": "11 line(s) cleared during AI play" - }, - { - "name": "score_changes", - "pass": true, - "detail": "score changed from 96 to 192" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 83 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 16, - "failed": 0, - "score": 1 - }, - "gameplay": { - "pieces_placed": 124, - "lines_cleared": 42, - "max_score_observed": 216, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 40 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "pass": false, + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.8987 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -25,25 +25,9 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": true - }, - "performance": { - "bundle_size_bytes": 13233, - "size_under_512kb": true - }, - "score": 0.67 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-wnxcksi7/loop-bench-ymvhffsl', 'typescript']' timed out after 120 seconds" }, "code_analysis": { "files": { @@ -125,137 +109,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.88, - "total": 16, - "passed": 14, - "failed": 2, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 300, - "height": 600 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "x", - "drop": "Space" - }, - "start_mechanism": "click_canvas", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via button" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": false, - "detail": "no change detected after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "game still responding after 10 piece drops" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [0] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 29s, placed 100 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 14, - "failed": 2, - "score": 0.88 - }, - "gameplay": { - "pieces_placed": 151, - "lines_cleared": 1, - "max_score_observed": 0, - "play_duration_seconds": 29, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 77 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.8706 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -25,25 +25,9 @@ ], "score": 0.75 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": true - }, - "performance": { - "bundle_size_bytes": 15345, - "size_under_512kb": true - }, - "score": 0.67 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-4g5vr4fy/loop-bench-9osp3kim', 'typescript']' timed out after 120 seconds" }, "code_analysis": { "files": { @@ -96,10 +80,10 @@ }, "html_validation": { "valid": false, - "errors": 2 + "errors": 0 }, "duplication_percentage": 0.0, - "score": 0.85 + "score": 0.5 }, "transcript_analysis": { "total_events": 52, @@ -125,137 +109,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.94, - "total": 16, - "passed": 15, - "failed": 1, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 300, - "height": 600 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "x", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "grid accumulated cells: 20 -> 44" - }, - { - "name": "line_clear", - "pass": true, - "detail": "1 line(s) cleared during AI play" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [166] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 79 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 15, - "failed": 1, - "score": 0.94 - }, - "gameplay": { - "pieces_placed": 120, - "lines_cleared": 2, - "max_score_observed": 142, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 76 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.8394 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=off_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -1,49 +1,11 @@ { "structural": { - "pass": true, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": true, - "detail": "npm run build completed successfully" - }, - { - "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" - } - ], - "score": 1.0 - }, - "functional": { "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-0_h4aplp/loop-bench-5xgkq0xm', 'typescript']' timed out after 120 seconds" }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": true - }, - "performance": { - "bundle_size_bytes": 22016, - "size_under_512kb": true - }, - "score": 0.67 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-0_h4aplp/loop-bench-5xgkq0xm', 'typescript']' timed out after 120 seconds" }, "code_analysis": { "files": { @@ -128,5 +90,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.5769 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -25,25 +25,9 @@ ], "score": 0.75 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { - "lint": { - "pass": true, - "errors": 0, - "warnings": 0 - }, - "typecheck": { - "pass": true - }, - "performance": { - "bundle_size_bytes": 16576, - "size_under_512kb": true - }, - "score": 0.67 + "pass": false, + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/tmp/reeval-6xzte1vr/loop-bench-05evwwhn', 'typescript']' timed out after 120 seconds" }, "code_analysis": { "files": { @@ -96,10 +80,10 @@ }, "html_validation": { "valid": false, - "errors": 2 + "errors": 0 }, "duplication_percentage": 0.0, - "score": 0.85 + "score": 0.5 }, "transcript_analysis": { "total_events": 61, @@ -125,137 +109,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.94, - "total": 16, - "passed": 15, - "failed": 1, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 300, - "height": 600 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "x", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected after drop" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "grid accumulated cells: 20 -> 40" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [162] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 26s, placed 100 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 15, - "failed": 1, - "score": 0.94 - }, - "gameplay": { - "pieces_placed": 151, - "lines_cleared": 1, - "max_score_observed": 132, - "play_duration_seconds": 26, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 937 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.8394 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -1,49 +1,25 @@ { "structural": { - "pass": true, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": true, - "detail": "npm run build completed successfully" - }, - { - "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" - } - ], - "score": 1.0 - }, - "functional": { "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-ldeahnr5/loop-bench-_vpevx35', 'typescript']' timed out after 120 seconds" }, "quality": { "lint": { - "pass": true, - "errors": 0, - "warnings": 0 + "pass": false, + "errors": -1, + "warnings": 0, + "error": "eslint failed to run" }, "typecheck": { - "pass": true + "pass": false, + "errors": 0 }, "performance": { + "pass": true, "bundle_size_bytes": 21434, "size_under_512kb": true }, - "score": 0.67 + "score": 0.33 }, "code_analysis": { "files": { @@ -96,10 +72,10 @@ }, "html_validation": { "valid": false, - "errors": 2 + "errors": 0 }, "duplication_percentage": 0.0, - "score": 0.85 + "score": 0.5 }, "transcript_analysis": { "total_events": 63, @@ -125,137 +101,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.88, - "total": 16, - "passed": 14, - "failed": 2, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 300, - "height": 600 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "enter", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via enter" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": false, - "detail": "no change detected after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [I] failed: [] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "grid accumulated cells: 16 -> 40" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [214] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 37s, placed 20 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 14, - "failed": 2, - "score": 0.88 - }, - "gameplay": { - "pieces_placed": 71, - "lines_cleared": 1, - "max_score_observed": 174, - "play_duration_seconds": 37, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 745 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.8519 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=off_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -1,34 +1,7 @@ { "structural": { - "pass": true, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": true, - "detail": "npm run build completed successfully" - }, - { - "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" - } - ], - "score": 1.0 - }, - "functional": { "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-r14ej_nz/loop-bench-ofmivkls', 'typescript']' timed out after 120 seconds" }, "quality": { "lint": { @@ -37,9 +10,11 @@ "warnings": 0 }, "typecheck": { - "pass": true + "pass": false, + "errors": 0 }, "performance": { + "pass": true, "bundle_size_bytes": 15383, "size_under_512kb": true }, @@ -96,10 +71,10 @@ }, "html_validation": { "valid": false, - "errors": 3 + "errors": 0 }, "duplication_percentage": 0.0, - "score": 0.85 + "score": 0.5 }, "transcript_analysis": { "total_events": 49, @@ -128,5 +103,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.5769 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -1,34 +1,7 @@ { "structural": { "pass": false, - "checks": [ - { - "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" - }, - { - "name": "package_json_exists", - "pass": true, - "detail": "package.json found" - }, - { - "name": "build_succeeds", - "pass": true, - "detail": "no build script defined (static project)" - }, - { - "name": "typescript_compiles", - "pass": false, - "detail": "tsc --noEmit failed" - } - ], - "score": 0.75 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/tmp/reeval-wuh6htkd/loop-bench-q0okqwr1', 'typescript']' timed out after 120 seconds" }, "quality": { "lint": { @@ -40,10 +13,11 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 35118, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -125,138 +99,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.88, - "total": 16, - "passed": 14, - "failed": 2, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 300, - "height": 600 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "x", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": false, - "detail": "no change detected after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "grid accumulated cells: 16 -> 49" - }, - { - "name": "line_clear", - "pass": true, - "detail": "2 line(s) cleared during AI play" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [222] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 79 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 14, - "failed": 2, - "score": 0.88 - }, - "gameplay": { - "pieces_placed": 120, - "lines_cleared": 2, - "max_score_observed": 222, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 2944 - }, - "accessibility": { - "issues": [ - "no headings found", - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 3, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.8206 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -25,11 +25,6 @@ ], "score": 0.75 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,10 +35,11 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 32045, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -125,137 +121,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.94, - "total": 16, - "passed": 15, - "failed": 1, - "report": { - "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 300, - "height": 600 - }, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "auto", - "score_element_found": true - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": true, - "detail": "started via auto" - }, - { - "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" - }, - { - "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" - }, - { - "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [I] failed: [] (tested 1 piece types in 60 attempts)" - }, - { - "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" - }, - { - "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" - }, - { - "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" - }, - { - "name": "multiple_pieces", - "pass": true, - "detail": "grid accumulated cells: 16 -> 36" - }, - { - "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" - }, - { - "name": "score_changes", - "pass": false, - "detail": "score did not increase: [294] -> no change after polling" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 70 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 15, - "failed": 1, - "score": 0.94 - }, - "gameplay": { - "pieces_placed": 121, - "lines_cleared": 1, - "max_score_observed": 186, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 358 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 2, - "pass": false - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.7831 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=off_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,10 +35,11 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 18674, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -125,8 +121,139 @@ }, "gameplay_bot": { "pass": false, - "score": 0, - "error": "Gameplay bot timed out after 180 seconds" + "score": 0.56, + "total": 16, + "passed": 9, + "failed": 7, + "report": { + "implementation": { + "renderer": "canvas", + "grid_detected": true, + "grid_bounds": { + "x": 0, + "y": 0, + "width": 45, + "height": 90 + }, + "controls": { + "left": "ArrowLeft", + "right": "ArrowRight", + "down": "ArrowDown", + "rotate": "x", + "drop": "Space" + }, + "start_mechanism": "auto", + "score_element_found": true + }, + "tests": [ + { + "name": "game_loads", + "pass": true, + "detail": "no console errors" + }, + { + "name": "game_starts", + "pass": true, + "detail": "started via auto" + }, + { + "name": "auto_drop", + "pass": true, + "detail": "pixels changed after 5s with no input" + }, + { + "name": "move_left", + "pass": true, + "detail": "grid state changed after key press" + }, + { + "name": "move_right", + "pass": true, + "detail": "grid state changed after key press" + }, + { + "name": "move_down", + "pass": true, + "detail": "grid state changed after key press" + }, + { + "name": "rotate", + "pass": true, + "detail": "piece shape changed after rotate key" + }, + { + "name": "all_pieces_rotate", + "pass": false, + "detail": "could not detect any piece rotations" + }, + { + "name": "hard_drop", + "pass": false, + "detail": "no change detected after hard drop key" + }, + { + "name": "piece_locks", + "pass": false, + "detail": "could not verify piece locking at bottom" + }, + { + "name": "new_piece_spawns", + "pass": false, + "detail": "could not detect new piece at top" + }, + { + "name": "multiple_pieces", + "pass": false, + "detail": "grid did not accumulate filled cells" + }, + { + "name": "line_clear", + "pass": true, + "detail": "line cleared via strategic placement" + }, + { + "name": "score_changes", + "pass": false, + "detail": "score did not increase: [176] -> no change after polling" + }, + { + "name": "game_over", + "pass": true, + "detail": "game stopped after stacking to top" + }, + { + "name": "playable_30s", + "pass": false, + "detail": "exception: keyboard.press: Target page, context or browser has been closed" + } + ], + "summary": { + "total": 16, + "passed": 9, + "failed": 7, + "score": 0.56 + }, + "gameplay": { + "pieces_placed": 104, + "lines_cleared": 1, + "max_score_observed": 0, + "play_duration_seconds": 0, + "errors_during_play": 0 + }, + "performance": { + "load_time_ms": 1405 + }, + "accessibility": { + "issues": [], + "issue_count": 0, + "pass": true + } + } }, - "score": 0.5862 + "outcome_score": 0.56, + "score": 0.56, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run1/eval_results.json @@ -25,11 +25,6 @@ ], "score": 0.75 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,10 +35,11 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 15813, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -128,5 +124,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.555 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run2/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,10 +35,11 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 24905, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -128,5 +124,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.505 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=off_tool_write=on_web_search=on_run3/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,10 +35,11 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 16835, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -128,5 +124,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.5769 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run1/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,10 +35,11 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 16066, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -128,5 +124,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.5206 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run2/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -41,10 +36,11 @@ "errors": 2 }, "performance": { + "pass": true, "bundle_size_bytes": 150310, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { "files": { @@ -129,5 +125,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.4262 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run3/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,10 +35,11 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 45421, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -126,7 +122,143 @@ "gameplay_bot": { "pass": false, "score": 0, - "error": "Gameplay bot timed out after 180 seconds" + "total": 16, + "passed": 0, + "failed": 16, + "report": { + "implementation": { + "renderer": "unknown", + "grid_detected": false, + "grid_bounds": null, + "controls": { + "left": "ArrowLeft", + "right": "ArrowRight", + "down": "ArrowDown", + "rotate": "ArrowUp", + "drop": "Space" + }, + "start_mechanism": "unknown", + "score_element_found": false, + "grid_confidence": 0 + }, + "tests": [ + { + "name": "game_loads", + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:33953/\" is interrupted by another navigation to \"http://127.0.0.1:33953/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:33953/\", waiting until \"domcontentloaded\"\u001b[22m\n" + }, + { + "name": "game_starts", + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:33953/\" is interrupted by another navigation to \"http://127.0.0.1:33953/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:33953/\", waiting until \"domcontentloaded\"\u001b[22m\n" + }, + { + "name": "auto_drop", + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:33953/\" is interrupted by another navigation to \"http://127.0.0.1:33953/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:33953/\", waiting until \"domcontentloaded\"\u001b[22m\n" + }, + { + "name": "move_left", + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:33953/\" is interrupted by another navigation to \"http://127.0.0.1:33953/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:33953/\", waiting until \"domcontentloaded\"\u001b[22m\n" + }, + { + "name": "move_right", + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:33953/\" is interrupted by another navigation to \"http://127.0.0.1:33953/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:33953/\", waiting until \"domcontentloaded\"\u001b[22m\n" + }, + { + "name": "move_down", + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:33953/\" is interrupted by another navigation to \"http://127.0.0.1:33953/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:33953/\", waiting until \"domcontentloaded\"\u001b[22m\n" + }, + { + "name": "rotate", + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:33953/\" is interrupted by another navigation to \"http://127.0.0.1:33953/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:33953/\", waiting until \"domcontentloaded\"\u001b[22m\n" + }, + { + "name": "all_pieces_rotate", + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:33953/\" is interrupted by another navigation to \"http://127.0.0.1:33953/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:33953/\", waiting until \"domcontentloaded\"\u001b[22m\n" + }, + { + "name": "hard_drop", + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:33953/\" is interrupted by another navigation to \"http://127.0.0.1:33953/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:33953/\", waiting until \"domcontentloaded\"\u001b[22m\n" + }, + { + "name": "piece_locks", + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:33953/\" is interrupted by another navigation to \"http://127.0.0.1:33953/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:33953/\", waiting until \"domcontentloaded\"\u001b[22m\n" + }, + { + "name": "new_piece_spawns", + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:33953/\" is interrupted by another navigation to \"http://127.0.0.1:33953/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:33953/\", waiting until \"domcontentloaded\"\u001b[22m\n" + }, + { + "name": "multiple_pieces", + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:33953/\" is interrupted by another navigation to \"http://127.0.0.1:33953/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:33953/\", waiting until \"domcontentloaded\"\u001b[22m\n" + }, + { + "name": "line_clear", + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:33953/\" is interrupted by another navigation to \"http://127.0.0.1:33953/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:33953/\", waiting until \"domcontentloaded\"\u001b[22m\n" + }, + { + "name": "score_changes", + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:33953/\" is interrupted by another navigation to \"http://127.0.0.1:33953/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:33953/\", waiting until \"domcontentloaded\"\u001b[22m\n" + }, + { + "name": "game_over", + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:33953/\" is interrupted by another navigation to \"http://127.0.0.1:33953/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:33953/\", waiting until \"domcontentloaded\"\u001b[22m\n" + }, + { + "name": "playable_30s", + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:33953/\" is interrupted by another navigation to \"http://127.0.0.1:33953/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:33953/\", waiting until \"domcontentloaded\"\u001b[22m\n" + } + ], + "summary": { + "total": 16, + "passed": 0, + "failed": 16, + "score": 0 + }, + "gameplay": { + "pieces_placed": 0, + "lines_cleared": 0, + "max_score_observed": 0, + "play_duration_seconds": 0, + "errors_during_play": 0 + }, + "session": { + "frames": 0, + "events_count": 0, + "pieces_spawned": 0, + "pieces_locked": 0, + "lines_cleared": 0, + "piece_types_seen": [], + "grid_read_success_rate": 0 + }, + "performance": { + "load_time_ms": -1 + }, + "accessibility": { + "issues": [], + "issue_count": 0, + "pass": true + } + } }, - "score": 0.5769 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run3/gameplay-bot-report.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=off_web_search=on_run3/gameplay-bot-report.json @@ -1,13 +1,8 @@ { "implementation": { - "renderer": "canvas", - "grid_detected": true, - "grid_bounds": { - "x": 0, - "y": 0, - "width": 300, - "height": 600 - }, + "renderer": "unknown", + "grid_detected": false, + "grid_bounds": null, "controls": { "left": "ArrowLeft", "right": "ArrowRight", @@ -15,106 +10,116 @@ "rotate": "ArrowUp", "drop": "Space" }, - "start_mechanism": "auto", - "score_element_found": false + "start_mechanism": "unknown", + "score_element_found": false, + "grid_confidence": 0 }, "tests": [ { "name": "game_loads", - "pass": true, - "detail": "no console errors" + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:33953/\" is interrupted by another navigation to \"http://127.0.0.1:33953/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:33953/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "game_starts", - "pass": true, - "detail": "started via auto" + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:33953/\" is interrupted by another navigation to \"http://127.0.0.1:33953/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:33953/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "auto_drop", - "pass": true, - "detail": "grid state changed after 5s with no input" + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:33953/\" is interrupted by another navigation to \"http://127.0.0.1:33953/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:33953/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:33953/\" is interrupted by another navigation to \"http://127.0.0.1:33953/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:33953/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:33953/\" is interrupted by another navigation to \"http://127.0.0.1:33953/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:33953/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "move_down", "pass": false, - "detail": "no change detected after key press" + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:33953/\" is interrupted by another navigation to \"http://127.0.0.1:33953/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:33953/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "rotate", "pass": false, - "detail": "no change detected after rotate key" + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:33953/\" is interrupted by another navigation to \"http://127.0.0.1:33953/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:33953/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "all_pieces_rotate", - "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:33953/\" is interrupted by another navigation to \"http://127.0.0.1:33953/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:33953/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "hard_drop", - "pass": true, - "detail": "piece immediately dropped and new piece appeared" + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:33953/\" is interrupted by another navigation to \"http://127.0.0.1:33953/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:33953/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "piece_locks", - "pass": true, - "detail": "filled cells persist at bottom" + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:33953/\" is interrupted by another navigation to \"http://127.0.0.1:33953/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:33953/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "new_piece_spawns", - "pass": true, - "detail": "new piece detected at top of grid" + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:33953/\" is interrupted by another navigation to \"http://127.0.0.1:33953/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:33953/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "multiple_pieces", - "pass": true, - "detail": "grid accumulated cells: 20 -> 42" + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:33953/\" is interrupted by another navigation to \"http://127.0.0.1:33953/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:33953/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "line_clear", - "pass": true, - "detail": "line cleared via strategic placement" + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:33953/\" is interrupted by another navigation to \"http://127.0.0.1:33953/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:33953/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "score_changes", "pass": false, - "detail": "no score element found and no number changed" + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:33953/\" is interrupted by another navigation to \"http://127.0.0.1:33953/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:33953/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:33953/\" is interrupted by another navigation to \"http://127.0.0.1:33953/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:33953/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "playable_30s", "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:33953/\" is interrupted by another navigation to \"http://127.0.0.1:33953/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:33953/\", waiting until \"domcontentloaded\"\u001b[22m\n" } ], "summary": { "total": 16, - "passed": 12, - "failed": 4, - "score": 0.75 + "passed": 0, + "failed": 16, + "score": 0 }, "gameplay": { - "pieces_placed": 36, - "lines_cleared": 1, + "pieces_placed": 0, + "lines_cleared": 0, "max_score_observed": 0, "play_duration_seconds": 0, "errors_during_play": 0 }, + "session": { + "frames": 0, + "events_count": 0, + "pieces_spawned": 0, + "pieces_locked": 0, + "lines_cleared": 0, + "piece_types_seen": [], + "grid_read_success_rate": 0 + }, "performance": { - "load_time_ms": 1514 + "load_time_ms": -1 }, "accessibility": { "issues": [], diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run1/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,10 +35,11 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 65248, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -126,136 +122,12 @@ "gameplay_bot": { "pass": false, "score": 0, - "total": 16, - "passed": 0, - "failed": 16, - "report": { - "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_bounds": null, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "unknown", - "score_element_found": false - }, - "tests": [ - { - "name": "game_loads", - "pass": false, - "detail": "1 console error(s): Cannot read properties of undefined (reading '0')" - }, - { - "name": "game_starts", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "auto_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_left", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_right", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_down", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "line_clear", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "score_changes", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "game_over", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "playable_30s", - "pass": false, - "detail": "skipped: page did not load" - } - ], - "summary": { - "total": 16, - "passed": 0, - "failed": 16, - "score": 0 - }, - "gameplay": { - "pieces_placed": 0, - "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 0, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 5864 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role", - "canvas without aria-label or role", - "canvas without aria-label or role", - "canvas without aria-label or role", - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 7, - "pass": false - } - } + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.5862 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run2/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,10 +35,11 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 51756, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -126,7 +122,143 @@ "gameplay_bot": { "pass": false, "score": 0, - "error": "Gameplay bot timed out after 180 seconds" + "total": 16, + "passed": 0, + "failed": 16, + "report": { + "implementation": { + "renderer": "unknown", + "grid_detected": false, + "grid_bounds": null, + "controls": { + "left": "ArrowLeft", + "right": "ArrowRight", + "down": "ArrowDown", + "rotate": "ArrowUp", + "drop": "Space" + }, + "start_mechanism": "unknown", + "score_element_found": false, + "grid_confidence": 0 + }, + "tests": [ + { + "name": "game_loads", + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:41317/\" is interrupted by another navigation to \"http://127.0.0.1:41317/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:41317/\", waiting until \"domcontentloaded\"\u001b[22m\n" + }, + { + "name": "game_starts", + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:41317/\" is interrupted by another navigation to \"http://127.0.0.1:41317/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:41317/\", waiting until \"domcontentloaded\"\u001b[22m\n" + }, + { + "name": "auto_drop", + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:41317/\" is interrupted by another navigation to \"http://127.0.0.1:41317/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:41317/\", waiting until \"domcontentloaded\"\u001b[22m\n" + }, + { + "name": "move_left", + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:41317/\" is interrupted by another navigation to \"http://127.0.0.1:41317/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:41317/\", waiting until \"domcontentloaded\"\u001b[22m\n" + }, + { + "name": "move_right", + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:41317/\" is interrupted by another navigation to \"http://127.0.0.1:41317/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:41317/\", waiting until \"domcontentloaded\"\u001b[22m\n" + }, + { + "name": "move_down", + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:41317/\" is interrupted by another navigation to \"http://127.0.0.1:41317/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:41317/\", waiting until \"domcontentloaded\"\u001b[22m\n" + }, + { + "name": "rotate", + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:41317/\" is interrupted by another navigation to \"http://127.0.0.1:41317/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:41317/\", waiting until \"domcontentloaded\"\u001b[22m\n" + }, + { + "name": "all_pieces_rotate", + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:41317/\" is interrupted by another navigation to \"http://127.0.0.1:41317/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:41317/\", waiting until \"domcontentloaded\"\u001b[22m\n" + }, + { + "name": "hard_drop", + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:41317/\" is interrupted by another navigation to \"http://127.0.0.1:41317/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:41317/\", waiting until \"domcontentloaded\"\u001b[22m\n" + }, + { + "name": "piece_locks", + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:41317/\" is interrupted by another navigation to \"http://127.0.0.1:41317/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:41317/\", waiting until \"domcontentloaded\"\u001b[22m\n" + }, + { + "name": "new_piece_spawns", + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:41317/\" is interrupted by another navigation to \"http://127.0.0.1:41317/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:41317/\", waiting until \"domcontentloaded\"\u001b[22m\n" + }, + { + "name": "multiple_pieces", + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:41317/\" is interrupted by another navigation to \"http://127.0.0.1:41317/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:41317/\", waiting until \"domcontentloaded\"\u001b[22m\n" + }, + { + "name": "line_clear", + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:41317/\" is interrupted by another navigation to \"http://127.0.0.1:41317/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:41317/\", waiting until \"domcontentloaded\"\u001b[22m\n" + }, + { + "name": "score_changes", + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:41317/\" is interrupted by another navigation to \"http://127.0.0.1:41317/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:41317/\", waiting until \"domcontentloaded\"\u001b[22m\n" + }, + { + "name": "game_over", + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:41317/\" is interrupted by another navigation to \"http://127.0.0.1:41317/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:41317/\", waiting until \"domcontentloaded\"\u001b[22m\n" + }, + { + "name": "playable_30s", + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:41317/\" is interrupted by another navigation to \"http://127.0.0.1:41317/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:41317/\", waiting until \"domcontentloaded\"\u001b[22m\n" + } + ], + "summary": { + "total": 16, + "passed": 0, + "failed": 16, + "score": 0 + }, + "gameplay": { + "pieces_placed": 0, + "lines_cleared": 0, + "max_score_observed": 0, + "play_duration_seconds": 0, + "errors_during_play": 0 + }, + "session": { + "frames": 0, + "events_count": 0, + "pieces_spawned": 0, + "pieces_locked": 0, + "lines_cleared": 0, + "piece_types_seen": [], + "grid_read_success_rate": 0 + }, + "performance": { + "load_time_ms": -1 + }, + "accessibility": { + "issues": [], + "issue_count": 0, + "pass": true + } + } }, - "score": 0.5769 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run2/gameplay-bot-report.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run2/gameplay-bot-report.json @@ -11,105 +11,115 @@ "drop": "Space" }, "start_mechanism": "unknown", - "score_element_found": false + "score_element_found": false, + "grid_confidence": 0 }, "tests": [ { "name": "game_loads", - "pass": true, - "detail": "no console errors" + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:41317/\" is interrupted by another navigation to \"http://127.0.0.1:41317/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:41317/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "game_starts", "pass": false, - "detail": "exception: page.screenshot: Timeout 10000ms exceeded.\nCall log:\n\u001b[2m - taking page screenshot\u001b[22m\n" + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:41317/\" is interrupted by another navigation to \"http://127.0.0.1:41317/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:41317/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "auto_drop", - "pass": true, - "detail": "pixels changed after 5s with no input" + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:41317/\" is interrupted by another navigation to \"http://127.0.0.1:41317/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:41317/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:41317/\" is interrupted by another navigation to \"http://127.0.0.1:41317/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:41317/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:41317/\" is interrupted by another navigation to \"http://127.0.0.1:41317/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:41317/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:41317/\" is interrupted by another navigation to \"http://127.0.0.1:41317/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:41317/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" + "pass": false, + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:41317/\" is interrupted by another navigation to \"http://127.0.0.1:41317/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:41317/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "all_pieces_rotate", "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:41317/\" is interrupted by another navigation to \"http://127.0.0.1:41317/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:41317/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "hard_drop", "pass": false, - "detail": "exception: page.screenshot: Target page, context or browser has been closed" + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:41317/\" is interrupted by another navigation to \"http://127.0.0.1:41317/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:41317/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "piece_locks", "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:41317/\" is interrupted by another navigation to \"http://127.0.0.1:41317/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:41317/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "new_piece_spawns", "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:41317/\" is interrupted by another navigation to \"http://127.0.0.1:41317/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:41317/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "multiple_pieces", "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:41317/\" is interrupted by another navigation to \"http://127.0.0.1:41317/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:41317/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "line_clear", "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:41317/\" is interrupted by another navigation to \"http://127.0.0.1:41317/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:41317/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "score_changes", "pass": false, - "detail": "exception: page.evaluate: Target page, context or browser has been closed" + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:41317/\" is interrupted by another navigation to \"http://127.0.0.1:41317/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:41317/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "game_over", "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:41317/\" is interrupted by another navigation to \"http://127.0.0.1:41317/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:41317/\", waiting until \"domcontentloaded\"\u001b[22m\n" }, { "name": "playable_30s", "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "detail": "page load failed: page.goto: Navigation to \"http://127.0.0.1:41317/\" is interrupted by another navigation to \"http://127.0.0.1:41317/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:41317/\", waiting until \"domcontentloaded\"\u001b[22m\n" } ], "summary": { "total": 16, - "passed": 6, - "failed": 10, - "score": 0.38 + "passed": 0, + "failed": 16, + "score": 0 }, "gameplay": { - "pieces_placed": 16, + "pieces_placed": 0, "lines_cleared": 0, "max_score_observed": 0, "play_duration_seconds": 0, "errors_during_play": 0 }, + "session": { + "frames": 0, + "events_count": 0, + "pieces_spawned": 0, + "pieces_locked": 0, + "lines_cleared": 0, + "piece_types_seen": [], + "grid_read_success_rate": 0 + }, "performance": { - "load_time_ms": 136 + "load_time_ms": -1 }, "accessibility": { "issues": [], diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=off_run3/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,10 +35,11 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 22236, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -126,128 +122,12 @@ "gameplay_bot": { "pass": false, "score": 0, - "total": 16, - "passed": 0, - "failed": 16, - "report": { - "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_bounds": null, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "unknown", - "score_element_found": false - }, - "tests": [ - { - "name": "game_loads", - "pass": false, - "detail": "exception: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:36761/\", waiting until \"domcontentloaded\"\u001b[22m\n" - }, - { - "name": "game_starts", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "auto_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_left", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_right", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_down", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "line_clear", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "score_changes", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "game_over", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "playable_30s", - "pass": false, - "detail": "skipped: page did not load" - } - ], - "summary": { - "total": 16, - "passed": 0, - "failed": 16, - "score": 0 - }, - "gameplay": { - "pieces_placed": 0, - "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 0, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 4258 - }, - "accessibility": { - "issues": [], - "issue_count": 0, - "pass": true - } - } + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.5769 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -41,10 +36,11 @@ "errors": 2 }, "performance": { + "pass": true, "bundle_size_bytes": 165944, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { "files": { @@ -129,5 +125,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.4919 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -25,11 +25,6 @@ ], "score": 0.75 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,10 +35,11 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 42635, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -96,10 +92,10 @@ }, "html_validation": { "valid": false, - "errors": 0 + "errors": 9 }, "duplication_percentage": 0.0, - "score": 0.5 + "score": 0.85 }, "transcript_analysis": { "total_events": 45, @@ -128,5 +124,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.48 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,10 +35,11 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 23112, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -96,10 +92,10 @@ }, "html_validation": { "valid": false, - "errors": 3 + "errors": 0 }, "duplication_percentage": 0.0, - "score": 0.85 + "score": 0.5 }, "transcript_analysis": { "total_events": 74, @@ -128,5 +124,19 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.5706 + "outcome_score": 0.38, + "score": 0.38, + "sonarqube": { + "bugs": 0, + "vulnerabilities": 0, + "code_smells": 3, + "cognitive_complexity": 161, + "lines_of_code": 695, + "duplication_pct": 0.0, + "tech_debt_minutes": 70, + "maintainability": "A", + "reliability": "A", + "security": "A", + "score": 0.76 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -1,11 +1,11 @@ { "structural": { - "pass": true, + "pass": false, "checks": [ { "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" + "pass": false, + "detail": "no index.html found in workspace root, dist/, or public/" }, { "name": "package_json_exists", @@ -18,12 +18,7 @@ "detail": "no build script defined (static project)" } ], - "score": 1.0 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "score": 0.67 }, "quality": { "lint": { @@ -36,15 +31,16 @@ "error": "no tsconfig.json" }, "performance": { - "bundle_size_bytes": 198885, + "pass": true, + "bundle_size_bytes": 0, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { "files": { - "total": 10, - "code": 2, + "total": 9, + "code": 1, "docs": 4, "unnecessary": 2, "unnecessary_list": [ @@ -52,7 +48,7 @@ "README.md" ] }, - "lines_of_code": 1540, + "lines_of_code": 770, "dependencies": { "production": 0, "dev": 5, @@ -88,7 +84,7 @@ "ratio_pct": 0.0 }, "separation_of_concerns": { - "verdict": "unclear", + "verdict": "single-file", "files_with_rendering": 0, "files_with_logic": 0, "files_with_both": 0 @@ -127,5 +123,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.3763 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -1,11 +1,11 @@ { "structural": { - "pass": true, + "pass": false, "checks": [ { "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" + "pass": false, + "detail": "no index.html found in workspace root, dist/, or public/" }, { "name": "package_json_exists", @@ -18,12 +18,7 @@ "detail": "no build script defined (static project)" } ], - "score": 1.0 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "score": 0.67 }, "quality": { "lint": { @@ -36,20 +31,21 @@ "error": "no tsconfig.json" }, "performance": { - "bundle_size_bytes": 158945, + "pass": true, + "bundle_size_bytes": 0, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { "files": { - "total": 5, - "code": 2, + "total": 4, + "code": 1, "docs": 0, "unnecessary": 0, "unnecessary_list": [] }, - "lines_of_code": 1422, + "lines_of_code": 711, "dependencies": { "production": 0, "dev": 5, @@ -85,7 +81,7 @@ "ratio_pct": 0.0 }, "separation_of_concerns": { - "verdict": "unclear", + "verdict": "single-file", "files_with_rendering": 0, "files_with_logic": 0, "files_with_both": 0 @@ -122,128 +118,12 @@ "gameplay_bot": { "pass": false, "score": 0, - "total": 16, - "passed": 0, - "failed": 16, - "report": { - "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_bounds": null, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "unknown", - "score_element_found": false - }, - "tests": [ - { - "name": "game_loads", - "pass": false, - "detail": "1 console error(s): Cannot read properties of undefined (reading '0')" - }, - { - "name": "game_starts", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "auto_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_left", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_right", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_down", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "line_clear", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "score_changes", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "game_over", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "playable_30s", - "pass": false, - "detail": "skipped: page did not load" - } - ], - "summary": { - "total": 16, - "passed": 0, - "failed": 16, - "score": 0 - }, - "gameplay": { - "pieces_placed": 0, - "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 0, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 1604 - }, - "accessibility": { - "issues": [], - "issue_count": 0, - "pass": true - } - } + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.445 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -1,11 +1,11 @@ { "structural": { - "pass": true, + "pass": false, "checks": [ { "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" + "pass": false, + "detail": "no index.html found in workspace root, dist/, or public/" }, { "name": "package_json_exists", @@ -18,12 +18,7 @@ "detail": "no build script defined (static project)" } ], - "score": 1.0 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "score": 0.67 }, "quality": { "lint": { @@ -36,20 +31,21 @@ "error": "no tsconfig.json" }, "performance": { - "bundle_size_bytes": 126680, + "pass": true, + "bundle_size_bytes": 0, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { "files": { - "total": 5, - "code": 2, + "total": 4, + "code": 1, "docs": 0, "unnecessary": 0, "unnecessary_list": [] }, - "lines_of_code": 1220, + "lines_of_code": 610, "dependencies": { "production": 0, "dev": 5, @@ -85,7 +81,7 @@ "ratio_pct": 0.0 }, "separation_of_concerns": { - "verdict": "unclear", + "verdict": "single-file", "files_with_rendering": 0, "files_with_logic": 0, "files_with_both": 0 @@ -122,132 +118,12 @@ "gameplay_bot": { "pass": false, "score": 0, - "total": 16, - "passed": 0, - "failed": 16, - "report": { - "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_bounds": null, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "unknown", - "score_element_found": false - }, - "tests": [ - { - "name": "game_loads", - "pass": false, - "detail": "1 console error(s): Cannot read properties of undefined (reading '0')" - }, - { - "name": "game_starts", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "auto_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_left", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_right", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_down", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "line_clear", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "score_changes", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "game_over", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "playable_30s", - "pass": false, - "detail": "skipped: page did not load" - } - ], - "summary": { - "total": 16, - "passed": 0, - "failed": 16, - "score": 0 - }, - "gameplay": { - "pieces_placed": 0, - "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 0, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 1604 - }, - "accessibility": { - "issues": [ - "canvas without aria-label or role", - "canvas without aria-label or role", - "canvas without aria-label or role" - ], - "issue_count": 3, - "pass": false - } - } + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.445 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -20,11 +20,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -36,10 +31,11 @@ "error": "no tsconfig.json" }, "performance": { + "pass": true, "bundle_size_bytes": 108129, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { "files": { @@ -124,5 +120,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.445 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -20,11 +20,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -36,10 +31,11 @@ "error": "no tsconfig.json" }, "performance": { + "pass": true, "bundle_size_bytes": 115343, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { "files": { @@ -124,5 +120,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.445 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -20,14 +20,22 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { - "pass": false, - "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/root/loop-benchmarking/dashboard/public/artifacts/tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1', 'unspecified']' timed out after 120 seconds" + "lint": { + "pass": true, + "errors": 0, + "warnings": 0 + }, + "typecheck": { + "pass": false, + "error": "no tsconfig.json" + }, + "performance": { + "pass": true, + "bundle_size_bytes": 114345, + "size_under_512kb": true + }, + "score": 0.67 }, "code_analysis": { "files": { @@ -112,5 +120,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.4833 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -1,11 +1,11 @@ { "structural": { - "pass": true, + "pass": false, "checks": [ { "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" + "pass": false, + "detail": "no index.html found in workspace root, dist/, or public/" }, { "name": "package_json_exists", @@ -18,26 +18,34 @@ "detail": "no build script defined (static project)" } ], - "score": 1.0 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "score": 0.67 }, "quality": { - "pass": false, - "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/root/loop-benchmarking/dashboard/public/artifacts/tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2', 'unspecified']' timed out after 120 seconds" + "lint": { + "pass": true, + "errors": 0, + "warnings": 0 + }, + "typecheck": { + "pass": false, + "error": "no tsconfig.json" + }, + "performance": { + "pass": true, + "bundle_size_bytes": 0, + "size_under_512kb": true + }, + "score": 0.67 }, "code_analysis": { "files": { - "total": 5, - "code": 2, + "total": 4, + "code": 1, "docs": 0, "unnecessary": 0, "unnecessary_list": [] }, - "lines_of_code": 1582, + "lines_of_code": 791, "dependencies": { "production": 0, "dev": 5, @@ -73,7 +81,7 @@ "ratio_pct": 0.0 }, "separation_of_concerns": { - "verdict": "unclear", + "verdict": "single-file", "files_with_rendering": 0, "files_with_logic": 0, "files_with_both": 0 @@ -112,5 +120,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.4833 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=en_language=unspecified_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -1,11 +1,11 @@ { "structural": { - "pass": true, + "pass": false, "checks": [ { "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" + "pass": false, + "detail": "no index.html found in workspace root, dist/, or public/" }, { "name": "package_json_exists", @@ -18,12 +18,7 @@ "detail": "no build script defined (static project)" } ], - "score": 1.0 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "score": 0.67 }, "quality": { "lint": { @@ -36,20 +31,21 @@ "error": "no tsconfig.json" }, "performance": { - "bundle_size_bytes": 152745, + "pass": true, + "bundle_size_bytes": 0, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { "files": { - "total": 5, - "code": 2, + "total": 4, + "code": 1, "docs": 0, "unnecessary": 0, "unnecessary_list": [] }, - "lines_of_code": 2144, + "lines_of_code": 1072, "dependencies": { "production": 0, "dev": 5, @@ -85,7 +81,7 @@ "ratio_pct": 0.0 }, "separation_of_concerns": { - "verdict": "unclear", + "verdict": "single-file", "files_with_rendering": 0, "files_with_logic": 0, "files_with_both": 0 @@ -121,129 +117,13 @@ }, "gameplay_bot": { "pass": false, - "score": 0.19, - "total": 16, - "passed": 3, - "failed": 13, - "report": { - "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_bounds": null, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "unknown", - "score_element_found": false - }, - "tests": [ - { - "name": "game_loads", - "pass": true, - "detail": "no console errors" - }, - { - "name": "game_starts", - "pass": false, - "detail": "could not start game with any mechanism" - }, - { - "name": "auto_drop", - "pass": false, - "detail": "piece did not move in 5 seconds" - }, - { - "name": "move_left", - "pass": false, - "detail": "no change detected after key press" - }, - { - "name": "move_right", - "pass": false, - "detail": "no change detected after key press" - }, - { - "name": "move_down", - "pass": false, - "detail": "no change detected after key press" - }, - { - "name": "rotate", - "pass": false, - "detail": "no change detected after rotate key" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "could not detect any piece rotations" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "no change detected after hard drop key" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "could not verify piece locking at bottom" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "could not detect new piece at top" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "grid did not accumulate filled cells" - }, - { - "name": "line_clear", - "pass": false, - "detail": "could not trigger or detect a line clear" - }, - { - "name": "score_changes", - "pass": false, - "detail": "no score element found and no number changed" - }, - { - "name": "game_over", - "pass": true, - "detail": "game stopped after stacking to top" - }, - { - "name": "playable_30s", - "pass": true, - "detail": "played for 30s, placed 11 pieces, no crashes" - } - ], - "summary": { - "total": 16, - "passed": 3, - "failed": 13, - "score": 0.19 - }, - "gameplay": { - "pieces_placed": 102, - "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 30, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 19 - }, - "accessibility": { - "issues": [], - "issue_count": 0, - "pass": true - } - } + "score": 0, + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.5044 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -25,11 +25,6 @@ ], "score": 0.75 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -41,14 +36,15 @@ "error": "no tsconfig.json" }, "performance": { - "bundle_size_bytes": 176510, + "pass": true, + "bundle_size_bytes": 107072, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { "files": { - "total": 12, + "total": 11, "code": 4, "docs": 5, "unnecessary": 2, @@ -60,8 +56,8 @@ "lines_of_code": 1243, "dependencies": { "production": 0, - "dev": 7, - "total": 7 + "dev": 5, + "total": 5 }, "complexity": "moderate", "console_logs": 1, @@ -100,10 +96,10 @@ }, "html_validation": { "valid": false, - "errors": 0 + "errors": 3 }, "duplication_percentage": 0.0, - "score": 0.33 + "score": 0.68 }, "transcript_analysis": { "total_events": 84, @@ -132,5 +128,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.3319 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -4,8 +4,8 @@ "checks": [ { "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" + "pass": false, + "detail": "no index.html found in workspace root, dist/, or public/" }, { "name": "package_json_exists", @@ -23,12 +23,7 @@ "detail": "tsc --noEmit failed" } ], - "score": 0.75 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "score": 0.5 }, "quality": { "lint": { @@ -41,28 +36,29 @@ "errors": 3 }, "performance": { - "bundle_size_bytes": 240829, + "pass": true, + "bundle_size_bytes": 0, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { "files": { - "total": 15, - "code": 3, + "total": 13, + "code": 2, "docs": 8, "unnecessary": 1, "unnecessary_list": [ "README.md" ] }, - "lines_of_code": 1679, + "lines_of_code": 1086, "dependencies": { "production": 0, - "dev": 7, - "total": 7 + "dev": 5, + "total": 5 }, - "complexity": "moderate", + "complexity": "minimal", "console_logs": 0, "magic_numbers": { "count": 8, @@ -129,128 +125,12 @@ "gameplay_bot": { "pass": false, "score": 0, - "total": 16, - "passed": 0, - "failed": 16, - "report": { - "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_bounds": null, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "unknown", - "score_element_found": false - }, - "tests": [ - { - "name": "game_loads", - "pass": false, - "detail": "exception: page.goto: Navigation to \"http://127.0.0.1:44091/\" is interrupted by another navigation to \"http://127.0.0.1:44091/public/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:44091/\", waiting until \"domcontentloaded\"\u001b[22m\n" - }, - { - "name": "game_starts", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "auto_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_left", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_right", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_down", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "line_clear", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "score_changes", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "game_over", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "playable_30s", - "pass": false, - "detail": "skipped: page did not load" - } - ], - "summary": { - "total": 16, - "passed": 0, - "failed": 16, - "score": 0 - }, - "gameplay": { - "pieces_placed": 0, - "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 0, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": -1 - }, - "accessibility": { - "issues": [], - "issue_count": 0, - "pass": true - } - } + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.3669 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -4,8 +4,8 @@ "checks": [ { "name": "entry_point_exists", - "pass": true, - "detail": "index.html found" + "pass": false, + "detail": "no index.html found in workspace root, dist/, or public/" }, { "name": "package_json_exists", @@ -23,150 +23,116 @@ "detail": "TypeScript files found but no tsconfig.json" } ], - "score": 0.75 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "score": 0.5 }, "quality": { - "pass": false, - "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/root/loop-benchmarking/dashboard/public/artifacts/tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3', 'typescript']' timed out after 120 seconds" + "lint": { + "pass": true, + "errors": 0, + "warnings": 0 + }, + "typecheck": { + "pass": false, + "error": "no tsconfig.json" + }, + "performance": { + "pass": true, + "bundle_size_bytes": 0, + "size_under_512kb": true + }, + "score": 0.67 }, "code_analysis": { - "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/code-analysis.py', '/root/loop-benchmarking/dashboard/public/artifacts/tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3', 'typescript']' timed out after 120 seconds", - "score": 0 + "files": { + "total": 16, + "code": 6, + "docs": 7, + "unnecessary": 3, + "unnecessary_list": [ + "server.js", + "server.js", + "README.md" + ] + }, + "lines_of_code": 1184, + "dependencies": { + "production": 0, + "dev": 5, + "total": 5 + }, + "complexity": "over-engineered", + "console_logs": 2, + "magic_numbers": { + "count": 20, + "excessive": false + }, + "function_length": { + "count": 48, + "average": 6.4, + "max": 22, + "long_functions": 0 + }, + "max_nesting_depth": 12, + "global_declarations": 24, + "naming": { + "dominant_style": "camelCase", + "consistency_pct": 100.0, + "camel_case": 616, + "snake_case": 0 + }, + "error_handling": { + "try_catch_blocks": 0, + "has_error_handling": false + }, + "comments": { + "comment_lines": 28, + "source_lines": 832, + "ratio_pct": 3.4 + }, + "separation_of_concerns": { + "verdict": "mixed", + "files_with_rendering": 2, + "files_with_logic": 2, + "files_with_both": 2 + }, + "html_validation": { + "valid": false, + "errors": 2 + }, + "duplication_percentage": 0.0, + "score": 0.46 }, "transcript_analysis": { - "error": "Command '['python3', '/root/loop-benchmarking/tasks/tetris/eval/transcript-analysis.py', '/root/loop-benchmarking/results/runs/tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3']' timed out after 30 seconds", - "score": 0 + "total_events": 150, + "tool_calls": { + "total": 45, + "bash": 37, + "write": 0, + "edit": 4, + "read": 4 + }, + "wasted_turns": { + "total": 20, + "docs": 6, + "ascii_art": 3, + "server_starts": 11 + }, + "errors_encountered": 0, + "thinking_blocks": 46, + "text_blocks": 10, + "productivity_ratio": 0.56, + "self_tested": false, + "score": 0.75 }, "gameplay_bot": { "pass": false, "score": 0, - "total": 16, - "passed": 0, - "failed": 16, - "report": { - "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_bounds": null, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "unknown", - "score_element_found": false - }, - "tests": [ - { - "name": "game_loads", - "pass": false, - "detail": "1 console error(s): Unexpected token '<'" - }, - { - "name": "game_starts", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "auto_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_left", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_right", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_down", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "line_clear", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "score_changes", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "game_over", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "playable_30s", - "pass": false, - "detail": "skipped: page did not load" - } - ], - "summary": { - "total": 16, - "passed": 0, - "failed": 16, - "score": 0 - }, - "gameplay": { - "pieces_placed": 0, - "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 0, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 97 - }, - "accessibility": { - "issues": [], - "issue_count": 0, - "pass": true - } - } + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.125 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -1,16 +1,45 @@ { "structural": { - "pass": false, - "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/root/loop-benchmarking/dashboard/public/artifacts/tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1', 'typescript']' timed out after 120 seconds" - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "pass": true, + "checks": [ + { + "name": "entry_point_exists", + "pass": true, + "detail": "index.html found" + }, + { + "name": "package_json_exists", + "pass": true, + "detail": "package.json found" + }, + { + "name": "build_succeeds", + "pass": true, + "detail": "no build script defined (static project)" + }, + { + "name": "typescript_compiles", + "pass": true, + "detail": "tsc --noEmit passed" + } + ], + "score": 1.0 }, "quality": { - "pass": false, - "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/root/loop-benchmarking/dashboard/public/artifacts/tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1', 'typescript']' timed out after 120 seconds" + "lint": { + "pass": true, + "errors": 0, + "warnings": 0 + }, + "typecheck": { + "pass": true + }, + "performance": { + "pass": true, + "bundle_size_bytes": 14468, + "size_under_512kb": true + }, + "score": 1.0 }, "code_analysis": { "files": { @@ -63,10 +92,10 @@ }, "html_validation": { "valid": false, - "errors": 0 + "errors": 2 }, "duplication_percentage": 0.0, - "score": 0.5 + "score": 0.85 }, "transcript_analysis": { "total_events": 39, @@ -95,5 +124,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.35 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -1,16 +1,45 @@ { "structural": { - "pass": false, - "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/root/loop-benchmarking/dashboard/public/artifacts/tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2', 'typescript']' timed out after 120 seconds" - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "pass": true, + "checks": [ + { + "name": "entry_point_exists", + "pass": true, + "detail": "index.html found" + }, + { + "name": "package_json_exists", + "pass": true, + "detail": "package.json found" + }, + { + "name": "build_succeeds", + "pass": true, + "detail": "no build script defined (static project)" + }, + { + "name": "typescript_compiles", + "pass": true, + "detail": "tsc --noEmit passed" + } + ], + "score": 1.0 }, "quality": { - "pass": false, - "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/root/loop-benchmarking/dashboard/public/artifacts/tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2', 'typescript']' timed out after 120 seconds" + "lint": { + "pass": true, + "errors": 0, + "warnings": 0 + }, + "typecheck": { + "pass": true + }, + "performance": { + "pass": true, + "bundle_size_bytes": 38881, + "size_under_512kb": true + }, + "score": 1.0 }, "code_analysis": { "files": { @@ -63,10 +92,10 @@ }, "html_validation": { "valid": false, - "errors": 0 + "errors": 3 }, "duplication_percentage": 0.0, - "score": 0.5 + "score": 0.85 }, "transcript_analysis": { "total_events": 67, @@ -95,5 +124,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.35 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -1,16 +1,45 @@ { "structural": { - "pass": false, - "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/structural.sh', '/root/loop-benchmarking/dashboard/public/artifacts/tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3', 'typescript']' timed out after 120 seconds" - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "pass": true, + "checks": [ + { + "name": "entry_point_exists", + "pass": true, + "detail": "index.html found" + }, + { + "name": "package_json_exists", + "pass": true, + "detail": "package.json found" + }, + { + "name": "build_succeeds", + "pass": true, + "detail": "no build script defined (static project)" + }, + { + "name": "typescript_compiles", + "pass": true, + "detail": "tsc --noEmit passed" + } + ], + "score": 1.0 }, "quality": { - "pass": false, - "error": "Command '['bash', '/root/loop-benchmarking/tasks/tetris/eval/quality.sh', '/root/loop-benchmarking/dashboard/public/artifacts/tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3', 'typescript']' timed out after 120 seconds" + "lint": { + "pass": true, + "errors": 0, + "warnings": 0 + }, + "typecheck": { + "pass": true + }, + "performance": { + "pass": true, + "bundle_size_bytes": 45268, + "size_under_512kb": true + }, + "score": 1.0 }, "code_analysis": { "files": { @@ -63,10 +92,10 @@ }, "html_validation": { "valid": false, - "errors": 0 + "errors": 3 }, "duplication_percentage": 0.0, - "score": 0.4 + "score": 0.75 }, "transcript_analysis": { "total_events": 51, @@ -95,5 +124,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.32 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,10 +35,11 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 68649, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -128,5 +124,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.5519 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,10 +35,11 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 24975, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -126,128 +122,21 @@ "gameplay_bot": { "pass": false, "score": 0, - "total": 16, - "passed": 0, - "failed": 16, - "report": { - "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_bounds": null, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "unknown", - "score_element_found": false - }, - "tests": [ - { - "name": "game_loads", - "pass": false, - "detail": "exception: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:38503/\", waiting until \"domcontentloaded\"\u001b[22m\n" - }, - { - "name": "game_starts", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "auto_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_left", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_right", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_down", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "line_clear", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "score_changes", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "game_over", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "playable_30s", - "pass": false, - "detail": "skipped: page did not load" - } - ], - "summary": { - "total": 16, - "passed": 0, - "failed": 16, - "score": 0 - }, - "gameplay": { - "pieces_placed": 0, - "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 0, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": -1 - }, - "accessibility": { - "issues": [], - "issue_count": 0, - "pass": true - } - } + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.5112 + "outcome_score": 0.26, + "score": 0.26, + "sonarqube": { + "bugs": 1, + "vulnerabilities": 0, + "code_smells": 6, + "cognitive_complexity": 156, + "lines_of_code": 813, + "duplication_pct": 0.0, + "tech_debt_minutes": 60, + "maintainability": "A", + "reliability": "C", + "security": "A", + "score": 0.52 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=high_human_language=es_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -25,11 +25,6 @@ ], "score": 0.75 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,10 +35,11 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 21723, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -126,128 +122,12 @@ "gameplay_bot": { "pass": false, "score": 0, - "total": 16, - "passed": 0, - "failed": 16, - "report": { - "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_bounds": null, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "unknown", - "score_element_found": false - }, - "tests": [ - { - "name": "game_loads", - "pass": false, - "detail": "exception: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:33367/\", waiting until \"domcontentloaded\"\u001b[22m\n" - }, - { - "name": "game_starts", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "auto_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_left", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_right", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_down", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "line_clear", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "score_changes", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "game_over", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "playable_30s", - "pass": false, - "detail": "skipped: page did not load" - } - ], - "summary": { - "total": 16, - "passed": 0, - "failed": 16, - "score": 0 - }, - "gameplay": { - "pieces_placed": 0, - "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 0, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 7033 - }, - "accessibility": { - "issues": [], - "issue_count": 0, - "pass": true - } - } + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.48 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,14 +35,15 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 35209, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { - "total": 19, + "total": 18, "code": 4, "docs": 8, "unnecessary": 2, @@ -59,8 +55,8 @@ "lines_of_code": 1114, "dependencies": { "production": 0, - "dev": 4, - "total": 4 + "dev": 2, + "total": 2 }, "complexity": "moderate", "console_logs": 0, @@ -129,128 +125,12 @@ "gameplay_bot": { "pass": false, "score": 0, - "total": 16, - "passed": 0, - "failed": 16, - "report": { - "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_bounds": null, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "unknown", - "score_element_found": false - }, - "tests": [ - { - "name": "game_loads", - "pass": false, - "detail": "exception: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:46525/\", waiting until \"domcontentloaded\"\u001b[22m\n" - }, - { - "name": "game_starts", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "auto_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_left", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_right", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_down", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "line_clear", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "score_changes", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "game_over", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "playable_30s", - "pass": false, - "detail": "skipped: page did not load" - } - ], - "summary": { - "total": 16, - "passed": 0, - "failed": 16, - "score": 0 - }, - "gameplay": { - "pieces_placed": 0, - "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 0, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 7299 - }, - "accessibility": { - "issues": [], - "issue_count": 0, - "pass": true - } - } + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.5081 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -14,8 +14,8 @@ }, { "name": "build_succeeds", - "pass": false, - "detail": "npm run build failed" + "pass": true, + "detail": "npm run build completed successfully" }, { "name": "typescript_compiles", @@ -23,12 +23,7 @@ "detail": "TypeScript files found but no tsconfig.json" } ], - "score": 0.5 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "score": 0.75 }, "quality": { "lint": { @@ -41,14 +36,15 @@ "error": "no tsconfig.json" }, "performance": { - "bundle_size_bytes": 204210, + "pass": true, + "bundle_size_bytes": 137831, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { "files": { - "total": 16, + "total": 15, "code": 3, "docs": 9, "unnecessary": 1, @@ -59,8 +55,8 @@ "lines_of_code": 1206, "dependencies": { "production": 0, - "dev": 7, - "total": 7 + "dev": 5, + "total": 5 }, "complexity": "moderate", "console_logs": 0, @@ -129,128 +125,12 @@ "gameplay_bot": { "pass": false, "score": 0, - "total": 16, - "passed": 0, - "failed": 16, - "report": { - "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_bounds": null, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "unknown", - "score_element_found": false - }, - "tests": [ - { - "name": "game_loads", - "pass": false, - "detail": "exception: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:37507/\", waiting until \"domcontentloaded\"\u001b[22m\n" - }, - { - "name": "game_starts", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "auto_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_left", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_right", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_down", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "line_clear", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "score_changes", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "game_over", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "playable_30s", - "pass": false, - "detail": "skipped: page did not load" - } - ], - "summary": { - "total": 16, - "passed": 0, - "failed": 16, - "score": 0 - }, - "gameplay": { - "pieces_placed": 0, - "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 0, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": -1 - }, - "accessibility": { - "issues": [], - "issue_count": 0, - "pass": true - } - } + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.3887 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -25,11 +25,6 @@ ], "score": 0.75 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -41,14 +36,15 @@ "error": "no tsconfig.json" }, "performance": { - "bundle_size_bytes": 195519, + "pass": true, + "bundle_size_bytes": 127262, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { "files": { - "total": 16, + "total": 15, "code": 5, "docs": 7, "unnecessary": 3, @@ -61,8 +57,8 @@ "lines_of_code": 1360, "dependencies": { "production": 0, - "dev": 7, - "total": 7 + "dev": 5, + "total": 5 }, "complexity": "moderate", "console_logs": 3, @@ -133,5 +129,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.3619 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,10 +35,11 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 55143, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -128,5 +124,10 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.5769 + "outcome_score": 0.0, + "score": 0.0, + "sonarqube": { + "pass": false, + "error": "no output" + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,10 +35,11 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 58894, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -96,10 +92,10 @@ }, "html_validation": { "valid": false, - "errors": 8 + "errors": 0 }, "duplication_percentage": 0.0, - "score": 0.75 + "score": 0.4 }, "transcript_analysis": { "total_events": 112, @@ -126,128 +122,21 @@ "gameplay_bot": { "pass": false, "score": 0, - "total": 16, - "passed": 0, - "failed": 16, - "report": { - "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_bounds": null, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "unknown", - "score_element_found": false - }, - "tests": [ - { - "name": "game_loads", - "pass": false, - "detail": "exception: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:35703/\", waiting until \"domcontentloaded\"\u001b[22m\n" - }, - { - "name": "game_starts", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "auto_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_left", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_right", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_down", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "line_clear", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "score_changes", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "game_over", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "playable_30s", - "pass": false, - "detail": "skipped: page did not load" - } - ], - "summary": { - "total": 16, - "passed": 0, - "failed": 16, - "score": 0 - }, - "gameplay": { - "pieces_placed": 0, - "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 0, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": 5999 - }, - "accessibility": { - "issues": [], - "issue_count": 0, - "pass": true - } - } + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.5581 + "outcome_score": 0.325, + "score": 0.325, + "sonarqube": { + "bugs": 0, + "vulnerabilities": 0, + "code_smells": 13, + "cognitive_complexity": 152, + "lines_of_code": 1149, + "duplication_pct": 0.0, + "tech_debt_minutes": 60, + "maintainability": "A", + "reliability": "A", + "security": "A", + "score": 0.65 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,10 +35,11 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 46436, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -96,10 +92,10 @@ }, "html_validation": { "valid": false, - "errors": 8 + "errors": 0 }, "duplication_percentage": 0.0, - "score": 0.85 + "score": 0.5 }, "transcript_analysis": { "total_events": 71, @@ -128,5 +124,19 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.5769 + "outcome_score": 0.25, + "score": 0.25, + "sonarqube": { + "bugs": 1, + "vulnerabilities": 0, + "code_smells": 15, + "cognitive_complexity": 137, + "lines_of_code": 753, + "duplication_pct": 4.6, + "tech_debt_minutes": 80, + "maintainability": "A", + "reliability": "C", + "security": "A", + "score": 0.5 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/gameplay-bot-report.json b/results/runs/tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/gameplay-bot-report.json @@ -16,7 +16,8 @@ "drop": "Space" }, "start_mechanism": "auto", - "score_element_found": true + "score_element_found": true, + "grid_confidence": 1 }, "tests": [ { @@ -32,93 +33,107 @@ { "name": "auto_drop", "pass": true, - "detail": "grid state changed after 5s with no input" + "detail": "grid state changed after 5s with no input (grid-verified)" }, { "name": "move_left", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_right", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_down", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" + "pass": false, + "detail": "no shape change detected after rotate key" }, { "name": "all_pieces_rotate", "pass": false, - "detail": "exception: page.screenshot: Timeout 10000ms exceeded.\nCall log:\n\u001b[2m - taking page screenshot\u001b[22m\n" + "detail": "could not detect any piece rotations via grid reader" }, { "name": "hard_drop", - "pass": false, - "detail": "exception: page.screenshot: Timeout 10000ms exceeded.\nCall log:\n\u001b[2m - taking page screenshot\u001b[22m\n\u001b[2m - waiting for fonts to load...\u001b[22m\n" + "pass": true, + "detail": "piece immediately dropped to bottom (grid-verified)" }, { "name": "piece_locks", "pass": true, - "detail": "filled cells persist at bottom" + "detail": "filled cells persist at bottom (grid-verified, 2 lock event(s))" }, { "name": "new_piece_spawns", "pass": true, - "detail": "new piece detected at top of grid" + "detail": "6 new piece(s) detected at top of grid" }, { "name": "multiple_pieces", - "pass": false, - "detail": "exception: keyboard.press: Test timeout of 180000ms exceeded." + "pass": true, + "detail": "11 pieces placed during play session" }, { "name": "line_clear", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "pass": true, + "detail": "1 line(s) cleared (grid-verified)" }, { "name": "score_changes", "pass": false, - "detail": "could not read score element" + "detail": "score stayed at 212" }, { "name": "game_over", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "pass": true, + "detail": "game stopped after stacking to top" }, { "name": "playable_30s", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "pass": true, + "detail": "played for 30s, placed 41 pieces, no crashes" } ], "summary": { "total": 16, - "passed": 9, - "failed": 7, - "score": 0.56 + "passed": 13, + "failed": 3, + "score": 0.81 }, "gameplay": { - "pieces_placed": 1, - "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 0, + "pieces_placed": 41, + "lines_cleared": 1, + "max_score_observed": 212, + "play_duration_seconds": 30, "errors_during_play": 0 }, + "session": { + "frames": 204, + "events_count": 10, + "pieces_spawned": 6, + "pieces_locked": 11, + "lines_cleared": 1, + "piece_types_seen": [ + "unknown" + ], + "grid_read_success_rate": 1 + }, "performance": { - "load_time_ms": 2793 + "load_time_ms": 280 }, "accessibility": { - "issues": [], - "issue_count": 0, - "pass": true + "issues": [ + "canvas without aria-label or role", + "canvas without aria-label or role" + ], + "issue_count": 2, + "pass": false } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -25,11 +25,6 @@ ], "score": 0.75 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,10 +35,11 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 15762, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -125,129 +121,163 @@ }, "gameplay_bot": { "pass": false, - "score": 0, + "score": 0.94, "total": 16, - "passed": 0, - "failed": 16, + "passed": 15, + "failed": 1, "report": { "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_bounds": null, + "renderer": "canvas", + "grid_detected": true, + "grid_bounds": { + "x": 0, + "y": 0, + "width": 300, + "height": 600 + }, "controls": { "left": "ArrowLeft", "right": "ArrowRight", "down": "ArrowDown", - "rotate": "ArrowUp", + "rotate": "z", "drop": "Space" }, - "start_mechanism": "unknown", - "score_element_found": false + "start_mechanism": "auto", + "score_element_found": true, + "grid_confidence": 1 }, "tests": [ { "name": "game_loads", - "pass": false, - "detail": "exception: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:39551/\", waiting until \"domcontentloaded\"\u001b[22m\n" + "pass": true, + "detail": "no console errors" }, { "name": "game_starts", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "started via auto" }, { "name": "auto_drop", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "grid state changed after 5s with no input (grid-verified)" }, { "name": "move_left", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_right", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_down", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "grid state changed after key press (grid-verified)" }, { "name": "rotate", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "piece shape changed after rotate key (grid-verified, 1 rotation(s))" }, { "name": "all_pieces_rotate", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "rotation confirmed but could not identify individual piece types" }, { "name": "hard_drop", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "piece immediately dropped to bottom (grid-verified)" }, { "name": "piece_locks", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "filled cells persist at bottom (grid-verified, 2 lock event(s))" }, { "name": "new_piece_spawns", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "2 new piece(s) detected at top of grid" }, { "name": "multiple_pieces", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "11 pieces placed during play session" }, { "name": "line_clear", "pass": false, - "detail": "skipped: page did not load" + "detail": "could not trigger or detect a line clear via grid reader" }, { "name": "score_changes", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "score changed from 362 to 372" }, { "name": "game_over", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "game stopped after stacking to top" }, { "name": "playable_30s", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "played for 30s, placed 39 pieces, no crashes" } ], "summary": { "total": 16, - "passed": 0, - "failed": 16, - "score": 0 + "passed": 15, + "failed": 1, + "score": 0.94 }, "gameplay": { - "pieces_placed": 0, + "pieces_placed": 39, "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 0, + "max_score_observed": 372, + "play_duration_seconds": 30, "errors_during_play": 0 }, + "session": { + "frames": 298, + "events_count": 9, + "pieces_spawned": 2, + "pieces_locked": 11, + "lines_cleared": 0, + "piece_types_seen": [ + "unknown" + ], + "grid_read_success_rate": 1 + }, "performance": { - "load_time_ms": -1 + "load_time_ms": 181 }, "accessibility": { - "issues": [], - "issue_count": 0, - "pass": true + "issues": [ + "canvas without aria-label or role", + "canvas without aria-label or role" + ], + "issue_count": 2, + "pass": false } } }, - "score": 0.555 + "outcome_score": 0.72, + "score": 0.72, + "sonarqube": { + "bugs": 2, + "vulnerabilities": 0, + "code_smells": 5, + "cognitive_complexity": 83, + "lines_of_code": 528, + "duplication_pct": 0.0, + "tech_debt_minutes": 13, + "maintainability": "A", + "reliability": "C", + "security": "A", + "score": 0.5 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/gameplay-bot-report.json b/results/runs/tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/gameplay-bot-report.json @@ -1,119 +1,139 @@ { "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_bounds": null, + "renderer": "canvas", + "grid_detected": true, + "grid_bounds": { + "x": 0, + "y": 0, + "width": 300, + "height": 600 + }, "controls": { "left": "ArrowLeft", "right": "ArrowRight", "down": "ArrowDown", - "rotate": "ArrowUp", + "rotate": "z", "drop": "Space" }, - "start_mechanism": "unknown", - "score_element_found": false + "start_mechanism": "auto", + "score_element_found": true, + "grid_confidence": 1 }, "tests": [ { "name": "game_loads", - "pass": false, - "detail": "exception: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:39551/\", waiting until \"domcontentloaded\"\u001b[22m\n" + "pass": true, + "detail": "no console errors" }, { "name": "game_starts", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "started via auto" }, { "name": "auto_drop", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "grid state changed after 5s with no input (grid-verified)" }, { "name": "move_left", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_right", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_down", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "grid state changed after key press (grid-verified)" }, { "name": "rotate", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "piece shape changed after rotate key (grid-verified, 1 rotation(s))" }, { "name": "all_pieces_rotate", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "rotation confirmed but could not identify individual piece types" }, { "name": "hard_drop", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "piece immediately dropped to bottom (grid-verified)" }, { "name": "piece_locks", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "filled cells persist at bottom (grid-verified, 2 lock event(s))" }, { "name": "new_piece_spawns", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "2 new piece(s) detected at top of grid" }, { "name": "multiple_pieces", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "11 pieces placed during play session" }, { "name": "line_clear", "pass": false, - "detail": "skipped: page did not load" + "detail": "could not trigger or detect a line clear via grid reader" }, { "name": "score_changes", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "score changed from 362 to 372" }, { "name": "game_over", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "game stopped after stacking to top" }, { "name": "playable_30s", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "played for 30s, placed 39 pieces, no crashes" } ], "summary": { "total": 16, - "passed": 0, - "failed": 16, - "score": 0 + "passed": 15, + "failed": 1, + "score": 0.94 }, "gameplay": { - "pieces_placed": 0, + "pieces_placed": 39, "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 0, + "max_score_observed": 372, + "play_duration_seconds": 30, "errors_during_play": 0 }, + "session": { + "frames": 298, + "events_count": 9, + "pieces_spawned": 2, + "pieces_locked": 11, + "lines_cleared": 0, + "piece_types_seen": [ + "unknown" + ], + "grid_read_success_rate": 1 + }, "performance": { - "load_time_ms": -1 + "load_time_ms": 181 }, "accessibility": { - "issues": [], - "issue_count": 0, - "pass": true + "issues": [ + "canvas without aria-label or role", + "canvas without aria-label or role" + ], + "issue_count": 2, + "pass": false } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,10 +35,11 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 36784, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -126,128 +122,21 @@ "gameplay_bot": { "pass": false, "score": 0, - "total": 16, - "passed": 0, - "failed": 16, - "report": { - "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_bounds": null, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "unknown", - "score_element_found": false - }, - "tests": [ - { - "name": "game_loads", - "pass": false, - "detail": "exception: page.goto: Navigation to \"http://127.0.0.1:40031/\" is interrupted by another navigation to \"http://127.0.0.1:40031/public/index.html\"\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:40031/\", waiting until \"domcontentloaded\"\u001b[22m\n" - }, - { - "name": "game_starts", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "auto_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_left", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_right", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_down", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "line_clear", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "score_changes", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "game_over", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "playable_30s", - "pass": false, - "detail": "skipped: page did not load" - } - ], - "summary": { - "total": 16, - "passed": 0, - "failed": 16, - "score": 0 - }, - "gameplay": { - "pieces_placed": 0, - "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 0, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": -1 - }, - "accessibility": { - "issues": [], - "issue_count": 0, - "pass": true - } - } + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.5769 + "outcome_score": 0.3, + "score": 0.3, + "sonarqube": { + "bugs": 1, + "vulnerabilities": 0, + "code_smells": 9, + "cognitive_complexity": 74, + "lines_of_code": 562, + "duplication_pct": 0.0, + "tech_debt_minutes": 21, + "maintainability": "A", + "reliability": "C", + "security": "A", + "score": 0.6 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,10 +35,11 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 24709, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -96,10 +92,10 @@ }, "html_validation": { "valid": false, - "errors": 0 + "errors": 3 }, "duplication_percentage": 0.0, - "score": 0.5 + "score": 0.85 }, "transcript_analysis": { "total_events": 50, @@ -125,8 +121,164 @@ }, "gameplay_bot": { "pass": false, - "score": 0, - "error": "Gameplay bot timed out after 180 seconds" + "score": 0.94, + "total": 16, + "passed": 15, + "failed": 1, + "report": { + "implementation": { + "renderer": "canvas", + "grid_detected": true, + "grid_bounds": { + "x": 0, + "y": 0, + "width": 300, + "height": 600 + }, + "controls": { + "left": "ArrowLeft", + "right": "ArrowRight", + "down": "ArrowDown", + "rotate": "x", + "drop": "Space" + }, + "start_mechanism": "auto", + "score_element_found": true, + "grid_confidence": 1 + }, + "tests": [ + { + "name": "game_loads", + "pass": true, + "detail": "no console errors" + }, + { + "name": "game_starts", + "pass": true, + "detail": "started via auto" + }, + { + "name": "auto_drop", + "pass": true, + "detail": "grid state changed after 5s with no input (grid-verified)" + }, + { + "name": "move_left", + "pass": true, + "detail": "grid state changed after key press (grid-verified)" + }, + { + "name": "move_right", + "pass": true, + "detail": "grid state changed after key press (grid-verified)" + }, + { + "name": "move_down", + "pass": true, + "detail": "grid state changed after key press (grid-verified)" + }, + { + "name": "rotate", + "pass": true, + "detail": "piece shape changed after rotate key (grid-verified, 1 rotation(s))" + }, + { + "name": "all_pieces_rotate", + "pass": true, + "detail": "rotation confirmed but could not identify individual piece types" + }, + { + "name": "hard_drop", + "pass": true, + "detail": "piece immediately dropped to bottom (grid-verified)" + }, + { + "name": "piece_locks", + "pass": true, + "detail": "filled cells persist at bottom (grid-verified, 2 lock event(s))" + }, + { + "name": "new_piece_spawns", + "pass": true, + "detail": "5 new piece(s) detected at top of grid" + }, + { + "name": "multiple_pieces", + "pass": true, + "detail": "11 pieces placed during play session" + }, + { + "name": "line_clear", + "pass": true, + "detail": "1 line(s) cleared (grid-verified)" + }, + { + "name": "score_changes", + "pass": true, + "detail": "score changed from 0 to 144" + }, + { + "name": "game_over", + "pass": false, + "detail": "could not trigger or detect game over" + }, + { + "name": "playable_30s", + "pass": true, + "detail": "played for 30s, placed 43 pieces, no crashes" + } + ], + "summary": { + "total": 16, + "passed": 15, + "failed": 1, + "score": 0.94 + }, + "gameplay": { + "pieces_placed": 43, + "lines_cleared": 1, + "max_score_observed": 144, + "play_duration_seconds": 30, + "errors_during_play": 0 + }, + "session": { + "frames": 288, + "events_count": 10, + "pieces_spawned": 5, + "pieces_locked": 11, + "lines_cleared": 1, + "piece_types_seen": [ + "unknown" + ], + "grid_read_success_rate": 1 + }, + "performance": { + "load_time_ms": 231 + }, + "accessibility": { + "issues": [ + "no headings found", + "canvas without aria-label or role", + "canvas without aria-label or role" + ], + "issue_count": 3, + "pass": false + } + } }, - "score": 0.5112 + "outcome_score": 0.72, + "score": 0.72, + "sonarqube": { + "bugs": 1, + "vulnerabilities": 0, + "code_smells": 10, + "cognitive_complexity": 122, + "lines_of_code": 753, + "duplication_pct": 0.0, + "tech_debt_minutes": 49, + "maintainability": "A", + "reliability": "C", + "security": "A", + "score": 0.5 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/gameplay-bot-report.json b/results/runs/tetris_context_file=none_effort=max_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/gameplay-bot-report.json @@ -16,7 +16,8 @@ "drop": "Space" }, "start_mechanism": "auto", - "score_element_found": true + "score_element_found": true, + "grid_confidence": 1 }, "tests": [ { @@ -31,94 +32,109 @@ }, { "name": "auto_drop", - "pass": false, - "detail": "exception: page.screenshot: Timeout 10000ms exceeded.\nCall log:\n\u001b[2m - taking page screenshot\u001b[22m\n" + "pass": true, + "detail": "grid state changed after 5s with no input (grid-verified)" }, { "name": "move_left", - "pass": false, - "detail": "exception: page.screenshot: Timeout 10000ms exceeded.\nCall log:\n\u001b[2m - taking page screenshot\u001b[22m\n" + "pass": true, + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_right", - "pass": false, - "detail": "exception: page.screenshot: Timeout 10000ms exceeded.\nCall log:\n\u001b[2m - taking page screenshot\u001b[22m\n" + "pass": true, + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_down", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "rotate", "pass": true, - "detail": "piece shape changed after rotate key" + "detail": "piece shape changed after rotate key (grid-verified, 1 rotation(s))" }, { "name": "all_pieces_rotate", "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" + "detail": "rotation confirmed but could not identify individual piece types" }, { "name": "hard_drop", "pass": true, - "detail": "piece immediately dropped and new piece appeared" + "detail": "piece immediately dropped to bottom (grid-verified)" }, { "name": "piece_locks", "pass": true, - "detail": "filled cells persist at bottom" + "detail": "filled cells persist at bottom (grid-verified, 2 lock event(s))" }, { "name": "new_piece_spawns", "pass": true, - "detail": "new piece detected at top of grid" + "detail": "5 new piece(s) detected at top of grid" }, { "name": "multiple_pieces", - "pass": false, - "detail": "exception: keyboard.press: Test timeout of 180000ms exceeded." + "pass": true, + "detail": "11 pieces placed during play session" }, { "name": "line_clear", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "pass": true, + "detail": "1 line(s) cleared (grid-verified)" }, { "name": "score_changes", - "pass": false, - "detail": "could not read score element" + "pass": true, + "detail": "score changed from 0 to 144" }, { "name": "game_over", "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "detail": "could not trigger or detect game over" }, { "name": "playable_30s", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "pass": true, + "detail": "played for 30s, placed 43 pieces, no crashes" } ], "summary": { "total": 16, - "passed": 8, - "failed": 8, - "score": 0.5 + "passed": 15, + "failed": 1, + "score": 0.94 }, "gameplay": { - "pieces_placed": 1, - "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 0, + "pieces_placed": 43, + "lines_cleared": 1, + "max_score_observed": 144, + "play_duration_seconds": 30, "errors_during_play": 0 }, + "session": { + "frames": 288, + "events_count": 10, + "pieces_spawned": 5, + "pieces_locked": 11, + "lines_cleared": 1, + "piece_types_seen": [ + "unknown" + ], + "grid_read_success_rate": 1 + }, "performance": { - "load_time_ms": -1 + "load_time_ms": 231 }, "accessibility": { - "issues": [], - "issue_count": 0, - "pass": true + "issues": [ + "no headings found", + "canvas without aria-label or role", + "canvas without aria-label or role" + ], + "issue_count": 3, + "pass": false } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,14 +35,15 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 13181, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { - "total": 10, + "total": 9, "code": 3, "docs": 3, "unnecessary": 1, @@ -58,8 +54,8 @@ "lines_of_code": 1150, "dependencies": { "production": 0, - "dev": 7, - "total": 7 + "dev": 5, + "total": 5 }, "complexity": "moderate", "console_logs": 0, @@ -127,8 +123,166 @@ }, "gameplay_bot": { "pass": false, - "score": 0, - "error": "Gameplay bot timed out after 180 seconds" + "score": 0.81, + "total": 16, + "passed": 13, + "failed": 3, + "report": { + "implementation": { + "renderer": "canvas", + "grid_detected": true, + "grid_bounds": { + "x": 0, + "y": 0, + "width": 240, + "height": 400 + }, + "controls": { + "left": "a", + "right": "d", + "down": "s", + "rotate": "z", + "drop": "Space" + }, + "start_mechanism": "auto", + "score_element_found": true, + "grid_confidence": 1 + }, + "tests": [ + { + "name": "game_loads", + "pass": true, + "detail": "no console errors" + }, + { + "name": "game_starts", + "pass": true, + "detail": "started via auto" + }, + { + "name": "auto_drop", + "pass": true, + "detail": "grid state changed after 5s with no input (grid-verified)" + }, + { + "name": "move_left", + "pass": false, + "detail": "no grid change detected after key press" + }, + { + "name": "move_right", + "pass": false, + "detail": "no grid change detected after key press" + }, + { + "name": "move_down", + "pass": true, + "detail": "grid state changed after key press (grid-verified)" + }, + { + "name": "rotate", + "pass": true, + "detail": "piece shape changed after rotate key (grid-verified, 1 rotation(s))" + }, + { + "name": "all_pieces_rotate", + "pass": true, + "detail": "rotation observed, piece types seen: [O, L, unknown]" + }, + { + "name": "hard_drop", + "pass": true, + "detail": "piece immediately dropped to bottom (grid-verified)" + }, + { + "name": "piece_locks", + "pass": true, + "detail": "filled cells persist at bottom (grid-verified, 2 lock event(s))" + }, + { + "name": "new_piece_spawns", + "pass": true, + "detail": "4 new piece(s) detected at top of grid" + }, + { + "name": "multiple_pieces", + "pass": true, + "detail": "11 pieces placed during play session" + }, + { + "name": "line_clear", + "pass": true, + "detail": "1 line(s) cleared (grid-verified)" + }, + { + "name": "score_changes", + "pass": false, + "detail": "score stayed at 0" + }, + { + "name": "game_over", + "pass": true, + "detail": "game stopped after stacking to top" + }, + { + "name": "playable_30s", + "pass": true, + "detail": "played for 30s, placed 35 pieces, no crashes" + } + ], + "summary": { + "total": 16, + "passed": 13, + "failed": 3, + "score": 0.81 + }, + "gameplay": { + "pieces_placed": 35, + "lines_cleared": 1, + "max_score_observed": 0, + "play_duration_seconds": 30, + "errors_during_play": 0 + }, + "session": { + "frames": 304, + "events_count": 9, + "pieces_spawned": 4, + "pieces_locked": 11, + "lines_cleared": 1, + "piece_types_seen": [ + "O", + "L", + "unknown" + ], + "grid_read_success_rate": 1 + }, + "performance": { + "load_time_ms": 216 + }, + "accessibility": { + "issues": [ + "no headings found", + "canvas without aria-label or role", + "canvas without aria-label or role" + ], + "issue_count": 3, + "pass": false + } + } }, - "score": 0.4831 + "outcome_score": 0.63, + "score": 0.63, + "sonarqube": { + "bugs": 2, + "vulnerabilities": 0, + "code_smells": 9, + "cognitive_complexity": 85, + "lines_of_code": 653, + "duplication_pct": 0.0, + "tech_debt_minutes": 18, + "maintainability": "A", + "reliability": "C", + "security": "A", + "score": 0.45 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/gameplay-bot-report.json b/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/gameplay-bot-report.json @@ -1,17 +1,23 @@ { "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_bounds": null, + "renderer": "canvas", + "grid_detected": true, + "grid_bounds": { + "x": 0, + "y": 0, + "width": 240, + "height": 400 + }, "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", + "left": "a", + "right": "d", + "down": "s", + "rotate": "z", "drop": "Space" }, - "start_mechanism": "unknown", - "score_element_found": false + "start_mechanism": "auto", + "score_element_found": true, + "grid_confidence": 1 }, "tests": [ { @@ -21,99 +27,116 @@ }, { "name": "game_starts", - "pass": false, - "detail": "exception: page.screenshot: Timeout 10000ms exceeded.\nCall log:\n\u001b[2m - taking page screenshot\u001b[22m\n\u001b[2m - waiting for fonts to load...\u001b[22m\n\u001b[2m - fonts loaded\u001b[22m\n" + "pass": true, + "detail": "started via auto" }, { "name": "auto_drop", - "pass": false, - "detail": "exception: page.screenshot: Timeout 10000ms exceeded.\nCall log:\n\u001b[2m - taking page screenshot\u001b[22m\n\u001b[2m - waiting for fonts to load...\u001b[22m\n\u001b[2m - fonts loaded\u001b[22m\n" + "pass": true, + "detail": "grid state changed after 5s with no input (grid-verified)" }, { "name": "move_left", "pass": false, - "detail": "exception: page.screenshot: Timeout 10000ms exceeded.\nCall log:\n\u001b[2m - taking page screenshot\u001b[22m\n" + "detail": "no grid change detected after key press" }, { "name": "move_right", "pass": false, - "detail": "exception: page.screenshot: Timeout 10000ms exceeded.\nCall log:\n\u001b[2m - taking page screenshot\u001b[22m\n" + "detail": "no grid change detected after key press" }, { "name": "move_down", - "pass": false, - "detail": "exception: page.screenshot: Timeout 10000ms exceeded.\nCall log:\n\u001b[2m - taking page screenshot\u001b[22m\n" + "pass": true, + "detail": "grid state changed after key press (grid-verified)" }, { "name": "rotate", - "pass": false, - "detail": "exception: page.screenshot: Timeout 10000ms exceeded.\nCall log:\n\u001b[2m - taking page screenshot\u001b[22m\n" + "pass": true, + "detail": "piece shape changed after rotate key (grid-verified, 1 rotation(s))" }, { "name": "all_pieces_rotate", - "pass": false, - "detail": "exception: page.reload: Timeout 10000ms exceeded.\nCall log:\n\u001b[2m - waiting for navigation until \"load\"\u001b[22m\n" + "pass": true, + "detail": "rotation observed, piece types seen: [O, L, unknown]" }, { "name": "hard_drop", "pass": true, - "detail": "piece immediately dropped and new piece appeared" + "detail": "piece immediately dropped to bottom (grid-verified)" }, { "name": "piece_locks", - "pass": false, - "detail": "could not verify piece locking at bottom" + "pass": true, + "detail": "filled cells persist at bottom (grid-verified, 2 lock event(s))" }, { "name": "new_piece_spawns", - "pass": false, - "detail": "exception: page.screenshot: Timeout 10000ms exceeded.\nCall log:\n\u001b[2m - taking page screenshot\u001b[22m\n\u001b[2m - waiting for fonts to load...\u001b[22m\n" + "pass": true, + "detail": "4 new piece(s) detected at top of grid" }, { "name": "multiple_pieces", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "pass": true, + "detail": "11 pieces placed during play session" }, { "name": "line_clear", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "pass": true, + "detail": "1 line(s) cleared (grid-verified)" }, { "name": "score_changes", "pass": false, - "detail": "exception: page.evaluate: Target page, context or browser has been closed" + "detail": "score stayed at 0" }, { "name": "game_over", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "pass": true, + "detail": "game stopped after stacking to top" }, { "name": "playable_30s", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "pass": true, + "detail": "played for 30s, placed 35 pieces, no crashes" } ], "summary": { "total": 16, - "passed": 2, - "failed": 14, - "score": 0.13 + "passed": 13, + "failed": 3, + "score": 0.81 }, "gameplay": { - "pieces_placed": 0, - "lines_cleared": 0, + "pieces_placed": 35, + "lines_cleared": 1, "max_score_observed": 0, - "play_duration_seconds": 0, + "play_duration_seconds": 30, "errors_during_play": 0 }, + "session": { + "frames": 304, + "events_count": 9, + "pieces_spawned": 4, + "pieces_locked": 11, + "lines_cleared": 1, + "piece_types_seen": [ + "O", + "L", + "unknown" + ], + "grid_read_success_rate": 1 + }, "performance": { - "load_time_ms": 924 + "load_time_ms": 216 }, "accessibility": { - "issues": [], - "issue_count": 0, - "pass": true + "issues": [ + "no headings found", + "canvas without aria-label or role", + "canvas without aria-label or role" + ], + "issue_count": 3, + "pass": false } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -1,6 +1,6 @@ { "structural": { - "pass": true, + "pass": false, "checks": [ { "name": "entry_point_exists", @@ -19,16 +19,11 @@ }, { "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" + "pass": false, + "detail": "tsc --noEmit failed" } ], - "score": 1.0 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "score": 0.75 }, "quality": { "lint": { @@ -40,14 +35,15 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 12904, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { - "total": 10, + "total": 9, "code": 3, "docs": 3, "unnecessary": 1, @@ -58,8 +54,8 @@ "lines_of_code": 1102, "dependencies": { "production": 0, - "dev": 7, - "total": 7 + "dev": 5, + "total": 5 }, "complexity": "moderate", "console_logs": 0, @@ -127,8 +123,160 @@ }, "gameplay_bot": { "pass": false, - "score": 0, - "error": "Gameplay bot timed out after 180 seconds" + "score": 0.94, + "total": 16, + "passed": 15, + "failed": 1, + "report": { + "implementation": { + "renderer": "canvas", + "grid_detected": true, + "grid_bounds": { + "x": 0, + "y": 0, + "width": 300, + "height": 600 + }, + "controls": { + "left": "ArrowLeft", + "right": "ArrowRight", + "down": "ArrowDown", + "rotate": "z", + "drop": "Space" + }, + "start_mechanism": "auto", + "score_element_found": true, + "grid_confidence": 1 + }, + "tests": [ + { + "name": "game_loads", + "pass": true, + "detail": "no console errors" + }, + { + "name": "game_starts", + "pass": true, + "detail": "started via auto" + }, + { + "name": "auto_drop", + "pass": true, + "detail": "grid state changed after 5s with no input (grid-verified)" + }, + { + "name": "move_left", + "pass": true, + "detail": "grid state changed after key press (grid-verified)" + }, + { + "name": "move_right", + "pass": true, + "detail": "grid state changed after key press (grid-verified)" + }, + { + "name": "move_down", + "pass": true, + "detail": "grid state changed after key press (grid-verified)" + }, + { + "name": "rotate", + "pass": true, + "detail": "piece shape changed after rotate key (grid-verified, 1 rotation(s))" + }, + { + "name": "all_pieces_rotate", + "pass": true, + "detail": "rotation confirmed but could not identify individual piece types" + }, + { + "name": "hard_drop", + "pass": true, + "detail": "piece immediately dropped to bottom (grid-verified)" + }, + { + "name": "piece_locks", + "pass": true, + "detail": "filled cells persist at bottom (grid-verified, 2 lock event(s))" + }, + { + "name": "new_piece_spawns", + "pass": true, + "detail": "4 new piece(s) detected at top of grid" + }, + { + "name": "multiple_pieces", + "pass": true, + "detail": "11 pieces placed during play session" + }, + { + "name": "line_clear", + "pass": true, + "detail": "1 line(s) cleared (grid-verified)" + }, + { + "name": "score_changes", + "pass": false, + "detail": "score stayed at 0" + }, + { + "name": "game_over", + "pass": true, + "detail": "game stopped after stacking to top" + }, + { + "name": "playable_30s", + "pass": true, + "detail": "played for 30s, placed 39 pieces, no crashes" + } + ], + "summary": { + "total": 16, + "passed": 15, + "failed": 1, + "score": 0.94 + }, + "gameplay": { + "pieces_placed": 39, + "lines_cleared": 1, + "max_score_observed": 0, + "play_duration_seconds": 30, + "errors_during_play": 0 + }, + "session": { + "frames": 298, + "events_count": 11, + "pieces_spawned": 4, + "pieces_locked": 11, + "lines_cleared": 1, + "piece_types_seen": [ + "unknown" + ], + "grid_read_success_rate": 1 + }, + "performance": { + "load_time_ms": 303 + }, + "accessibility": { + "issues": [], + "issue_count": 0, + "pass": true + } + } }, - "score": 0.4613 + "outcome_score": 0.845, + "score": 0.845, + "sonarqube": { + "bugs": 0, + "vulnerabilities": 0, + "code_smells": 9, + "cognitive_complexity": 96, + "lines_of_code": 618, + "duplication_pct": 4.1, + "tech_debt_minutes": 39, + "maintainability": "A", + "reliability": "A", + "security": "A", + "score": 0.75 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/gameplay-bot-report.json b/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/gameplay-bot-report.json @@ -1,17 +1,23 @@ { "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_bounds": null, + "renderer": "canvas", + "grid_detected": true, + "grid_bounds": { + "x": 0, + "y": 0, + "width": 300, + "height": 600 + }, "controls": { "left": "ArrowLeft", "right": "ArrowRight", "down": "ArrowDown", - "rotate": "ArrowUp", + "rotate": "z", "drop": "Space" }, - "start_mechanism": "unknown", - "score_element_found": false + "start_mechanism": "auto", + "score_element_found": true, + "grid_confidence": 1 }, "tests": [ { @@ -21,95 +27,106 @@ }, { "name": "game_starts", - "pass": false, - "detail": "exception: page.screenshot: Timeout 10000ms exceeded.\nCall log:\n\u001b[2m - taking page screenshot\u001b[22m\n\u001b[2m - waiting for fonts to load...\u001b[22m\n\u001b[2m - fonts loaded\u001b[22m\n" + "pass": true, + "detail": "started via auto" }, { "name": "auto_drop", - "pass": false, - "detail": "exception: page.screenshot: Timeout 10000ms exceeded.\nCall log:\n\u001b[2m - taking page screenshot\u001b[22m\n\u001b[2m - waiting for fonts to load...\u001b[22m\n" + "pass": true, + "detail": "grid state changed after 5s with no input (grid-verified)" }, { "name": "move_left", - "pass": false, - "detail": "exception: page.screenshot: Timeout 10000ms exceeded.\nCall log:\n\u001b[2m - taking page screenshot\u001b[22m\n\u001b[2m - waiting for fonts to load...\u001b[22m\n" + "pass": true, + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_right", - "pass": false, - "detail": "exception: page.screenshot: Timeout 10000ms exceeded.\nCall log:\n\u001b[2m - taking page screenshot\u001b[22m\n\u001b[2m - waiting for fonts to load...\u001b[22m\n" + "pass": true, + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_down", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "rotate", "pass": true, - "detail": "piece shape changed after rotate key" + "detail": "piece shape changed after rotate key (grid-verified, 1 rotation(s))" }, { "name": "all_pieces_rotate", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "pass": true, + "detail": "rotation confirmed but could not identify individual piece types" }, { "name": "hard_drop", - "pass": false, - "detail": "exception: page.screenshot: Target page, context or browser has been closed" + "pass": true, + "detail": "piece immediately dropped to bottom (grid-verified)" }, { "name": "piece_locks", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "pass": true, + "detail": "filled cells persist at bottom (grid-verified, 2 lock event(s))" }, { "name": "new_piece_spawns", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "pass": true, + "detail": "4 new piece(s) detected at top of grid" }, { "name": "multiple_pieces", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "pass": true, + "detail": "11 pieces placed during play session" }, { "name": "line_clear", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "pass": true, + "detail": "1 line(s) cleared (grid-verified)" }, { "name": "score_changes", "pass": false, - "detail": "exception: page.evaluate: Target page, context or browser has been closed" + "detail": "score stayed at 0" }, { "name": "game_over", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "pass": true, + "detail": "game stopped after stacking to top" }, { "name": "playable_30s", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "pass": true, + "detail": "played for 30s, placed 39 pieces, no crashes" } ], "summary": { "total": 16, - "passed": 3, - "failed": 13, - "score": 0.19 + "passed": 15, + "failed": 1, + "score": 0.94 }, "gameplay": { - "pieces_placed": 12, - "lines_cleared": 0, + "pieces_placed": 39, + "lines_cleared": 1, "max_score_observed": 0, - "play_duration_seconds": 0, + "play_duration_seconds": 30, "errors_during_play": 0 }, + "session": { + "frames": 298, + "events_count": 11, + "pieces_spawned": 4, + "pieces_locked": 11, + "lines_cleared": 1, + "piece_types_seen": [ + "unknown" + ], + "grid_read_success_rate": 1 + }, "performance": { - "load_time_ms": 5543 + "load_time_ms": 303 }, "accessibility": { "issues": [], diff --git a/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -25,11 +25,6 @@ ], "score": 0.75 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -41,14 +36,15 @@ "error": "no tsconfig.json" }, "performance": { - "bundle_size_bytes": 172481, + "pass": true, + "bundle_size_bytes": 108495, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { "files": { - "total": 14, + "total": 13, "code": 4, "docs": 6, "unnecessary": 1, @@ -59,8 +55,8 @@ "lines_of_code": 1312, "dependencies": { "production": 0, - "dev": 7, - "total": 7 + "dev": 5, + "total": 5 }, "complexity": "moderate", "console_logs": 0, @@ -99,10 +95,10 @@ }, "html_validation": { "valid": false, - "errors": 14 + "errors": 0 }, "duplication_percentage": 0.0, - "score": 0.75 + "score": 0.4 }, "transcript_analysis": { "total_events": 124, @@ -131,5 +127,19 @@ "score": 0, "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.4106 + "outcome_score": 0.375, + "score": 0.375, + "sonarqube": { + "bugs": 0, + "vulnerabilities": 0, + "code_smells": 9, + "cognitive_complexity": 96, + "lines_of_code": 618, + "duplication_pct": 4.1, + "tech_debt_minutes": 39, + "maintainability": "A", + "reliability": "A", + "security": "A", + "score": 0.75 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/gameplay-bot-report.json b/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=high_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/gameplay-bot-report.json @@ -1,17 +1,23 @@ { "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_bounds": null, + "renderer": "canvas", + "grid_detected": true, + "grid_bounds": { + "x": 0, + "y": 0, + "width": 240, + "height": 480 + }, "controls": { "left": "ArrowLeft", "right": "ArrowRight", "down": "ArrowDown", - "rotate": "ArrowUp", + "rotate": "x", "drop": "Space" }, - "start_mechanism": "unknown", - "score_element_found": false + "start_mechanism": "click_canvas", + "score_element_found": true, + "grid_confidence": 1 }, "tests": [ { @@ -21,99 +27,113 @@ }, { "name": "game_starts", - "pass": false, - "detail": "exception: page.screenshot: Timeout 10000ms exceeded.\nCall log:\n\u001b[2m - taking page screenshot\u001b[22m\n\u001b[2m - waiting for fonts to load...\u001b[22m\n" + "pass": true, + "detail": "started via click_canvas" }, { "name": "auto_drop", "pass": false, - "detail": "exception: page.screenshot: Timeout 10000ms exceeded.\nCall log:\n\u001b[2m - taking page screenshot\u001b[22m\n\u001b[2m - waiting for fonts to load...\u001b[22m\n\u001b[2m - fonts loaded\u001b[22m\n" + "detail": "piece did not move down in 5 seconds (grid-verified)" }, { "name": "move_left", - "pass": false, - "detail": "exception: page.screenshot: Timeout 10000ms exceeded.\nCall log:\n\u001b[2m - taking page screenshot\u001b[22m\n\u001b[2m - waiting for fonts to load...\u001b[22m\n" + "pass": true, + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_right", - "pass": false, - "detail": "exception: page.screenshot: Timeout 10000ms exceeded.\nCall log:\n\u001b[2m - taking page screenshot\u001b[22m\n\u001b[2m - waiting for fonts to load...\u001b[22m\n" + "pass": true, + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_down", - "pass": false, - "detail": "exception: page.screenshot: Timeout 10000ms exceeded.\nCall log:\n\u001b[2m - taking page screenshot\u001b[22m\n" + "pass": true, + "detail": "grid state changed after key press (grid-verified)" }, { "name": "rotate", "pass": false, - "detail": "exception: page.screenshot: Timeout 10000ms exceeded.\nCall log:\n\u001b[2m - taking page screenshot\u001b[22m\n" + "detail": "no shape change detected after rotate key" }, { "name": "all_pieces_rotate", "pass": false, - "detail": "exception: page.reload: Timeout 10000ms exceeded.\nCall log:\n\u001b[2m - waiting for navigation until \"load\"\u001b[22m\n" + "detail": "could not detect any piece rotations via grid reader" }, { "name": "hard_drop", "pass": true, - "detail": "piece immediately dropped and new piece appeared" + "detail": "piece immediately dropped to bottom (grid-verified)" }, { "name": "piece_locks", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "pass": true, + "detail": "filled cells persist at bottom (grid-verified, 2 lock event(s))" }, { "name": "new_piece_spawns", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "pass": true, + "detail": "4 new piece(s) detected at top of grid" }, { "name": "multiple_pieces", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "pass": true, + "detail": "11 pieces placed during play session" }, { "name": "line_clear", "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "detail": "could not trigger or detect a line clear via grid reader" }, { "name": "score_changes", - "pass": false, - "detail": "exception: page.evaluate: Target page, context or browser has been closed" + "pass": true, + "detail": "score changed from 304 to 318" }, { "name": "game_over", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "pass": true, + "detail": "game stopped after stacking to top" }, { "name": "playable_30s", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "pass": true, + "detail": "played for 30s, placed 22 pieces, no crashes" } ], "summary": { "total": 16, - "passed": 2, - "failed": 14, - "score": 0.13 + "passed": 12, + "failed": 4, + "score": 0.75 }, "gameplay": { - "pieces_placed": 0, + "pieces_placed": 22, "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 0, + "max_score_observed": 318, + "play_duration_seconds": 30, "errors_during_play": 0 }, + "session": { + "frames": 341, + "events_count": 7, + "pieces_spawned": 4, + "pieces_locked": 11, + "lines_cleared": 0, + "piece_types_seen": [ + "unknown" + ], + "grid_read_success_rate": 1 + }, "performance": { - "load_time_ms": 270 + "load_time_ms": 214 }, "accessibility": { - "issues": [], - "issue_count": 0, - "pass": true + "issues": [ + "canvas without aria-label or role", + "canvas without aria-label or role" + ], + "issue_count": 2, + "pass": false } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -1,6 +1,6 @@ { "structural": { - "pass": true, + "pass": false, "checks": [ { "name": "entry_point_exists", @@ -19,16 +19,11 @@ }, { "name": "typescript_compiles", - "pass": true, - "detail": "tsc --noEmit passed" + "pass": false, + "detail": "tsc --noEmit failed" } ], - "score": 1.0 - }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 + "score": 0.75 }, "quality": { "lint": { @@ -40,14 +35,15 @@ "pass": true }, "performance": { - "bundle_size_bytes": 183952, + "pass": true, + "bundle_size_bytes": 110276, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { - "total": 14, + "total": 13, "code": 3, "docs": 6, "unnecessary": 3, @@ -60,8 +56,8 @@ "lines_of_code": 1259, "dependencies": { "production": 0, - "dev": 7, - "total": 7 + "dev": 5, + "total": 5 }, "complexity": "moderate", "console_logs": 0, @@ -129,8 +125,164 @@ }, "gameplay_bot": { "pass": false, - "score": 0, - "error": "Gameplay bot timed out after 180 seconds" + "score": 0.81, + "total": 16, + "passed": 13, + "failed": 3, + "report": { + "implementation": { + "renderer": "canvas", + "grid_detected": true, + "grid_bounds": { + "x": 0, + "y": 0, + "width": 300, + "height": 600 + }, + "controls": { + "left": "ArrowLeft", + "right": "ArrowRight", + "down": "ArrowDown", + "rotate": "ArrowUp", + "drop": "Space" + }, + "start_mechanism": "click_canvas", + "score_element_found": true, + "grid_confidence": 1 + }, + "tests": [ + { + "name": "game_loads", + "pass": true, + "detail": "no console errors" + }, + { + "name": "game_starts", + "pass": true, + "detail": "started via click_canvas" + }, + { + "name": "auto_drop", + "pass": true, + "detail": "grid state changed after 5s with no input (grid-verified)" + }, + { + "name": "move_left", + "pass": true, + "detail": "grid state changed after key press (grid-verified)" + }, + { + "name": "move_right", + "pass": true, + "detail": "grid state changed after key press (grid-verified)" + }, + { + "name": "move_down", + "pass": true, + "detail": "grid state changed after key press (grid-verified)" + }, + { + "name": "rotate", + "pass": false, + "detail": "no shape change detected after rotate key" + }, + { + "name": "all_pieces_rotate", + "pass": false, + "detail": "could not detect any piece rotations via grid reader" + }, + { + "name": "hard_drop", + "pass": true, + "detail": "piece immediately dropped to bottom (grid-verified)" + }, + { + "name": "piece_locks", + "pass": true, + "detail": "filled cells persist at bottom (grid-verified, 2 lock event(s))" + }, + { + "name": "new_piece_spawns", + "pass": true, + "detail": "2 new piece(s) detected at top of grid" + }, + { + "name": "multiple_pieces", + "pass": true, + "detail": "11 pieces placed during play session" + }, + { + "name": "line_clear", + "pass": true, + "detail": "1 line(s) cleared (grid-verified)" + }, + { + "name": "score_changes", + "pass": false, + "detail": "score stayed at 0" + }, + { + "name": "game_over", + "pass": true, + "detail": "game stopped after stacking to top" + }, + { + "name": "playable_30s", + "pass": true, + "detail": "played for 30s, placed 22 pieces, no crashes" + } + ], + "summary": { + "total": 16, + "passed": 13, + "failed": 3, + "score": 0.81 + }, + "gameplay": { + "pieces_placed": 22, + "lines_cleared": 1, + "max_score_observed": 0, + "play_duration_seconds": 30, + "errors_during_play": 0 + }, + "session": { + "frames": 331, + "events_count": 9, + "pieces_spawned": 2, + "pieces_locked": 11, + "lines_cleared": 1, + "piece_types_seen": [ + "I" + ], + "grid_read_success_rate": 1 + }, + "performance": { + "load_time_ms": 222 + }, + "accessibility": { + "issues": [ + "no headings found", + "canvas without aria-label or role", + "canvas without aria-label or role" + ], + "issue_count": 3, + "pass": false + } + } }, - "score": 0.5112 + "outcome_score": 0.63, + "score": 0.63, + "sonarqube": { + "bugs": 2, + "vulnerabilities": 0, + "code_smells": 7, + "cognitive_complexity": 97, + "lines_of_code": 654, + "duplication_pct": 0.0, + "tech_debt_minutes": 39, + "maintainability": "A", + "reliability": "C", + "security": "A", + "score": 0.45 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/gameplay-bot-report.json b/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/gameplay-bot-report.json @@ -1,8 +1,13 @@ { "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_bounds": null, + "renderer": "canvas", + "grid_detected": true, + "grid_bounds": { + "x": 0, + "y": 0, + "width": 300, + "height": 600 + }, "controls": { "left": "ArrowLeft", "right": "ArrowRight", @@ -10,8 +15,9 @@ "rotate": "ArrowUp", "drop": "Space" }, - "start_mechanism": "unknown", - "score_element_found": false + "start_mechanism": "click_canvas", + "score_element_found": true, + "grid_confidence": 1 }, "tests": [ { @@ -21,99 +27,114 @@ }, { "name": "game_starts", - "pass": false, - "detail": "exception: page.screenshot: Timeout 10000ms exceeded.\nCall log:\n\u001b[2m - taking page screenshot\u001b[22m\n\u001b[2m - waiting for fonts to load...\u001b[22m\n\u001b[2m - fonts loaded\u001b[22m\n" + "pass": true, + "detail": "started via click_canvas" }, { "name": "auto_drop", - "pass": false, - "detail": "exception: page.screenshot: Timeout 10000ms exceeded.\nCall log:\n\u001b[2m - taking page screenshot\u001b[22m\n\u001b[2m - waiting for fonts to load...\u001b[22m\n\u001b[2m - fonts loaded\u001b[22m\n" + "pass": true, + "detail": "grid state changed after 5s with no input (grid-verified)" }, { "name": "move_left", - "pass": false, - "detail": "exception: page.screenshot: Timeout 10000ms exceeded.\nCall log:\n\u001b[2m - taking page screenshot\u001b[22m\n\u001b[2m - waiting for fonts to load...\u001b[22m\n\u001b[2m - fonts loaded\u001b[22m\n" + "pass": true, + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_right", - "pass": false, - "detail": "exception: page.screenshot: Timeout 10000ms exceeded.\nCall log:\n\u001b[2m - taking page screenshot\u001b[22m\n\u001b[2m - waiting for fonts to load...\u001b[22m\n\u001b[2m - fonts loaded\u001b[22m\n" + "pass": true, + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_down", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" + "pass": false, + "detail": "no shape change detected after rotate key" }, { "name": "all_pieces_rotate", "pass": false, - "detail": "exception: page.waitForTimeout: Test timeout of 180000ms exceeded." + "detail": "could not detect any piece rotations via grid reader" }, { "name": "hard_drop", - "pass": false, - "detail": "exception: page.screenshot: Target page, context or browser has been closed" + "pass": true, + "detail": "piece immediately dropped to bottom (grid-verified)" }, { "name": "piece_locks", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "pass": true, + "detail": "filled cells persist at bottom (grid-verified, 2 lock event(s))" }, { "name": "new_piece_spawns", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "pass": true, + "detail": "2 new piece(s) detected at top of grid" }, { "name": "multiple_pieces", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "pass": true, + "detail": "11 pieces placed during play session" }, { "name": "line_clear", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "pass": true, + "detail": "1 line(s) cleared (grid-verified)" }, { "name": "score_changes", "pass": false, - "detail": "exception: page.evaluate: Target page, context or browser has been closed" + "detail": "score stayed at 0" }, { "name": "game_over", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "pass": true, + "detail": "game stopped after stacking to top" }, { "name": "playable_30s", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "pass": true, + "detail": "played for 30s, placed 22 pieces, no crashes" } ], "summary": { "total": 16, - "passed": 3, - "failed": 13, - "score": 0.19 + "passed": 13, + "failed": 3, + "score": 0.81 }, "gameplay": { - "pieces_placed": 9, - "lines_cleared": 0, + "pieces_placed": 22, + "lines_cleared": 1, "max_score_observed": 0, - "play_duration_seconds": 0, + "play_duration_seconds": 30, "errors_during_play": 0 }, + "session": { + "frames": 331, + "events_count": 9, + "pieces_spawned": 2, + "pieces_locked": 11, + "lines_cleared": 1, + "piece_types_seen": [ + "I" + ], + "grid_read_success_rate": 1 + }, "performance": { - "load_time_ms": 305 + "load_time_ms": 222 }, "accessibility": { - "issues": [], - "issue_count": 0, - "pass": true + "issues": [ + "no headings found", + "canvas without aria-label or role", + "canvas without aria-label or role" + ], + "issue_count": 3, + "pass": false } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,14 +35,15 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 31455, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { - "total": 17, + "total": 16, "code": 5, "docs": 6, "unnecessary": 3, @@ -60,8 +56,8 @@ "lines_of_code": 1260, "dependencies": { "production": 0, - "dev": 7, - "total": 7 + "dev": 5, + "total": 5 }, "complexity": "moderate", "console_logs": 2, @@ -130,128 +126,21 @@ "gameplay_bot": { "pass": false, "score": 0, - "total": 16, - "passed": 0, - "failed": 16, - "report": { - "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_bounds": null, - "controls": { - "left": "ArrowLeft", - "right": "ArrowRight", - "down": "ArrowDown", - "rotate": "ArrowUp", - "drop": "Space" - }, - "start_mechanism": "unknown", - "score_element_found": false - }, - "tests": [ - { - "name": "game_loads", - "pass": false, - "detail": "exception: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:36895/\", waiting until \"domcontentloaded\"\u001b[22m\n" - }, - { - "name": "game_starts", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "auto_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_left", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_right", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "move_down", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "all_pieces_rotate", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "hard_drop", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "piece_locks", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "new_piece_spawns", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "multiple_pieces", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "line_clear", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "score_changes", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "game_over", - "pass": false, - "detail": "skipped: page did not load" - }, - { - "name": "playable_30s", - "pass": false, - "detail": "skipped: page did not load" - } - ], - "summary": { - "total": 16, - "passed": 0, - "failed": 16, - "score": 0 - }, - "gameplay": { - "pieces_placed": 0, - "lines_cleared": 0, - "max_score_observed": 0, - "play_duration_seconds": 0, - "errors_during_play": 0 - }, - "performance": { - "load_time_ms": -1 - }, - "accessibility": { - "issues": [], - "issue_count": 0, - "pass": true - } - } + "error": "Gameplay bot timed out after 180 seconds" }, - "score": 0.4944 + "outcome_score": 0.225, + "score": 0.225, + "sonarqube": { + "bugs": 2, + "vulnerabilities": 0, + "code_smells": 7, + "cognitive_complexity": 97, + "lines_of_code": 654, + "duplication_pct": 0.0, + "tech_debt_minutes": 39, + "maintainability": "A", + "reliability": "C", + "security": "A", + "score": 0.45 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/gameplay-bot-report.json b/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/gameplay-bot-report.json @@ -1,8 +1,13 @@ { "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_bounds": null, + "renderer": "canvas", + "grid_detected": true, + "grid_bounds": { + "x": 0, + "y": 0, + "width": 75, + "height": 150 + }, "controls": { "left": "ArrowLeft", "right": "ArrowRight", @@ -10,106 +15,116 @@ "rotate": "ArrowUp", "drop": "Space" }, - "start_mechanism": "unknown", - "score_element_found": false + "start_mechanism": "space", + "score_element_found": true, + "grid_confidence": 1 }, "tests": [ { "name": "game_loads", "pass": false, - "detail": "exception: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:36895/\", waiting until \"domcontentloaded\"\u001b[22m\n" + "detail": "1 console error(s): Unexpected token 'export'" }, { "name": "game_starts", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "started via space" }, { "name": "auto_drop", "pass": false, - "detail": "skipped: page did not load" + "detail": "piece did not move down in 5 seconds (grid-verified)" }, { "name": "move_left", "pass": false, - "detail": "skipped: page did not load" + "detail": "no grid change detected after key press" }, { "name": "move_right", "pass": false, - "detail": "skipped: page did not load" + "detail": "no grid change detected after key press" }, { "name": "move_down", "pass": false, - "detail": "skipped: page did not load" + "detail": "no grid change detected after key press" }, { "name": "rotate", "pass": false, - "detail": "skipped: page did not load" + "detail": "no shape change detected after rotate key" }, { "name": "all_pieces_rotate", "pass": false, - "detail": "skipped: page did not load" + "detail": "could not detect any piece rotations via grid reader" }, { "name": "hard_drop", "pass": false, - "detail": "skipped: page did not load" + "detail": "no grid change with bottom cells detected after hard drop key" }, { "name": "piece_locks", "pass": false, - "detail": "skipped: page did not load" + "detail": "10 lock event(s) but 0 spawns detected - likely false positive from UI misread" }, { "name": "new_piece_spawns", "pass": false, - "detail": "skipped: page did not load" + "detail": "could not detect new piece spawning at top via grid reader" }, { "name": "multiple_pieces", "pass": false, - "detail": "skipped: page did not load" + "detail": "only 10 piece(s) detected, need at least 3" }, { "name": "line_clear", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "1 line(s) cleared (grid-verified)" }, { "name": "score_changes", "pass": false, - "detail": "skipped: page did not load" + "detail": "score stayed at 0" }, { "name": "game_over", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "game stopped after stacking to top" }, { "name": "playable_30s", "pass": false, - "detail": "skipped: page did not load" + "detail": "5 console error(s), 0 play errors" } ], "summary": { "total": 16, - "passed": 0, - "failed": 16, - "score": 0 + "passed": 3, + "failed": 13, + "score": 0.19 }, "gameplay": { - "pieces_placed": 0, - "lines_cleared": 0, + "pieces_placed": 20, + "lines_cleared": 1, "max_score_observed": 0, - "play_duration_seconds": 0, + "play_duration_seconds": 30, "errors_during_play": 0 }, + "session": { + "frames": 323, + "events_count": 2, + "pieces_spawned": 0, + "pieces_locked": 10, + "lines_cleared": 1, + "piece_types_seen": [], + "grid_read_success_rate": 1 + }, "performance": { - "load_time_ms": -1 + "load_time_ms": 290 }, "accessibility": { "issues": [], diff --git a/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,14 +35,15 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 55608, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { - "total": 22, + "total": 21, "code": 11, "docs": 1, "unnecessary": 1, @@ -58,8 +54,8 @@ "lines_of_code": 1909, "dependencies": { "production": 1, - "dev": 7, - "total": 8 + "dev": 5, + "total": 6 }, "complexity": "over-engineered", "console_logs": 0, @@ -127,122 +123,140 @@ }, "gameplay_bot": { "pass": false, - "score": 0, + "score": 0.81, "total": 16, - "passed": 0, - "failed": 16, + "passed": 13, + "failed": 3, "report": { "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_bounds": null, + "renderer": "canvas", + "grid_detected": true, + "grid_bounds": { + "x": 0, + "y": 0, + "width": 300, + "height": 600 + }, "controls": { "left": "ArrowLeft", "right": "ArrowRight", "down": "ArrowDown", - "rotate": "ArrowUp", + "rotate": "z", "drop": "Space" }, - "start_mechanism": "unknown", - "score_element_found": false + "start_mechanism": "auto", + "score_element_found": true, + "grid_confidence": 1 }, "tests": [ { "name": "game_loads", - "pass": false, - "detail": "exception: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:46579/\", waiting until \"domcontentloaded\"\u001b[22m\n" + "pass": true, + "detail": "no console errors" }, { "name": "game_starts", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "started via auto" }, { "name": "auto_drop", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "grid state changed after 5s with no input (grid-verified)" }, { "name": "move_left", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_right", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_down", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "grid state changed after key press (grid-verified)" }, { "name": "rotate", "pass": false, - "detail": "skipped: page did not load" + "detail": "no shape change detected after rotate key" }, { "name": "all_pieces_rotate", "pass": false, - "detail": "skipped: page did not load" + "detail": "could not detect any piece rotations via grid reader" }, { "name": "hard_drop", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "piece immediately dropped to bottom (grid-verified)" }, { "name": "piece_locks", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "filled cells persist at bottom (grid-verified, 1 lock event(s))" }, { "name": "new_piece_spawns", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "9 new piece(s) detected at top of grid" }, { "name": "multiple_pieces", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "11 pieces placed during play session" }, { "name": "line_clear", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "1 line(s) cleared (grid-verified)" }, { "name": "score_changes", "pass": false, - "detail": "skipped: page did not load" + "detail": "score stayed at 0" }, { "name": "game_over", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "game stopped after stacking to top" }, { "name": "playable_30s", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "played for 30s, placed 36 pieces, no crashes" } ], "summary": { "total": 16, - "passed": 0, - "failed": 16, - "score": 0 + "passed": 13, + "failed": 3, + "score": 0.81 }, "gameplay": { - "pieces_placed": 0, - "lines_cleared": 0, + "pieces_placed": 36, + "lines_cleared": 1, "max_score_observed": 0, - "play_duration_seconds": 0, + "play_duration_seconds": 30, "errors_during_play": 0 }, + "session": { + "frames": 306, + "events_count": 8, + "pieces_spawned": 9, + "pieces_locked": 11, + "lines_cleared": 1, + "piece_types_seen": [ + "unknown", + "O" + ], + "grid_read_success_rate": 1 + }, "performance": { - "load_time_ms": -1 + "load_time_ms": 163 }, "accessibility": { "issues": [], @@ -251,5 +265,19 @@ } } }, - "score": 0.5081 + "outcome_score": 0.73, + "score": 0.73, + "sonarqube": { + "bugs": 1, + "vulnerabilities": 0, + "code_smells": 5, + "cognitive_complexity": 78, + "lines_of_code": 698, + "duplication_pct": 0.0, + "tech_debt_minutes": 25, + "maintainability": "A", + "reliability": "C", + "security": "A", + "score": 0.65 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/gameplay-bot-report.json b/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=haiku_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/gameplay-bot-report.json @@ -1,115 +1,133 @@ { "implementation": { - "renderer": "unknown", - "grid_detected": false, - "grid_bounds": null, + "renderer": "canvas", + "grid_detected": true, + "grid_bounds": { + "x": 0, + "y": 0, + "width": 300, + "height": 600 + }, "controls": { "left": "ArrowLeft", "right": "ArrowRight", "down": "ArrowDown", - "rotate": "ArrowUp", + "rotate": "z", "drop": "Space" }, - "start_mechanism": "unknown", - "score_element_found": false + "start_mechanism": "auto", + "score_element_found": true, + "grid_confidence": 1 }, "tests": [ { "name": "game_loads", - "pass": false, - "detail": "exception: page.goto: Timeout 5000ms exceeded.\nCall log:\n\u001b[2m - navigating to \"http://127.0.0.1:46579/\", waiting until \"domcontentloaded\"\u001b[22m\n" + "pass": true, + "detail": "no console errors" }, { "name": "game_starts", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "started via auto" }, { "name": "auto_drop", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "grid state changed after 5s with no input (grid-verified)" }, { "name": "move_left", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_right", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_down", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "grid state changed after key press (grid-verified)" }, { "name": "rotate", "pass": false, - "detail": "skipped: page did not load" + "detail": "no shape change detected after rotate key" }, { "name": "all_pieces_rotate", "pass": false, - "detail": "skipped: page did not load" + "detail": "could not detect any piece rotations via grid reader" }, { "name": "hard_drop", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "piece immediately dropped to bottom (grid-verified)" }, { "name": "piece_locks", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "filled cells persist at bottom (grid-verified, 1 lock event(s))" }, { "name": "new_piece_spawns", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "9 new piece(s) detected at top of grid" }, { "name": "multiple_pieces", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "11 pieces placed during play session" }, { "name": "line_clear", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "1 line(s) cleared (grid-verified)" }, { "name": "score_changes", "pass": false, - "detail": "skipped: page did not load" + "detail": "score stayed at 0" }, { "name": "game_over", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "game stopped after stacking to top" }, { "name": "playable_30s", - "pass": false, - "detail": "skipped: page did not load" + "pass": true, + "detail": "played for 30s, placed 36 pieces, no crashes" } ], "summary": { "total": 16, - "passed": 0, - "failed": 16, - "score": 0 + "passed": 13, + "failed": 3, + "score": 0.81 }, "gameplay": { - "pieces_placed": 0, - "lines_cleared": 0, + "pieces_placed": 36, + "lines_cleared": 1, "max_score_observed": 0, - "play_duration_seconds": 0, + "play_duration_seconds": 30, "errors_during_play": 0 }, + "session": { + "frames": 306, + "events_count": 8, + "pieces_spawned": 9, + "pieces_locked": 11, + "lines_cleared": 1, + "piece_types_seen": [ + "unknown", + "O" + ], + "grid_read_success_rate": 1 + }, "performance": { - "load_time_ms": -1 + "load_time_ms": 163 }, "accessibility": { "issues": [], diff --git a/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,14 +35,15 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 56923, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { - "total": 23, + "total": 24, "code": 13, "docs": 0, "unnecessary": 0, @@ -56,8 +52,8 @@ "lines_of_code": 1767, "dependencies": { "production": 0, - "dev": 5, - "total": 5 + "dev": 7, + "total": 7 }, "complexity": "over-engineered", "console_logs": 0, @@ -125,10 +121,10 @@ }, "gameplay_bot": { "pass": false, - "score": 0.56, + "score": 0.38, "total": 16, - "passed": 9, - "failed": 7, + "passed": 6, + "failed": 10, "report": { "implementation": { "renderer": "canvas", @@ -147,7 +143,8 @@ "drop": "Space" }, "start_mechanism": "auto", - "score_element_found": true + "score_element_found": true, + "grid_confidence": 1 }, "tests": [ { @@ -162,63 +159,63 @@ }, { "name": "auto_drop", - "pass": true, - "detail": "pixels changed after 5s with no input" + "pass": false, + "detail": "piece did not move down in 5 seconds (grid-verified)" }, { "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "no grid change detected after key press" }, { "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "no grid change detected after key press" }, { "name": "move_down", "pass": false, - "detail": "no change detected after key press" + "detail": "no grid change detected after key press" }, { "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" + "pass": false, + "detail": "no shape change detected after rotate key" }, { "name": "all_pieces_rotate", "pass": false, - "detail": "could not detect any piece rotations" + "detail": "could not detect any piece rotations via grid reader" }, { "name": "hard_drop", "pass": false, - "detail": "no change detected after hard drop key" + "detail": "no grid change with bottom cells detected after hard drop key" }, { "name": "piece_locks", "pass": false, - "detail": "could not verify piece locking at bottom" + "detail": "10 lock event(s) but 0 spawns detected - likely false positive from UI misread" }, { "name": "new_piece_spawns", "pass": false, - "detail": "could not detect new piece at top" + "detail": "could not detect new piece spawning at top via grid reader" }, { "name": "multiple_pieces", "pass": false, - "detail": "grid did not accumulate filled cells" + "detail": "only 10 piece(s) detected, need at least 3" }, { "name": "line_clear", "pass": true, - "detail": "line cleared via strategic placement" + "detail": "1 line(s) cleared (grid-verified)" }, { "name": "score_changes", - "pass": false, - "detail": "score did not increase: [266] -> no change after polling" + "pass": true, + "detail": "score changed from 160 to 162" }, { "name": "game_over", @@ -228,24 +225,33 @@ { "name": "playable_30s", "pass": true, - "detail": "played for 30s, placed 78 pieces, no crashes" + "detail": "played for 30s, placed 20 pieces, no crashes" } ], "summary": { "total": 16, - "passed": 9, - "failed": 7, - "score": 0.56 + "passed": 6, + "failed": 10, + "score": 0.38 }, "gameplay": { - "pieces_placed": 188, + "pieces_placed": 20, "lines_cleared": 1, - "max_score_observed": 390, + "max_score_observed": 162, "play_duration_seconds": 30, "errors_during_play": 0 }, + "session": { + "frames": 336, + "events_count": 2, + "pieces_spawned": 0, + "pieces_locked": 10, + "lines_cleared": 1, + "piece_types_seen": [], + "grid_read_success_rate": 1 + }, "performance": { - "load_time_ms": 41 + "load_time_ms": 402 }, "accessibility": { "issues": [], @@ -254,5 +260,19 @@ } } }, - "score": 0.7615 + "outcome_score": 0.515, + "score": 0.515, + "sonarqube": { + "bugs": 0, + "vulnerabilities": 0, + "code_smells": 17, + "cognitive_complexity": 123, + "lines_of_code": 958, + "duplication_pct": 2.8, + "tech_debt_minutes": 63, + "maintainability": "A", + "reliability": "A", + "security": "A", + "score": 0.65 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/gameplay-bot-report.json b/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/gameplay-bot-report.json @@ -16,7 +16,8 @@ "drop": "Space" }, "start_mechanism": "auto", - "score_element_found": true + "score_element_found": true, + "grid_confidence": 1 }, "tests": [ { @@ -31,63 +32,63 @@ }, { "name": "auto_drop", - "pass": true, - "detail": "pixels changed after 5s with no input" + "pass": false, + "detail": "piece did not move down in 5 seconds (grid-verified)" }, { "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "no grid change detected after key press" }, { "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "no grid change detected after key press" }, { "name": "move_down", "pass": false, - "detail": "no change detected after key press" + "detail": "no grid change detected after key press" }, { "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" + "pass": false, + "detail": "no shape change detected after rotate key" }, { "name": "all_pieces_rotate", "pass": false, - "detail": "could not detect any piece rotations" + "detail": "could not detect any piece rotations via grid reader" }, { "name": "hard_drop", "pass": false, - "detail": "no change detected after hard drop key" + "detail": "no grid change with bottom cells detected after hard drop key" }, { "name": "piece_locks", "pass": false, - "detail": "could not verify piece locking at bottom" + "detail": "10 lock event(s) but 0 spawns detected - likely false positive from UI misread" }, { "name": "new_piece_spawns", "pass": false, - "detail": "could not detect new piece at top" + "detail": "could not detect new piece spawning at top via grid reader" }, { "name": "multiple_pieces", "pass": false, - "detail": "grid did not accumulate filled cells" + "detail": "only 10 piece(s) detected, need at least 3" }, { "name": "line_clear", "pass": true, - "detail": "line cleared via strategic placement" + "detail": "1 line(s) cleared (grid-verified)" }, { "name": "score_changes", - "pass": false, - "detail": "score did not increase: [266] -> no change after polling" + "pass": true, + "detail": "score changed from 160 to 162" }, { "name": "game_over", @@ -97,24 +98,33 @@ { "name": "playable_30s", "pass": true, - "detail": "played for 30s, placed 78 pieces, no crashes" + "detail": "played for 30s, placed 20 pieces, no crashes" } ], "summary": { "total": 16, - "passed": 9, - "failed": 7, - "score": 0.56 + "passed": 6, + "failed": 10, + "score": 0.38 }, "gameplay": { - "pieces_placed": 188, + "pieces_placed": 20, "lines_cleared": 1, - "max_score_observed": 390, + "max_score_observed": 162, "play_duration_seconds": 30, "errors_during_play": 0 }, + "session": { + "frames": 336, + "events_count": 2, + "pieces_spawned": 0, + "pieces_locked": 10, + "lines_cleared": 1, + "piece_types_seen": [], + "grid_read_success_rate": 1 + }, "performance": { - "load_time_ms": 41 + "load_time_ms": 402 }, "accessibility": { "issues": [], diff --git a/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,14 +35,15 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 18951, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { - "total": 6, + "total": 7, "code": 3, "docs": 0, "unnecessary": 0, @@ -56,8 +52,8 @@ "lines_of_code": 1220, "dependencies": { "production": 0, - "dev": 5, - "total": 5 + "dev": 7, + "total": 7 }, "complexity": "moderate", "console_logs": 0, @@ -143,11 +139,12 @@ "left": "ArrowLeft", "right": "ArrowRight", "down": "ArrowDown", - "rotate": "z", + "rotate": "ArrowUp", "drop": "Space" }, "start_mechanism": "auto", - "score_element_found": true + "score_element_found": true, + "grid_confidence": 1 }, "tests": [ { @@ -163,62 +160,62 @@ { "name": "auto_drop", "pass": true, - "detail": "grid state changed after 5s with no input" + "detail": "grid state changed after 5s with no input (grid-verified)" }, { "name": "move_left", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_right", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_down", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "rotate", "pass": true, - "detail": "piece shape changed after rotate key" + "detail": "piece shape changed after rotate key (grid-verified, 1 rotation(s))" }, { "name": "all_pieces_rotate", "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" + "detail": "rotation observed, piece types seen: [unknown, S]" }, { "name": "hard_drop", "pass": true, - "detail": "piece immediately dropped and new piece appeared" + "detail": "piece immediately dropped to bottom (grid-verified)" }, { "name": "piece_locks", "pass": true, - "detail": "filled cells persist at bottom" + "detail": "filled cells persist at bottom (grid-verified, 2 lock event(s))" }, { "name": "new_piece_spawns", "pass": true, - "detail": "new piece detected at top of grid" + "detail": "4 new piece(s) detected at top of grid" }, { "name": "multiple_pieces", "pass": true, - "detail": "grid accumulated cells: 16 -> 45" + "detail": "11 pieces placed during play session" }, { "name": "line_clear", "pass": true, - "detail": "line cleared via strategic placement" + "detail": "2 line(s) cleared (grid-verified)" }, { "name": "score_changes", "pass": false, - "detail": "score did not increase: [202] -> no change after polling" + "detail": "score stayed at 282" }, { "name": "game_over", @@ -228,7 +225,7 @@ { "name": "playable_30s", "pass": true, - "detail": "played for 30s, placed 79 pieces, no crashes" + "detail": "played for 30s, placed 37 pieces, no crashes" } ], "summary": { @@ -238,14 +235,26 @@ "score": 0.94 }, "gameplay": { - "pieces_placed": 130, + "pieces_placed": 37, "lines_cleared": 2, - "max_score_observed": 228, + "max_score_observed": 282, "play_duration_seconds": 30, "errors_during_play": 0 }, + "session": { + "frames": 296, + "events_count": 11, + "pieces_spawned": 4, + "pieces_locked": 11, + "lines_cleared": 2, + "piece_types_seen": [ + "unknown", + "S" + ], + "grid_read_success_rate": 1 + }, "performance": { - "load_time_ms": 51 + "load_time_ms": 141 }, "accessibility": { "issues": [ @@ -256,5 +265,19 @@ } } }, - "score": 0.894 + "outcome_score": 0.795, + "score": 0.795, + "sonarqube": { + "bugs": 0, + "vulnerabilities": 0, + "code_smells": 17, + "cognitive_complexity": 123, + "lines_of_code": 958, + "duplication_pct": 2.8, + "tech_debt_minutes": 63, + "maintainability": "A", + "reliability": "A", + "security": "A", + "score": 0.65 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/gameplay-bot-report.json b/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/gameplay-bot-report.json @@ -12,11 +12,12 @@ "left": "ArrowLeft", "right": "ArrowRight", "down": "ArrowDown", - "rotate": "z", + "rotate": "ArrowUp", "drop": "Space" }, "start_mechanism": "auto", - "score_element_found": true + "score_element_found": true, + "grid_confidence": 1 }, "tests": [ { @@ -32,62 +33,62 @@ { "name": "auto_drop", "pass": true, - "detail": "grid state changed after 5s with no input" + "detail": "grid state changed after 5s with no input (grid-verified)" }, { "name": "move_left", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_right", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_down", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "rotate", "pass": true, - "detail": "piece shape changed after rotate key" + "detail": "piece shape changed after rotate key (grid-verified, 1 rotation(s))" }, { "name": "all_pieces_rotate", "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" + "detail": "rotation observed, piece types seen: [unknown, S]" }, { "name": "hard_drop", "pass": true, - "detail": "piece immediately dropped and new piece appeared" + "detail": "piece immediately dropped to bottom (grid-verified)" }, { "name": "piece_locks", "pass": true, - "detail": "filled cells persist at bottom" + "detail": "filled cells persist at bottom (grid-verified, 2 lock event(s))" }, { "name": "new_piece_spawns", "pass": true, - "detail": "new piece detected at top of grid" + "detail": "4 new piece(s) detected at top of grid" }, { "name": "multiple_pieces", "pass": true, - "detail": "grid accumulated cells: 16 -> 45" + "detail": "11 pieces placed during play session" }, { "name": "line_clear", "pass": true, - "detail": "line cleared via strategic placement" + "detail": "2 line(s) cleared (grid-verified)" }, { "name": "score_changes", "pass": false, - "detail": "score did not increase: [202] -> no change after polling" + "detail": "score stayed at 282" }, { "name": "game_over", @@ -97,7 +98,7 @@ { "name": "playable_30s", "pass": true, - "detail": "played for 30s, placed 79 pieces, no crashes" + "detail": "played for 30s, placed 37 pieces, no crashes" } ], "summary": { @@ -107,14 +108,26 @@ "score": 0.94 }, "gameplay": { - "pieces_placed": 130, + "pieces_placed": 37, "lines_cleared": 2, - "max_score_observed": 228, + "max_score_observed": 282, "play_duration_seconds": 30, "errors_during_play": 0 }, + "session": { + "frames": 296, + "events_count": 11, + "pieces_spawned": 4, + "pieces_locked": 11, + "lines_cleared": 2, + "piece_types_seen": [ + "unknown", + "S" + ], + "grid_read_success_rate": 1 + }, "performance": { - "load_time_ms": 51 + "load_time_ms": 141 }, "accessibility": { "issues": [ diff --git a/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,14 +35,15 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 63464, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { - "total": 12, + "total": 13, "code": 8, "docs": 0, "unnecessary": 0, @@ -56,8 +52,8 @@ "lines_of_code": 1718, "dependencies": { "production": 0, - "dev": 6, - "total": 6 + "dev": 8, + "total": 8 }, "complexity": "over-engineered", "console_logs": 0, @@ -147,7 +143,8 @@ "drop": "Space" }, "start_mechanism": "auto", - "score_element_found": true + "score_element_found": true, + "grid_confidence": 1 }, "tests": [ { @@ -163,62 +160,62 @@ { "name": "auto_drop", "pass": true, - "detail": "grid state changed after 5s with no input" + "detail": "grid state changed after 5s with no input (grid-verified)" }, { "name": "move_left", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_right", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_down", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "rotate", "pass": true, - "detail": "piece shape changed after rotate key" + "detail": "piece shape changed after rotate key (grid-verified, 1 rotation(s))" }, { "name": "all_pieces_rotate", "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" + "detail": "rotation observed, piece types seen: [unknown, T, S, I]" }, { "name": "hard_drop", "pass": true, - "detail": "piece immediately dropped and new piece appeared" + "detail": "piece immediately dropped to bottom (grid-verified)" }, { "name": "piece_locks", "pass": true, - "detail": "filled cells persist at bottom" + "detail": "filled cells persist at bottom (grid-verified, 2 lock event(s))" }, { "name": "new_piece_spawns", "pass": true, - "detail": "new piece detected at top of grid" + "detail": "8 new piece(s) detected at top of grid" }, { "name": "multiple_pieces", "pass": true, - "detail": "grid accumulated cells: 20 -> 34" + "detail": "11 pieces placed during play session" }, { "name": "line_clear", "pass": true, - "detail": "3 line(s) cleared during AI play" + "detail": "1 line(s) cleared (grid-verified)" }, { "name": "score_changes", "pass": false, - "detail": "score did not increase: [286] -> no change after polling" + "detail": "score stayed at 350" }, { "name": "game_over", @@ -228,7 +225,7 @@ { "name": "playable_30s", "pass": true, - "detail": "played for 30s, placed 78 pieces, no crashes" + "detail": "played for 30s, placed 41 pieces, no crashes" } ], "summary": { @@ -238,14 +235,28 @@ "score": 0.94 }, "gameplay": { - "pieces_placed": 119, - "lines_cleared": 6, - "max_score_observed": 232, + "pieces_placed": 41, + "lines_cleared": 1, + "max_score_observed": 350, "play_duration_seconds": 30, "errors_during_play": 0 }, + "session": { + "frames": 289, + "events_count": 10, + "pieces_spawned": 8, + "pieces_locked": 11, + "lines_cleared": 1, + "piece_types_seen": [ + "unknown", + "T", + "S", + "I" + ], + "grid_read_success_rate": 1 + }, "performance": { - "load_time_ms": 52 + "load_time_ms": 100 }, "accessibility": { "issues": [], @@ -254,5 +265,19 @@ } } }, - "score": 0.8565 + "outcome_score": 0.795, + "score": 0.795, + "sonarqube": { + "bugs": 0, + "vulnerabilities": 0, + "code_smells": 17, + "cognitive_complexity": 123, + "lines_of_code": 958, + "duplication_pct": 2.8, + "tech_debt_minutes": 63, + "maintainability": "A", + "reliability": "A", + "security": "A", + "score": 0.65 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/gameplay-bot-report.json b/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=opus_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/gameplay-bot-report.json @@ -16,7 +16,8 @@ "drop": "Space" }, "start_mechanism": "auto", - "score_element_found": true + "score_element_found": true, + "grid_confidence": 1 }, "tests": [ { @@ -32,62 +33,62 @@ { "name": "auto_drop", "pass": true, - "detail": "grid state changed after 5s with no input" + "detail": "grid state changed after 5s with no input (grid-verified)" }, { "name": "move_left", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_right", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_down", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "rotate", "pass": true, - "detail": "piece shape changed after rotate key" + "detail": "piece shape changed after rotate key (grid-verified, 1 rotation(s))" }, { "name": "all_pieces_rotate", "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" + "detail": "rotation observed, piece types seen: [unknown, T, S, I]" }, { "name": "hard_drop", "pass": true, - "detail": "piece immediately dropped and new piece appeared" + "detail": "piece immediately dropped to bottom (grid-verified)" }, { "name": "piece_locks", "pass": true, - "detail": "filled cells persist at bottom" + "detail": "filled cells persist at bottom (grid-verified, 2 lock event(s))" }, { "name": "new_piece_spawns", "pass": true, - "detail": "new piece detected at top of grid" + "detail": "8 new piece(s) detected at top of grid" }, { "name": "multiple_pieces", "pass": true, - "detail": "grid accumulated cells: 20 -> 34" + "detail": "11 pieces placed during play session" }, { "name": "line_clear", "pass": true, - "detail": "3 line(s) cleared during AI play" + "detail": "1 line(s) cleared (grid-verified)" }, { "name": "score_changes", "pass": false, - "detail": "score did not increase: [286] -> no change after polling" + "detail": "score stayed at 350" }, { "name": "game_over", @@ -97,7 +98,7 @@ { "name": "playable_30s", "pass": true, - "detail": "played for 30s, placed 78 pieces, no crashes" + "detail": "played for 30s, placed 41 pieces, no crashes" } ], "summary": { @@ -107,14 +108,28 @@ "score": 0.94 }, "gameplay": { - "pieces_placed": 119, - "lines_cleared": 6, - "max_score_observed": 232, + "pieces_placed": 41, + "lines_cleared": 1, + "max_score_observed": 350, "play_duration_seconds": 30, "errors_during_play": 0 }, + "session": { + "frames": 289, + "events_count": 10, + "pieces_spawned": 8, + "pieces_locked": 11, + "lines_cleared": 1, + "piece_types_seen": [ + "unknown", + "T", + "S", + "I" + ], + "grid_read_success_rate": 1 + }, "performance": { - "load_time_ms": 52 + "load_time_ms": 100 }, "accessibility": { "issues": [], diff --git a/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json b/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/eval_results.json @@ -25,11 +25,6 @@ ], "score": 0.75 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,10 +35,11 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 23654, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -125,8 +121,158 @@ }, "gameplay_bot": { "pass": false, - "score": 0, - "error": "Gameplay bot timed out after 180 seconds" + "score": 0.31, + "total": 16, + "passed": 5, + "failed": 11, + "report": { + "implementation": { + "renderer": "canvas", + "grid_detected": true, + "grid_bounds": { + "x": 0, + "y": 0, + "width": 48, + "height": 96 + }, + "controls": { + "left": "ArrowLeft", + "right": "ArrowRight", + "down": "ArrowDown", + "rotate": "x", + "drop": "Space" + }, + "start_mechanism": "auto", + "score_element_found": true, + "grid_confidence": 1 + }, + "tests": [ + { + "name": "game_loads", + "pass": true, + "detail": "no console errors" + }, + { + "name": "game_starts", + "pass": true, + "detail": "started via auto" + }, + { + "name": "auto_drop", + "pass": false, + "detail": "piece did not move down in 5 seconds (grid-verified)" + }, + { + "name": "move_left", + "pass": false, + "detail": "no grid change detected after key press" + }, + { + "name": "move_right", + "pass": false, + "detail": "no grid change detected after key press" + }, + { + "name": "move_down", + "pass": false, + "detail": "no grid change detected after key press" + }, + { + "name": "rotate", + "pass": false, + "detail": "no shape change detected after rotate key" + }, + { + "name": "all_pieces_rotate", + "pass": false, + "detail": "could not detect any piece rotations via grid reader" + }, + { + "name": "hard_drop", + "pass": false, + "detail": "no grid change with bottom cells detected after hard drop key" + }, + { + "name": "piece_locks", + "pass": false, + "detail": "10 lock event(s) but 0 spawns detected - likely false positive from UI misread" + }, + { + "name": "new_piece_spawns", + "pass": false, + "detail": "could not detect new piece spawning at top via grid reader" + }, + { + "name": "multiple_pieces", + "pass": false, + "detail": "only 10 piece(s) detected, need at least 3" + }, + { + "name": "line_clear", + "pass": true, + "detail": "1 line(s) cleared (grid-verified)" + }, + { + "name": "score_changes", + "pass": false, + "detail": "score stayed at 1" + }, + { + "name": "game_over", + "pass": true, + "detail": "game stopped after stacking to top" + }, + { + "name": "playable_30s", + "pass": true, + "detail": "played for 30s, placed 20 pieces, no crashes" + } + ], + "summary": { + "total": 16, + "passed": 5, + "failed": 11, + "score": 0.31 + }, + "gameplay": { + "pieces_placed": 20, + "lines_cleared": 1, + "max_score_observed": 1, + "play_duration_seconds": 30, + "errors_during_play": 0 + }, + "session": { + "frames": 327, + "events_count": 2, + "pieces_spawned": 0, + "pieces_locked": 10, + "lines_cleared": 1, + "piece_types_seen": [], + "grid_read_success_rate": 1 + }, + "performance": { + "load_time_ms": 133 + }, + "accessibility": { + "issues": [], + "issue_count": 0, + "pass": true + } + } }, - "score": 0.48 + "outcome_score": 0.405, + "score": 0.405, + "sonarqube": { + "bugs": 1, + "vulnerabilities": 0, + "code_smells": 22, + "cognitive_complexity": 120, + "lines_of_code": 811, + "duplication_pct": 0.0, + "tech_debt_minutes": 63, + "maintainability": "A", + "reliability": "C", + "security": "A", + "score": 0.5 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/gameplay-bot-report.json b/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run1/gameplay-bot-report.json @@ -16,7 +16,8 @@ "drop": "Space" }, "start_mechanism": "auto", - "score_element_found": true + "score_element_found": true, + "grid_confidence": 1 }, "tests": [ { @@ -31,63 +32,63 @@ }, { "name": "auto_drop", - "pass": true, - "detail": "pixels changed after 5s with no input" + "pass": false, + "detail": "piece did not move down in 5 seconds (grid-verified)" }, { "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "no grid change detected after key press" }, { "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "no grid change detected after key press" }, { "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "no grid change detected after key press" }, { "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" + "pass": false, + "detail": "no shape change detected after rotate key" }, { "name": "all_pieces_rotate", "pass": false, - "detail": "could not detect any piece rotations" + "detail": "could not detect any piece rotations via grid reader" }, { "name": "hard_drop", "pass": false, - "detail": "no change detected after hard drop key" + "detail": "no grid change with bottom cells detected after hard drop key" }, { "name": "piece_locks", "pass": false, - "detail": "could not verify piece locking at bottom" + "detail": "10 lock event(s) but 0 spawns detected - likely false positive from UI misread" }, { "name": "new_piece_spawns", "pass": false, - "detail": "could not detect new piece at top" + "detail": "could not detect new piece spawning at top via grid reader" }, { "name": "multiple_pieces", "pass": false, - "detail": "grid did not accumulate filled cells" + "detail": "only 10 piece(s) detected, need at least 3" }, { "name": "line_clear", "pass": true, - "detail": "line cleared via strategic placement" + "detail": "1 line(s) cleared (grid-verified)" }, { "name": "score_changes", "pass": false, - "detail": "score did not increase: [1] -> no change after polling" + "detail": "score stayed at 1" }, { "name": "game_over", @@ -96,25 +97,34 @@ }, { "name": "playable_30s", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "pass": true, + "detail": "played for 30s, placed 20 pieces, no crashes" } ], "summary": { "total": 16, - "passed": 9, - "failed": 7, - "score": 0.56 + "passed": 5, + "failed": 11, + "score": 0.31 }, "gameplay": { - "pieces_placed": 110, + "pieces_placed": 20, "lines_cleared": 1, - "max_score_observed": 0, - "play_duration_seconds": 0, + "max_score_observed": 1, + "play_duration_seconds": 30, "errors_during_play": 0 }, + "session": { + "frames": 327, + "events_count": 2, + "pieces_spawned": 0, + "pieces_locked": 10, + "lines_cleared": 1, + "piece_types_seen": [], + "grid_read_success_rate": 1 + }, "performance": { - "load_time_ms": 173 + "load_time_ms": 133 }, "accessibility": { "issues": [], diff --git a/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json b/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/eval_results.json @@ -25,11 +25,6 @@ ], "score": 0.75 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -40,10 +35,11 @@ "pass": true }, "performance": { + "pass": true, "bundle_size_bytes": 16701, "size_under_512kb": true }, - "score": 0.67 + "score": 1.0 }, "code_analysis": { "files": { @@ -147,7 +143,8 @@ "drop": "Space" }, "start_mechanism": "auto", - "score_element_found": true + "score_element_found": true, + "grid_confidence": 1 }, "tests": [ { @@ -163,62 +160,62 @@ { "name": "auto_drop", "pass": true, - "detail": "grid state changed after 5s with no input" + "detail": "grid state changed after 5s with no input (grid-verified)" }, { "name": "move_left", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_right", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_down", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "rotate", "pass": true, - "detail": "piece shape changed after rotate key" + "detail": "piece shape changed after rotate key (grid-verified, 1 rotation(s))" }, { "name": "all_pieces_rotate", "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" + "detail": "rotation observed, piece types seen: [unknown, J]" }, { "name": "hard_drop", "pass": true, - "detail": "piece immediately dropped and new piece appeared" + "detail": "piece immediately dropped to bottom (grid-verified)" }, { "name": "piece_locks", "pass": true, - "detail": "filled cells persist at bottom" + "detail": "filled cells persist at bottom (grid-verified, 2 lock event(s))" }, { "name": "new_piece_spawns", "pass": true, - "detail": "new piece detected at top of grid" + "detail": "8 new piece(s) detected at top of grid" }, { "name": "multiple_pieces", "pass": true, - "detail": "game still responding after 10 piece drops" + "detail": "11 pieces placed during play session" }, { "name": "line_clear", "pass": true, - "detail": "9 line(s) cleared during AI play" + "detail": "1 line(s) cleared (grid-verified)" }, { "name": "score_changes", "pass": true, - "detail": "score changed from 126 to 236" + "detail": "score changed from 126 to 266" }, { "name": "game_over", @@ -228,7 +225,7 @@ { "name": "playable_30s", "pass": true, - "detail": "played for 31s, placed 73 pieces, no crashes" + "detail": "played for 30s, placed 43 pieces, no crashes" } ], "summary": { @@ -238,14 +235,26 @@ "score": 0.94 }, "gameplay": { - "pieces_placed": 114, - "lines_cleared": 32, - "max_score_observed": 220, - "play_duration_seconds": 31, + "pieces_placed": 43, + "lines_cleared": 1, + "max_score_observed": 266, + "play_duration_seconds": 30, "errors_during_play": 0 }, + "session": { + "frames": 281, + "events_count": 10, + "pieces_spawned": 8, + "pieces_locked": 11, + "lines_cleared": 1, + "piece_types_seen": [ + "unknown", + "J" + ], + "grid_read_success_rate": 1 + }, "performance": { - "load_time_ms": 162 + "load_time_ms": 146 }, "accessibility": { "issues": [], @@ -254,5 +263,19 @@ } } }, - "score": 0.8487 + "outcome_score": 0.87, + "score": 0.87, + "sonarqube": { + "bugs": 0, + "vulnerabilities": 0, + "code_smells": 5, + "cognitive_complexity": 94, + "lines_of_code": 623, + "duplication_pct": 0.0, + "tech_debt_minutes": 25, + "maintainability": "A", + "reliability": "A", + "security": "A", + "score": 0.8 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/gameplay-bot-report.json b/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run2/gameplay-bot-report.json @@ -16,7 +16,8 @@ "drop": "Space" }, "start_mechanism": "auto", - "score_element_found": true + "score_element_found": true, + "grid_confidence": 1 }, "tests": [ { @@ -32,62 +33,62 @@ { "name": "auto_drop", "pass": true, - "detail": "grid state changed after 5s with no input" + "detail": "grid state changed after 5s with no input (grid-verified)" }, { "name": "move_left", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_right", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "move_down", "pass": true, - "detail": "grid state changed after key press" + "detail": "grid state changed after key press (grid-verified)" }, { "name": "rotate", "pass": true, - "detail": "piece shape changed after rotate key" + "detail": "piece shape changed after rotate key (grid-verified, 1 rotation(s))" }, { "name": "all_pieces_rotate", "pass": true, - "detail": "rotated: [other] failed: [] (tested 1 piece types in 60 attempts)" + "detail": "rotation observed, piece types seen: [unknown, J]" }, { "name": "hard_drop", "pass": true, - "detail": "piece immediately dropped and new piece appeared" + "detail": "piece immediately dropped to bottom (grid-verified)" }, { "name": "piece_locks", "pass": true, - "detail": "filled cells persist at bottom" + "detail": "filled cells persist at bottom (grid-verified, 2 lock event(s))" }, { "name": "new_piece_spawns", "pass": true, - "detail": "new piece detected at top of grid" + "detail": "8 new piece(s) detected at top of grid" }, { "name": "multiple_pieces", "pass": true, - "detail": "game still responding after 10 piece drops" + "detail": "11 pieces placed during play session" }, { "name": "line_clear", "pass": true, - "detail": "9 line(s) cleared during AI play" + "detail": "1 line(s) cleared (grid-verified)" }, { "name": "score_changes", "pass": true, - "detail": "score changed from 126 to 236" + "detail": "score changed from 126 to 266" }, { "name": "game_over", @@ -97,7 +98,7 @@ { "name": "playable_30s", "pass": true, - "detail": "played for 31s, placed 73 pieces, no crashes" + "detail": "played for 30s, placed 43 pieces, no crashes" } ], "summary": { @@ -107,14 +108,26 @@ "score": 0.94 }, "gameplay": { - "pieces_placed": 114, - "lines_cleared": 32, - "max_score_observed": 220, - "play_duration_seconds": 31, + "pieces_placed": 43, + "lines_cleared": 1, + "max_score_observed": 266, + "play_duration_seconds": 30, "errors_during_play": 0 }, + "session": { + "frames": 281, + "events_count": 10, + "pieces_spawned": 8, + "pieces_locked": 11, + "lines_cleared": 1, + "piece_types_seen": [ + "unknown", + "J" + ], + "grid_read_success_rate": 1 + }, "performance": { - "load_time_ms": 162 + "load_time_ms": 146 }, "accessibility": { "issues": [], diff --git a/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json b/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/eval_results.json @@ -25,11 +25,6 @@ ], "score": 1.0 }, - "functional": { - "pass": false, - "error": "playwright eval not yet wired", - "score": 0 - }, "quality": { "lint": { "pass": true, @@ -41,10 +36,11 @@ "errors": 1 }, "performance": { + "pass": true, "bundle_size_bytes": 24321, "size_under_512kb": true }, - "score": 0.33 + "score": 0.67 }, "code_analysis": { "files": { @@ -97,10 +93,10 @@ }, "html_validation": { "valid": false, - "errors": 0 + "errors": 4 }, "duplication_percentage": 0.0, - "score": 0.5 + "score": 0.85 }, "transcript_analysis": { "total_events": 86, @@ -126,8 +122,158 @@ }, "gameplay_bot": { "pass": false, - "score": 0, - "error": "Gameplay bot timed out after 180 seconds" + "score": 0.38, + "total": 16, + "passed": 6, + "failed": 10, + "report": { + "implementation": { + "renderer": "canvas", + "grid_detected": true, + "grid_bounds": { + "x": 0, + "y": 0, + "width": 44, + "height": 88 + }, + "controls": { + "left": "ArrowLeft", + "right": "ArrowRight", + "down": "ArrowDown", + "rotate": "x", + "drop": "Space" + }, + "start_mechanism": "enter", + "score_element_found": true, + "grid_confidence": 1 + }, + "tests": [ + { + "name": "game_loads", + "pass": true, + "detail": "no console errors" + }, + { + "name": "game_starts", + "pass": true, + "detail": "started via enter" + }, + { + "name": "auto_drop", + "pass": false, + "detail": "piece did not move down in 5 seconds (grid-verified)" + }, + { + "name": "move_left", + "pass": false, + "detail": "no grid change detected after key press" + }, + { + "name": "move_right", + "pass": false, + "detail": "no grid change detected after key press" + }, + { + "name": "move_down", + "pass": false, + "detail": "no grid change detected after key press" + }, + { + "name": "rotate", + "pass": false, + "detail": "no shape change detected after rotate key" + }, + { + "name": "all_pieces_rotate", + "pass": false, + "detail": "could not detect any piece rotations via grid reader" + }, + { + "name": "hard_drop", + "pass": false, + "detail": "no grid change with bottom cells detected after hard drop key" + }, + { + "name": "piece_locks", + "pass": false, + "detail": "10 lock event(s) but 0 spawns detected - likely false positive from UI misread" + }, + { + "name": "new_piece_spawns", + "pass": false, + "detail": "could not detect new piece spawning at top via grid reader" + }, + { + "name": "multiple_pieces", + "pass": false, + "detail": "only 10 piece(s) detected, need at least 3" + }, + { + "name": "line_clear", + "pass": true, + "detail": "1 line(s) cleared (grid-verified)" + }, + { + "name": "score_changes", + "pass": true, + "detail": "score changed from 196 to 216" + }, + { + "name": "game_over", + "pass": true, + "detail": "game stopped after stacking to top" + }, + { + "name": "playable_30s", + "pass": true, + "detail": "played for 30s, placed 20 pieces, no crashes" + } + ], + "summary": { + "total": 16, + "passed": 6, + "failed": 10, + "score": 0.38 + }, + "gameplay": { + "pieces_placed": 20, + "lines_cleared": 1, + "max_score_observed": 216, + "play_duration_seconds": 30, + "errors_during_play": 0 + }, + "session": { + "frames": 325, + "events_count": 2, + "pieces_spawned": 0, + "pieces_locked": 10, + "lines_cleared": 1, + "piece_types_seen": [], + "grid_read_success_rate": 1 + }, + "performance": { + "load_time_ms": 160 + }, + "accessibility": { + "issues": [], + "issue_count": 0, + "pass": true + } + } }, - "score": 0.4138 + "outcome_score": 0.44, + "score": 0.44, + "sonarqube": { + "bugs": 1, + "vulnerabilities": 0, + "code_smells": 22, + "cognitive_complexity": 120, + "lines_of_code": 811, + "duplication_pct": 0.0, + "tech_debt_minutes": 63, + "maintainability": "A", + "reliability": "C", + "security": "A", + "score": 0.5 + } } \ No newline at end of file diff --git a/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/gameplay-bot-report.json b/results/runs/tetris_context_file=provided_effort=high_human_language=en_language=typescript_linter=on_max_budget=low_model=sonnet_playwright=on_prompt_style=simple_sub_agents=on_tool_edit=on_tool_glob=on_tool_grep=on_tool_read=on_tool_write=on_web_search=on_run3/gameplay-bot-report.json @@ -16,7 +16,8 @@ "drop": "Space" }, "start_mechanism": "enter", - "score_element_found": true + "score_element_found": true, + "grid_confidence": 1 }, "tests": [ { @@ -31,63 +32,63 @@ }, { "name": "auto_drop", - "pass": true, - "detail": "pixels changed after 5s with no input" + "pass": false, + "detail": "piece did not move down in 5 seconds (grid-verified)" }, { "name": "move_left", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "no grid change detected after key press" }, { "name": "move_right", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "no grid change detected after key press" }, { "name": "move_down", - "pass": true, - "detail": "grid state changed after key press" + "pass": false, + "detail": "no grid change detected after key press" }, { "name": "rotate", - "pass": true, - "detail": "piece shape changed after rotate key" + "pass": false, + "detail": "no shape change detected after rotate key" }, { "name": "all_pieces_rotate", "pass": false, - "detail": "could not detect any piece rotations" + "detail": "could not detect any piece rotations via grid reader" }, { "name": "hard_drop", "pass": false, - "detail": "no change detected after hard drop key" + "detail": "no grid change with bottom cells detected after hard drop key" }, { "name": "piece_locks", "pass": false, - "detail": "could not verify piece locking at bottom" + "detail": "10 lock event(s) but 0 spawns detected - likely false positive from UI misread" }, { "name": "new_piece_spawns", "pass": false, - "detail": "could not detect new piece at top" + "detail": "could not detect new piece spawning at top via grid reader" }, { "name": "multiple_pieces", "pass": false, - "detail": "grid did not accumulate filled cells" + "detail": "only 10 piece(s) detected, need at least 3" }, { "name": "line_clear", "pass": true, - "detail": "line cleared via strategic placement" + "detail": "1 line(s) cleared (grid-verified)" }, { "name": "score_changes", - "pass": false, - "detail": "score did not increase: [202] -> no change after polling" + "pass": true, + "detail": "score changed from 196 to 216" }, { "name": "game_over", @@ -96,25 +97,34 @@ }, { "name": "playable_30s", - "pass": false, - "detail": "exception: keyboard.press: Target page, context or browser has been closed" + "pass": true, + "detail": "played for 30s, placed 20 pieces, no crashes" } ], "summary": { "total": 16, - "passed": 9, - "failed": 7, - "score": 0.56 + "passed": 6, + "failed": 10, + "score": 0.38 }, "gameplay": { - "pieces_placed": 110, + "pieces_placed": 20, "lines_cleared": 1, - "max_score_observed": 0, - "play_duration_seconds": 0, + "max_score_observed": 216, + "play_duration_seconds": 30, "errors_during_play": 0 }, + "session": { + "frames": 325, + "events_count": 2, + "pieces_spawned": 0, + "pieces_locked": 10, + "lines_cleared": 1, + "piece_types_seen": [], + "grid_read_success_rate": 1 + }, "performance": { - "load_time_ms": 170 + "load_time_ms": 160 }, "accessibility": { "issues": [],