grid.yaml (7560B)
1 version: 1 2 3 defaults: 4 runs_per_cell: 3 5 timeout_seconds: 1200 6 budget: 7 low: 2.00 8 high: 10.00 9 10 axes: 11 model: 12 values: ["haiku-4.5", "sonnet-4.6", "opus-4.6", "glm-4.5-air", "glm-4.7", "glm-5.1", "qwen-3.6-plus", "gemma-4-26b", "minimax-m2.7", "kimi-k2.5"] 13 effort: 14 values: [high, max] 15 prompt_style: 16 values: [simple, detailed] 17 language: 18 values: [typescript, javascript, unspecified] 19 human_language: 20 values: [en, es] 21 tool_read: 22 values: ["on", "off"] 23 tool_write: 24 values: ["on", "off"] 25 tool_edit: 26 values: ["on", "off"] 27 tool_glob: 28 values: ["on", "off"] 29 tool_grep: 30 values: ["on", "off"] 31 linter: 32 values: ["on", "off"] 33 playwright: 34 values: ["off", "available", "instructed"] 35 context_file: 36 values: [none, provided] 37 web_search: 38 values: ["on", "off"] 39 max_budget: 40 values: [low, high] 41 tests_provided: 42 values: ["none", "a_few", "many"] 43 strategy: 44 values: ["none", "plan_first", "iterate", "creative_validate", "use_subagents", "delegate", "review", "compete", "split_work"] 45 design_guidance: 46 values: ["none", "vague", "specific"] 47 architecture: 48 values: ["none", "separation", "best_practices"] 49 error_checking: 50 values: ["none", "self_verify"] 51 context_noise: 52 values: ["clean", "wikipedia_1k", "wikipedia_10k", "wikipedia_50k", "wikipedia_100k", "wikipedia_25", "wikipedia_50", "wikipedia_75", "lorem_1k", "lorem_10k", "lorem_50k", "lorem_100k", "lorem_25", "lorem_50", "lorem_75"] 53 renderer: 54 values: ["none", "canvas", "svg", "dom", "webgl"] 55 provider: 56 values: ["anthropic", "zai", "openrouter"] 57 58 providers: 59 anthropic: 60 # Maps display names to CLI --model args 61 cli_model_map: 62 "haiku-4.5": "haiku" 63 "sonnet-4.6": "sonnet" 64 "opus-4.6": "opus" 65 zai: 66 base_url: "https://api.z.ai/api/anthropic" 67 api_key_env: "ZAI_API_KEY" 68 models: ["glm-4.5-air", "glm-4.7", "glm-5.1"] 69 openrouter: 70 base_url: "http://localhost:4000" 71 auth_token: "dummy" 72 cli_model_map: 73 "qwen-3.6-plus": "openrouter/qwen/qwen3.6-plus" 74 "gemma-4-26b": "openrouter/google/gemma-4-26b-a4b-it" 75 "minimax-m2.7": "openrouter/minimax/minimax-m2.7" 76 "kimi-k2.5": "openrouter/moonshotai/kimi-k2.5" 77 78 exclusions: 79 # Haiku does not support extended thinking 80 - when: 81 model: "haiku-4.5" 82 effort: max 83 - when: 84 tests_provided: a_few 85 playwright: "off" 86 - when: 87 tests_provided: many 88 playwright: "off" 89 - when: 90 strategy: compete 91 # GLM models only with zai provider 92 - when: { provider: anthropic, model: "glm-4.5-air" } 93 - when: { provider: anthropic, model: "glm-4.7" } 94 - when: { provider: anthropic, model: "glm-5.1" } 95 # Anthropic models only with anthropic provider 96 - when: { provider: zai, model: "haiku-4.5" } 97 - when: { provider: zai, model: "sonnet-4.6" } 98 - when: { provider: zai, model: "opus-4.6" } 99 - when: { provider: zai, model: "qwen-3.6-plus" } 100 # OpenRouter models only with openrouter 101 - when: { provider: anthropic, model: "qwen-3.6-plus" } 102 - when: { provider: anthropic, model: "gemma-4-26b" } 103 - when: { provider: anthropic, model: "minimax-m2.7" } 104 - when: { provider: anthropic, model: "kimi-k2.5" } 105 - when: { provider: zai, model: "gemma-4-26b" } 106 - when: { provider: zai, model: "minimax-m2.7" } 107 - when: { provider: zai, model: "kimi-k2.5" } 108 - when: { provider: openrouter, model: "haiku-4.5" } 109 - when: { provider: openrouter, model: "sonnet-4.6" } 110 - when: { provider: openrouter, model: "opus-4.6" } 111 - when: { provider: openrouter, model: "glm-4.5-air" } 112 - when: { provider: openrouter, model: "glm-4.7" } 113 - when: { provider: openrouter, model: "glm-5.1" } 114 115 tasks: 116 - tetris 117 118 profiles: 119 smoke: 120 description: "Quick validation -- minimal grid" 121 axes: 122 model: ["haiku-4.5"] 123 effort: [high] 124 prompt_style: [simple, detailed] 125 language: [typescript] 126 human_language: [en] 127 tool_read: ["on"] 128 tool_write: ["on"] 129 tool_edit: ["on"] 130 tool_glob: ["on"] 131 tool_grep: ["on"] 132 linter: ["off"] 133 playwright: ["off"] 134 context_file: [none] 135 web_search: ["off"] 136 max_budget: [low] 137 tests_provided: ["none"] 138 strategy: ["none"] 139 design_guidance: ["none"] 140 architecture: ["none"] 141 error_checking: ["none"] 142 context_noise: ["clean"] 143 renderer: ["none"] 144 provider: ["anthropic", "zai"] 145 runs_per_cell: 1 146 147 zai-smoke: 148 description: "Quick validation for Z.AI GLM models" 149 axes: 150 model: ["glm-4.5-air", "glm-4.7", "glm-5.1"] 151 effort: [high] 152 prompt_style: [simple, detailed] 153 language: [typescript] 154 human_language: [en] 155 tool_read: ["on"] 156 tool_write: ["on"] 157 tool_edit: ["on"] 158 tool_glob: ["on"] 159 tool_grep: ["on"] 160 linter: ["off"] 161 playwright: ["off"] 162 context_file: [none] 163 web_search: ["off"] 164 max_budget: [low] 165 tests_provided: ["none"] 166 strategy: ["none"] 167 design_guidance: ["none"] 168 architecture: ["none"] 169 error_checking: ["none"] 170 context_noise: ["clean"] 171 renderer: ["none"] 172 provider: ["zai"] 173 runs_per_cell: 1 174 175 core: 176 description: "Core comparison -- models and effort levels" 177 axes: 178 model: ["haiku-4.5", "sonnet-4.6", "opus-4.6"] 179 effort: [high, max] 180 prompt_style: [simple, detailed] 181 language: [typescript] 182 human_language: [en] 183 tool_read: ["on"] 184 tool_write: ["on"] 185 tool_edit: ["on"] 186 tool_glob: ["on"] 187 tool_grep: ["on"] 188 linter: ["off"] 189 playwright: ["off"] 190 context_file: [none] 191 web_search: ["off"] 192 max_budget: [high] 193 tests_provided: ["none"] 194 strategy: ["none"] 195 design_guidance: ["none"] 196 architecture: ["none"] 197 error_checking: ["none"] 198 context_noise: ["clean"] 199 renderer: ["none"] 200 provider: ["anthropic", "zai"] 201 runs_per_cell: 3 202 203 all-on: 204 description: "Everything enabled -- max tooling" 205 axes: 206 model: ["haiku-4.5"] 207 effort: [high] 208 prompt_style: [simple] 209 language: [typescript] 210 human_language: [en] 211 tool_read: ["on"] 212 tool_write: ["on"] 213 tool_edit: ["on"] 214 tool_glob: ["on"] 215 tool_grep: ["on"] 216 linter: ["on"] 217 playwright: ["instructed"] 218 context_file: [provided] 219 web_search: ["on"] 220 max_budget: [high] 221 tests_provided: ["many"] 222 strategy: ["delegate"] 223 design_guidance: ["specific"] 224 architecture: ["best_practices"] 225 error_checking: ["self_verify"] 226 context_noise: ["clean"] 227 renderer: ["canvas"] 228 provider: ["anthropic", "zai"] 229 runs_per_cell: 3 230 231 all-off: 232 description: "Everything disabled -- bare minimum (Bash only)" 233 axes: 234 model: ["haiku-4.5"] 235 effort: [high] 236 prompt_style: [simple] 237 language: [typescript] 238 human_language: [en] 239 tool_read: ["off"] 240 tool_write: ["off"] 241 tool_edit: ["off"] 242 tool_glob: ["off"] 243 tool_grep: ["off"] 244 linter: ["off"] 245 playwright: ["off"] 246 context_file: [none] 247 web_search: ["off"] 248 max_budget: [low] 249 tests_provided: ["none"] 250 strategy: ["none"] 251 design_guidance: ["none"] 252 architecture: ["none"] 253 error_checking: ["none"] 254 context_noise: ["clean"] 255 renderer: ["none"] 256 provider: ["anthropic", "zai"] 257 runs_per_cell: 3 258 259 full: 260 description: "Full grid -- all dimensions" 261 # Uses top-level axes definition 262 runs_per_cell: 3