main_effects_transcript.json (8268B)
1 { 2 "model": { 3 "values": { 4 "gemma-4-26b": { 5 "mean": 0.9814, 6 "effect": 0.0449, 7 "n": 43 8 }, 9 "glm-4.5-air": { 10 "mean": 0.9488, 11 "effect": 0.0123, 12 "n": 41 13 }, 14 "glm-4.7": { 15 "mean": 0.8926, 16 "effect": -0.0439, 17 "n": 81 18 }, 19 "glm-5.1": { 20 "mean": 0.9898, 21 "effect": 0.0534, 22 "n": 123 23 }, 24 "haiku-4.5": { 25 "mean": 0.7949, 26 "effect": -0.1415, 27 "n": 89 28 }, 29 "kimi-k2.5": { 30 "mean": 0.9333, 31 "effect": -0.0031, 32 "n": 3 33 }, 34 "minimax-m2.7": { 35 "mean": 1.0, 36 "effect": 0.0635, 37 "n": 3 38 }, 39 "opus-4.6": { 40 "mean": 1.0, 41 "effect": 0.0635, 42 "n": 52 43 }, 44 "qwen-3.6-plus": { 45 "mean": 0.9864, 46 "effect": 0.0499, 47 "n": 22 48 }, 49 "sonnet-4.6": { 50 "mean": 0.9849, 51 "effect": 0.0484, 52 "n": 53 53 } 54 }, 55 "spread": 0.2051 56 }, 57 "renderer": { 58 "values": { 59 "canvas": { 60 "mean": 0.9071, 61 "effect": -0.0293, 62 "n": 7 63 }, 64 "dom": { 65 "mean": 1.0, 66 "effect": 0.0635, 67 "n": 5 68 }, 69 "none": { 70 "mean": 0.9368, 71 "effect": 0.0003, 72 "n": 487 73 }, 74 "svg": { 75 "mean": 0.9, 76 "effect": -0.0365, 77 "n": 7 78 }, 79 "webgl": { 80 "mean": 0.9375, 81 "effect": 0.001, 82 "n": 4 83 } 84 }, 85 "spread": 0.1 86 }, 87 "context_noise": { 88 "values": { 89 "clean": { 90 "mean": 0.9345, 91 "effect": -0.002, 92 "n": 477 93 }, 94 "lorem_100k": { 95 "mean": 0.9167, 96 "effect": -0.0198, 97 "n": 6 98 }, 99 "lorem_10k": { 100 "mean": 0.9333, 101 "effect": -0.0031, 102 "n": 6 103 }, 104 "lorem_1k": { 105 "mean": 1.0, 106 "effect": 0.0635, 107 "n": 3 108 }, 109 "lorem_50k": { 110 "mean": 0.9583, 111 "effect": 0.0219, 112 "n": 6 113 }, 114 "wikipedia_100k": { 115 "mean": 1.0, 116 "effect": 0.0635, 117 "n": 3 118 }, 119 "wikipedia_10k": { 120 "mean": 1.0, 121 "effect": 0.0635, 122 "n": 3 123 }, 124 "wikipedia_1k": { 125 "mean": 1.0, 126 "effect": 0.0635, 127 "n": 3 128 }, 129 "wikipedia_50k": { 130 "mean": 1.0, 131 "effect": 0.0635, 132 "n": 3 133 } 134 }, 135 "spread": 0.0833 136 }, 137 "strategy": { 138 "values": { 139 "creative_validate": { 140 "mean": 0.9688, 141 "effect": 0.0323, 142 "n": 8 143 }, 144 "delegate": { 145 "mean": 0.9286, 146 "effect": -0.0079, 147 "n": 7 148 }, 149 "iterate": { 150 "mean": 0.9455, 151 "effect": 0.009, 152 "n": 11 153 }, 154 "none": { 155 "mean": 0.9448, 156 "effect": 0.0084, 157 "n": 300 158 }, 159 "plan_first": { 160 "mean": 0.94, 161 "effect": 0.0035, 162 "n": 10 163 }, 164 "review": { 165 "mean": 0.92, 166 "effect": -0.0165, 167 "n": 5 168 }, 169 "split_work": { 170 "mean": 1.0, 171 "effect": 0.0635, 172 "n": 5 173 }, 174 "use_subagents": { 175 "mean": 0.9177, 176 "effect": -0.0188, 177 "n": 164 178 } 179 }, 180 "spread": 0.0823 181 }, 182 "provider": { 183 "values": { 184 "anthropic": { 185 "mean": 0.9018, 186 "effect": -0.0347, 187 "n": 194 188 }, 189 "openrouter": { 190 "mean": 0.9817, 191 "effect": 0.0452, 192 "n": 71 193 }, 194 "zai": { 195 "mean": 0.9508, 196 "effect": 0.0143, 197 "n": 245 198 } 199 }, 200 "spread": 0.0799 201 }, 202 "tool_write": { 203 "values": { 204 "off": { 205 "mean": 0.8621, 206 "effect": -0.0743, 207 "n": 33 208 }, 209 "on": { 210 "mean": 0.9416, 211 "effect": 0.0051, 212 "n": 477 213 } 214 }, 215 "spread": 0.0795 216 }, 217 "tool_glob": { 218 "values": { 219 "off": { 220 "mean": 0.8733, 221 "effect": -0.0631, 222 "n": 30 223 }, 224 "on": { 225 "mean": 0.9404, 226 "effect": 0.0039, 227 "n": 480 228 } 229 }, 230 "spread": 0.0671 231 }, 232 "context_file": { 233 "values": { 234 "none": { 235 "mean": 0.9404, 236 "effect": 0.0039, 237 "n": 479 238 }, 239 "provided": { 240 "mean": 0.8758, 241 "effect": -0.0607, 242 "n": 31 243 } 244 }, 245 "spread": 0.0646 246 }, 247 "tool_read": { 248 "values": { 249 "off": { 250 "mean": 0.8774, 251 "effect": -0.0591, 252 "n": 31 253 }, 254 "on": { 255 "mean": 0.9403, 256 "effect": 0.0038, 257 "n": 479 258 } 259 }, 260 "spread": 0.0629 261 }, 262 "human_language": { 263 "values": { 264 "en": { 265 "mean": 0.94, 266 "effect": 0.0036, 267 "n": 481 268 }, 269 "es": { 270 "mean": 0.8776, 271 "effect": -0.0589, 272 "n": 29 273 } 274 }, 275 "spread": 0.0624 276 }, 277 "tool_edit": { 278 "values": { 279 "off": { 280 "mean": 0.88, 281 "effect": -0.0565, 282 "n": 35 283 }, 284 "on": { 285 "mean": 0.9406, 286 "effect": 0.0042, 287 "n": 475 288 } 289 }, 290 "spread": 0.0606 291 }, 292 "linter": { 293 "values": { 294 "off": { 295 "mean": 0.8821, 296 "effect": -0.0544, 297 "n": 39 298 }, 299 "on": { 300 "mean": 0.941, 301 "effect": 0.0045, 302 "n": 471 303 } 304 }, 305 "spread": 0.0589 306 }, 307 "language": { 308 "values": { 309 "javascript": { 310 "mean": 0.9905, 311 "effect": 0.054, 312 "n": 21 313 }, 314 "typescript": { 315 "mean": 0.932, 316 "effect": -0.0045, 317 "n": 469 318 }, 319 "unspecified": { 320 "mean": 0.985, 321 "effect": 0.0485, 322 "n": 20 323 } 324 }, 325 "spread": 0.0585 326 }, 327 "tool_grep": { 328 "values": { 329 "off": { 330 "mean": 0.8823, 331 "effect": -0.0542, 332 "n": 31 333 }, 334 "on": { 335 "mean": 0.94, 336 "effect": 0.0035, 337 "n": 479 338 } 339 }, 340 "spread": 0.0577 341 }, 342 "playwright": { 343 "values": { 344 "available": { 345 "mean": 0.9155, 346 "effect": -0.021, 347 "n": 165 348 }, 349 "instructed": { 350 "mean": 0.9727, 351 "effect": 0.0363, 352 "n": 11 353 }, 354 "off": { 355 "mean": 0.9457, 356 "effect": 0.0092, 357 "n": 334 358 } 359 }, 360 "spread": 0.0572 361 }, 362 "web_search": { 363 "values": { 364 "off": { 365 "mean": 0.8833, 366 "effect": -0.0531, 367 "n": 36 368 }, 369 "on": { 370 "mean": 0.9405, 371 "effect": 0.004, 372 "n": 474 373 } 374 }, 375 "spread": 0.0572 376 }, 377 "architecture": { 378 "values": { 379 "best_practices": { 380 "mean": 0.975, 381 "effect": 0.0385, 382 "n": 4 383 }, 384 "none": { 385 "mean": 0.9356, 386 "effect": -0.0008, 387 "n": 501 388 }, 389 "separation": { 390 "mean": 0.99, 391 "effect": 0.0535, 392 "n": 5 393 } 394 }, 395 "spread": 0.0544 396 }, 397 "prompt_style": { 398 "values": { 399 "detailed": { 400 "mean": 0.89, 401 "effect": -0.0465, 402 "n": 30 403 }, 404 "simple": { 405 "mean": 0.9394, 406 "effect": 0.0029, 407 "n": 480 408 } 409 }, 410 "spread": 0.0494 411 }, 412 "max_budget": { 413 "values": { 414 "high": { 415 "mean": 0.8917, 416 "effect": -0.0448, 417 "n": 24 418 }, 419 "low": { 420 "mean": 0.9387, 421 "effect": 0.0022, 422 "n": 486 423 } 424 }, 425 "spread": 0.047 426 }, 427 "design_guidance": { 428 "values": { 429 "none": { 430 "mean": 0.936, 431 "effect": -0.0005, 432 "n": 500 433 }, 434 "specific": { 435 "mean": 0.95, 436 "effect": 0.0135, 437 "n": 5 438 }, 439 "vague": { 440 "mean": 0.97, 441 "effect": 0.0335, 442 "n": 5 443 } 444 }, 445 "spread": 0.034 446 }, 447 "error_checking": { 448 "values": { 449 "none": { 450 "mean": 0.9363, 451 "effect": -0.0002, 452 "n": 506 453 }, 454 "self_verify": { 455 "mean": 0.9625, 456 "effect": 0.026, 457 "n": 4 458 } 459 }, 460 "spread": 0.0262 461 }, 462 "effort": { 463 "values": { 464 "high": { 465 "mean": 0.9363, 466 "effect": -0.0002, 467 "n": 491 468 }, 469 "max": { 470 "mean": 0.9421, 471 "effect": 0.0056, 472 "n": 19 473 } 474 }, 475 "spread": 0.0058 476 } 477 }