main_effects_score.json (8316B)
1 { 2 "model": { 3 "values": { 4 "gemma-4-26b": { 5 "mean": 0.4963, 6 "effect": -0.2352, 7 "n": 43 8 }, 9 "glm-4.5-air": { 10 "mean": 0.67, 11 "effect": -0.0615, 12 "n": 41 13 }, 14 "glm-4.7": { 15 "mean": 0.7107, 16 "effect": -0.0207, 17 "n": 81 18 }, 19 "glm-5.1": { 20 "mean": 0.754, 21 "effect": 0.0225, 22 "n": 123 23 }, 24 "haiku-4.5": { 25 "mean": 0.7558, 26 "effect": 0.0243, 27 "n": 89 28 }, 29 "kimi-k2.5": { 30 "mean": 0.5667, 31 "effect": -0.1648, 32 "n": 3 33 }, 34 "minimax-m2.7": { 35 "mean": 0.725, 36 "effect": -0.0065, 37 "n": 3 38 }, 39 "opus-4.6": { 40 "mean": 0.8256, 41 "effect": 0.0941, 42 "n": 52 43 }, 44 "qwen-3.6-plus": { 45 "mean": 0.7148, 46 "effect": -0.0167, 47 "n": 22 48 }, 49 "sonnet-4.6": { 50 "mean": 0.8327, 51 "effect": 0.1013, 52 "n": 53 53 } 54 }, 55 "spread": 0.3364 56 }, 57 "strategy": { 58 "values": { 59 "creative_validate": { 60 "mean": 0.5981, 61 "effect": -0.1333, 62 "n": 8 63 }, 64 "delegate": { 65 "mean": 0.7086, 66 "effect": -0.0229, 67 "n": 7 68 }, 69 "iterate": { 70 "mean": 0.7318, 71 "effect": 0.0003, 72 "n": 11 73 }, 74 "none": { 75 "mean": 0.7085, 76 "effect": -0.023, 77 "n": 300 78 }, 79 "plan_first": { 80 "mean": 0.7115, 81 "effect": -0.02, 82 "n": 10 83 }, 84 "review": { 85 "mean": 0.705, 86 "effect": -0.0265, 87 "n": 5 88 }, 89 "split_work": { 90 "mean": 0.536, 91 "effect": -0.1955, 92 "n": 5 93 }, 94 "use_subagents": { 95 "mean": 0.7889, 96 "effect": 0.0574, 97 "n": 164 98 } 99 }, 100 "spread": 0.2529 101 }, 102 "provider": { 103 "values": { 104 "anthropic": { 105 "mean": 0.7955, 106 "effect": 0.064, 107 "n": 194 108 }, 109 "openrouter": { 110 "mean": 0.5766, 111 "effect": -0.1549, 112 "n": 71 113 }, 114 "zai": { 115 "mean": 0.7256, 116 "effect": -0.0058, 117 "n": 245 118 } 119 }, 120 "spread": 0.2189 121 }, 122 "playwright": { 123 "values": { 124 "available": { 125 "mean": 0.7907, 126 "effect": 0.0592, 127 "n": 165 128 }, 129 "instructed": { 130 "mean": 0.5918, 131 "effect": -0.1397, 132 "n": 11 133 }, 134 "off": { 135 "mean": 0.7068, 136 "effect": -0.0246, 137 "n": 334 138 } 139 }, 140 "spread": 0.1989 141 }, 142 "context_noise": { 143 "values": { 144 "clean": { 145 "mean": 0.731, 146 "effect": -0.0004, 147 "n": 477 148 }, 149 "lorem_100k": { 150 "mean": 0.6842, 151 "effect": -0.0473, 152 "n": 6 153 }, 154 "lorem_10k": { 155 "mean": 0.7492, 156 "effect": 0.0177, 157 "n": 6 158 }, 159 "lorem_1k": { 160 "mean": 0.7783, 161 "effect": 0.0469, 162 "n": 3 163 }, 164 "lorem_50k": { 165 "mean": 0.6742, 166 "effect": -0.0573, 167 "n": 6 168 }, 169 "wikipedia_100k": { 170 "mean": 0.69, 171 "effect": -0.0415, 172 "n": 3 173 }, 174 "wikipedia_10k": { 175 "mean": 0.7483, 176 "effect": 0.0169, 177 "n": 3 178 }, 179 "wikipedia_1k": { 180 "mean": 0.85, 181 "effect": 0.1185, 182 "n": 3 183 }, 184 "wikipedia_50k": { 185 "mean": 0.835, 186 "effect": 0.1035, 187 "n": 3 188 } 189 }, 190 "spread": 0.1758 191 }, 192 "architecture": { 193 "values": { 194 "best_practices": { 195 "mean": 0.795, 196 "effect": 0.0635, 197 "n": 4 198 }, 199 "none": { 200 "mean": 0.7319, 201 "effect": 0.0004, 202 "n": 501 203 }, 204 "separation": { 205 "mean": 0.638, 206 "effect": -0.0935, 207 "n": 5 208 } 209 }, 210 "spread": 0.157 211 }, 212 "design_guidance": { 213 "values": { 214 "none": { 215 "mean": 0.7323, 216 "effect": 0.0008, 217 "n": 500 218 }, 219 "specific": { 220 "mean": 0.62, 221 "effect": -0.1115, 222 "n": 5 223 }, 224 "vague": { 225 "mean": 0.762, 226 "effect": 0.0305, 227 "n": 5 228 } 229 }, 230 "spread": 0.142 231 }, 232 "renderer": { 233 "values": { 234 "canvas": { 235 "mean": 0.6971, 236 "effect": -0.0343, 237 "n": 7 238 }, 239 "dom": { 240 "mean": 0.749, 241 "effect": 0.0175, 242 "n": 5 243 }, 244 "none": { 245 "mean": 0.7333, 246 "effect": 0.0018, 247 "n": 487 248 }, 249 "svg": { 250 "mean": 0.6364, 251 "effect": -0.095, 252 "n": 7 253 }, 254 "webgl": { 255 "mean": 0.7137, 256 "effect": -0.0177, 257 "n": 4 258 } 259 }, 260 "spread": 0.1126 261 }, 262 "prompt_style": { 263 "values": { 264 "detailed": { 265 "mean": 0.8182, 266 "effect": 0.0867, 267 "n": 30 268 }, 269 "simple": { 270 "mean": 0.7261, 271 "effect": -0.0054, 272 "n": 480 273 } 274 }, 275 "spread": 0.0921 276 }, 277 "language": { 278 "values": { 279 "javascript": { 280 "mean": 0.8033, 281 "effect": 0.0719, 282 "n": 21 283 }, 284 "typescript": { 285 "mean": 0.7257, 286 "effect": -0.0058, 287 "n": 469 288 }, 289 "unspecified": { 290 "mean": 0.7923, 291 "effect": 0.0608, 292 "n": 20 293 } 294 }, 295 "spread": 0.0776 296 }, 297 "human_language": { 298 "values": { 299 "en": { 300 "mean": 0.7282, 301 "effect": -0.0032, 302 "n": 481 303 }, 304 "es": { 305 "mean": 0.7853, 306 "effect": 0.0539, 307 "n": 29 308 } 309 }, 310 "spread": 0.0571 311 }, 312 "context_file": { 313 "values": { 314 "none": { 315 "mean": 0.7284, 316 "effect": -0.0031, 317 "n": 479 318 }, 319 "provided": { 320 "mean": 0.7792, 321 "effect": 0.0477, 322 "n": 31 323 } 324 }, 325 "spread": 0.0508 326 }, 327 "web_search": { 328 "values": { 329 "off": { 330 "mean": 0.7747, 331 "effect": 0.0433, 332 "n": 36 333 }, 334 "on": { 335 "mean": 0.7282, 336 "effect": -0.0033, 337 "n": 474 338 } 339 }, 340 "spread": 0.0465 341 }, 342 "tool_edit": { 343 "values": { 344 "off": { 345 "mean": 0.7679, 346 "effect": 0.0364, 347 "n": 35 348 }, 349 "on": { 350 "mean": 0.7288, 351 "effect": -0.0027, 352 "n": 475 353 } 354 }, 355 "spread": 0.0391 356 }, 357 "tool_grep": { 358 "values": { 359 "off": { 360 "mean": 0.7668, 361 "effect": 0.0353, 362 "n": 31 363 }, 364 "on": { 365 "mean": 0.7292, 366 "effect": -0.0023, 367 "n": 479 368 } 369 }, 370 "spread": 0.0376 371 }, 372 "max_budget": { 373 "values": { 374 "high": { 375 "mean": 0.7583, 376 "effect": 0.0269, 377 "n": 24 378 }, 379 "low": { 380 "mean": 0.7301, 381 "effect": -0.0013, 382 "n": 486 383 } 384 }, 385 "spread": 0.0282 386 }, 387 "tool_read": { 388 "values": { 389 "off": { 390 "mean": 0.7573, 391 "effect": 0.0258, 392 "n": 31 393 }, 394 "on": { 395 "mean": 0.7298, 396 "effect": -0.0017, 397 "n": 479 398 } 399 }, 400 "spread": 0.0275 401 }, 402 "error_checking": { 403 "values": { 404 "none": { 405 "mean": 0.7316, 406 "effect": 0.0002, 407 "n": 506 408 }, 409 "self_verify": { 410 "mean": 0.71, 411 "effect": -0.0215, 412 "n": 4 413 } 414 }, 415 "spread": 0.0216 416 }, 417 "effort": { 418 "values": { 419 "high": { 420 "mean": 0.7323, 421 "effect": 0.0008, 422 "n": 491 423 }, 424 "max": { 425 "mean": 0.7111, 426 "effect": -0.0204, 427 "n": 19 428 } 429 }, 430 "spread": 0.0212 431 }, 432 "linter": { 433 "values": { 434 "off": { 435 "mean": 0.7396, 436 "effect": 0.0081, 437 "n": 39 438 }, 439 "on": { 440 "mean": 0.7308, 441 "effect": -0.0007, 442 "n": 471 443 } 444 }, 445 "spread": 0.0088 446 }, 447 "tool_glob": { 448 "values": { 449 "off": { 450 "mean": 0.7267, 451 "effect": -0.0048, 452 "n": 30 453 }, 454 "on": { 455 "mean": 0.7318, 456 "effect": 0.0003, 457 "n": 480 458 } 459 }, 460 "spread": 0.0051 461 }, 462 "tool_write": { 463 "values": { 464 "off": { 465 "mean": 0.7341, 466 "effect": 0.0026, 467 "n": 33 468 }, 469 "on": { 470 "mean": 0.7313, 471 "effect": -0.0002, 472 "n": 477 473 } 474 }, 475 "spread": 0.0028 476 } 477 }