loop-benchmarking

Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.
git clone https://git.shiptheloop.com/loop-benchmarking.git
Log | Files | Refs | README

main_effects_wall_time.json (8556B)


      1 {
      2   "renderer": {
      3     "values": {
      4       "canvas": {
      5         "mean": 435.7143,
      6         "effect": -82.3289,
      7         "n": 7
      8       },
      9       "dom": {
     10         "mean": 410.4,
     11         "effect": -107.6431,
     12         "n": 5
     13       },
     14       "none": {
     15         "mean": 515.0205,
     16         "effect": -3.0226,
     17         "n": 487
     18       },
     19       "svg": {
     20         "mean": 549.0,
     21         "effect": 30.9569,
     22         "n": 7
     23       },
     24       "webgl": {
     25         "mean": 1110.5,
     26         "effect": 592.4569,
     27         "n": 4
     28       }
     29     },
     30     "spread": 700.1
     31   },
     32   "model": {
     33     "values": {
     34       "gemma-4-26b": {
     35         "mean": 838.3953,
     36         "effect": 320.3522,
     37         "n": 43
     38       },
     39       "glm-4.5-air": {
     40         "mean": 629.7073,
     41         "effect": 111.6642,
     42         "n": 41
     43       },
     44       "glm-4.7": {
     45         "mean": 521.0,
     46         "effect": 2.9569,
     47         "n": 81
     48       },
     49       "glm-5.1": {
     50         "mean": 533.8455,
     51         "effect": 15.8024,
     52         "n": 123
     53       },
     54       "haiku-4.5": {
     55         "mean": 228.5281,
     56         "effect": -289.515,
     57         "n": 89
     58       },
     59       "kimi-k2.5": {
     60         "mean": 674.6667,
     61         "effect": 156.6235,
     62         "n": 3
     63       },
     64       "minimax-m2.7": {
     65         "mean": 499.0,
     66         "effect": -19.0431,
     67         "n": 3
     68       },
     69       "opus-4.6": {
     70         "mean": 228.2308,
     71         "effect": -289.8124,
     72         "n": 52
     73       },
     74       "qwen-3.6-plus": {
     75         "mean": 743.7273,
     76         "effect": 225.6841,
     77         "n": 22
     78       },
     79       "sonnet-4.6": {
     80         "mean": 799.6038,
     81         "effect": 281.5606,
     82         "n": 53
     83       }
     84     },
     85     "spread": 610.1645
     86   },
     87   "architecture": {
     88     "values": {
     89       "best_practices": {
     90         "mean": 941.25,
     91         "effect": 423.2069,
     92         "n": 4
     93       },
     94       "none": {
     95         "mean": 516.3952,
     96         "effect": -1.6479,
     97         "n": 501
     98       },
     99       "separation": {
    100         "mean": 344.6,
    101         "effect": -173.4431,
    102         "n": 5
    103       }
    104     },
    105     "spread": 596.65
    106   },
    107   "design_guidance": {
    108     "values": {
    109       "none": {
    110         "mean": 511.946,
    111         "effect": -6.0971,
    112         "n": 500
    113       },
    114       "specific": {
    115         "mean": 687.4,
    116         "effect": 169.3569,
    117         "n": 5
    118       },
    119       "vague": {
    120         "mean": 958.4,
    121         "effect": 440.3569,
    122         "n": 5
    123       }
    124     },
    125     "spread": 446.454
    126   },
    127   "provider": {
    128     "values": {
    129       "anthropic": {
    130         "mean": 384.4639,
    131         "effect": -133.5792,
    132         "n": 194
    133       },
    134       "openrouter": {
    135         "mean": 787.8028,
    136         "effect": 269.7597,
    137         "n": 71
    138       },
    139       "zai": {
    140         "mean": 545.6408,
    141         "effect": 27.5977,
    142         "n": 245
    143       }
    144     },
    145     "spread": 403.3389
    146   },
    147   "error_checking": {
    148     "values": {
    149       "none": {
    150         "mean": 514.8913,
    151         "effect": -3.1518,
    152         "n": 506
    153       },
    154       "self_verify": {
    155         "mean": 916.75,
    156         "effect": 398.7069,
    157         "n": 4
    158       }
    159     },
    160     "spread": 401.8587
    161   },
    162   "context_noise": {
    163     "values": {
    164       "clean": {
    165         "mean": 531.4549,
    166         "effect": 13.4118,
    167         "n": 477
    168       },
    169       "lorem_100k": {
    170         "mean": 232.1667,
    171         "effect": -285.8765,
    172         "n": 6
    173       },
    174       "lorem_10k": {
    175         "mean": 245.6667,
    176         "effect": -272.3765,
    177         "n": 6
    178       },
    179       "lorem_1k": {
    180         "mean": 346.6667,
    181         "effect": -171.3765,
    182         "n": 3
    183       },
    184       "lorem_50k": {
    185         "mean": 618.8333,
    186         "effect": 100.7902,
    187         "n": 6
    188       },
    189       "wikipedia_100k": {
    190         "mean": 270.0,
    191         "effect": -248.0431,
    192         "n": 3
    193       },
    194       "wikipedia_10k": {
    195         "mean": 255.3333,
    196         "effect": -262.7098,
    197         "n": 3
    198       },
    199       "wikipedia_1k": {
    200         "mean": 238.3333,
    201         "effect": -279.7098,
    202         "n": 3
    203       },
    204       "wikipedia_50k": {
    205         "mean": 262.3333,
    206         "effect": -255.7098,
    207         "n": 3
    208       }
    209     },
    210     "spread": 386.6666
    211   },
    212   "strategy": {
    213     "values": {
    214       "creative_validate": {
    215         "mean": 782.0,
    216         "effect": 263.9569,
    217         "n": 8
    218       },
    219       "delegate": {
    220         "mean": 759.4286,
    221         "effect": 241.3854,
    222         "n": 7
    223       },
    224       "iterate": {
    225         "mean": 760.3636,
    226         "effect": 242.3205,
    227         "n": 11
    228       },
    229       "none": {
    230         "mean": 528.6133,
    231         "effect": 10.5702,
    232         "n": 300
    233       },
    234       "plan_first": {
    235         "mean": 801.4,
    236         "effect": 283.3569,
    237         "n": 10
    238       },
    239       "review": {
    240         "mean": 746.6,
    241         "effect": 228.5569,
    242         "n": 5
    243       },
    244       "split_work": {
    245         "mean": 780.4,
    246         "effect": 262.3569,
    247         "n": 5
    248       },
    249       "use_subagents": {
    250         "mean": 427.0305,
    251         "effect": -91.0126,
    252         "n": 164
    253       }
    254     },
    255     "spread": 374.3695
    256   },
    257   "playwright": {
    258     "values": {
    259       "available": {
    260         "mean": 419.0909,
    261         "effect": -98.9522,
    262         "n": 165
    263       },
    264       "instructed": {
    265         "mean": 775.6364,
    266         "effect": 257.5932,
    267         "n": 11
    268       },
    269       "off": {
    270         "mean": 558.4431,
    271         "effect": 40.4,
    272         "n": 334
    273       }
    274     },
    275     "spread": 356.5455
    276   },
    277   "context_file": {
    278     "values": {
    279       "none": {
    280         "mean": 531.2881,
    281         "effect": 13.245,
    282         "n": 479
    283       },
    284       "provided": {
    285         "mean": 313.3871,
    286         "effect": -204.656,
    287         "n": 31
    288       }
    289     },
    290     "spread": 217.901
    291   },
    292   "linter": {
    293     "values": {
    294       "off": {
    295         "mean": 348.1538,
    296         "effect": -169.8893,
    297         "n": 39
    298       },
    299       "on": {
    300         "mean": 532.1104,
    301         "effect": 14.0673,
    302         "n": 471
    303       }
    304     },
    305     "spread": 183.9566
    306   },
    307   "max_budget": {
    308     "values": {
    309       "high": {
    310         "mean": 360.875,
    311         "effect": -157.1681,
    312         "n": 24
    313       },
    314       "low": {
    315         "mean": 525.8045,
    316         "effect": 7.7614,
    317         "n": 486
    318       }
    319     },
    320     "spread": 164.9295
    321   },
    322   "effort": {
    323     "values": {
    324       "high": {
    325         "mean": 512.7475,
    326         "effect": -5.2957,
    327         "n": 491
    328       },
    329       "max": {
    330         "mean": 654.8947,
    331         "effect": 136.8516,
    332         "n": 19
    333       }
    334     },
    335     "spread": 142.1472
    336   },
    337   "prompt_style": {
    338     "values": {
    339       "detailed": {
    340         "mean": 386.0,
    341         "effect": -132.0431,
    342         "n": 30
    343       },
    344       "simple": {
    345         "mean": 526.2958,
    346         "effect": 8.2527,
    347         "n": 480
    348       }
    349     },
    350     "spread": 140.2958
    351   },
    352   "web_search": {
    353     "values": {
    354       "off": {
    355         "mean": 392.5,
    356         "effect": -125.5431,
    357         "n": 36
    358       },
    359       "on": {
    360         "mean": 527.5781,
    361         "effect": 9.5349,
    362         "n": 474
    363       }
    364     },
    365     "spread": 135.0781
    366   },
    367   "language": {
    368     "values": {
    369       "javascript": {
    370         "mean": 427.2857,
    371         "effect": -90.7574,
    372         "n": 21
    373       },
    374       "typescript": {
    375         "mean": 527.2921,
    376         "effect": 9.249,
    377         "n": 469
    378       },
    379       "unspecified": {
    380         "mean": 396.45,
    381         "effect": -121.5931,
    382         "n": 20
    383       }
    384     },
    385     "spread": 130.8421
    386   },
    387   "tool_grep": {
    388     "values": {
    389       "off": {
    390         "mean": 403.7419,
    391         "effect": -114.3012,
    392         "n": 31
    393       },
    394       "on": {
    395         "mean": 525.4405,
    396         "effect": 7.3974,
    397         "n": 479
    398       }
    399     },
    400     "spread": 121.6986
    401   },
    402   "tool_edit": {
    403     "values": {
    404       "off": {
    405         "mean": 413.1714,
    406         "effect": -104.8717,
    407         "n": 35
    408       },
    409       "on": {
    410         "mean": 525.7705,
    411         "effect": 7.7274,
    412         "n": 475
    413       }
    414     },
    415     "spread": 112.5991
    416   },
    417   "human_language": {
    418     "values": {
    419       "en": {
    420         "mean": 524.2599,
    421         "effect": 6.2167,
    422         "n": 481
    423       },
    424       "es": {
    425         "mean": 414.931,
    426         "effect": -103.1121,
    427         "n": 29
    428       }
    429     },
    430     "spread": 109.3289
    431   },
    432   "tool_read": {
    433     "values": {
    434       "off": {
    435         "mean": 420.9355,
    436         "effect": -97.1077,
    437         "n": 31
    438       },
    439       "on": {
    440         "mean": 524.3278,
    441         "effect": 6.2846,
    442         "n": 479
    443       }
    444     },
    445     "spread": 103.3923
    446   },
    447   "tool_write": {
    448     "values": {
    449       "off": {
    450         "mean": 422.7879,
    451         "effect": -95.2553,
    452         "n": 33
    453       },
    454       "on": {
    455         "mean": 524.6331,
    456         "effect": 6.59,
    457         "n": 477
    458       }
    459     },
    460     "spread": 101.8452
    461   },
    462   "tool_glob": {
    463     "values": {
    464       "off": {
    465         "mean": 429.1667,
    466         "effect": -88.8765,
    467         "n": 30
    468       },
    469       "on": {
    470         "mean": 523.5979,
    471         "effect": 5.5548,
    472         "n": 480
    473       }
    474     },
    475     "spread": 94.4312
    476   }
    477 }

Impressum · Datenschutz