loop-benchmarking

Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.
git clone https://git.shiptheloop.com/loop-benchmarking.git
Log | Files | Refs | README

settings.local.json (3793B)


      1 {
      2   "permissions": {
      3     "allow": [
      4       "Bash(apt-cache search:*)",
      5       "Bash(yq --version)",
      6       "Bash(pnpm --version)",
      7       "Bash(claude:*)",
      8       "Bash(yq --help)",
      9       "Bash(yq -r '.items[]')",
     10       "Bash(/root/loop-benchmarking/harness/lib/get-oauth-token.sh)",
     11       "Bash(pkill -f \"astro preview\")",
     12       "Bash(xargs jq:*)",
     13       "Bash(mkdir -p /root/loop-benchmarking/.git/hooks)",
     14       "Bash(kill 1351559 1378243)",
     15       "Bash([ ! -f /tmp/claude-0/-root-loop-benchmarking/1bb44510-b024-4c2b-8a3e-c5676349c380/tasks/af3768549ec6dfefd.completed ])",
     16       "Bash(ps aux:*)",
     17       "Bash(pkill -f \"serve.*--no-clipboard\")",
     18       "Bash(pkill -9 -f \"serve.*--no-clipboard\")",
     19       "Bash(pkill -9 -f \"sonar-scanner\")",
     20       "Bash(pkill -9 -f \"node.*reeval\")",
     21       "Bash(kill -9 1988554 1988687 1988688)",
     22       "Bash(nohup litellm:*)",
     23       "Bash(kill %1)",
     24       "Bash(pkill -f \"debug-bot\")",
     25       "Bash(pkill -f \"serve -l 46197\")",
     26       "Bash(pkill -f \"http.server\")",
     27       "Bash(pkill -f debug-bot)",
     28       "Bash(pkill -f \"http.server 19123\")",
     29       "Bash(pkill -f \"http.server 39483\")",
     30       "Bash(pkill -f \"http.server 38533\")",
     31       "Bash(pkill -f \"http.server 46197\")",
     32       "Bash(kill 2848476 2855159)",
     33       "Bash(kill 2616228 2616253)",
     34       "Bash(ln -s /root/loop-benchmarking/artifacts /root/loop-benchmarking/dashboard/public/artifacts)",
     35       "Bash(GAME_URL=\"http://127.0.0.1:4321/artifacts/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=en_lang=ts_lint=on_budget=high_model=opus46_pw=avail_prompt=simple_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1/index.html\" REPORT_OUTPUT_PATH=\"/tmp/bot-test-opus2.json\" npx playwright:*)",
     36       "Bash(xargs -r kill -9)",
     37       "Bash(pkill -9 -f chromium)",
     38       "Bash(pkill -9 -f \"test-canvas\")",
     39       "Bash(DISPLAY=:99 node:*)",
     40       "Bash(lspci)",
     41       "Bash(dpkg -l)",
     42       "Bash(apt list:*)",
     43       "Read(//dev/dri/**)",
     44       "Read(//dev/**)",
     45       "Bash(chown root:root /dev/dri/card0 /dev/dri/renderD128)",
     46       "Bash(usermod -aG render,video root)",
     47       "Bash(setfacl -m u:root:rw /dev/dri/renderD128)",
     48       "Bash(pkill -f \"run-dom-tests\")",
     49       "Bash(pkill -f \"playwright.*gameplay-bot\")",
     50       "Bash(pkill -f playwright)",
     51       "Bash(WORKSPACE_PATH=\"artifacts/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=es_lang=ts_lint=on_budget=low_model=haiku45_pw=avail_prompt=simple_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1\" REPORT_OUTPUT_PATH=\"/tmp/bot-interactive-test2.json\" npx playwright test --config tasks/tetris/eval/playwright.config.ts)",
     52       "Bash(WORKSPACE_PATH=\"artifacts/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=es_lang=ts_lint=on_budget=low_model=haiku45_pw=avail_prompt=simple_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1\" REPORT_OUTPUT_PATH=\"/tmp/bot-interactive-test3.json\" npx playwright test --config tasks/tetris/eval/playwright.config.ts)",
     53       "Bash(WORKSPACE_PATH=\"artifacts/tetris_arch=none_ctx=none_noise=clean_dsgn=none_eff=high_echk=none_hlang=es_lang=ts_lint=on_budget=low_model=haiku45_pw=avail_prompt=simple_rndr=none_strat=usub_tst=none_tedit=on_tglob=on_tgrep=on_tread=on_twrite=on_web=on_run1\" REPORT_OUTPUT_PATH=\"/tmp/v2-e2e04e75.json\" timeout 420 npx playwright test --config tasks/tetris/eval/gameplay-bot-v2/playwright.config.ts)",
     54       "Bash([ -f /tmp/v2-e2e04e75.json ])",
     55       "Bash(pkill -f \"reeval\\\\|clean-and-reeval\\\\|analyze-and-push\" pkill -f \"serve.*--no-clipboard\" pkill -f \"playwright\" sleep 2 ps aux)",
     56       "Bash([ -f /tmp/v2-canvas-test.json ])"
     57     ]
     58   }
     59 }

Impressum · Datenschutz