loop-benchmarking

Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.
git clone https://git.shiptheloop.com/loop-benchmarking.git
Log | Files | Refs | README

commit 724f4222b877c7ff2bd457c2bac60ec69eb48ab9
parent 4393657e0353aa1bdd0955de6ec9090238db8747
Author: Brian Graham <brian@buildingbetterteams.de>
Date:   Mon,  6 Apr 2026 17:11:12 +0200

Add short_id, short_cell_id, claude_version to analysis skip keys

These metadata fields should not be treated as experiment axes.
Also skip sub_agents in dashboard analysis (replaced by strategy).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Diffstat:
Mdashboard/src/lib/analysis.ts | 4++++
Mharness/lib/experiment_design.py | 1+
2 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/dashboard/src/lib/analysis.ts b/dashboard/src/lib/analysis.ts @@ -58,6 +58,10 @@ const SKIP_KEYS = new Set([ "completed_at", "wall_time_seconds", "exit_code", + "short_id", + "short_cell_id", + "claude_version", + "sub_agents", ]); type MetricExtractor = (run: Run) => number | null; diff --git a/harness/lib/experiment_design.py b/harness/lib/experiment_design.py @@ -310,6 +310,7 @@ def analyze_main_effects(results_dir, metric="score"): "task", "cell_id", "run_id", "run_number", "runs_per_cell", "max_budget_usd", "timeout_seconds", "base_tools", "started_at", "completed_at", "wall_time_seconds", "exit_code", + "short_id", "short_cell_id", "claude_version", } axis_names = sorted(meta_keys - skip_keys)

Impressum · Datenschutz