loop-benchmarking

Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.
git clone https://git.shiptheloop.com/loop-benchmarking.git
Log | Files | Refs | README

commit 402389afc4dfd249919a7689faa430b7ce0bb9ac
parent 09fa08a840c08c51e66600238b66261f2e1e2422
Author: Brian Graham <brian@buildingbetterteams.de>
Date:   Fri,  3 Apr 2026 20:27:09 +0200

Record full run config in transcript

Future runs inject a harness/config event as the first transcript entry
with model, effort, tool list, budget, timeout, task, and language.
Transcript viewer renders it as a compact config summary line.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Diffstat:
Mdashboard/src/components/TranscriptViewer.tsx | 16++++++++++++++++
Mharness/run.py | 17++++++++++++++++-
2 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/dashboard/src/components/TranscriptViewer.tsx b/dashboard/src/components/TranscriptViewer.tsx @@ -441,6 +441,22 @@ function renderEvent(event: TranscriptEvent, index: number): ReactNode { ); } + // Harness config event + if (type === "harness" && event.subtype === "config") { + const tools = Array.isArray(event.tools) ? (event.tools as string[]).join(", ") : ""; + return ( + <EventCard key={index} borderColor="#4b5563" bgTint="rgba(75, 85, 99, 0.05)" compact> + <div style={{ fontSize: "0.7rem", fontFamily: theme.fontMono, color: theme.textMuted, display: "flex", flexWrap: "wrap", gap: "12px" }}> + <span>model: <span style={{ color: theme.text }}>{event.model as string}</span></span> + <span>effort: <span style={{ color: theme.text }}>{event.effort as string}</span></span> + <span>lang: <span style={{ color: theme.text }}>{event.language as string}</span></span> + <span>budget: <span style={{ color: theme.text }}>${event.max_budget_usd as number}</span></span> + <span>tools: <span style={{ color: theme.text }}>{tools}</span></span> + </div> + </EventCard> + ); + } + // Assistant message - contains content blocks if (type === "assistant" && event.message?.content) { const blocks = event.message.content; diff --git a/harness/run.py b/harness/run.py @@ -159,8 +159,23 @@ def invoke_claude(cell: dict, workspace: Path, run_dir: Path, project_dir: Path) transcript_path = run_dir / "transcript.jsonl" stderr_path = run_dir / "claude_stderr.log" - # Inject the prompt and context as the first transcript entries + # Inject harness metadata, prompt, and context as the first transcript entries with open(transcript_path, "w") as transcript_f: + # Run configuration (everything the harness set up) + config_event = { + "type": "harness", + "subtype": "config", + "model": model, + "effort": effort, + "tools": tools.split(","), + "max_budget_usd": budget, + "timeout_seconds": timeout, + "task": cell["task"], + "language": cell.get("language"), + "prompt_style": cell.get("prompt_style"), + } + transcript_f.write(json.dumps(config_event) + "\n") + # The user's prompt prompt_event = { "type": "user",

Impressum · Datenschutz