Record full run config in transcript - loop-benchmarking - Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.

commit 402389afc4dfd249919a7689faa430b7ce0bb9ac
parent 09fa08a840c08c51e66600238b66261f2e1e2422
Author: Brian Graham <brian@buildingbetterteams.de>
Date:   Fri,  3 Apr 2026 20:27:09 +0200

Record full run config in transcript

Future runs inject a harness/config event as the first transcript entry
with model, effort, tool list, budget, timeout, task, and language.
Transcript viewer renders it as a compact config summary line.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Diffstat:
M dashboard/src/components/TranscriptViewer.tsx  | 16 ++++++++++++++++
M harness/run.py  | 17 ++++++++++++++++-

2 files changed, 32 insertions(+), 1 deletion(-)
diff --git a/dashboard/src/components/TranscriptViewer.tsx b/dashboard/src/components/TranscriptViewer.tsx
@@ -441,6 +441,22 @@ function renderEvent(event: TranscriptEvent, index: number): ReactNode {
     );
   }
 
+  // Harness config event
+  if (type === "harness" && event.subtype === "config") {
+    const tools = Array.isArray(event.tools) ? (event.tools as string[]).join(", ") : "";
+    return (
+      <EventCard key={index} borderColor="#4b5563" bgTint="rgba(75, 85, 99, 0.05)" compact>
+        <div style={{ fontSize: "0.7rem", fontFamily: theme.fontMono, color: theme.textMuted, display: "flex", flexWrap: "wrap", gap: "12px" }}>
+          <span>model: <span style={{ color: theme.text }}>{event.model as string}</span></span>
+          <span>effort: <span style={{ color: theme.text }}>{event.effort as string}</span></span>
+          <span>lang: <span style={{ color: theme.text }}>{event.language as string}</span></span>
+          <span>budget: <span style={{ color: theme.text }}>${event.max_budget_usd as number}</span></span>
+          <span>tools: <span style={{ color: theme.text }}>{tools}</span></span>
+        </div>
+      </EventCard>
+    );
+  }
+
   // Assistant message - contains content blocks
   if (type === "assistant" && event.message?.content) {
     const blocks = event.message.content;
diff --git a/harness/run.py b/harness/run.py
@@ -159,8 +159,23 @@ def invoke_claude(cell: dict, workspace: Path, run_dir: Path, project_dir: Path)
     transcript_path = run_dir / "transcript.jsonl"
     stderr_path = run_dir / "claude_stderr.log"
 
-    # Inject the prompt and context as the first transcript entries
+    # Inject harness metadata, prompt, and context as the first transcript entries
     with open(transcript_path, "w") as transcript_f:
+        # Run configuration (everything the harness set up)
+        config_event = {
+            "type": "harness",
+            "subtype": "config",
+            "model": model,
+            "effort": effort,
+            "tools": tools.split(","),
+            "max_budget_usd": budget,
+            "timeout_seconds": timeout,
+            "task": cell["task"],
+            "language": cell.get("language"),
+            "prompt_style": cell.get("prompt_style"),
+        }
+        transcript_f.write(json.dumps(config_event) + "\n")
+
         # The user's prompt
         prompt_event = {
             "type": "user",

	loop-benchmarking Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.
	git clone https://git.shiptheloop.com/loop-benchmarking.git
	Log \| Files \| Refs \| README

M	dashboard/src/components/TranscriptViewer.tsx	\|	16	++++++++++++++++
M	harness/run.py	\|	17	++++++++++++++++-