commit 4c1ca47d2ad560fc2037407b1b702e3ec33d9497
parent afa692e0d3097240748b70e991a55fe00a7fb034
Author: Brian Graham <brian@buildingbetterteams.de>
Date: Mon, 6 Apr 2026 10:44:17 +0200
Add sonarqube metric to analysis pipeline, fix metric labels
- experiment_design.py: added sonarqube and build_quality extractors
- clean-and-reeval.py: added sonarqube and build_quality to analysis metrics
- Insights.tsx: metric labels updated (Outcome, Code Quality = SonarQube,
Build Quality = lint/typecheck, Code Analysis = our homebrew)
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Diffstat:
3 files changed, 18 insertions(+), 4 deletions(-)
diff --git a/dashboard/src/components/Insights.tsx b/dashboard/src/components/Insights.tsx
@@ -9,11 +9,12 @@ interface InsightsProps {
}
const METRICS = [
- { key: "score", label: "Overall" },
+ { key: "score", label: "Outcome" },
{ key: "gameplay", label: "Gameplay" },
- { key: "code_quality", label: "Code" },
+ { key: "sonarqube", label: "Code Quality" },
+ { key: "quality", label: "Build Quality" },
{ key: "structural", label: "Structural" },
- { key: "quality", label: "Lint/Types" },
+ { key: "code_quality", label: "Code Analysis" },
{ key: "transcript", label: "Agent Eff." },
{ key: "cost", label: "Cost" },
{ key: "turns", label: "Turns" },
diff --git a/harness/clean-and-reeval.py b/harness/clean-and-reeval.py
@@ -117,7 +117,8 @@ def run_analysis():
metrics = [
"score", "cost", "turns", "wall_time",
- "gameplay", "code_quality", "structural", "transcript",
+ "gameplay", "sonarqube", "code_quality",
+ "structural", "transcript", "build_quality",
]
for metric in metrics:
effects = analyze_main_effects(str(RESULTS_DIR), metric)
diff --git a/harness/lib/experiment_design.py b/harness/lib/experiment_design.py
@@ -535,6 +535,18 @@ def _extract_metric(run, metric):
val = t.get("score")
return val if isinstance(val, (int, float)) else None
return None
+ elif metric == "sonarqube":
+ sq = run["eval"].get("sonarqube", {})
+ if isinstance(sq, dict):
+ val = sq.get("score")
+ return val if isinstance(val, (int, float)) else None
+ return None
+ elif metric == "build_quality":
+ q = run["eval"].get("quality", {})
+ if isinstance(q, dict):
+ val = q.get("score")
+ return val if isinstance(val, (int, float)) else None
+ return None
return None