commit b087659035807b7db06e48ad8dd9b9bd6d911aaa
parent 1862a787fcf22188e9681812d77b7276db281f7b
Author: Brian Graham <brian@buildingbetterteams.de>
Date: Mon, 6 Apr 2026 09:25:16 +0200
Add SonarQube integration for code quality analysis
sonarqube-scan.py runs sonar-scanner against game workspaces and pulls
metrics via API. Requires SonarQube running at localhost:9000.
Metrics captured:
- Bugs, vulnerabilities, code smells (count)
- Cognitive complexity (better than cyclomatic)
- Duplication percentage
- Technical debt (minutes)
- Maintainability/Reliability/Security ratings (A-E)
- Composite 0-1 score
Tested: haiku JS game scored 0.77 (1 bug, complexity 90, A maintainability)
Not yet wired into the harness - needs SonarQube running during eval.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Diffstat:
1 file changed, 185 insertions(+), 0 deletions(-)
diff --git a/tasks/tetris/eval/sonarqube-scan.py b/tasks/tetris/eval/sonarqube-scan.py
@@ -0,0 +1,185 @@
+#!/usr/bin/env python3
+"""SonarQube code analysis for generated Tetris implementations.
+
+Runs sonar-scanner against the workspace and pulls metrics via API.
+Requires SonarQube running at localhost:9000.
+
+Usage: python3 sonarqube-scan.py <workspace_path> <project_key>
+Output: JSON to stdout
+"""
+
+import json
+import subprocess
+import sys
+import time
+import urllib.request
+import urllib.error
+from pathlib import Path
+
+
+SONAR_URL = "http://localhost:9000"
+SONAR_TOKEN_FILE = Path.home() / ".sonarqube-token"
+
+
+def get_token() -> str:
+ if SONAR_TOKEN_FILE.exists():
+ return SONAR_TOKEN_FILE.read_text().strip()
+ return ""
+
+
+def scan(workspace: Path, project_key: str, token: str) -> bool:
+ """Run sonar-scanner against workspace. Returns True on success."""
+ cmd = [
+ "sonar-scanner",
+ f"-Dsonar.projectKey={project_key}",
+ "-Dsonar.sources=.",
+ f"-Dsonar.host.url={SONAR_URL}",
+ f"-Dsonar.token={token}",
+ "-Dsonar.exclusions=**/node_modules/**,**/package-lock.json,**/report/**",
+ "-Dsonar.scm.disabled=true",
+ ]
+ result = subprocess.run(
+ cmd, cwd=workspace, capture_output=True, text=True, timeout=60
+ )
+ return "EXECUTION SUCCESS" in result.stdout
+
+
+def wait_for_analysis(project_key: str, token: str, timeout: int = 30) -> bool:
+ """Wait for SonarQube to finish processing."""
+ import base64
+ auth = base64.b64encode(f"{token}:".encode()).decode()
+ headers = {"Authorization": f"Basic {auth}"}
+
+ for _ in range(timeout):
+ try:
+ req = urllib.request.Request(
+ f"{SONAR_URL}/api/ce/component?component={project_key}",
+ headers=headers,
+ )
+ resp = urllib.request.urlopen(req, timeout=5)
+ data = json.loads(resp.read())
+ tasks = data.get("queue", []) + [data.get("current", {})]
+ pending = any(
+ t.get("status") in ("PENDING", "IN_PROGRESS")
+ for t in tasks if t
+ )
+ if not pending:
+ return True
+ except Exception:
+ pass
+ time.sleep(1)
+ return False
+
+
+def get_metrics(project_key: str, token: str) -> dict:
+ """Pull metrics from SonarQube API."""
+ import base64
+ auth = base64.b64encode(f"{token}:".encode()).decode()
+ headers = {"Authorization": f"Basic {auth}"}
+
+ metrics = [
+ "bugs", "vulnerabilities", "code_smells",
+ "cognitive_complexity", "duplicated_lines_density",
+ "ncloc", "sqale_rating", "reliability_rating",
+ "security_rating", "sqale_index",
+ ]
+
+ try:
+ req = urllib.request.Request(
+ f"{SONAR_URL}/api/measures/component?component={project_key}&metricKeys={','.join(metrics)}",
+ headers=headers,
+ )
+ resp = urllib.request.urlopen(req, timeout=10)
+ data = json.loads(resp.read())
+ measures = data.get("component", {}).get("measures", [])
+ return {m["metric"]: float(m["value"]) for m in measures}
+ except Exception as e:
+ return {"error": str(e)}
+
+
+def compute_score(metrics: dict) -> float:
+ """Compute a 0-1 score from SonarQube metrics."""
+ if "error" in metrics:
+ return 0.0
+
+ score = 100.0
+
+ # Bugs: -15 each, max -30
+ bugs = metrics.get("bugs", 0)
+ score -= min(bugs * 15, 30)
+
+ # Code smells: -3 each, max -20
+ smells = metrics.get("code_smells", 0)
+ score -= min(smells * 3, 20)
+
+ # Vulnerabilities: -20 each, max -40
+ vulns = metrics.get("vulnerabilities", 0)
+ score -= min(vulns * 20, 40)
+
+ # Cognitive complexity: penalty above 50
+ complexity = metrics.get("cognitive_complexity", 0)
+ if complexity > 100:
+ score -= 15
+ elif complexity > 50:
+ score -= 5
+
+ # Duplication: penalty above 5%
+ duplication = metrics.get("duplicated_lines_density", 0)
+ if duplication > 10:
+ score -= 10
+ elif duplication > 5:
+ score -= 5
+
+ return max(0.0, min(100.0, score)) / 100.0
+
+
+def main():
+ workspace = Path(sys.argv[1]) if len(sys.argv) > 1 else Path(".")
+ project_key = sys.argv[2] if len(sys.argv) > 2 else "tetris-eval"
+
+ token = get_token()
+ if not token:
+ print(json.dumps({"error": "no SonarQube token found", "score": 0}))
+ return
+
+ # Check if SonarQube is running
+ try:
+ urllib.request.urlopen(f"{SONAR_URL}/api/system/status", timeout=3)
+ except Exception:
+ print(json.dumps({"error": "SonarQube not running at localhost:9000", "score": 0}))
+ return
+
+ # Run scan
+ if not scan(workspace, project_key, token):
+ print(json.dumps({"error": "sonar-scanner failed", "score": 0}))
+ return
+
+ # Wait for processing
+ wait_for_analysis(project_key, token)
+
+ # Get metrics
+ metrics = get_metrics(project_key, token)
+ score = compute_score(metrics)
+
+ # Rating labels (SonarQube uses 1-5 where 1=A, 5=E)
+ rating_labels = {1.0: "A", 2.0: "B", 3.0: "C", 4.0: "D", 5.0: "E"}
+
+ result = {
+ "bugs": int(metrics.get("bugs", 0)),
+ "vulnerabilities": int(metrics.get("vulnerabilities", 0)),
+ "code_smells": int(metrics.get("code_smells", 0)),
+ "cognitive_complexity": int(metrics.get("cognitive_complexity", 0)),
+ "lines_of_code": int(metrics.get("ncloc", 0)),
+ "duplication_pct": metrics.get("duplicated_lines_density", 0),
+ "tech_debt_minutes": int(metrics.get("sqale_index", 0)),
+ "maintainability": rating_labels.get(metrics.get("sqale_rating", 0), "?"),
+ "reliability": rating_labels.get(metrics.get("reliability_rating", 0), "?"),
+ "security": rating_labels.get(metrics.get("security_rating", 0), "?"),
+ "score": round(score, 2),
+ }
+
+ print(json.dumps(result, indent=2))
+
+
+if __name__ == "__main__":
+ main()