calibrate.astro (1750B)
1 --- 2 import Base from "../layouts/Base.astro"; 3 import Calibrate from "../components/Calibrate"; 4 import fs from "node:fs"; 5 import path from "node:path"; 6 import { loadAllRuns } from "../lib/data"; 7 8 // Load calibration data 9 const calibrationDir = path.resolve(process.cwd(), "../tasks/tetris/eval/gameplay-bot/calibration"); 10 interface CalibrationEntry { 11 run_id: string; 12 short_id: string; 13 label: string; 14 notes: string; 15 human_tested_at: string; 16 human_tests: Record<string, boolean | null>; 17 } 18 19 const entries: CalibrationEntry[] = []; 20 if (fs.existsSync(calibrationDir)) { 21 for (const file of fs.readdirSync(calibrationDir).sort()) { 22 if (!file.endsWith(".json")) continue; 23 try { 24 const data = JSON.parse(fs.readFileSync(path.join(calibrationDir, file), "utf-8")); 25 entries.push(data); 26 } catch {} 27 } 28 } 29 30 // Load bot results for these runs 31 const allRuns = loadAllRuns(); 32 const runsByRunId = new Map(allRuns.map(r => [r.meta.run_id, r])); 33 34 // Build comparison data for the React component 35 const comparisons = entries.map(entry => { 36 const run = runsByRunId.get(entry.run_id); 37 const botScore = (run?.eval_results as any)?.gameplay_bot?.score ?? null; 38 const botTests = ((run?.eval_results as any)?.gameplay_bot?.report?.tests ?? []) as Array<{name: string; pass: boolean; detail: string}>; 39 const artifactUrl = `/artifacts/${entry.run_id}/index.html`; 40 return { entry, botScore, botTests, artifactUrl }; 41 }); 42 --- 43 44 <Base title="Bot Calibration"> 45 <h1 style="margin-bottom: 8px;">Bot Calibration</h1> 46 <p style="color: var(--text-muted); margin-bottom: 24px; font-size: 0.875rem;"> 47 Hand-picked games with human test results compared to bot results. 48 </p> 49 50 <Calibrate client:load comparisons={comparisons} /> 51 </Base>