loop-benchmarking

Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.
git clone https://git.shiptheloop.com/loop-benchmarking.git
Log | Files | Refs | README

Calibrate.tsx (12450B)


      1 import { useState } from "react";
      2 
      3 interface BotTest {
      4   name: string;
      5   pass: boolean;
      6   detail: string;
      7 }
      8 
      9 interface CalibrationEntry {
     10   run_id: string;
     11   short_id: string;
     12   label: string;
     13   notes: string;
     14   human_tested_at: string;
     15   human_tests: Record<string, boolean | null>;
     16 }
     17 
     18 interface ComparisonData {
     19   entry: CalibrationEntry;
     20   botScore: number | null;
     21   botTests: BotTest[];
     22   artifactUrl: string;
     23 }
     24 
     25 const ALL_TEST_NAMES = [
     26   "game_loads", "game_starts", "auto_drop",
     27   "move_left", "move_right", "move_down", "rotate", "hard_drop", "all_pieces_rotate",
     28   "piece_locks", "new_piece_spawns", "multiple_pieces",
     29   "line_clear", "score_changes",
     30   "game_over", "playable_30s",
     31   "multi_line_clear", "score_scaling", "level_progression", "speed_progression",
     32   "next_piece_preview", "game_over_display", "counter_clockwise_rotation", "soft_drop_distinct",
     33 ];
     34 
     35 function TriState({ value, onChange }: { value: boolean | null; onChange: (v: boolean | null) => void }) {
     36   const next = value === null ? true : value === true ? false : null;
     37   const label = value === true ? "yes" : value === false ? "no" : "-";
     38   const color = value === true ? "var(--green)" : value === false ? "var(--red)" : "var(--text-muted)";
     39   return (
     40     <button
     41       onClick={() => onChange(next)}
     42       style={{
     43         background: "none",
     44         border: "1px solid hsl(var(--border) / 0.5)",
     45         color,
     46         fontWeight: 700,
     47         fontSize: "0.7rem",
     48         padding: "2px 10px",
     49         cursor: "pointer",
     50         fontFamily: "var(--font-mono)",
     51         minWidth: "40px",
     52       }}
     53     >
     54       {label}
     55     </button>
     56   );
     57 }
     58 
     59 function CopyButton({ text, label }: { text: string; label: string }) {
     60   const [copied, setCopied] = useState(false);
     61   return (
     62     <button
     63       onClick={() => { navigator.clipboard.writeText(text); setCopied(true); setTimeout(() => setCopied(false), 1500); }}
     64       style={{
     65         padding: "4px 12px",
     66         fontSize: "0.7rem",
     67         background: copied ? "var(--green)" : "transparent",
     68         color: copied ? "#fff" : "var(--text-muted)",
     69         border: `1px solid ${copied ? "var(--green)" : "var(--border)"}`,
     70         cursor: "pointer",
     71         fontFamily: "var(--font-mono)",
     72         transition: "all 0.15s",
     73       }}
     74     >
     75       {copied ? "Copied!" : label}
     76     </button>
     77   );
     78 }
     79 
     80 function CalibrationCard({ data, editing, onUpdate }: { data: ComparisonData; editing: boolean; onUpdate: (tests: Record<string, boolean | null>, notes: string) => void }) {
     81   const { entry, botScore, botTests, artifactUrl } = data;
     82   const [humanTests, setHumanTests] = useState<Record<string, boolean | null>>({ ...entry.human_tests });
     83   const [notes, setNotes] = useState(entry.notes);
     84   const showEditor = editing;
     85 
     86   const botByName = new Map(botTests.map(t => [t.name, t]));
     87 
     88   const humanPass = Object.values(humanTests).filter(v => v === true).length;
     89   const humanFail = Object.values(humanTests).filter(v => v === false).length;
     90 
     91   let agree = 0, disagree = 0;
     92   for (const name of ALL_TEST_NAMES) {
     93     const human = humanTests[name];
     94     const bot = botByName.get(name);
     95     const botSkip = bot?.detail?.startsWith("skipped:");
     96     if (human !== null && human !== undefined && bot && !botSkip) {
     97       if (human === bot.pass) agree++;
     98       else disagree++;
     99     }
    100   }
    101 
    102   function handleTestChange(name: string, value: boolean | null) {
    103     const updated = { ...humanTests, [name]: value };
    104     setHumanTests(updated);
    105     onUpdate(updated, notes);
    106   }
    107 
    108   function handleNotesChange(value: string) {
    109     setNotes(value);
    110     onUpdate(humanTests, value);
    111   }
    112 
    113   // Build export JSON
    114   const exportData = {
    115     run_id: entry.run_id,
    116     short_id: entry.short_id,
    117     label: entry.label,
    118     notes,
    119     human_tested_at: new Date().toISOString().slice(0, 10),
    120     human_tests: humanTests,
    121   };
    122 
    123   return (
    124     <div className="card" style={{ padding: "20px", marginBottom: "20px" }}>
    125       <div style={{ display: "flex", justifyContent: "space-between", alignItems: "flex-start", marginBottom: "12px" }}>
    126         <div>
    127           <h3 style={{ margin: "0 0 4px 0", fontSize: "1rem" }}>
    128             <a href={artifactUrl} target="_blank" rel="noopener noreferrer" style={{ color: "var(--accent)", textDecoration: "none" }}>
    129               {entry.short_id}
    130             </a>
    131             <span style={{ color: "var(--text)", marginLeft: "8px" }}>{entry.label}</span>
    132           </h3>
    133           <div style={{ fontSize: "0.7rem", color: "var(--text-muted)" }}>
    134             <a href={artifactUrl} target="_blank" style={{ color: "var(--accent)" }}>Play game</a>
    135             {" | "}
    136             <a href={`/r/${entry.short_id}`} style={{ color: "var(--accent)" }}>Run detail</a>
    137           </div>
    138         </div>
    139         <div style={{ display: "flex", gap: "16px", fontSize: "0.75rem", fontFamily: "var(--font-mono)" }}>
    140           <div style={{ textAlign: "center" }}>
    141             <div style={{ fontWeight: 700, fontSize: "1.1rem" }}>{humanPass}/{humanPass + humanFail || 0}</div>
    142             <div style={{ color: "var(--text-muted)", fontSize: "0.6rem" }}>HUMAN</div>
    143           </div>
    144           <div style={{ textAlign: "center" }}>
    145             <div style={{ fontWeight: 700, fontSize: "1.1rem" }}>{botScore != null ? `${Math.round(botScore * 100)}%` : "-"}</div>
    146             <div style={{ color: "var(--text-muted)", fontSize: "0.6rem" }}>BOT</div>
    147           </div>
    148           <div style={{ textAlign: "center" }}>
    149             <div style={{ fontWeight: 700, fontSize: "1.1rem", color: disagree > 0 ? "var(--red)" : "var(--green)" }}>{agree}/{agree + disagree || 0}</div>
    150             <div style={{ color: "var(--text-muted)", fontSize: "0.6rem" }}>AGREE</div>
    151           </div>
    152         </div>
    153       </div>
    154 
    155       <div style={{ fontSize: "0.75rem", color: "var(--text-muted)", marginBottom: "12px", padding: "8px", background: "hsl(var(--bg-secondary))" }}>
    156         {showEditor ? (
    157           <textarea
    158             value={notes}
    159             onChange={e => handleNotesChange(e.target.value)}
    160             style={{ width: "100%", minHeight: "40px", background: "transparent", border: "1px solid var(--border)", color: "var(--text)", fontSize: "0.75rem", padding: "4px", fontFamily: "inherit", resize: "vertical" }}
    161           />
    162         ) : (
    163           <span>{notes || "(no notes)"}</span>
    164         )}
    165       </div>
    166 
    167       <table style={{ width: "100%", fontSize: "0.7rem", borderCollapse: "collapse" }}>
    168         <thead>
    169           <tr style={{ borderBottom: "1px solid var(--border)" }}>
    170             <th style={{ textAlign: "left", padding: "4px 8px", fontWeight: 600 }}>Test</th>
    171             <th style={{ textAlign: "center", padding: "4px 8px", fontWeight: 600, width: "80px" }}>Human</th>
    172             <th style={{ textAlign: "center", padding: "4px 8px", fontWeight: 600, width: "80px" }}>Bot</th>
    173             <th style={{ textAlign: "center", padding: "4px 8px", fontWeight: 600, width: "30px" }}></th>
    174             <th style={{ textAlign: "left", padding: "4px 8px", fontWeight: 600 }}>Bot Detail</th>
    175           </tr>
    176         </thead>
    177         <tbody>
    178           {ALL_TEST_NAMES.map(name => {
    179             const human = humanTests[name];
    180             const bot = botByName.get(name);
    181             const botSkip = bot?.detail?.startsWith("skipped:");
    182             const botStr = bot ? (botSkip ? "skip" : bot.pass ? "yes" : "no") : "-";
    183             const botColor = bot ? (botSkip ? "var(--text-muted)" : bot.pass ? "var(--green)" : "var(--red)") : "var(--text-muted)";
    184 
    185             let matchIcon = "";
    186             let matchColor = "var(--text-muted)";
    187             if (human !== null && human !== undefined && bot && !botSkip) {
    188               if (human === bot.pass) { matchIcon = "="; matchColor = "var(--green)"; }
    189               else { matchIcon = "!"; matchColor = "var(--red)"; }
    190             }
    191 
    192             return (
    193               <tr key={name} style={{ borderBottom: "1px solid hsl(var(--border) / 0.3)" }}>
    194                 <td style={{ padding: "3px 8px", fontFamily: "var(--font-mono)" }}>{name}</td>
    195                 <td style={{ textAlign: "center", padding: "3px 8px" }}>
    196                   {showEditor ? (
    197                     <TriState value={human ?? null} onChange={v => handleTestChange(name, v)} />
    198                   ) : (
    199                     <span style={{ color: human === true ? "var(--green)" : human === false ? "var(--red)" : "var(--text-muted)", fontWeight: 600 }}>
    200                       {human === true ? "yes" : human === false ? "no" : "-"}
    201                     </span>
    202                   )}
    203                 </td>
    204                 <td style={{ textAlign: "center", padding: "3px 8px", color: botColor, fontWeight: 600 }}>{botStr}</td>
    205                 <td style={{ textAlign: "center", padding: "3px 8px", color: matchColor, fontWeight: 700 }}>{matchIcon}</td>
    206                 <td style={{ padding: "3px 8px", color: "var(--text-muted)", overflow: "hidden", textOverflow: "ellipsis", whiteSpace: "nowrap", maxWidth: "300px" }}>{bot?.detail || ""}</td>
    207               </tr>
    208             );
    209           })}
    210         </tbody>
    211       </table>
    212 
    213       {showEditor && (
    214         <div style={{ marginTop: "12px", display: "flex", justifyContent: "flex-end" }}>
    215           <CopyButton text={JSON.stringify(exportData, null, 2)} label={`Copy ${entry.short_id} JSON`} />
    216         </div>
    217       )}
    218     </div>
    219   );
    220 }
    221 
    222 export default function Calibrate({ comparisons }: { comparisons: ComparisonData[] }) {
    223   const [showEditor, setShowEditor] = useState(false);
    224   const [updates, setUpdates] = useState<Map<string, { tests: Record<string, boolean | null>; notes: string }>>(new Map());
    225 
    226   function handleUpdate(shortId: string, tests: Record<string, boolean | null>, notes: string) {
    227     const next = new Map(updates);
    228     next.set(shortId, { tests, notes });
    229     setUpdates(next);
    230   }
    231 
    232   // Aggregate stats
    233   const totalEntries = comparisons.length;
    234   let totalAgree = 0, totalDisagree = 0;
    235   for (const { entry, botTests } of comparisons) {
    236     const botByName = new Map(botTests.map(t => [t.name, t]));
    237     for (const name of ALL_TEST_NAMES) {
    238       const human = entry.human_tests[name];
    239       const bot = botByName.get(name);
    240       const botSkip = bot?.detail?.startsWith("skipped:");
    241       if (human !== null && human !== undefined && bot && !botSkip) {
    242         if (human === bot.pass) totalAgree++;
    243         else totalDisagree++;
    244       }
    245     }
    246   }
    247 
    248   return (
    249     <div>
    250       <div style={{ display: "flex", justifyContent: "space-between", alignItems: "center", marginBottom: "24px" }}>
    251         <div style={{ display: "flex", gap: "24px", fontFamily: "var(--font-mono)", fontSize: "0.8rem" }}>
    252           <span>{totalEntries} games</span>
    253           <span style={{ color: "var(--green)" }}>{totalAgree} agree</span>
    254           <span style={{ color: totalDisagree > 0 ? "var(--red)" : "var(--text-muted)" }}>{totalDisagree} disagree</span>
    255         </div>
    256         <button
    257           onClick={() => setShowEditor(!showEditor)}
    258           style={{
    259             padding: "6px 16px",
    260             fontSize: "0.75rem",
    261             background: showEditor ? "var(--accent)" : "transparent",
    262             color: showEditor ? "#fff" : "var(--text-muted)",
    263             border: `1px solid ${showEditor ? "var(--accent)" : "var(--border)"}`,
    264             cursor: "pointer",
    265             fontFamily: "var(--font-mono)",
    266           }}
    267         >
    268           {showEditor ? "Done Testing" : "Human Testing"}
    269         </button>
    270       </div>
    271 
    272       {showEditor && (
    273         <div className="card" style={{ padding: "16px", marginBottom: "20px", fontSize: "0.75rem" }}>
    274           <div style={{ fontWeight: 600, marginBottom: "8px" }}>Paste results here</div>
    275           <p style={{ color: "var(--text-muted)", fontSize: "0.7rem", margin: "0 0 8px 0" }}>
    276             After clicking test states below, copy the JSON from each card and paste it to me in the chat with the short code. Format: <code style={{ background: "hsl(var(--bg-secondary))", padding: "1px 4px" }}>SHORT_ID: paste json</code>
    277           </p>
    278         </div>
    279       )}
    280 
    281       {comparisons.map(data => (
    282         <CalibrationCard
    283           key={data.entry.short_id}
    284           editing={showEditor}
    285           data={{ ...data, entry: { ...data.entry, notes: updates.get(data.entry.short_id)?.notes ?? data.entry.notes, human_tests: updates.get(data.entry.short_id)?.tests ?? data.entry.human_tests } }}
    286           onUpdate={(tests, notes) => handleUpdate(data.entry.short_id, tests, notes)}
    287         />
    288       ))}
    289     </div>
    290   );
    291 }

Impressum · Datenschutz