catchup-v3.py (9619B)
1 #!/usr/bin/env python3 2 """ 3 Catch-up script: add v3 engagement factors to existing v2 scans. 4 5 Reads each v2 scan.json, sends title + key findings + claims to Opus 6 for engagement factor classification, merges results into v3 scan.json. 7 8 Usage: 9 python3 scripts/catchup-v3.py # All v2 scans without v3 10 python3 scripts/catchup-v3.py --limit 10 # First N 11 python3 scripts/catchup-v3.py --parallel 4 # Concurrent 12 python3 scripts/catchup-v3.py --id metr-rct-2025 # Specific paper 13 """ 14 15 import json 16 import subprocess 17 import sys 18 from concurrent.futures import ThreadPoolExecutor, as_completed 19 from pathlib import Path 20 21 ROOT = Path(__file__).resolve().parent.parent 22 PAPERS_DIR = ROOT / "papers" 23 24 ENGAGEMENT_PROMPT = """You are classifying a research paper on 6 dimensions that predict social media engagement (Hacker News, Reddit, tech newsletters). Rate each dimension 0-3 based on the paper's content, framing, and likely audience appeal. 25 26 ## Dimensions 27 28 1. **practical_relevance** (0-3): Can a developer/practitioner use this at work? Tools, techniques, frameworks they can apply immediately. 29 - 0: Pure theory, no practical application 30 - 1: Potentially useful but requires significant adaptation 31 - 2: Contains actionable techniques or findings practitioners can apply 32 - 3: Directly usable tool, library, or technique with immediate workflow impact 33 34 2. **surprise_contrarian** (0-3): Does this challenge conventional wisdom or reveal something unexpected? 35 - 0: Confirms what everyone already believes 36 - 1: Minor unexpected finding buried in expected results 37 - 2: Main finding is somewhat surprising or counterintuitive 38 - 3: Directly contradicts a widely-held belief with evidence 39 40 3. **fear_safety** (0-3): Does this raise concerns about AI risks, security vulnerabilities, or misuse potential? 41 - 0: No safety/risk angle 42 - 1: Mentions risks as a secondary concern 43 - 2: Safety/risk is a major theme with concrete demonstrations 44 - 3: Demonstrates a novel attack, vulnerability, or existential concern 45 46 4. **drama_conflict** (0-3): Is there a controversy, company rivalry, or "the emperor has no clothes" angle? 47 - 0: No controversy or conflict 48 - 1: Mild tension (e.g., questions a popular approach) 49 - 2: Directly challenges a specific company's claims or a popular benchmark 50 - 3: Major controversy — "benchmarks are fake", "company X is lying", replication failure 51 52 5. **demo_ability** (0-3): Can someone try this themselves right now? 53 - 0: No code, no demo, no way to interact 54 - 1: Code exists but requires significant setup 55 - 2: Reproducible with moderate effort, clear instructions 56 - 3: Live demo, web app, or pip-installable tool you can try in minutes 57 58 6. **brand_recognition** (0-3): Is this from a famous lab or about a famous product? 59 - 0: Unknown lab/authors, obscure topic 60 - 1: Recognized institution but not a household name in tech 61 - 2: Major tech company or famous research lab (Google Brain, FAIR, etc.) 62 - 3: About a product millions use (ChatGPT, Copilot, Cursor) or from OpenAI/Anthropic/DeepMind 63 64 ## Paper Information 65 66 Title: {title} 67 Authors: {authors} 68 Year: {year} 69 Venue: {venue} 70 Tags: {tags} 71 72 Key Findings: 73 {key_findings} 74 75 Claims: 76 {claims} 77 78 Red Flags: 79 {red_flags} 80 81 ## Output 82 83 Respond with ONLY a JSON object, no other text: 84 {{ 85 "engagement_factors": {{ 86 "practical_relevance": {{"score": <0-3>, "justification": "<1 sentence>"}}, 87 "surprise_contrarian": {{"score": <0-3>, "justification": "<1 sentence>"}}, 88 "fear_safety": {{"score": <0-3>, "justification": "<1 sentence>"}}, 89 "drama_conflict": {{"score": <0-3>, "justification": "<1 sentence>"}}, 90 "demo_ability": {{"score": <0-3>, "justification": "<1 sentence>"}}, 91 "brand_recognition": {{"score": <0-3>, "justification": "<1 sentence>"}} 92 }} 93 }}""" 94 95 96 def classify_one(paper_id): 97 """Run engagement classification on one paper. Returns (paper_id, ok, reason).""" 98 scan_path = PAPERS_DIR / paper_id / "scan.json" 99 if not scan_path.exists(): 100 return paper_id, False, "no scan.json" 101 102 with open(scan_path) as f: 103 scan = json.load(f) 104 105 if scan.get("scan_version", 1) < 2: 106 return paper_id, False, "v1 scan, skip" 107 108 if scan.get("scan_version") == 3: 109 return paper_id, True, "already v3" 110 111 # Read the full paper text 112 txt_path = PAPERS_DIR / paper_id / "paper.txt" 113 if not txt_path.exists(): 114 return paper_id, False, "no paper.txt" 115 paper_text = txt_path.read_text(encoding="utf-8").replace("\x00", "") 116 117 # Build prompt with full paper text + existing scan context 118 paper = scan.get("paper", {}) 119 claims_text = "\n".join( 120 f"- [{c.get('supported', '?')}] {c.get('claim', '')}" 121 for c in scan.get("claims", []) 122 ) 123 red_flags_text = "\n".join( 124 f"- {r.get('flag', '')}: {r.get('detail', '')}" 125 for r in scan.get("red_flags", []) 126 ) 127 128 prompt = ENGAGEMENT_PROMPT.format( 129 title=paper.get("title", ""), 130 authors=", ".join(paper.get("authors", [])[:5]), 131 year=paper.get("year", ""), 132 venue=paper.get("venue", ""), 133 tags=", ".join(scan.get("methodology_tags", [])), 134 key_findings=scan.get("key_findings", ""), 135 claims=claims_text or "(none)", 136 red_flags=red_flags_text or "(none)", 137 ) + f"\n\n## Full Paper Text\n{paper_text}" 138 139 try: 140 result = subprocess.run( 141 ["claude", "-p", "-", "--model", "opus", "--max-turns", "1"], 142 input=prompt, 143 capture_output=True, text=True, timeout=300, 144 cwd=str(ROOT), 145 ) 146 147 if result.returncode != 0: 148 return paper_id, False, f"claude exit {result.returncode}" 149 150 # Parse JSON from output — find the JSON object in the response 151 output = result.stdout.strip() 152 # Try to extract JSON from the output 153 json_start = output.find("{") 154 json_end = output.rfind("}") + 1 155 if json_start == -1 or json_end == 0: 156 return paper_id, False, "no JSON in output" 157 158 parsed = json.loads(output[json_start:json_end]) 159 factors = parsed.get("engagement_factors", parsed) 160 161 # Validate structure 162 required = ["practical_relevance", "surprise_contrarian", "fear_safety", 163 "drama_conflict", "demo_ability", "brand_recognition"] 164 for key in required: 165 if key not in factors: 166 return paper_id, False, f"missing factor: {key}" 167 if "score" not in factors[key]: 168 return paper_id, False, f"missing score in {key}" 169 170 # Merge into v3 171 scan["scan_version"] = 3 172 scan["engagement_factors"] = factors 173 174 with open(scan_path, "w") as f: 175 json.dump(scan, f, ensure_ascii=False, indent=2) 176 177 scores = [factors[k]["score"] for k in required] 178 return paper_id, True, f"v3 [{','.join(str(s) for s in scores)}]" 179 180 except json.JSONDecodeError as e: 181 return paper_id, False, f"JSON parse error: {e}" 182 except subprocess.TimeoutExpired: 183 return paper_id, False, "timeout" 184 except Exception as e: 185 return paper_id, False, f"error: {e}" 186 187 188 def main(): 189 args = sys.argv[1:] 190 limit = None 191 specific_id = None 192 parallel = 1 193 194 for i, arg in enumerate(args): 195 if arg == "--limit" and i + 1 < len(args): 196 limit = int(args[i + 1]) 197 if arg == "--id" and i + 1 < len(args): 198 specific_id = args[i + 1] 199 if arg == "--parallel" and i + 1 < len(args): 200 parallel = int(args[i + 1]) 201 202 # Collect candidates 203 candidates = [] 204 for scan_path in sorted(PAPERS_DIR.glob("*/scan.json")): 205 pid = scan_path.parent.name 206 if specific_id and pid != specific_id: 207 continue 208 with open(scan_path) as f: 209 s = json.load(f) 210 if s.get("scan_version", 1) < 2: 211 continue 212 if s.get("scan_version") == 3 and not specific_id: 213 continue 214 candidates.append(pid) 215 216 if limit: 217 candidates = candidates[:limit] 218 219 if not candidates: 220 print("No papers to classify.") 221 return 222 223 print(f"Classifying engagement factors for {len(candidates)} papers" 224 f"{f' (parallel={parallel})' if parallel > 1 else ''}:\n") 225 226 results = {"ok": 0, "fail": 0} 227 failures = [] 228 229 if parallel > 1: 230 with ThreadPoolExecutor(max_workers=parallel) as executor: 231 futures = {executor.submit(classify_one, pid): pid for pid in candidates} 232 for future in as_completed(futures): 233 pid, ok, reason = future.result() 234 if ok: 235 results["ok"] += 1 236 if reason != "already v3": 237 print(f" OK: {pid} — {reason}") 238 else: 239 results["fail"] += 1 240 failures.append((pid, reason)) 241 print(f" FAIL: {pid} — {reason}") 242 else: 243 for i, pid in enumerate(candidates): 244 print(f"[{i+1}/{len(candidates)}] {pid}") 245 _, ok, reason = classify_one(pid) 246 if ok: 247 results["ok"] += 1 248 print(f" {reason}") 249 else: 250 results["fail"] += 1 251 failures.append((pid, reason)) 252 print(f" FAIL: {reason}") 253 254 print(f"\nDone. OK: {results['ok']}, Failed: {results['fail']}") 255 if failures: 256 print("Failures:") 257 for pid, reason in failures[:20]: 258 print(f" {pid}: {reason}") 259 260 261 if __name__ == "__main__": 262 main()