commit dd1e239a2cd9d62a8dd1f144070ba1d9b604c983
parent fab02ffcee6cc4b837e996171068e5654295171d
Author: Brian Graham <brian@buildingbetterteams.de>
Date: Sun, 22 Mar 2026 07:45:01 +0100
Fix scan agent max-turns: 3 → 8, accept --max-turns CLI arg
3 turns was the exact minimum (read agent prompt, read schema, write
scan.json) with zero margin. 129/200 papers silently failed when the
agent needed an extra turn. Bumping to 8 resolved all but the known
persistent failures (bad PDFs, survey_methodology truncation).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Diffstat:
1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/scripts/run-scan.py b/scripts/run-scan.py
@@ -89,7 +89,7 @@ def ensure_text(entry):
return False, f"pymupdf error: {e}"
-def run_scan_agent(entry):
+def run_scan_agent(entry, max_turns=8):
"""Run the scan agent on a single paper. Returns (ok, reason)."""
txt_path = PAPERS_DIR / entry["id"] / "paper.txt"
scan_path = PAPERS_DIR / entry["id"] / "scan.json"
@@ -122,7 +122,7 @@ Scan this paper and write the result to papers/{entry['id']}/scan.json.
"claude", "-p", "-",
"--model", "opus",
"--allowedTools", "Read,Write,Edit",
- "--max-turns", "3",
+ "--max-turns", str(max_turns),
],
input=prompt,
capture_output=True, text=True, timeout=600,
@@ -170,7 +170,7 @@ Scan this paper and write the result to papers/{entry['id']}/scan.json.
return False, f"error: {e}"
-def scan_one(entry):
+def scan_one(entry, max_turns=8):
"""Full pipeline for one paper: extract text → scan → return result."""
paper_id = entry["id"]
@@ -180,7 +180,7 @@ def scan_one(entry):
return paper_id, False, f"text extraction failed: {reason}"
# Step 2: run scan
- ok, reason = run_scan_agent(entry)
+ ok, reason = run_scan_agent(entry, max_turns=max_turns)
return paper_id, ok, reason
@@ -190,6 +190,7 @@ def main():
limit = None
specific_id = None
parallel = 1
+ max_turns = 8
for i, arg in enumerate(args):
if arg == "--limit" and i + 1 < len(args):
@@ -198,6 +199,8 @@ def main():
specific_id = args[i + 1]
if arg == "--parallel" and i + 1 < len(args):
parallel = int(args[i + 1])
+ if arg == "--max-turns" and i + 1 < len(args):
+ max_turns = int(args[i + 1])
entries = load_registry()
@@ -233,7 +236,7 @@ def main():
if parallel > 1:
with ThreadPoolExecutor(max_workers=parallel) as executor:
- futures = {executor.submit(scan_one, e): e for e in candidates}
+ futures = {executor.submit(scan_one, e, max_turns): e for e in candidates}
for future in as_completed(futures):
paper_id, ok, reason = future.result()
if ok:
@@ -246,7 +249,7 @@ def main():
else:
for i, entry in enumerate(candidates):
print(f"[{i+1}/{len(candidates)}] {entry['id']}")
- paper_id, ok, reason = scan_one(entry)
+ paper_id, ok, reason = scan_one(entry, max_turns)
if ok:
results["scanned"] += 1
print(f" OK: {reason}")