transcript-analysis.py (3853B)
1 #!/usr/bin/env python3 2 """Transcript analysis - measures agent efficiency from the conversation log.""" 3 4 import json 5 import re 6 import sys 7 from pathlib import Path 8 9 10 def main(): 11 run_dir = Path(sys.argv[1]) if len(sys.argv) > 1 else Path(".") 12 transcript = run_dir / "transcript.jsonl" 13 14 if not transcript.exists(): 15 print(json.dumps({"error": "no transcript found", "score": 0})) 16 return 17 18 events = [] 19 for line in transcript.read_text().strip().split("\n"): 20 if line.strip(): 21 try: 22 events.append(json.loads(line)) 23 except json.JSONDecodeError: 24 pass 25 26 # Count tool calls by type 27 tool_calls = [] 28 for ev in events: 29 if ev.get("type") == "assistant" and ev.get("message", {}).get("content"): 30 for block in ev["message"]["content"]: 31 if block.get("type") == "tool_use": 32 tool_calls.append(block) 33 34 tool_names = [t.get("name", "") for t in tool_calls] 35 bash_commands = [] 36 for t in tool_calls: 37 if t.get("name") == "Bash": 38 cmd = t.get("input", {}).get("command", "") 39 bash_commands.append(cmd) 40 41 # Count wasted turns 42 doc_patterns = re.compile(r"cat >.*?(README|IMPLEMENTATION|FEATURES|QUICK_START|CHANGELOG|TODO|\.txt)", re.I) 43 ascii_patterns = re.compile(r"(cat <<|echo).*[═╔╗╚╝║▓░█✓✅🎮]") 44 server_patterns = re.compile(r"node server|npm start|npx serve|http-server|python.*http") 45 test_patterns = re.compile(r"npm test|npx.*test|node.*test|tsc --noEmit|eslint") 46 47 wasted_docs = sum(1 for c in bash_commands if doc_patterns.search(c)) 48 wasted_ascii = sum(1 for c in bash_commands if ascii_patterns.search(c)) 49 wasted_server = sum(1 for c in bash_commands if server_patterns.search(c)) 50 self_tested = sum(1 for c in bash_commands if test_patterns.search(c)) 51 52 total_wasted = wasted_docs + wasted_ascii + wasted_server 53 54 # Count errors in tool results 55 errors = 0 56 for ev in events: 57 if ev.get("type") == "user": 58 result = ev.get("tool_use_result") 59 if isinstance(result, dict) and result.get("stderr"): 60 errors += 1 61 62 # Count thinking and text blocks 63 thinking_blocks = 0 64 text_blocks = 0 65 for ev in events: 66 if ev.get("type") == "assistant" and ev.get("message", {}).get("content"): 67 for block in ev["message"]["content"]: 68 if block.get("type") == "thinking": 69 thinking_blocks += 1 70 elif block.get("type") == "text": 71 text_blocks += 1 72 73 # Productivity ratio 74 total_tools = len(tool_calls) 75 productive = total_tools - total_wasted 76 productivity_ratio = round(productive / total_tools, 2) if total_tools > 0 else 0 77 78 # Score 79 score = 100 80 waste_penalty = min(total_wasted * 5, 25) 81 score -= waste_penalty 82 if self_tested > 0: 83 score = min(score + 10, 100) 84 score_normalized = round(score / 100, 2) 85 86 result = { 87 "total_events": len(events), 88 "tool_calls": { 89 "total": total_tools, 90 "bash": tool_names.count("Bash"), 91 "write": tool_names.count("Write"), 92 "edit": tool_names.count("Edit"), 93 "read": tool_names.count("Read"), 94 }, 95 "wasted_turns": { 96 "total": total_wasted, 97 "docs": wasted_docs, 98 "ascii_art": wasted_ascii, 99 "server_starts": wasted_server, 100 }, 101 "errors_encountered": errors, 102 "thinking_blocks": thinking_blocks, 103 "text_blocks": text_blocks, 104 "productivity_ratio": productivity_ratio, 105 "self_tested": self_tested > 0, 106 "score": score_normalized, 107 } 108 109 print(json.dumps(result, indent=2)) 110 111 112 if __name__ == "__main__": 113 main()