invoke.sh (4466B)
1 #!/usr/bin/env bash 2 # Claude CLI invocation for benchmark runs. 3 # Maps grid cell configuration to CLI flags and captures output. 4 5 invoke_claude() { 6 local cell_json="$1" 7 local workspace="$2" 8 local run_dir="$3" 9 local project_dir="$4" 10 11 local model effort task prompt_style human_language context_file 12 local sub_agents web_search budget timeout_seconds base_tools 13 14 model=$(echo "$cell_json" | jq -r '.model') 15 effort=$(echo "$cell_json" | jq -r '.effort') 16 task=$(echo "$cell_json" | jq -r '.task') 17 prompt_style=$(echo "$cell_json" | jq -r '.prompt_style') 18 human_language=$(echo "$cell_json" | jq -r '.human_language') 19 context_file=$(echo "$cell_json" | jq -r '.context_file') 20 sub_agents=$(echo "$cell_json" | jq -r '.sub_agents') 21 web_search=$(echo "$cell_json" | jq -r '.web_search') 22 budget=$(echo "$cell_json" | jq -r '.max_budget_usd') 23 timeout_seconds=$(echo "$cell_json" | jq -r '.timeout_seconds // 600') 24 base_tools=$(echo "$cell_json" | jq -r '.base_tools') 25 26 # Select prompt file 27 local prompt_file="$project_dir/tasks/$task/prompts/${prompt_style}.${human_language}.md" 28 if [[ ! -f "$prompt_file" ]]; then 29 echo "ERROR: Prompt file not found: $prompt_file" >&2 30 return 1 31 fi 32 local prompt 33 prompt=$(<"$prompt_file") 34 35 # Append language instruction 36 local language 37 language=$(echo "$cell_json" | jq -r '.language') 38 if [[ "$language" == "typescript" ]]; then 39 prompt="$prompt 40 41 Use TypeScript." 42 elif [[ "$language" == "javascript" ]]; then 43 prompt="$prompt 44 45 Use JavaScript (no TypeScript)." 46 fi 47 48 # Build tool list from individual axes (Bash always on) 49 local tools="Bash" 50 local tool_read tool_write tool_edit tool_glob tool_grep 51 tool_read=$(echo "$cell_json" | jq -r '.tool_read // "on"') 52 tool_write=$(echo "$cell_json" | jq -r '.tool_write // "on"') 53 tool_edit=$(echo "$cell_json" | jq -r '.tool_edit // "on"') 54 tool_glob=$(echo "$cell_json" | jq -r '.tool_glob // "on"') 55 tool_grep=$(echo "$cell_json" | jq -r '.tool_grep // "on"') 56 [[ "$tool_read" == "on" ]] && tools="$tools,Read" 57 [[ "$tool_write" == "on" ]] && tools="$tools,Write" 58 [[ "$tool_edit" == "on" ]] && tools="$tools,Edit" 59 [[ "$tool_glob" == "on" ]] && tools="$tools,Glob" 60 [[ "$tool_grep" == "on" ]] && tools="$tools,Grep" 61 if [[ "$sub_agents" == "on" ]]; then 62 tools="$tools,Agent" 63 fi 64 if [[ "$web_search" == "on" ]]; then 65 tools="$tools,WebSearch,WebFetch" 66 fi 67 68 # Build the claude command 69 # --bare for full isolation (no CLAUDE.md, hooks, MCP, memory). 70 # Auth via apiKeyHelper that reads OAuth token from ~/.claude/.credentials.json. 71 local auth_helper 72 auth_helper="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/get-oauth-token.sh" 73 74 local cmd=( 75 claude 76 --bare 77 -p "$prompt" 78 --model "$model" 79 --output-format stream-json 80 --verbose 81 --permission-mode dontAsk 82 --max-budget-usd "$budget" 83 --allowedTools "$tools" 84 --settings "{\"apiKeyHelper\": \"$auth_helper\"}" 85 ) 86 87 # Add effort level 88 if [[ -n "$effort" ]] && [[ "$effort" != "null" ]]; then 89 cmd+=(--effort "$effort") 90 fi 91 92 # Add context file as system prompt if provided 93 if [[ "$context_file" == "provided" ]]; then 94 local ctx_file="$project_dir/tasks/$task/context.md" 95 if [[ -f "$ctx_file" ]]; then 96 cmd+=(--append-system-prompt "$(cat "$ctx_file")") 97 fi 98 fi 99 100 # Record start time 101 local start_time 102 start_time=$(date +%s) 103 104 # Run claude in the workspace directory 105 cd "$workspace" || exit 1 106 107 local exit_code=0 108 if timeout "${timeout_seconds}s" "${cmd[@]}" \ 109 > "$run_dir/transcript.jsonl" 2>"$run_dir/claude_stderr.log"; then 110 exit_code=0 111 else 112 exit_code=$? 113 fi 114 115 local end_time 116 end_time=$(date +%s) 117 local wall_time=$((end_time - start_time)) 118 119 # Extract the final result message from the stream 120 # The last JSON object with type "result" contains the summary metrics 121 if [[ -f "$run_dir/transcript.jsonl" ]]; then 122 tail -1 "$run_dir/transcript.jsonl" > "$run_dir/claude_output.json" 2>/dev/null || true 123 fi 124 125 # Update meta.json with timing info 126 local meta_file="$run_dir/meta.json" 127 if [[ -f "$meta_file" ]]; then 128 local tmp 129 tmp=$(jq \ 130 --argjson wall "$wall_time" \ 131 --argjson exit_code "$exit_code" \ 132 --arg completed "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \ 133 '. + {wall_time_seconds: $wall, exit_code: $exit_code, completed_at: $completed}' \ 134 "$meta_file") 135 echo "$tmp" > "$meta_file" 136 fi 137 138 return $exit_code 139 }