loop-benchmarking

Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.
git clone https://git.shiptheloop.com/loop-benchmarking.git
Log | Files | Refs | README

structural.sh (2900B)


      1 #!/usr/bin/env bash
      2 # Structural evaluation for Tetris task.
      3 # Checks that the agent produced a runnable game.
      4 #
      5 # Usage: structural.sh <workspace_path> <language>
      6 # Output: JSON to stdout
      7 
      8 WORKSPACE="$1"
      9 LANGUAGE="$2"
     10 
     11 checks=()
     12 pass_count=0
     13 total_count=0
     14 
     15 add_check() {
     16   local name="$1"
     17   local passed="$2"
     18   local detail="$3"
     19   checks+=("{\"name\": \"$name\", \"pass\": $passed, \"detail\": \"$detail\"}")
     20   total_count=$((total_count + 1))
     21   if [[ "$passed" == "true" ]]; then
     22     pass_count=$((pass_count + 1))
     23   fi
     24 }
     25 
     26 # Check for entry point
     27 if [[ -f "$WORKSPACE/index.html" ]]; then
     28   add_check "entry_point_exists" "true" "index.html found"
     29 elif [[ -f "$WORKSPACE/dist/index.html" ]]; then
     30   add_check "entry_point_exists" "true" "dist/index.html found"
     31 elif [[ -f "$WORKSPACE/public/index.html" ]]; then
     32   add_check "entry_point_exists" "true" "public/index.html found"
     33 else
     34   add_check "entry_point_exists" "false" "no index.html found in workspace root, dist/, or public/"
     35 fi
     36 
     37 # Check for package.json
     38 if [[ -f "$WORKSPACE/package.json" ]]; then
     39   add_check "package_json_exists" "true" "package.json found"
     40 else
     41   add_check "package_json_exists" "false" "no package.json found"
     42 fi
     43 
     44 # Check build succeeds (if there's a build script)
     45 if [[ -f "$WORKSPACE/package.json" ]]; then
     46   has_build=$(cd "$WORKSPACE" && node -e "const p=require('./package.json'); console.log(p.scripts && p.scripts.build ? 'yes' : 'no')" 2>/dev/null)
     47   if [[ "$has_build" == "yes" ]]; then
     48     cd "$WORKSPACE"
     49     if npm install --ignore-scripts > /dev/null 2>&1 && npm run build > /dev/null 2>&1; then
     50       add_check "build_succeeds" "true" "npm run build completed successfully"
     51     else
     52       add_check "build_succeeds" "false" "npm run build failed"
     53     fi
     54   else
     55     add_check "build_succeeds" "true" "no build script defined (static project)"
     56   fi
     57 fi
     58 
     59 # Check TypeScript compiles (if typescript)
     60 if [[ "$LANGUAGE" == "typescript" ]]; then
     61   if [[ -f "$WORKSPACE/tsconfig.json" ]]; then
     62     cd "$WORKSPACE"
     63     if npx tsc --noEmit > /dev/null 2>&1; then
     64       add_check "typescript_compiles" "true" "tsc --noEmit passed"
     65     else
     66       add_check "typescript_compiles" "false" "tsc --noEmit failed"
     67     fi
     68   else
     69     # Check if there are .ts files without a tsconfig
     70     ts_files=$(find "$WORKSPACE" -name "*.ts" -not -path "*/node_modules/*" | head -1)
     71     if [[ -n "$ts_files" ]]; then
     72       add_check "typescript_compiles" "false" "TypeScript files found but no tsconfig.json"
     73     fi
     74   fi
     75 fi
     76 
     77 # Build checks array JSON
     78 checks_json=$(printf '%s,' "${checks[@]}")
     79 checks_json="[${checks_json%,}]"
     80 
     81 # Compute score
     82 if [[ $total_count -gt 0 ]]; then
     83   score=$(awk "BEGIN {printf \"%.2f\", $pass_count / $total_count}")
     84 else
     85   score="0"
     86 fi
     87 
     88 overall_pass="true"
     89 if [[ $pass_count -lt $total_count ]]; then
     90   overall_pass="false"
     91 fi
     92 
     93 echo "{\"pass\": $overall_pass, \"checks\": $checks_json, \"score\": $score}"

Impressum · Datenschutz