code-analysis.sh - loop-benchmarking - Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.

code-analysis.sh (5439B)
      1 #!/usr/bin/env bash
      2 # Code analysis for generated Tetris implementations.
      3 # Measures code quality attributes that don't require running the game.
      4 #
      5 # Usage: code-analysis.sh <workspace_path> <language>
      6 # Output: JSON to stdout
      7 
      8 WORKSPACE="$1"
      9 LANGUAGE="$2"
     10 
     11 cd "$WORKSPACE" || exit 1
     12 
     13 results='{}'
     14 
     15 # --- File inventory ---
     16 total_files=$(find . -not -path './node_modules/*' -not -path './.git/*' -type f | wc -l)
     17 code_files=$(find . -not -path './node_modules/*' -not -path './.git/*' \( -name "*.ts" -o -name "*.js" -o -name "*.html" -o -name "*.css" \) -type f | wc -l)
     18 doc_files=$(find . -not -path './node_modules/*' -not -path './.git/*' \( -name "*.md" -o -name "*.txt" \) -type f | wc -l)
     19 
     20 # Count specific unnecessary files
     21 unnecessary=0
     22 unnecessary_list=""
     23 for pattern in README.md IMPLEMENTATION.md FEATURES.md QUICK_START.txt CHANGELOG.md TODO.md server.js; do
     24   if [ -f "$pattern" ]; then
     25     unnecessary=$((unnecessary + 1))
     26     unnecessary_list="$unnecessary_list $pattern"
     27   fi
     28 done
     29 
     30 results=$(echo "$results" | jq \
     31   --argjson total "$total_files" \
     32   --argjson code "$code_files" \
     33   --argjson docs "$doc_files" \
     34   --argjson unnecessary "$unnecessary" \
     35   --arg unnecessary_list "${unnecessary_list# }" \
     36   '. + {files: {total: $total, code: $code, docs: $docs, unnecessary: $unnecessary, unnecessary_list: $unnecessary_list}}')
     37 
     38 # --- Lines of code ---
     39 total_loc=0
     40 for ext in ts js html css; do
     41   count=$(find . -not -path './node_modules/*' -not -path './.git/*' -name "*.$ext" -exec cat {} + 2>/dev/null | wc -l)
     42   total_loc=$((total_loc + count))
     43 done
     44 
     45 results=$(echo "$results" | jq --argjson loc "$total_loc" '. + {lines_of_code: $loc}')
     46 
     47 # --- Dependency count ---
     48 dep_count=0
     49 dev_dep_count=0
     50 if [ -f "package.json" ]; then
     51   dep_count=$(jq '.dependencies // {} | length' package.json 2>/dev/null || echo 0)
     52   dev_dep_count=$(jq '.devDependencies // {} | length' package.json 2>/dev/null || echo 0)
     53 fi
     54 
     55 results=$(echo "$results" | jq \
     56   --argjson deps "$dep_count" \
     57   --argjson devDeps "$dev_dep_count" \
     58   '. + {dependencies: {production: $deps, dev: $devDeps, total: ($deps + $devDeps)}}')
     59 
     60 # --- Single file vs multi file ---
     61 # For Tetris, a single HTML file with inline JS is perfectly valid
     62 # Over-engineering signal: more than 5 code files for a Tetris game
     63 if [ "$code_files" -le 2 ]; then
     64   complexity="minimal"
     65 elif [ "$code_files" -le 5 ]; then
     66   complexity="moderate"
     67 else
     68   complexity="over-engineered"
     69 fi
     70 
     71 results=$(echo "$results" | jq --arg c "$complexity" '. + {complexity: $c}')
     72 
     73 # --- Console.log count (debug noise) ---
     74 console_logs=$(grep -r "console\.log" --include="*.ts" --include="*.js" . 2>/dev/null | grep -v node_modules | wc -l)
     75 results=$(echo "$results" | jq --argjson cl "$console_logs" '. + {console_logs: $cl}')
     76 
     77 # --- HTML validation ---
     78 html_valid="unknown"
     79 html_errors=0
     80 if [ -f "index.html" ] || find . -name "*.html" -not -path "*/node_modules/*" | head -1 | grep -q .; then
     81   npm install --save-dev html-validate > /dev/null 2>&1
     82   html_file=$(find . -name "index.html" -not -path "*/node_modules/*" | head -1)
     83   if [ -n "$html_file" ]; then
     84     html_output=$(npx html-validate --formatter json "$html_file" 2>/dev/null) || true
     85     if echo "$html_output" | jq . > /dev/null 2>&1; then
     86       html_errors=$(echo "$html_output" | jq '[.[].errorCount] | add // 0')
     87       html_errors=${html_errors:-0}
     88       if [ "$html_errors" -eq 0 ]; then
     89         html_valid="true"
     90       else
     91         html_valid="false"
     92       fi
     93     fi
     94   fi
     95 fi
     96 results=$(echo "$results" | jq --arg v "$html_valid" --argjson e "$html_errors" \
     97   '. + {html_validation: {valid: ($v == "true"), errors: $e}}')
     98 
     99 # --- Code duplication ---
    100 duplication_pct=0
    101 npm install --save-dev jscpd > /dev/null 2>&1
    102 dupl_output=$(npx jscpd --min-lines 5 --min-tokens 50 --reporters json \
    103   --ignore "node_modules,package-lock.json" . 2>/dev/null) || true
    104 if echo "$dupl_output" | jq . > /dev/null 2>&1; then
    105   duplication_pct=$(echo "$dupl_output" | jq '.statistics.total.percentage // 0')
    106   duplication_pct=${duplication_pct:-0}
    107 fi
    108 results=$(echo "$results" | jq --argjson d "$duplication_pct" \
    109   '. + {duplication_percentage: $d}')
    110 
    111 # --- Compute score ---
    112 # Scoring: fewer unnecessary files, fewer deps, moderate LOC, no debug noise
    113 score=100
    114 
    115 # Penalty for unnecessary files (10 points each, max 30)
    116 penalty=$((unnecessary * 10))
    117 [ "$penalty" -gt 30 ] && penalty=30
    118 score=$((score - penalty))
    119 
    120 # Penalty for too many dependencies (5 points each over 2)
    121 if [ "$dep_count" -gt 2 ]; then
    122   dep_penalty=$(( (dep_count - 2) * 5 ))
    123   [ "$dep_penalty" -gt 20 ] && dep_penalty=20
    124   score=$((score - dep_penalty))
    125 fi
    126 
    127 # Penalty for excessive console.logs (2 points each, max 10)
    128 log_penalty=$((console_logs * 2))
    129 [ "$log_penalty" -gt 10 ] && log_penalty=10
    130 score=$((score - log_penalty))
    131 
    132 # Penalty for over-engineering
    133 if [ "$complexity" = "over-engineered" ]; then
    134   score=$((score - 10))
    135 fi
    136 
    137 # Penalty for invalid HTML (5 points)
    138 if [ "$html_valid" = "false" ]; then
    139   score=$((score - 5))
    140 fi
    141 
    142 # Penalty for high duplication (over 10%)
    143 dup_int=$(awk "BEGIN {printf \"%d\", $duplication_pct}")
    144 if [ "$dup_int" -gt 10 ]; then
    145   score=$((score - 10))
    146 elif [ "$dup_int" -gt 5 ]; then
    147   score=$((score - 5))
    148 fi
    149 
    150 # Normalize to 0-1
    151 score_normalized=$(awk "BEGIN {s = $score / 100; if (s < 0) s = 0; printf \"%.2f\", s}")
    152 
    153 results=$(echo "$results" | jq --argjson s "$score_normalized" '. + {score: $s}')
    154 
    155 echo "$results" | jq '.'
	loop-benchmarking Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.
	git clone https://git.shiptheloop.com/loop-benchmarking.git
	Log \| Files \| Refs \| README