loop-benchmarking

Controlled experiments across agentic coding configurations. Same task, one variable, what actually works.
git clone https://git.shiptheloop.com/loop-benchmarking.git
Log | Files | Refs | README

quality.sh (3569B)


      1 #!/usr/bin/env bash
      2 # Quality evaluation for Tetris task.
      3 # Runs lint, accessibility, and performance checks.
      4 #
      5 # Usage: quality.sh <workspace_path> <language>
      6 # Output: JSON to stdout
      7 
      8 WORKSPACE="$1"
      9 LANGUAGE="$2"
     10 
     11 results='{}'
     12 
     13 # --- Lint check ---
     14 cd "$WORKSPACE"
     15 if command -v npx > /dev/null 2>&1; then
     16   npm install --save-dev eslint @eslint/js > /dev/null 2>&1
     17 
     18   if [[ "$LANGUAGE" == "typescript" ]]; then
     19     extensions="ts,tsx"
     20   elif [[ "$LANGUAGE" == "unspecified" ]]; then
     21     extensions="ts,tsx,js,jsx"
     22   else
     23     extensions="js,jsx"
     24   fi
     25 
     26   lint_output=$(npx eslint --no-eslintrc --rule '{"no-unused-vars":"warn","no-undef":"warn","no-console":"off"}' \
     27     --ext ".$extensions" --format json . 2>/dev/null) || true
     28 
     29   if echo "$lint_output" | jq . > /dev/null 2>&1; then
     30     errors=$(echo "$lint_output" | jq '[.[].errorCount] | add // 0')
     31     warnings=$(echo "$lint_output" | jq '[.[].warningCount] | add // 0')
     32     errors=${errors:-0}
     33     warnings=${warnings:-0}
     34     if [[ "$errors" -gt 0 ]]; then
     35       results=$(echo "$results" | jq --argjson e "$errors" --argjson w "$warnings" \
     36         '. + {lint: {pass: false, errors: $e, warnings: $w}}')
     37     else
     38       results=$(echo "$results" | jq --argjson e "$errors" --argjson w "$warnings" \
     39         '. + {lint: {pass: true, errors: $e, warnings: $w}}')
     40     fi
     41   else
     42     results=$(echo "$results" | jq '. + {lint: {pass: false, errors: -1, warnings: 0, error: "eslint failed to run"}}')
     43   fi
     44 else
     45   results=$(echo "$results" | jq '. + {lint: {pass: false, error: "eslint not available"}}')
     46 fi
     47 
     48 # --- TypeScript type check ---
     49 # For "unspecified" language, check if there are .ts files and a tsconfig
     50 if [[ "$LANGUAGE" == "typescript" ]] || [[ "$LANGUAGE" == "unspecified" ]]; then
     51   cd "$WORKSPACE"
     52   if [[ -f "tsconfig.json" ]]; then
     53     if npx tsc --noEmit > /dev/null 2>&1; then
     54       results=$(echo "$results" | jq '. + {typecheck: {pass: true}}')
     55     else
     56       type_errors=$(npx tsc --noEmit 2>&1 | grep -c "error TS" || true)
     57       type_errors=${type_errors:-0}
     58       results=$(echo "$results" | jq --argjson e "$type_errors" '. + {typecheck: {pass: false, errors: $e}}')
     59     fi
     60   else
     61     results=$(echo "$results" | jq '. + {typecheck: {pass: false, error: "no tsconfig.json"}}')
     62   fi
     63 else
     64   results=$(echo "$results" | jq '. + {typecheck: {pass: true, note: "not applicable for javascript"}}')
     65 fi
     66 
     67 # --- File size check ---
     68 total_size=0
     69 if [[ -d "$WORKSPACE/dist" ]]; then
     70   total_size=$(du -sb "$WORKSPACE/dist" 2>/dev/null | awk '{print $1}')
     71 elif [[ -f "$WORKSPACE/index.html" ]]; then
     72   total_size=$(du -sb "$WORKSPACE" --exclude=node_modules --exclude=.git 2>/dev/null | awk '{print $1}')
     73 fi
     74 total_size=${total_size:-0}
     75 
     76 if [[ "$total_size" -gt 524288 ]]; then
     77   results=$(echo "$results" | jq --argjson s "$total_size" \
     78     '. + {performance: {pass: false, bundle_size_bytes: $s, size_under_512kb: false}}')
     79 else
     80   results=$(echo "$results" | jq --argjson s "$total_size" \
     81     '. + {performance: {pass: true, bundle_size_bytes: $s, size_under_512kb: true}}')
     82 fi
     83 
     84 # --- Compute aggregate quality score ---
     85 score_sum=0
     86 score_count=0
     87 
     88 for key in lint typecheck performance; do
     89   val=$(echo "$results" | jq --arg k "$key" '.[$k].pass // false')
     90   if [[ "$val" == "true" ]]; then
     91     score_sum=$((score_sum + 100))
     92   fi
     93   score_count=$((score_count + 1))
     94 done
     95 
     96 if [[ $score_count -gt 0 ]]; then
     97   score=$(awk "BEGIN {printf \"%.2f\", $score_sum / ($score_count * 100)}")
     98 else
     99   score="0"
    100 fi
    101 
    102 results=$(echo "$results" | jq --argjson s "$score" '. + {score: $s}')
    103 
    104 echo "$results" | jq '.'

Impressum · Datenschutz