| 1 | #!/usr/bin/env bash |
| 2 | # Performance regression gate for armfortas. |
| 3 | # |
| 4 | # Compiles a set of representative test programs and measures: |
| 5 | # - Compile time (wall clock) |
| 6 | # - Binary size |
| 7 | # |
| 8 | # Compares against a baseline file (.benchmarks/baseline.txt). |
| 9 | # If no baseline exists, creates one. |
| 10 | # |
| 11 | # Usage: |
| 12 | # ./scripts/benchmark_gate.sh # compare against baseline |
| 13 | # ./scripts/benchmark_gate.sh --update # update the baseline |
| 14 | # |
| 15 | # Thresholds: |
| 16 | # Compile time: fail if >30% slower than baseline |
| 17 | # Binary size: fail if >15% larger than baseline |
| 18 | |
| 19 | set -euo pipefail |
| 20 | cd "$(git rev-parse --show-toplevel)" |
| 21 | |
| 22 | COMPILER="./target/release/armfortas" |
| 23 | BASELINE=".benchmarks/baseline.txt" |
| 24 | PROGRAMS=( |
| 25 | test_programs/array_bulk_kernels.f90 |
| 26 | test_programs/module_init.f90 |
| 27 | test_programs/two_loops.f90 |
| 28 | test_programs/derived_type_nested.f90 |
| 29 | test_programs/allocatable.f90 |
| 30 | ) |
| 31 | OPT="-O2" |
| 32 | |
| 33 | if [ ! -x "$COMPILER" ]; then |
| 34 | echo "Build the compiler first: cargo build --release" |
| 35 | exit 1 |
| 36 | fi |
| 37 | |
| 38 | mkdir -p .benchmarks |
| 39 | TMPDIR=$(mktemp -d) |
| 40 | trap "rm -rf $TMPDIR" EXIT |
| 41 | |
| 42 | compile_and_measure() { |
| 43 | local src="$1" |
| 44 | local stem |
| 45 | stem=$(basename "$src" .f90) |
| 46 | local binary="$TMPDIR/$stem" |
| 47 | |
| 48 | # Compile and time it |
| 49 | local start end elapsed |
| 50 | start=$(python3 -c 'import time; print(time.monotonic())') |
| 51 | "$COMPILER" "$src" $OPT -o "$binary" 2>/dev/null |
| 52 | end=$(python3 -c 'import time; print(time.monotonic())') |
| 53 | elapsed=$(python3 -c "print(f'{$end - $start:.4f}')") |
| 54 | |
| 55 | # Binary size |
| 56 | local size |
| 57 | if [ -f "$binary" ]; then |
| 58 | size=$(stat -f%z "$binary" 2>/dev/null || stat -c%s "$binary" 2>/dev/null || echo 0) |
| 59 | else |
| 60 | size=0 |
| 61 | fi |
| 62 | |
| 63 | echo "$stem $elapsed $size" |
| 64 | } |
| 65 | |
| 66 | echo "Benchmarking ${#PROGRAMS[@]} programs at $OPT..." |
| 67 | RESULTS="" |
| 68 | for prog in "${PROGRAMS[@]}"; do |
| 69 | if [ ! -f "$prog" ]; then |
| 70 | echo " SKIP: $prog (not found)" |
| 71 | continue |
| 72 | fi |
| 73 | result=$(compile_and_measure "$prog") |
| 74 | echo " $result" |
| 75 | RESULTS="$RESULTS$result"$'\n' |
| 76 | done |
| 77 | |
| 78 | if [ "${1:-}" = "--update" ]; then |
| 79 | echo "$RESULTS" > "$BASELINE" |
| 80 | echo "Baseline updated: $BASELINE" |
| 81 | exit 0 |
| 82 | fi |
| 83 | |
| 84 | if [ ! -f "$BASELINE" ]; then |
| 85 | echo "$RESULTS" > "$BASELINE" |
| 86 | echo "No baseline found — created: $BASELINE" |
| 87 | echo "Run again to compare." |
| 88 | exit 0 |
| 89 | fi |
| 90 | |
| 91 | # Compare against baseline |
| 92 | echo "" |
| 93 | echo "Comparing against baseline..." |
| 94 | FAIL=0 |
| 95 | while IFS=' ' read -r name time size; do |
| 96 | [ -z "$name" ] && continue |
| 97 | baseline_line=$(grep "^$name " "$BASELINE" 2>/dev/null || true) |
| 98 | if [ -z "$baseline_line" ]; then |
| 99 | echo " $name: NEW (no baseline)" |
| 100 | continue |
| 101 | fi |
| 102 | base_time=$(echo "$baseline_line" | awk '{print $2}') |
| 103 | base_size=$(echo "$baseline_line" | awk '{print $3}') |
| 104 | |
| 105 | # Time regression check (30% threshold) |
| 106 | time_ratio=$(python3 -c " |
| 107 | bt, ct = $base_time, $time |
| 108 | if bt > 0: |
| 109 | ratio = ct / bt |
| 110 | print(f'{ratio:.2f}') |
| 111 | else: |
| 112 | print('1.00') |
| 113 | ") |
| 114 | time_pct=$(python3 -c "print(f'{($time / max($base_time, 0.001) - 1) * 100:.1f}')") |
| 115 | |
| 116 | # Size regression check (15% threshold) |
| 117 | size_pct=$(python3 -c " |
| 118 | bs, cs = $base_size, $size |
| 119 | if bs > 0: |
| 120 | print(f'{(cs / bs - 1) * 100:.1f}') |
| 121 | else: |
| 122 | print('0.0') |
| 123 | ") |
| 124 | |
| 125 | status="OK" |
| 126 | if python3 -c "exit(0 if $time / max($base_time, 0.001) > 1.30 else 1)" 2>/dev/null; then |
| 127 | status="SLOW" |
| 128 | FAIL=1 |
| 129 | fi |
| 130 | if python3 -c "exit(0 if $size / max($base_size, 1) > 1.15 else 1)" 2>/dev/null; then |
| 131 | status="BLOAT" |
| 132 | FAIL=1 |
| 133 | fi |
| 134 | |
| 135 | echo " $name: time ${time_pct}% size ${size_pct}% [$status]" |
| 136 | done <<< "$RESULTS" |
| 137 | |
| 138 | if [ $FAIL -ne 0 ]; then |
| 139 | echo "" |
| 140 | echo "FAIL: performance regression detected" |
| 141 | exit 1 |
| 142 | else |
| 143 | echo "" |
| 144 | echo "PASS: no regressions" |
| 145 | fi |