@@ -0,0 +1,145 @@ |
| 1 | +#!/usr/bin/env bash |
| 2 | +# Performance regression gate for armfortas. |
| 3 | +# |
| 4 | +# Compiles a set of representative test programs and measures: |
| 5 | +# - Compile time (wall clock) |
| 6 | +# - Binary size |
| 7 | +# |
| 8 | +# Compares against a baseline file (.benchmarks/baseline.txt). |
| 9 | +# If no baseline exists, creates one. |
| 10 | +# |
| 11 | +# Usage: |
| 12 | +# ./scripts/benchmark_gate.sh # compare against baseline |
| 13 | +# ./scripts/benchmark_gate.sh --update # update the baseline |
| 14 | +# |
| 15 | +# Thresholds: |
| 16 | +# Compile time: fail if >30% slower than baseline |
| 17 | +# Binary size: fail if >15% larger than baseline |
| 18 | + |
| 19 | +set -euo pipefail |
| 20 | +cd "$(git rev-parse --show-toplevel)" |
| 21 | + |
| 22 | +COMPILER="./target/release/armfortas" |
| 23 | +BASELINE=".benchmarks/baseline.txt" |
| 24 | +PROGRAMS=( |
| 25 | + test_programs/array_bulk_kernels.f90 |
| 26 | + test_programs/module_init.f90 |
| 27 | + test_programs/two_loops.f90 |
| 28 | + test_programs/derived_type_nested.f90 |
| 29 | + test_programs/allocatable.f90 |
| 30 | +) |
| 31 | +OPT="-O2" |
| 32 | + |
| 33 | +if [ ! -x "$COMPILER" ]; then |
| 34 | + echo "Build the compiler first: cargo build --release" |
| 35 | + exit 1 |
| 36 | +fi |
| 37 | + |
| 38 | +mkdir -p .benchmarks |
| 39 | +TMPDIR=$(mktemp -d) |
| 40 | +trap "rm -rf $TMPDIR" EXIT |
| 41 | + |
| 42 | +compile_and_measure() { |
| 43 | + local src="$1" |
| 44 | + local stem |
| 45 | + stem=$(basename "$src" .f90) |
| 46 | + local binary="$TMPDIR/$stem" |
| 47 | + |
| 48 | + # Compile and time it |
| 49 | + local start end elapsed |
| 50 | + start=$(python3 -c 'import time; print(time.monotonic())') |
| 51 | + "$COMPILER" "$src" $OPT -o "$binary" 2>/dev/null |
| 52 | + end=$(python3 -c 'import time; print(time.monotonic())') |
| 53 | + elapsed=$(python3 -c "print(f'{$end - $start:.4f}')") |
| 54 | + |
| 55 | + # Binary size |
| 56 | + local size |
| 57 | + if [ -f "$binary" ]; then |
| 58 | + size=$(stat -f%z "$binary" 2>/dev/null || stat -c%s "$binary" 2>/dev/null || echo 0) |
| 59 | + else |
| 60 | + size=0 |
| 61 | + fi |
| 62 | + |
| 63 | + echo "$stem $elapsed $size" |
| 64 | +} |
| 65 | + |
| 66 | +echo "Benchmarking ${#PROGRAMS[@]} programs at $OPT..." |
| 67 | +RESULTS="" |
| 68 | +for prog in "${PROGRAMS[@]}"; do |
| 69 | + if [ ! -f "$prog" ]; then |
| 70 | + echo " SKIP: $prog (not found)" |
| 71 | + continue |
| 72 | + fi |
| 73 | + result=$(compile_and_measure "$prog") |
| 74 | + echo " $result" |
| 75 | + RESULTS="$RESULTS$result"$'\n' |
| 76 | +done |
| 77 | + |
| 78 | +if [ "${1:-}" = "--update" ]; then |
| 79 | + echo "$RESULTS" > "$BASELINE" |
| 80 | + echo "Baseline updated: $BASELINE" |
| 81 | + exit 0 |
| 82 | +fi |
| 83 | + |
| 84 | +if [ ! -f "$BASELINE" ]; then |
| 85 | + echo "$RESULTS" > "$BASELINE" |
| 86 | + echo "No baseline found — created: $BASELINE" |
| 87 | + echo "Run again to compare." |
| 88 | + exit 0 |
| 89 | +fi |
| 90 | + |
| 91 | +# Compare against baseline |
| 92 | +echo "" |
| 93 | +echo "Comparing against baseline..." |
| 94 | +FAIL=0 |
| 95 | +while IFS=' ' read -r name time size; do |
| 96 | + [ -z "$name" ] && continue |
| 97 | + baseline_line=$(grep "^$name " "$BASELINE" 2>/dev/null || true) |
| 98 | + if [ -z "$baseline_line" ]; then |
| 99 | + echo " $name: NEW (no baseline)" |
| 100 | + continue |
| 101 | + fi |
| 102 | + base_time=$(echo "$baseline_line" | awk '{print $2}') |
| 103 | + base_size=$(echo "$baseline_line" | awk '{print $3}') |
| 104 | + |
| 105 | + # Time regression check (30% threshold) |
| 106 | + time_ratio=$(python3 -c " |
| 107 | +bt, ct = $base_time, $time |
| 108 | +if bt > 0: |
| 109 | + ratio = ct / bt |
| 110 | + print(f'{ratio:.2f}') |
| 111 | +else: |
| 112 | + print('1.00') |
| 113 | +") |
| 114 | + time_pct=$(python3 -c "print(f'{($time / max($base_time, 0.001) - 1) * 100:.1f}')") |
| 115 | + |
| 116 | + # Size regression check (15% threshold) |
| 117 | + size_pct=$(python3 -c " |
| 118 | +bs, cs = $base_size, $size |
| 119 | +if bs > 0: |
| 120 | + print(f'{(cs / bs - 1) * 100:.1f}') |
| 121 | +else: |
| 122 | + print('0.0') |
| 123 | +") |
| 124 | + |
| 125 | + status="OK" |
| 126 | + if python3 -c "exit(0 if $time / max($base_time, 0.001) > 1.30 else 1)" 2>/dev/null; then |
| 127 | + status="SLOW" |
| 128 | + FAIL=1 |
| 129 | + fi |
| 130 | + if python3 -c "exit(0 if $size / max($base_size, 1) > 1.15 else 1)" 2>/dev/null; then |
| 131 | + status="BLOAT" |
| 132 | + FAIL=1 |
| 133 | + fi |
| 134 | + |
| 135 | + echo " $name: time ${time_pct}% size ${size_pct}% [$status]" |
| 136 | +done <<< "$RESULTS" |
| 137 | + |
| 138 | +if [ $FAIL -ne 0 ]; then |
| 139 | + echo "" |
| 140 | + echo "FAIL: performance regression detected" |
| 141 | + exit 1 |
| 142 | +else |
| 143 | + echo "" |
| 144 | + echo "PASS: no regressions" |
| 145 | +fi |