@@ -0,0 +1,145 @@ |
| | 1 | +#!/usr/bin/env bash |
| | 2 | +# Performance regression gate for armfortas. |
| | 3 | +# |
| | 4 | +# Compiles a set of representative test programs and measures: |
| | 5 | +# - Compile time (wall clock) |
| | 6 | +# - Binary size |
| | 7 | +# |
| | 8 | +# Compares against a baseline file (.benchmarks/baseline.txt). |
| | 9 | +# If no baseline exists, creates one. |
| | 10 | +# |
| | 11 | +# Usage: |
| | 12 | +# ./scripts/benchmark_gate.sh # compare against baseline |
| | 13 | +# ./scripts/benchmark_gate.sh --update # update the baseline |
| | 14 | +# |
| | 15 | +# Thresholds: |
| | 16 | +# Compile time: fail if >30% slower than baseline |
| | 17 | +# Binary size: fail if >15% larger than baseline |
| | 18 | + |
| | 19 | +set -euo pipefail |
| | 20 | +cd "$(git rev-parse --show-toplevel)" |
| | 21 | + |
| | 22 | +COMPILER="./target/release/armfortas" |
| | 23 | +BASELINE=".benchmarks/baseline.txt" |
| | 24 | +PROGRAMS=( |
| | 25 | + test_programs/array_bulk_kernels.f90 |
| | 26 | + test_programs/module_init.f90 |
| | 27 | + test_programs/two_loops.f90 |
| | 28 | + test_programs/derived_type_nested.f90 |
| | 29 | + test_programs/allocatable.f90 |
| | 30 | +) |
| | 31 | +OPT="-O2" |
| | 32 | + |
| | 33 | +if [ ! -x "$COMPILER" ]; then |
| | 34 | + echo "Build the compiler first: cargo build --release" |
| | 35 | + exit 1 |
| | 36 | +fi |
| | 37 | + |
| | 38 | +mkdir -p .benchmarks |
| | 39 | +TMPDIR=$(mktemp -d) |
| | 40 | +trap "rm -rf $TMPDIR" EXIT |
| | 41 | + |
| | 42 | +compile_and_measure() { |
| | 43 | + local src="$1" |
| | 44 | + local stem |
| | 45 | + stem=$(basename "$src" .f90) |
| | 46 | + local binary="$TMPDIR/$stem" |
| | 47 | + |
| | 48 | + # Compile and time it |
| | 49 | + local start end elapsed |
| | 50 | + start=$(python3 -c 'import time; print(time.monotonic())') |
| | 51 | + "$COMPILER" "$src" $OPT -o "$binary" 2>/dev/null |
| | 52 | + end=$(python3 -c 'import time; print(time.monotonic())') |
| | 53 | + elapsed=$(python3 -c "print(f'{$end - $start:.4f}')") |
| | 54 | + |
| | 55 | + # Binary size |
| | 56 | + local size |
| | 57 | + if [ -f "$binary" ]; then |
| | 58 | + size=$(stat -f%z "$binary" 2>/dev/null || stat -c%s "$binary" 2>/dev/null || echo 0) |
| | 59 | + else |
| | 60 | + size=0 |
| | 61 | + fi |
| | 62 | + |
| | 63 | + echo "$stem $elapsed $size" |
| | 64 | +} |
| | 65 | + |
| | 66 | +echo "Benchmarking ${#PROGRAMS[@]} programs at $OPT..." |
| | 67 | +RESULTS="" |
| | 68 | +for prog in "${PROGRAMS[@]}"; do |
| | 69 | + if [ ! -f "$prog" ]; then |
| | 70 | + echo " SKIP: $prog (not found)" |
| | 71 | + continue |
| | 72 | + fi |
| | 73 | + result=$(compile_and_measure "$prog") |
| | 74 | + echo " $result" |
| | 75 | + RESULTS="$RESULTS$result"$'\n' |
| | 76 | +done |
| | 77 | + |
| | 78 | +if [ "${1:-}" = "--update" ]; then |
| | 79 | + echo "$RESULTS" > "$BASELINE" |
| | 80 | + echo "Baseline updated: $BASELINE" |
| | 81 | + exit 0 |
| | 82 | +fi |
| | 83 | + |
| | 84 | +if [ ! -f "$BASELINE" ]; then |
| | 85 | + echo "$RESULTS" > "$BASELINE" |
| | 86 | + echo "No baseline found — created: $BASELINE" |
| | 87 | + echo "Run again to compare." |
| | 88 | + exit 0 |
| | 89 | +fi |
| | 90 | + |
| | 91 | +# Compare against baseline |
| | 92 | +echo "" |
| | 93 | +echo "Comparing against baseline..." |
| | 94 | +FAIL=0 |
| | 95 | +while IFS=' ' read -r name time size; do |
| | 96 | + [ -z "$name" ] && continue |
| | 97 | + baseline_line=$(grep "^$name " "$BASELINE" 2>/dev/null || true) |
| | 98 | + if [ -z "$baseline_line" ]; then |
| | 99 | + echo " $name: NEW (no baseline)" |
| | 100 | + continue |
| | 101 | + fi |
| | 102 | + base_time=$(echo "$baseline_line" | awk '{print $2}') |
| | 103 | + base_size=$(echo "$baseline_line" | awk '{print $3}') |
| | 104 | + |
| | 105 | + # Time regression check (30% threshold) |
| | 106 | + time_ratio=$(python3 -c " |
| | 107 | +bt, ct = $base_time, $time |
| | 108 | +if bt > 0: |
| | 109 | + ratio = ct / bt |
| | 110 | + print(f'{ratio:.2f}') |
| | 111 | +else: |
| | 112 | + print('1.00') |
| | 113 | +") |
| | 114 | + time_pct=$(python3 -c "print(f'{($time / max($base_time, 0.001) - 1) * 100:.1f}')") |
| | 115 | + |
| | 116 | + # Size regression check (15% threshold) |
| | 117 | + size_pct=$(python3 -c " |
| | 118 | +bs, cs = $base_size, $size |
| | 119 | +if bs > 0: |
| | 120 | + print(f'{(cs / bs - 1) * 100:.1f}') |
| | 121 | +else: |
| | 122 | + print('0.0') |
| | 123 | +") |
| | 124 | + |
| | 125 | + status="OK" |
| | 126 | + if python3 -c "exit(0 if $time / max($base_time, 0.001) > 1.30 else 1)" 2>/dev/null; then |
| | 127 | + status="SLOW" |
| | 128 | + FAIL=1 |
| | 129 | + fi |
| | 130 | + if python3 -c "exit(0 if $size / max($base_size, 1) > 1.15 else 1)" 2>/dev/null; then |
| | 131 | + status="BLOAT" |
| | 132 | + FAIL=1 |
| | 133 | + fi |
| | 134 | + |
| | 135 | + echo " $name: time ${time_pct}% size ${size_pct}% [$status]" |
| | 136 | +done <<< "$RESULTS" |
| | 137 | + |
| | 138 | +if [ $FAIL -ne 0 ]; then |
| | 139 | + echo "" |
| | 140 | + echo "FAIL: performance regression detected" |
| | 141 | + exit 1 |
| | 142 | +else |
| | 143 | + echo "" |
| | 144 | + echo "PASS: no regressions" |
| | 145 | +fi |