Bash · 3699 bytes Raw Blame History
1 #!/usr/bin/env bash
2 # Performance regression gate for armfortas.
3 #
4 # Compiles a set of representative test programs and measures:
5 # - Compile time (wall clock)
6 # - Binary size
7 #
8 # Compares against a baseline file (.benchmarks/baseline.txt).
9 # If no baseline exists, creates one.
10 #
11 # Usage:
12 # ./scripts/benchmark_gate.sh # compare against baseline
13 # ./scripts/benchmark_gate.sh --update # update the baseline
14 #
15 # Thresholds:
16 # Compile time: fail if >30% slower than baseline
17 # Binary size: fail if >15% larger than baseline
18
19 set -euo pipefail
20 cd "$(git rev-parse --show-toplevel)"
21
22 COMPILER="./target/release/armfortas"
23 BASELINE=".benchmarks/baseline.txt"
24 PROGRAMS=(
25 test_programs/array_bulk_kernels.f90
26 test_programs/module_init.f90
27 test_programs/two_loops.f90
28 test_programs/derived_type_nested.f90
29 test_programs/allocatable.f90
30 )
31 OPT="-O2"
32
33 if [ ! -x "$COMPILER" ]; then
34 echo "Build the compiler first: cargo build --release"
35 exit 1
36 fi
37
38 mkdir -p .benchmarks
39 TMPDIR=$(mktemp -d)
40 trap "rm -rf $TMPDIR" EXIT
41
42 compile_and_measure() {
43 local src="$1"
44 local stem
45 stem=$(basename "$src" .f90)
46 local binary="$TMPDIR/$stem"
47
48 # Compile and time it
49 local start end elapsed
50 start=$(python3 -c 'import time; print(time.monotonic())')
51 "$COMPILER" "$src" $OPT -o "$binary" 2>/dev/null
52 end=$(python3 -c 'import time; print(time.monotonic())')
53 elapsed=$(python3 -c "print(f'{$end - $start:.4f}')")
54
55 # Binary size
56 local size
57 if [ -f "$binary" ]; then
58 size=$(stat -f%z "$binary" 2>/dev/null || stat -c%s "$binary" 2>/dev/null || echo 0)
59 else
60 size=0
61 fi
62
63 echo "$stem $elapsed $size"
64 }
65
66 echo "Benchmarking ${#PROGRAMS[@]} programs at $OPT..."
67 RESULTS=""
68 for prog in "${PROGRAMS[@]}"; do
69 if [ ! -f "$prog" ]; then
70 echo " SKIP: $prog (not found)"
71 continue
72 fi
73 result=$(compile_and_measure "$prog")
74 echo " $result"
75 RESULTS="$RESULTS$result"$'\n'
76 done
77
78 if [ "${1:-}" = "--update" ]; then
79 echo "$RESULTS" > "$BASELINE"
80 echo "Baseline updated: $BASELINE"
81 exit 0
82 fi
83
84 if [ ! -f "$BASELINE" ]; then
85 echo "$RESULTS" > "$BASELINE"
86 echo "No baseline found — created: $BASELINE"
87 echo "Run again to compare."
88 exit 0
89 fi
90
91 # Compare against baseline
92 echo ""
93 echo "Comparing against baseline..."
94 FAIL=0
95 while IFS=' ' read -r name time size; do
96 [ -z "$name" ] && continue
97 baseline_line=$(grep "^$name " "$BASELINE" 2>/dev/null || true)
98 if [ -z "$baseline_line" ]; then
99 echo " $name: NEW (no baseline)"
100 continue
101 fi
102 base_time=$(echo "$baseline_line" | awk '{print $2}')
103 base_size=$(echo "$baseline_line" | awk '{print $3}')
104
105 # Time regression check (30% threshold)
106 time_ratio=$(python3 -c "
107 bt, ct = $base_time, $time
108 if bt > 0:
109 ratio = ct / bt
110 print(f'{ratio:.2f}')
111 else:
112 print('1.00')
113 ")
114 time_pct=$(python3 -c "print(f'{($time / max($base_time, 0.001) - 1) * 100:.1f}')")
115
116 # Size regression check (15% threshold)
117 size_pct=$(python3 -c "
118 bs, cs = $base_size, $size
119 if bs > 0:
120 print(f'{(cs / bs - 1) * 100:.1f}')
121 else:
122 print('0.0')
123 ")
124
125 status="OK"
126 if python3 -c "exit(0 if $time / max($base_time, 0.001) > 1.30 else 1)" 2>/dev/null; then
127 status="SLOW"
128 FAIL=1
129 fi
130 if python3 -c "exit(0 if $size / max($base_size, 1) > 1.15 else 1)" 2>/dev/null; then
131 status="BLOAT"
132 FAIL=1
133 fi
134
135 echo " $name: time ${time_pct}% size ${size_pct}% [$status]"
136 done <<< "$RESULTS"
137
138 if [ $FAIL -ne 0 ]; then
139 echo ""
140 echo "FAIL: performance regression detected"
141 exit 1
142 else
143 echo ""
144 echo "PASS: no regressions"
145 fi