fortrangoingonforty/armfortas / 261c316

Browse files

Add performance regression gate with baseline

Compiles 5 representative programs at O2, measures compile time
and binary size. Fails on >30% time or >15% size regression.
Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
261c31671e3353e194aaa0cd3d495919ecedc6f4
Parents
d9be79c
Tree
bf4c699

2 changed files

StatusFile+-
A .benchmarks/baseline.txt 6 0
A scripts/benchmark_gate.sh 145 0
.benchmarks/baseline.txtadded
@@ -0,0 +1,6 @@
1
+array_bulk_kernels 0.0812 2193072
2
+module_init 0.0752 2129288
3
+two_loops 0.0840 2129160
4
+derived_type_nested 0.0771 2129216
5
+allocatable 0.0823 2193048
6
+
scripts/benchmark_gate.shadded
@@ -0,0 +1,145 @@
1
+#!/usr/bin/env bash
2
+# Performance regression gate for armfortas.
3
+#
4
+# Compiles a set of representative test programs and measures:
5
+#   - Compile time (wall clock)
6
+#   - Binary size
7
+#
8
+# Compares against a baseline file (.benchmarks/baseline.txt).
9
+# If no baseline exists, creates one.
10
+#
11
+# Usage:
12
+#   ./scripts/benchmark_gate.sh           # compare against baseline
13
+#   ./scripts/benchmark_gate.sh --update  # update the baseline
14
+#
15
+# Thresholds:
16
+#   Compile time: fail if >30% slower than baseline
17
+#   Binary size:  fail if >15% larger than baseline
18
+
19
+set -euo pipefail
20
+cd "$(git rev-parse --show-toplevel)"
21
+
22
+COMPILER="./target/release/armfortas"
23
+BASELINE=".benchmarks/baseline.txt"
24
+PROGRAMS=(
25
+    test_programs/array_bulk_kernels.f90
26
+    test_programs/module_init.f90
27
+    test_programs/two_loops.f90
28
+    test_programs/derived_type_nested.f90
29
+    test_programs/allocatable.f90
30
+)
31
+OPT="-O2"
32
+
33
+if [ ! -x "$COMPILER" ]; then
34
+    echo "Build the compiler first: cargo build --release"
35
+    exit 1
36
+fi
37
+
38
+mkdir -p .benchmarks
39
+TMPDIR=$(mktemp -d)
40
+trap "rm -rf $TMPDIR" EXIT
41
+
42
+compile_and_measure() {
43
+    local src="$1"
44
+    local stem
45
+    stem=$(basename "$src" .f90)
46
+    local binary="$TMPDIR/$stem"
47
+
48
+    # Compile and time it
49
+    local start end elapsed
50
+    start=$(python3 -c 'import time; print(time.monotonic())')
51
+    "$COMPILER" "$src" $OPT -o "$binary" 2>/dev/null
52
+    end=$(python3 -c 'import time; print(time.monotonic())')
53
+    elapsed=$(python3 -c "print(f'{$end - $start:.4f}')")
54
+
55
+    # Binary size
56
+    local size
57
+    if [ -f "$binary" ]; then
58
+        size=$(stat -f%z "$binary" 2>/dev/null || stat -c%s "$binary" 2>/dev/null || echo 0)
59
+    else
60
+        size=0
61
+    fi
62
+
63
+    echo "$stem $elapsed $size"
64
+}
65
+
66
+echo "Benchmarking ${#PROGRAMS[@]} programs at $OPT..."
67
+RESULTS=""
68
+for prog in "${PROGRAMS[@]}"; do
69
+    if [ ! -f "$prog" ]; then
70
+        echo "  SKIP: $prog (not found)"
71
+        continue
72
+    fi
73
+    result=$(compile_and_measure "$prog")
74
+    echo "  $result"
75
+    RESULTS="$RESULTS$result"$'\n'
76
+done
77
+
78
+if [ "${1:-}" = "--update" ]; then
79
+    echo "$RESULTS" > "$BASELINE"
80
+    echo "Baseline updated: $BASELINE"
81
+    exit 0
82
+fi
83
+
84
+if [ ! -f "$BASELINE" ]; then
85
+    echo "$RESULTS" > "$BASELINE"
86
+    echo "No baseline found — created: $BASELINE"
87
+    echo "Run again to compare."
88
+    exit 0
89
+fi
90
+
91
+# Compare against baseline
92
+echo ""
93
+echo "Comparing against baseline..."
94
+FAIL=0
95
+while IFS=' ' read -r name time size; do
96
+    [ -z "$name" ] && continue
97
+    baseline_line=$(grep "^$name " "$BASELINE" 2>/dev/null || true)
98
+    if [ -z "$baseline_line" ]; then
99
+        echo "  $name: NEW (no baseline)"
100
+        continue
101
+    fi
102
+    base_time=$(echo "$baseline_line" | awk '{print $2}')
103
+    base_size=$(echo "$baseline_line" | awk '{print $3}')
104
+
105
+    # Time regression check (30% threshold)
106
+    time_ratio=$(python3 -c "
107
+bt, ct = $base_time, $time
108
+if bt > 0:
109
+    ratio = ct / bt
110
+    print(f'{ratio:.2f}')
111
+else:
112
+    print('1.00')
113
+")
114
+    time_pct=$(python3 -c "print(f'{($time / max($base_time, 0.001) - 1) * 100:.1f}')")
115
+
116
+    # Size regression check (15% threshold)
117
+    size_pct=$(python3 -c "
118
+bs, cs = $base_size, $size
119
+if bs > 0:
120
+    print(f'{(cs / bs - 1) * 100:.1f}')
121
+else:
122
+    print('0.0')
123
+")
124
+
125
+    status="OK"
126
+    if python3 -c "exit(0 if $time / max($base_time, 0.001) > 1.30 else 1)" 2>/dev/null; then
127
+        status="SLOW"
128
+        FAIL=1
129
+    fi
130
+    if python3 -c "exit(0 if $size / max($base_size, 1) > 1.15 else 1)" 2>/dev/null; then
131
+        status="BLOAT"
132
+        FAIL=1
133
+    fi
134
+
135
+    echo "  $name: time ${time_pct}% size ${size_pct}% [$status]"
136
+done <<< "$RESULTS"
137
+
138
+if [ $FAIL -ne 0 ]; then
139
+    echo ""
140
+    echo "FAIL: performance regression detected"
141
+    exit 1
142
+else
143
+    echo ""
144
+    echo "PASS: no regressions"
145
+fi