`261c316`

Add performance regression gate with baseline

Compiles 5 representative programs at O2, measures compile time
and binary size. Fails on >30% time or >15% size regression.

Authored by mfwolffe <wolffemf@dukes.jmu.edu> 1 month ago

SHA: 261c31671e3353e194aaa0cd3d495919ecedc6f4
Parents: d9be79c
Tree: bf4c699

2 changed files

Status	File	+	-
A	`.benchmarks/baseline.txt`	6	0
A	`scripts/benchmark_gate.sh`	145	0

.benchmarks/baseline.txtadded

 +array_bulk_kernels 0.0812 2193072
 +module_init 0.0752 2129288
 +two_loops 0.0840 2129160
 +derived_type_nested 0.0771 2129216
 +allocatable 0.0823 2193048
++

scripts/benchmark_gate.shadded

 +#!/usr/bin/env bash
 +# Performance regression gate for armfortas.
 +#
 +# Compiles a set of representative test programs and measures:
 +#   - Compile time (wall clock)
 +#   - Binary size
 +#
 +# Compares against a baseline file (.benchmarks/baseline.txt).
 +# If no baseline exists, creates one.
 +#
 +# Usage:
 +#   ./scripts/benchmark_gate.sh           # compare against baseline
 +#   ./scripts/benchmark_gate.sh --update  # update the baseline
 +#
 +# Thresholds:
 +#   Compile time: fail if >30% slower than baseline
 +#   Binary size:  fail if >15% larger than baseline
++
 +set -euo pipefail
 +cd "$(git rev-parse --show-toplevel)"
++
 +COMPILER="./target/release/armfortas"
 +BASELINE=".benchmarks/baseline.txt"
 +PROGRAMS=(
 +    test_programs/array_bulk_kernels.f90
 +    test_programs/module_init.f90
 +    test_programs/two_loops.f90
 +    test_programs/derived_type_nested.f90
 +    test_programs/allocatable.f90
 +)
 +OPT="-O2"
++
 +if [ ! -x "$COMPILER" ]; then
 +    echo "Build the compiler first: cargo build --release"
 +    exit 1
 +fi
++
 +mkdir -p .benchmarks
 +TMPDIR=$(mktemp -d)
 +trap "rm -rf $TMPDIR" EXIT
++
 +compile_and_measure() {
 +    local src="$1"
 +    local stem
 +    stem=$(basename "$src" .f90)
 +    local binary="$TMPDIR/$stem"
++
 +    # Compile and time it
 +    local start end elapsed
 +    start=$(python3 -c 'import time; print(time.monotonic())')
 +    "$COMPILER" "$src" $OPT -o "$binary" 2>/dev/null
 +    end=$(python3 -c 'import time; print(time.monotonic())')
 +    elapsed=$(python3 -c "print(f'{$end - $start:.4f}')")
++
 +    # Binary size
 +    local size
 +    if [ -f "$binary" ]; then
 +        size=$(stat -f%z "$binary" 2>/dev/null || stat -c%s "$binary" 2>/dev/null || echo 0)
 +    else
 +        size=0
 +    fi
++
 +    echo "$stem $elapsed $size"
 +}
++
 +echo "Benchmarking ${#PROGRAMS[@]} programs at $OPT..."
 +RESULTS=""
 +for prog in "${PROGRAMS[@]}"; do
 +    if [ ! -f "$prog" ]; then
 +        echo "  SKIP: $prog (not found)"
 +        continue
 +    fi
 +    result=$(compile_and_measure "$prog")
 +    echo "  $result"
 +    RESULTS="$RESULTS$result"$'\n'
 +done
++
 +if [ "${1:-}" = "--update" ]; then
 +    echo "$RESULTS" > "$BASELINE"
 +    echo "Baseline updated: $BASELINE"
 +    exit 0
 +fi
++
 +if [ ! -f "$BASELINE" ]; then
 +    echo "$RESULTS" > "$BASELINE"
 +    echo "No baseline found — created: $BASELINE"
 +    echo "Run again to compare."
 +    exit 0
 +fi
++
 +# Compare against baseline
 +echo ""
 +echo "Comparing against baseline..."
 +FAIL=0
 +while IFS=' ' read -r name time size; do
 +    [ -z "$name" ] && continue
 +    baseline_line=$(grep "^$name " "$BASELINE" 2>/dev/null || true)
 +    if [ -z "$baseline_line" ]; then
 +        echo "  $name: NEW (no baseline)"
 +        continue
 +    fi
 +    base_time=$(echo "$baseline_line" | awk '{print $2}')
 +    base_size=$(echo "$baseline_line" | awk '{print $3}')
++
 +    # Time regression check (30% threshold)
 +    time_ratio=$(python3 -c "
 +bt, ct = $base_time, $time
 +if bt > 0:
 +    ratio = ct / bt
 +    print(f'{ratio:.2f}')
 +else:
 +    print('1.00')
 +")
 +    time_pct=$(python3 -c "print(f'{($time / max($base_time, 0.001) - 1) * 100:.1f}')")
++
 +    # Size regression check (15% threshold)
 +    size_pct=$(python3 -c "
 +bs, cs = $base_size, $size
 +if bs > 0:
 +    print(f'{(cs / bs - 1) * 100:.1f}')
 +else:
 +    print('0.0')
 +")
++
 +    status="OK"
 +    if python3 -c "exit(0 if $time / max($base_time, 0.001) > 1.30 else 1)" 2>/dev/null; then
 +        status="SLOW"
 +        FAIL=1
 +    fi
 +    if python3 -c "exit(0 if $size / max($base_size, 1) > 1.15 else 1)" 2>/dev/null; then
 +        status="BLOAT"
 +        FAIL=1
 +    fi
++
 +    echo "  $name: time ${time_pct}% size ${size_pct}% [$status]"
 +done <<< "$RESULTS"
++
 +if [ $FAIL -ne 0 ]; then
 +    echo ""
 +    echo "FAIL: performance regression detected"
 +    exit 1
 +else
 +    echo ""
 +    echo "PASS: no regressions"
 +fi