`e1c16d6`

S36: bench harness — HTTP scenario probe + JSON output

Authored by

espadonne 4 days ago

SHA: e1c16d6e4bcdadd4e18beeee0e49d3b72438f4a7
Parents: 4f16faa
Tree: 22c3f5c

2 changed files

Status	File	+	-
A	`bench/fixtures/README.md`	40	0
A	`bench/run.go`	134	0

bench/fixtures/README.mdadded

 +# Bench fixtures
++
 +S36 calls for four big-repo fixtures used by the nightly perf run:
++
 +- `big_repo_1m_commits/` — repository with 1M commits across a sensible branch graph
 +- `repo_100k_files/`   — repository with 100k files in a reasonable directory structure
 +- `issues_100k/`       — repo with 100k issues + 1M comments
 +- `users_org_5k_members/` — org with 5k members and 200 teams (one level)
++
 +These fixtures aren't generated yet — the seed cost is non-trivial
 +(tens of minutes per fixture) and the current repo shape doesn't
 +yet warrant the disk-and-CI-time spend. The S36 baseline uses the
 +small dev seed fixtures via `bench/run.go`.
++
 +## Generation plan (deferred)
++
 +Each fixture has a generator under `bench/fixtures/<name>/seed.go`
 +that takes a fixed RNG seed and produces a deterministic on-disk
 +shape. The CI integrity check re-runs the generator and asserts
 +the resulting tree-hash matches the committed manifest, so a
 +generator regression doesn't silently change what the bench measures.
++
 +Run order (when generators land):
++
 +```
 +go run ./bench/fixtures/big_repo_1m_commits -out=./bench/fixtures/big_repo_1m_commits
 +go run ./bench/fixtures/repo_100k_files     -out=./bench/fixtures/repo_100k_files
 +…
 +```
++
 +The fixtures themselves are gitignored (regenerable). The generators
 +live in source.
++
 +## Dev fixture (today)
++
 +The `make seed` flow in the repo root produces the small dev
 +fixtures used by `make bench-small`. That's enough to catch
 +regressions in the harness itself plus the small-scale handler
 +latency floor; the big-fixture targets in S36's "Definition of
 +done" land with the generators above.

bench/run.goadded

 +// SPDX-License-Identifier: AGPL-3.0-or-later
++
 +// Command bench runs HTTP latency scenarios against a target shithub
 +// instance and emits one JSON record per scenario to stdout. The
 +// output is structured for ingestion into a perf-baseline file.
 +//
 +// Usage:
 +//
 +//	go run ./bench -target=http://localhost:8080 -iters=20 > out.json
 +//
 +// `make bench-small` invokes this against the dev instance with a
 +// short scenario list; `make bench-full` is the placeholder hook for
 +// the nightly run that exercises big-fixture scenarios (1M-commit
 +// repo, 100k-issue repo) — those fixtures aren't generated by this
 +// harness yet (the seed cost is non-trivial; see fixtures/README.md
 +// for the planned generators).
 +//
 +// The "harness in the repo, not external" choice is per the S36
 +// design notes: keeps perf-as-a-feature visible in PR review.
 +package main
++
 +import (
 +	"context"
 +	"encoding/json"
 +	"flag"
 +	"fmt"
 +	"io"
 +	"net/http"
 +	"os"
 +	"sort"
 +	"time"
 +)
++
 +// Scenario is one named HTTP probe. Status is checked against an
 +// expected code; latency is measured wall-clock at the client.
 +type Scenario struct {
 +	Name           string
 +	Method         string
 +	Path           string
 +	ExpectedStatus int
 +}
++
 +// Result is the per-scenario emission. Latencies in microseconds so
 +// the JSON stays integer-friendly for downstream tools.
 +type Result struct {
 +	Scenario string  `json:"scenario"`
 +	Iters    int     `json:"iters"`
 +	OkCount  int     `json:"ok_count"`
 +	P50us    int64   `json:"p50_us"`
 +	P95us    int64   `json:"p95_us"`
 +	P99us    int64   `json:"p99_us"`
 +	MaxUs    int64   `json:"max_us"`
 +	MeanUs   float64 `json:"mean_us"`
 +}
++
 +func main() {
 +	target := flag.String("target", "http://localhost:8080", "base URL to probe")
 +	iters := flag.Int("iters", 20, "iterations per scenario")
 +	timeout := flag.Duration("timeout", 30*time.Second, "per-request timeout")
 +	flag.Parse()
++
 +	scenarios := []Scenario{
 +		{"home", "GET", "/", 200},
 +		{"explore", "GET", "/explore", 200},
 +		// The repo paths assume the dev seed's `sarah/demo-pub` exists;
 +		// if not, the scenario records 0 OkCount and we move on.
 +		{"repo-tree", "GET", "/sarah/demo-pub/tree/trunk", 200},
 +		{"repo-branches", "GET", "/sarah/demo-pub/branches", 200},
 +		{"repo-issues", "GET", "/sarah/demo-pub/issues", 200},
 +		{"login-form", "GET", "/login", 200},
 +	}
++
 +	client := &http.Client{Timeout: *timeout}
 +	enc := json.NewEncoder(os.Stdout)
 +	for _, s := range scenarios {
 +		res := runScenario(context.Background(), client, *target, s, *iters)
 +		_ = enc.Encode(res)
 +	}
 +}
++
 +func runScenario(ctx context.Context, client *http.Client, base string, s Scenario, n int) Result {
 +	lat := make([]int64, 0, n)
 +	ok := 0
 +	for i := 0; i < n; i++ {
 +		req, err := http.NewRequestWithContext(ctx, s.Method, base+s.Path, nil)
 +		if err != nil {
 +			continue
 +		}
 +		t0 := time.Now()
 +		resp, err := client.Do(req)
 +		took := time.Since(t0).Microseconds()
 +		if err != nil {
 +			continue
 +		}
 +		_, _ = io.Copy(io.Discard, resp.Body)
 +		_ = resp.Body.Close()
 +		if resp.StatusCode == s.ExpectedStatus {
 +			ok++
 +			lat = append(lat, took)
 +		}
 +	}
 +	return summarize(s.Name, n, ok, lat)
 +}
++
 +func summarize(name string, n, ok int, lat []int64) Result {
 +	r := Result{Scenario: name, Iters: n, OkCount: ok}
 +	if len(lat) == 0 {
 +		return r
 +	}
 +	sort.Slice(lat, func(i, j int) bool { return lat[i] < lat[j] })
 +	r.P50us = lat[len(lat)*50/100]
 +	r.P95us = lat[min(len(lat)*95/100, len(lat)-1)]
 +	r.P99us = lat[min(len(lat)*99/100, len(lat)-1)]
 +	r.MaxUs = lat[len(lat)-1]
 +	var sum int64
 +	for _, v := range lat {
 +		sum += v
 +	}
 +	r.MeanUs = float64(sum) / float64(len(lat))
 +	return r
 +}
++
 +// min is the tiny helper kept here so the cmd/bench package doesn't
 +// import a shared math/utility just for a 3-liner.
 +func min(a, b int) int {
 +	if a < b {
 +		return a
 +	}
 +	return b
 +}
++
 +// Compile-time assertion that the package never grows accidental
 +// fmt.Print calls — the harness emits JSON only, never free text.
 +var _ = fmt.Sprintln