tenseleyflow/shithub / e1c16d6

Browse files

S36: bench harness — HTTP scenario probe + JSON output

Authored by espadonne
SHA
e1c16d6e4bcdadd4e18beeee0e49d3b72438f4a7
Parents
4f16faa
Tree
22c3f5c

2 changed files

StatusFile+-
A bench/fixtures/README.md 40 0
A bench/run.go 134 0
bench/fixtures/README.mdadded
@@ -0,0 +1,40 @@
1
+# Bench fixtures
2
+
3
+S36 calls for four big-repo fixtures used by the nightly perf run:
4
+
5
+- `big_repo_1m_commits/` — repository with 1M commits across a sensible branch graph
6
+- `repo_100k_files/`   — repository with 100k files in a reasonable directory structure
7
+- `issues_100k/`       — repo with 100k issues + 1M comments
8
+- `users_org_5k_members/` — org with 5k members and 200 teams (one level)
9
+
10
+These fixtures aren't generated yet — the seed cost is non-trivial
11
+(tens of minutes per fixture) and the current repo shape doesn't
12
+yet warrant the disk-and-CI-time spend. The S36 baseline uses the
13
+small dev seed fixtures via `bench/run.go`.
14
+
15
+## Generation plan (deferred)
16
+
17
+Each fixture has a generator under `bench/fixtures/<name>/seed.go`
18
+that takes a fixed RNG seed and produces a deterministic on-disk
19
+shape. The CI integrity check re-runs the generator and asserts
20
+the resulting tree-hash matches the committed manifest, so a
21
+generator regression doesn't silently change what the bench measures.
22
+
23
+Run order (when generators land):
24
+
25
+```
26
+go run ./bench/fixtures/big_repo_1m_commits -out=./bench/fixtures/big_repo_1m_commits
27
+go run ./bench/fixtures/repo_100k_files     -out=./bench/fixtures/repo_100k_files
28
+…
29
+```
30
+
31
+The fixtures themselves are gitignored (regenerable). The generators
32
+live in source.
33
+
34
+## Dev fixture (today)
35
+
36
+The `make seed` flow in the repo root produces the small dev
37
+fixtures used by `make bench-small`. That's enough to catch
38
+regressions in the harness itself plus the small-scale handler
39
+latency floor; the big-fixture targets in S36's "Definition of
40
+done" land with the generators above.
bench/run.goadded
@@ -0,0 +1,134 @@
1
+// SPDX-License-Identifier: AGPL-3.0-or-later
2
+
3
+// Command bench runs HTTP latency scenarios against a target shithub
4
+// instance and emits one JSON record per scenario to stdout. The
5
+// output is structured for ingestion into a perf-baseline file.
6
+//
7
+// Usage:
8
+//
9
+//	go run ./bench -target=http://localhost:8080 -iters=20 > out.json
10
+//
11
+// `make bench-small` invokes this against the dev instance with a
12
+// short scenario list; `make bench-full` is the placeholder hook for
13
+// the nightly run that exercises big-fixture scenarios (1M-commit
14
+// repo, 100k-issue repo) — those fixtures aren't generated by this
15
+// harness yet (the seed cost is non-trivial; see fixtures/README.md
16
+// for the planned generators).
17
+//
18
+// The "harness in the repo, not external" choice is per the S36
19
+// design notes: keeps perf-as-a-feature visible in PR review.
20
+package main
21
+
22
+import (
23
+	"context"
24
+	"encoding/json"
25
+	"flag"
26
+	"fmt"
27
+	"io"
28
+	"net/http"
29
+	"os"
30
+	"sort"
31
+	"time"
32
+)
33
+
34
+// Scenario is one named HTTP probe. Status is checked against an
35
+// expected code; latency is measured wall-clock at the client.
36
+type Scenario struct {
37
+	Name           string
38
+	Method         string
39
+	Path           string
40
+	ExpectedStatus int
41
+}
42
+
43
+// Result is the per-scenario emission. Latencies in microseconds so
44
+// the JSON stays integer-friendly for downstream tools.
45
+type Result struct {
46
+	Scenario string  `json:"scenario"`
47
+	Iters    int     `json:"iters"`
48
+	OkCount  int     `json:"ok_count"`
49
+	P50us    int64   `json:"p50_us"`
50
+	P95us    int64   `json:"p95_us"`
51
+	P99us    int64   `json:"p99_us"`
52
+	MaxUs    int64   `json:"max_us"`
53
+	MeanUs   float64 `json:"mean_us"`
54
+}
55
+
56
+func main() {
57
+	target := flag.String("target", "http://localhost:8080", "base URL to probe")
58
+	iters := flag.Int("iters", 20, "iterations per scenario")
59
+	timeout := flag.Duration("timeout", 30*time.Second, "per-request timeout")
60
+	flag.Parse()
61
+
62
+	scenarios := []Scenario{
63
+		{"home", "GET", "/", 200},
64
+		{"explore", "GET", "/explore", 200},
65
+		// The repo paths assume the dev seed's `sarah/demo-pub` exists;
66
+		// if not, the scenario records 0 OkCount and we move on.
67
+		{"repo-tree", "GET", "/sarah/demo-pub/tree/trunk", 200},
68
+		{"repo-branches", "GET", "/sarah/demo-pub/branches", 200},
69
+		{"repo-issues", "GET", "/sarah/demo-pub/issues", 200},
70
+		{"login-form", "GET", "/login", 200},
71
+	}
72
+
73
+	client := &http.Client{Timeout: *timeout}
74
+	enc := json.NewEncoder(os.Stdout)
75
+	for _, s := range scenarios {
76
+		res := runScenario(context.Background(), client, *target, s, *iters)
77
+		_ = enc.Encode(res)
78
+	}
79
+}
80
+
81
+func runScenario(ctx context.Context, client *http.Client, base string, s Scenario, n int) Result {
82
+	lat := make([]int64, 0, n)
83
+	ok := 0
84
+	for i := 0; i < n; i++ {
85
+		req, err := http.NewRequestWithContext(ctx, s.Method, base+s.Path, nil)
86
+		if err != nil {
87
+			continue
88
+		}
89
+		t0 := time.Now()
90
+		resp, err := client.Do(req)
91
+		took := time.Since(t0).Microseconds()
92
+		if err != nil {
93
+			continue
94
+		}
95
+		_, _ = io.Copy(io.Discard, resp.Body)
96
+		_ = resp.Body.Close()
97
+		if resp.StatusCode == s.ExpectedStatus {
98
+			ok++
99
+			lat = append(lat, took)
100
+		}
101
+	}
102
+	return summarize(s.Name, n, ok, lat)
103
+}
104
+
105
+func summarize(name string, n, ok int, lat []int64) Result {
106
+	r := Result{Scenario: name, Iters: n, OkCount: ok}
107
+	if len(lat) == 0 {
108
+		return r
109
+	}
110
+	sort.Slice(lat, func(i, j int) bool { return lat[i] < lat[j] })
111
+	r.P50us = lat[len(lat)*50/100]
112
+	r.P95us = lat[min(len(lat)*95/100, len(lat)-1)]
113
+	r.P99us = lat[min(len(lat)*99/100, len(lat)-1)]
114
+	r.MaxUs = lat[len(lat)-1]
115
+	var sum int64
116
+	for _, v := range lat {
117
+		sum += v
118
+	}
119
+	r.MeanUs = float64(sum) / float64(len(lat))
120
+	return r
121
+}
122
+
123
+// min is the tiny helper kept here so the cmd/bench package doesn't
124
+// import a shared math/utility just for a 3-liner.
125
+func min(a, b int) int {
126
+	if a < b {
127
+		return a
128
+	}
129
+	return b
130
+}
131
+
132
+// Compile-time assertion that the package never grows accidental
133
+// fmt.Print calls — the harness emits JSON only, never free text.
134
+var _ = fmt.Sprintln