Go · 6913 bytes Raw Blame History
1 // SPDX-License-Identifier: AGPL-3.0-or-later
2
3 // Package metrics owns the Prometheus registry. Standard metrics are
4 // instantiated up front; per-package metrics register against the same
5 // shared registry.
6 package metrics
7
8 import (
9 "crypto/subtle"
10 "net/http"
11
12 "github.com/prometheus/client_golang/prometheus"
13 "github.com/prometheus/client_golang/prometheus/collectors"
14 "github.com/prometheus/client_golang/prometheus/promhttp"
15 )
16
17 // Registry is the project-wide Prometheus registry. Subpackages register
18 // their collectors against this so /metrics has a single source.
19 var Registry = prometheus.NewRegistry()
20
21 // Standard process / Go runtime metrics.
22 func init() {
23 Registry.MustRegister(
24 collectors.NewGoCollector(),
25 collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}),
26 )
27 }
28
29 // HTTP request metrics. Wired by the HTTP middleware.
30 var (
31 HTTPRequestsTotal = prometheus.NewCounterVec(
32 prometheus.CounterOpts{
33 Name: "shithub_http_requests_total",
34 Help: "Total HTTP requests by route, method, and status.",
35 },
36 []string{"route", "method", "status"},
37 )
38 HTTPRequestDuration = prometheus.NewHistogramVec(
39 prometheus.HistogramOpts{
40 Name: "shithub_http_request_duration_seconds",
41 Help: "HTTP request duration distribution by route and method.",
42 Buckets: prometheus.ExponentialBuckets(0.001, 2.5, 12),
43 },
44 []string{"route", "method"},
45 )
46 HTTPInFlight = prometheus.NewGauge(
47 prometheus.GaugeOpts{
48 Name: "shithub_http_in_flight",
49 Help: "Number of HTTP requests currently in flight.",
50 },
51 )
52 PanicsTotal = prometheus.NewCounter(
53 prometheus.CounterOpts{
54 Name: "shithub_panics_total",
55 Help: "Total panics caught by the recover middleware.",
56 },
57 )
58 )
59
60 // DB pool metrics. Updated periodically by an observer goroutine that the
61 // caller starts via Observe(pool, interval).
62 var (
63 DBConnsAcquired = prometheus.NewGauge(
64 prometheus.GaugeOpts{
65 Name: "shithub_db_pool_acquired",
66 Help: "Postgres connections currently checked out of the pool.",
67 },
68 )
69 DBConnsIdle = prometheus.NewGauge(
70 prometheus.GaugeOpts{
71 Name: "shithub_db_pool_idle",
72 Help: "Postgres connections currently idle in the pool.",
73 },
74 )
75 DBConnsTotal = prometheus.NewGauge(
76 prometheus.GaugeOpts{
77 Name: "shithub_db_pool_total",
78 Help: "Postgres connections currently held by the pool.",
79 },
80 )
81 DBAcquireWaitDurationTotal = prometheus.NewCounter(
82 prometheus.CounterOpts{
83 Name: "shithub_db_pool_acquire_wait_seconds_total",
84 Help: "Cumulative time clients spent waiting to acquire a Postgres connection.",
85 },
86 )
87 )
88
89 // Worker metrics. The pool updates these on every dispatch.
90 var (
91 WorkerJobsProcessedTotal = prometheus.NewCounterVec(
92 prometheus.CounterOpts{
93 Name: "shithub_worker_jobs_processed_total",
94 Help: "Worker jobs processed by kind and outcome (ok, retry, failed, poison).",
95 },
96 []string{"kind", "outcome"},
97 )
98 WorkerJobDurationSeconds = prometheus.NewHistogramVec(
99 prometheus.HistogramOpts{
100 Name: "shithub_worker_job_duration_seconds",
101 Help: "Worker handler latency by kind.",
102 Buckets: prometheus.ExponentialBuckets(0.005, 2.5, 12),
103 },
104 []string{"kind"},
105 )
106 WorkerInFlight = prometheus.NewGaugeVec(
107 prometheus.GaugeOpts{
108 Name: "shithub_worker_in_flight",
109 Help: "Worker handler invocations currently in flight by kind.",
110 },
111 []string{"kind"},
112 )
113 )
114
115 // Actions trigger pipeline metrics (S41b). Incremented from
116 // internal/actions/trigger.
117 var (
118 ActionsRunsEnqueuedTotal = prometheus.NewCounterVec(
119 prometheus.CounterOpts{
120 Name: "shithub_actions_runs_enqueued_total",
121 Help: "Total workflow runs enqueued by triggering event kind. Result is 'fresh' for new runs or 'already_exists' when ON CONFLICT noop'd.",
122 },
123 []string{"event", "result"},
124 )
125 ActionsTriggerMatchDurationSeconds = prometheus.NewHistogram(
126 prometheus.HistogramOpts{
127 Name: "shithub_actions_trigger_match_duration_seconds",
128 Help: "Wall-clock time spent in the trigger handler discovering + parsing + matching workflows for one triggering event.",
129 Buckets: prometheus.ExponentialBuckets(0.005, 2.0, 12),
130 },
131 )
132 ActionsRunnerRegistrationsTotal = prometheus.NewCounter(
133 prometheus.CounterOpts{
134 Name: "shithub_actions_runner_registrations_total",
135 Help: "Total Actions runners registered through operator tooling.",
136 },
137 )
138 ActionsRunnerHeartbeatsTotal = prometheus.NewCounterVec(
139 prometheus.CounterOpts{
140 Name: "shithub_actions_runner_heartbeats_total",
141 Help: "Total runner heartbeats by result (claimed, no_job).",
142 },
143 []string{"result"},
144 )
145 ActionsRunnerJWTTotal = prometheus.NewCounterVec(
146 prometheus.CounterOpts{
147 Name: "shithub_actions_runner_jwt_total",
148 Help: "Total runner job JWT outcomes by result (issued, rejected, replay).",
149 },
150 []string{"result"},
151 )
152 ActionsLogScrubReplacementsTotal = prometheus.NewCounterVec(
153 prometheus.CounterOpts{
154 Name: "shithub_actions_log_scrub_replacements_total",
155 Help: "Total exact secret-value replacements performed on Actions log chunks.",
156 },
157 []string{"location"},
158 )
159 )
160
161 func init() {
162 Registry.MustRegister(
163 HTTPRequestsTotal,
164 HTTPRequestDuration,
165 HTTPInFlight,
166 PanicsTotal,
167 DBConnsAcquired,
168 DBConnsIdle,
169 DBConnsTotal,
170 DBAcquireWaitDurationTotal,
171 WorkerJobsProcessedTotal,
172 WorkerJobDurationSeconds,
173 WorkerInFlight,
174 ActionsRunsEnqueuedTotal,
175 ActionsTriggerMatchDurationSeconds,
176 ActionsRunnerRegistrationsTotal,
177 ActionsRunnerHeartbeatsTotal,
178 ActionsRunnerJWTTotal,
179 ActionsLogScrubReplacementsTotal,
180 )
181 }
182
183 // Handler returns the /metrics HTTP handler. When user/pass is set, the
184 // handler enforces HTTP Basic auth; otherwise it serves unauthenticated
185 // (S35 will tighten the policy).
186 //
187 // DisableCompression: promhttp gzips responses when the scraper sends
188 // Accept-Encoding: gzip. Alloy 1.16's Prometheus scraper advertises gzip
189 // but mishandles the Content-Encoding: gzip response (parses raw 0x1f
190 // magic byte as text, scrape fails with up=0). Bypass at the source —
191 // /metrics payload is small enough that wire savings are irrelevant.
192 // Skipping the chi Compress middleware on this route (handlers.go) is
193 // also necessary but not sufficient; promhttp does its own gzip layer.
194 func Handler(user, pass string) http.Handler {
195 h := promhttp.HandlerFor(Registry, promhttp.HandlerOpts{
196 Registry: Registry,
197 DisableCompression: true,
198 })
199 if user == "" && pass == "" {
200 return h
201 }
202 expectedUser := []byte(user)
203 expectedPass := []byte(pass)
204 return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
205 gotUser, gotPass, ok := r.BasicAuth()
206 if !ok ||
207 subtle.ConstantTimeCompare([]byte(gotUser), expectedUser) != 1 ||
208 subtle.ConstantTimeCompare([]byte(gotPass), expectedPass) != 1 {
209 w.Header().Set("WWW-Authenticate", `Basic realm="metrics"`)
210 http.Error(w, "unauthorized", http.StatusUnauthorized)
211 return
212 }
213 h.ServeHTTP(w, r)
214 })
215 }
216