| 1 | // SPDX-License-Identifier: AGPL-3.0-or-later |
| 2 | |
| 3 | // Package metrics owns the Prometheus registry. Standard metrics are |
| 4 | // instantiated up front; per-package metrics register against the same |
| 5 | // shared registry. |
| 6 | package metrics |
| 7 | |
| 8 | import ( |
| 9 | "crypto/subtle" |
| 10 | "net/http" |
| 11 | |
| 12 | "github.com/prometheus/client_golang/prometheus" |
| 13 | "github.com/prometheus/client_golang/prometheus/collectors" |
| 14 | "github.com/prometheus/client_golang/prometheus/promhttp" |
| 15 | ) |
| 16 | |
| 17 | // Registry is the project-wide Prometheus registry. Subpackages register |
| 18 | // their collectors against this so /metrics has a single source. |
| 19 | var Registry = prometheus.NewRegistry() |
| 20 | |
| 21 | // Standard process / Go runtime metrics. |
| 22 | func init() { |
| 23 | Registry.MustRegister( |
| 24 | collectors.NewGoCollector(), |
| 25 | collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}), |
| 26 | ) |
| 27 | } |
| 28 | |
| 29 | // HTTP request metrics. Wired by the HTTP middleware. |
| 30 | var ( |
| 31 | HTTPRequestsTotal = prometheus.NewCounterVec( |
| 32 | prometheus.CounterOpts{ |
| 33 | Name: "shithub_http_requests_total", |
| 34 | Help: "Total HTTP requests by route, method, and status.", |
| 35 | }, |
| 36 | []string{"route", "method", "status"}, |
| 37 | ) |
| 38 | HTTPRequestDuration = prometheus.NewHistogramVec( |
| 39 | prometheus.HistogramOpts{ |
| 40 | Name: "shithub_http_request_duration_seconds", |
| 41 | Help: "HTTP request duration distribution by route and method.", |
| 42 | Buckets: prometheus.ExponentialBuckets(0.001, 2.5, 12), |
| 43 | }, |
| 44 | []string{"route", "method"}, |
| 45 | ) |
| 46 | HTTPInFlight = prometheus.NewGauge( |
| 47 | prometheus.GaugeOpts{ |
| 48 | Name: "shithub_http_in_flight", |
| 49 | Help: "Number of HTTP requests currently in flight.", |
| 50 | }, |
| 51 | ) |
| 52 | PanicsTotal = prometheus.NewCounter( |
| 53 | prometheus.CounterOpts{ |
| 54 | Name: "shithub_panics_total", |
| 55 | Help: "Total panics caught by the recover middleware.", |
| 56 | }, |
| 57 | ) |
| 58 | ) |
| 59 | |
| 60 | // DB pool metrics. Updated periodically by an observer goroutine that the |
| 61 | // caller starts via Observe(pool, interval). |
| 62 | var ( |
| 63 | DBConnsAcquired = prometheus.NewGauge( |
| 64 | prometheus.GaugeOpts{ |
| 65 | Name: "shithub_db_pool_acquired", |
| 66 | Help: "Postgres connections currently checked out of the pool.", |
| 67 | }, |
| 68 | ) |
| 69 | DBConnsIdle = prometheus.NewGauge( |
| 70 | prometheus.GaugeOpts{ |
| 71 | Name: "shithub_db_pool_idle", |
| 72 | Help: "Postgres connections currently idle in the pool.", |
| 73 | }, |
| 74 | ) |
| 75 | DBConnsTotal = prometheus.NewGauge( |
| 76 | prometheus.GaugeOpts{ |
| 77 | Name: "shithub_db_pool_total", |
| 78 | Help: "Postgres connections currently held by the pool.", |
| 79 | }, |
| 80 | ) |
| 81 | DBAcquireWaitDurationTotal = prometheus.NewCounter( |
| 82 | prometheus.CounterOpts{ |
| 83 | Name: "shithub_db_pool_acquire_wait_seconds_total", |
| 84 | Help: "Cumulative time clients spent waiting to acquire a Postgres connection.", |
| 85 | }, |
| 86 | ) |
| 87 | ) |
| 88 | |
| 89 | // Worker metrics. The pool updates these on every dispatch. |
| 90 | var ( |
| 91 | WorkerJobsProcessedTotal = prometheus.NewCounterVec( |
| 92 | prometheus.CounterOpts{ |
| 93 | Name: "shithub_worker_jobs_processed_total", |
| 94 | Help: "Worker jobs processed by kind and outcome (ok, retry, failed, poison).", |
| 95 | }, |
| 96 | []string{"kind", "outcome"}, |
| 97 | ) |
| 98 | WorkerJobDurationSeconds = prometheus.NewHistogramVec( |
| 99 | prometheus.HistogramOpts{ |
| 100 | Name: "shithub_worker_job_duration_seconds", |
| 101 | Help: "Worker handler latency by kind.", |
| 102 | Buckets: prometheus.ExponentialBuckets(0.005, 2.5, 12), |
| 103 | }, |
| 104 | []string{"kind"}, |
| 105 | ) |
| 106 | WorkerInFlight = prometheus.NewGaugeVec( |
| 107 | prometheus.GaugeOpts{ |
| 108 | Name: "shithub_worker_in_flight", |
| 109 | Help: "Worker handler invocations currently in flight by kind.", |
| 110 | }, |
| 111 | []string{"kind"}, |
| 112 | ) |
| 113 | ) |
| 114 | |
| 115 | func init() { |
| 116 | Registry.MustRegister( |
| 117 | HTTPRequestsTotal, |
| 118 | HTTPRequestDuration, |
| 119 | HTTPInFlight, |
| 120 | PanicsTotal, |
| 121 | DBConnsAcquired, |
| 122 | DBConnsIdle, |
| 123 | DBConnsTotal, |
| 124 | DBAcquireWaitDurationTotal, |
| 125 | WorkerJobsProcessedTotal, |
| 126 | WorkerJobDurationSeconds, |
| 127 | WorkerInFlight, |
| 128 | ) |
| 129 | } |
| 130 | |
| 131 | // Handler returns the /metrics HTTP handler. When user/pass is set, the |
| 132 | // handler enforces HTTP Basic auth; otherwise it serves unauthenticated |
| 133 | // (S35 will tighten the policy). |
| 134 | func Handler(user, pass string) http.Handler { |
| 135 | h := promhttp.HandlerFor(Registry, promhttp.HandlerOpts{ |
| 136 | Registry: Registry, |
| 137 | }) |
| 138 | if user == "" && pass == "" { |
| 139 | return h |
| 140 | } |
| 141 | expectedUser := []byte(user) |
| 142 | expectedPass := []byte(pass) |
| 143 | return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { |
| 144 | gotUser, gotPass, ok := r.BasicAuth() |
| 145 | if !ok || |
| 146 | subtle.ConstantTimeCompare([]byte(gotUser), expectedUser) != 1 || |
| 147 | subtle.ConstantTimeCompare([]byte(gotPass), expectedPass) != 1 { |
| 148 | w.Header().Set("WWW-Authenticate", `Basic realm="metrics"`) |
| 149 | http.Error(w, "unauthorized", http.StatusUnauthorized) |
| 150 | return |
| 151 | } |
| 152 | h.ServeHTTP(w, r) |
| 153 | }) |
| 154 | } |
| 155 |