JSON · 7569 bytes Raw Blame History
1 {
2 "uid": "shithubd-actions",
3 "title": "shithubd - Actions",
4 "tags": ["shithubd", "actions"],
5 "timezone": "browser",
6 "schemaVersion": 39,
7 "version": 1,
8 "refresh": "30s",
9 "time": {"from": "now-6h", "to": "now"},
10 "templating": {
11 "list": [
12 {
13 "name": "instance",
14 "type": "query",
15 "datasource": "Prometheus",
16 "query": "label_values(up{job=\"shithubd-web\"}, instance)",
17 "includeAll": true,
18 "multi": true
19 }
20 ]
21 },
22 "panels": [
23 {
24 "id": 1,
25 "type": "stat",
26 "title": "Queued jobs",
27 "gridPos": {"x": 0, "y": 0, "w": 4, "h": 4},
28 "targets": [{"expr": "sum(shithub_actions_queue_depth{resource=\"jobs\",instance=~\"$instance\"})", "refId": "A"}],
29 "fieldConfig": {
30 "defaults": {
31 "thresholds": {
32 "mode": "absolute",
33 "steps": [
34 {"color": "green", "value": null},
35 {"color": "yellow", "value": 50},
36 {"color": "red", "value": 100}
37 ]
38 }
39 }
40 }
41 },
42 {
43 "id": 2,
44 "type": "stat",
45 "title": "Running jobs",
46 "gridPos": {"x": 4, "y": 0, "w": 4, "h": 4},
47 "targets": [{"expr": "sum(shithub_actions_active{resource=\"jobs\",instance=~\"$instance\"})", "refId": "A"}]
48 },
49 {
50 "id": 3,
51 "type": "stat",
52 "title": "Stale runners",
53 "gridPos": {"x": 8, "y": 0, "w": 4, "h": 4},
54 "targets": [{"expr": "count(shithub_actions_runner_heartbeat_age_seconds{status!=\"offline\",instance=~\"$instance\"} > 60)", "refId": "A"}],
55 "fieldConfig": {
56 "defaults": {
57 "thresholds": {
58 "mode": "absolute",
59 "steps": [
60 {"color": "green", "value": null},
61 {"color": "red", "value": 1}
62 ]
63 }
64 }
65 }
66 },
67 {
68 "id": 4,
69 "type": "stat",
70 "title": "Log MB/day",
71 "gridPos": {"x": 12, "y": 0, "w": 4, "h": 4},
72 "targets": [{"expr": "sum(increase(shithub_actions_log_chunk_bytes_total{instance=~\"$instance\"}[24h])) / 1024 / 1024", "refId": "A"}]
73 },
74 {
75 "id": 5,
76 "type": "stat",
77 "title": "Run p99",
78 "gridPos": {"x": 16, "y": 0, "w": 4, "h": 4},
79 "targets": [{"expr": "histogram_quantile(0.99, sum(rate(shithub_actions_run_duration_seconds_bucket{instance=~\"$instance\"}[30m])) by (le))", "refId": "A"}],
80 "fieldConfig": {"defaults": {"unit": "s"}}
81 },
82 {
83 "id": 6,
84 "type": "stat",
85 "title": "Storage MB",
86 "gridPos": {"x": 20, "y": 0, "w": 4, "h": 4},
87 "targets": [{"expr": "sum(shithub_actions_storage_bytes{instance=~\"$instance\"}) / 1024 / 1024", "refId": "A"}]
88 },
89 {
90 "id": 7,
91 "type": "timeseries",
92 "title": "Queue depth",
93 "gridPos": {"x": 0, "y": 4, "w": 12, "h": 8},
94 "targets": [
95 {
96 "expr": "sum(shithub_actions_queue_depth{instance=~\"$instance\"}) by (resource)",
97 "legendFormat": "{{resource}} queued",
98 "refId": "A"
99 }
100 ]
101 },
102 {
103 "id": 8,
104 "type": "timeseries",
105 "title": "Active runs and jobs",
106 "gridPos": {"x": 12, "y": 4, "w": 12, "h": 8},
107 "targets": [
108 {
109 "expr": "sum(shithub_actions_active{instance=~\"$instance\"}) by (resource)",
110 "legendFormat": "{{resource}} active",
111 "refId": "A"
112 }
113 ]
114 },
115 {
116 "id": 9,
117 "type": "timeseries",
118 "title": "Run duration p95 and p99",
119 "gridPos": {"x": 0, "y": 12, "w": 12, "h": 8},
120 "targets": [
121 {
122 "expr": "histogram_quantile(0.95, sum(rate(shithub_actions_run_duration_seconds_bucket{instance=~\"$instance\"}[15m])) by (le, event))",
123 "legendFormat": "p95 {{event}}",
124 "refId": "A"
125 },
126 {
127 "expr": "histogram_quantile(0.99, sum(rate(shithub_actions_run_duration_seconds_bucket{instance=~\"$instance\"}[15m])) by (le, event))",
128 "legendFormat": "p99 {{event}}",
129 "refId": "B"
130 }
131 ],
132 "fieldConfig": {"defaults": {"unit": "s"}}
133 },
134 {
135 "id": 10,
136 "type": "timeseries",
137 "title": "Runner heartbeat age",
138 "gridPos": {"x": 12, "y": 12, "w": 12, "h": 8},
139 "targets": [
140 {
141 "expr": "shithub_actions_runner_heartbeat_age_seconds{instance=~\"$instance\"}",
142 "legendFormat": "{{runner}} {{status}}",
143 "refId": "A"
144 }
145 ],
146 "fieldConfig": {"defaults": {"unit": "s"}}
147 },
148 {
149 "id": 11,
150 "type": "timeseries",
151 "title": "Runs per minute",
152 "gridPos": {"x": 0, "y": 20, "w": 12, "h": 8},
153 "targets": [
154 {
155 "expr": "sum(rate(shithub_actions_runs_enqueued_total{result=\"fresh\",instance=~\"$instance\"}[5m])) * 60",
156 "legendFormat": "enqueued",
157 "refId": "A"
158 },
159 {
160 "expr": "sum(rate(shithub_actions_runs_completed_total{instance=~\"$instance\"}[5m])) * 60",
161 "legendFormat": "completed",
162 "refId": "B"
163 }
164 ]
165 },
166 {
167 "id": 12,
168 "type": "timeseries",
169 "title": "Run conclusions",
170 "gridPos": {"x": 12, "y": 20, "w": 12, "h": 8},
171 "targets": [
172 {
173 "expr": "sum(rate(shithub_actions_runs_completed_total{instance=~\"$instance\"}[15m])) by (conclusion)",
174 "legendFormat": "{{conclusion}}",
175 "refId": "A"
176 }
177 ]
178 },
179 {
180 "id": 13,
181 "type": "timeseries",
182 "title": "Step outcomes",
183 "gridPos": {"x": 0, "y": 28, "w": 12, "h": 8},
184 "targets": [
185 {
186 "expr": "sum(rate(shithub_actions_steps_completed_total{instance=~\"$instance\"}[15m])) by (step_type, conclusion)",
187 "legendFormat": "{{step_type}} {{conclusion}}",
188 "refId": "A"
189 }
190 ]
191 },
192 {
193 "id": 14,
194 "type": "timeseries",
195 "title": "Log throughput",
196 "gridPos": {"x": 12, "y": 28, "w": 12, "h": 8},
197 "targets": [
198 {
199 "expr": "sum(rate(shithub_actions_log_chunk_bytes_total{instance=~\"$instance\"}[5m])) by (location)",
200 "legendFormat": "{{location}} bytes/sec",
201 "refId": "A"
202 },
203 {
204 "expr": "sum(rate(shithub_actions_log_chunks_total{instance=~\"$instance\"}[5m])) by (location)",
205 "legendFormat": "{{location}} chunks/sec",
206 "refId": "B"
207 }
208 ]
209 },
210 {
211 "id": 15,
212 "type": "timeseries",
213 "title": "Actions storage",
214 "gridPos": {"x": 0, "y": 36, "w": 12, "h": 8},
215 "targets": [
216 {
217 "expr": "sum(shithub_actions_storage_bytes{instance=~\"$instance\"}) by (kind)",
218 "legendFormat": "{{kind}} bytes",
219 "refId": "A"
220 },
221 {
222 "expr": "sum(shithub_actions_storage_objects{instance=~\"$instance\"}) by (kind)",
223 "legendFormat": "{{kind}} objects",
224 "refId": "B"
225 }
226 ]
227 },
228 {
229 "id": 16,
230 "type": "timeseries",
231 "title": "Cancellations and retention",
232 "gridPos": {"x": 12, "y": 36, "w": 12, "h": 8},
233 "targets": [
234 {
235 "expr": "sum(rate(shithub_actions_jobs_cancelled_total{instance=~\"$instance\"}[15m])) by (reason)",
236 "legendFormat": "cancel {{reason}}",
237 "refId": "A"
238 },
239 {
240 "expr": "sum(rate(shithub_actions_runs_pruned_total{instance=~\"$instance\"}[1h])) by (kind)",
241 "legendFormat": "pruned {{kind}}",
242 "refId": "B"
243 }
244 ]
245 }
246 ]
247 }