tenseleyflow/shithub / eb28d6f

Browse files

deploy/runner: enforce actions egress allowlist

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
eb28d6fe54b25ef31fe7168879a77254f5cbd858
Parents
2b63ffd
Tree
f8ffa23

11 changed files

StatusFile+-
M deploy/ansible/inventory/production.example 3 1
M deploy/ansible/roles/shithubd-runner/defaults/main.yml 9 3
M deploy/ansible/roles/shithubd-runner/handlers/main.yml 6 0
M deploy/ansible/roles/shithubd-runner/tasks/main.yml 91 2
M deploy/runner-config/README.md 10 10
M deploy/runner-config/dnsmasq.conf.j2 14 6
A deploy/runner-config/firewall.sh.j2 26 0
A deploy/systemd/shithub-runner-firewall.service 14 0
M deploy/systemd/shithubd-runner.service 2 1
M docs/internal/runbooks/actions-runner.md 12 4
M docs/internal/runbooks/runner-deploy.md 22 15
deploy/ansible/inventory/production.examplemodified
@@ -52,4 +52,6 @@ grafana_cloud_prom_token=REPLACE_ME # access-policy token
5252
 # shithub_runner_labels=self-hosted,linux,ubuntu-latest
5353
 # shithub_runner_capacity=1
5454
 # shithub_runner_default_image=ghcr.io/shithub/runner-nix:1.0
55
-# shithub_runner_dns_servers=172.30.0.1
55
+# The role creates shithub-actions on shact0 (172.30.0.1/24), runs
56
+# dnsmasq on that bridge, and enforces direct-IP egress denial with
57
+# shithub-runner-firewall.service.
deploy/ansible/roles/shithubd-runner/defaults/main.ymlmodified
@@ -23,10 +23,16 @@ shithub_runner_network_allowlist:
2323
   - "*.githubusercontent.com"
2424
 shithub_runner_engine: docker
2525
 shithub_runner_default_image: ghcr.io/shithub/runner-nix:1.0
26
-shithub_runner_network: bridge
27
-shithub_runner_dns_servers: []
28
-shithub_runner_dnsmasq_config: /etc/shithubd-runner/dnsmasq.conf
26
+shithub_runner_network: shithub-actions
27
+shithub_runner_network_bridge: shact0
28
+shithub_runner_network_subnet: 172.30.0.0/24
29
+shithub_runner_network_gateway: 172.30.0.1
30
+shithub_runner_dns_servers:
31
+  - "{{ shithub_runner_network_gateway }}"
32
+shithub_runner_dnsmasq_config: /etc/dnsmasq.d/shithubd-runner.conf
2933
 shithub_runner_dnsmasq_upstream: 1.1.1.1
34
+shithub_runner_ipset_name: shithub_actions_allowed
35
+shithub_runner_firewall_script: /usr/local/sbin/shithub-runner-firewall
3036
 shithub_runner_memory: 2g
3137
 shithub_runner_cpus: "2"
3238
 shithub_runner_seccomp_profile: /etc/shithubd-runner/seccomp.json
deploy/ansible/roles/shithubd-runner/handlers/main.ymlmodified
@@ -4,5 +4,11 @@
44
 - name: daemon-reload
55
   systemd: { daemon_reload: yes }
66
 
7
+- name: restart dnsmasq
8
+  systemd: { name: dnsmasq, state: restarted, enabled: yes }
9
+
10
+- name: restart shithub-runner-firewall
11
+  systemd: { name: shithub-runner-firewall, state: restarted, enabled: yes, daemon_reload: yes }
12
+
713
 - name: restart shithubd-runner
814
   systemd: { name: shithubd-runner, state: restarted, enabled: yes }
deploy/ansible/roles/shithubd-runner/tasks/main.ymlmodified
@@ -25,12 +25,30 @@
2525
       unless the shithubd-runner systemd unit's ReadWritePaths= hardening is
2626
       updated with the matching path.
2727
 
28
+- name: Runner Docker bridge name fits Linux interface limit
29
+  assert:
30
+    that:
31
+      - (shithub_runner_network_bridge | string | length) <= 15
32
+    fail_msg: >-
33
+      shithub_runner_network_bridge must be 15 characters or fewer because
34
+      Linux interface names are capped by IFNAMSIZ.
35
+
2836
 - name: Docker group exists
2937
   getent:
3038
     database: group
3139
     key: docker
3240
   when: shithub_runner_engine == "docker"
3341
 
42
+- name: Runner network firewall packages
43
+  apt:
44
+    name:
45
+      - dnsmasq
46
+      - ipset
47
+      - iptables
48
+    state: present
49
+    update_cache: yes
50
+  when: shithub_runner_engine == "docker"
51
+
3452
 - name: Runner group
3553
   group:
3654
     name: shithub-runner
@@ -60,6 +78,50 @@
6078
     - { path: "{{ shithub_runner_workspace_root }}", owner: shithub-runner, group: shithub-runner, mode: "0750" }
6179
     - { path: /var/lib/shithubd-runner/binaries, owner: shithub-runner, group: shithub-runner, mode: "0750" }
6280
 
81
+- name: Inspect Actions Docker network
82
+  command: "{{ shithub_runner_engine }} network inspect {{ shithub_runner_network }}"
83
+  register: shithub_runner_network_inspect
84
+  failed_when: shithub_runner_network_inspect.rc not in [0, 1]
85
+  changed_when: false
86
+  when: shithub_runner_engine == "docker" and not ansible_check_mode
87
+
88
+- name: Create Actions Docker network
89
+  command: >-
90
+    {{ shithub_runner_engine }} network create
91
+    --driver bridge
92
+    --subnet {{ shithub_runner_network_subnet }}
93
+    --gateway {{ shithub_runner_network_gateway }}
94
+    --opt com.docker.network.bridge.name={{ shithub_runner_network_bridge }}
95
+    {{ shithub_runner_network }}
96
+  when:
97
+    - shithub_runner_engine == "docker"
98
+    - not ansible_check_mode
99
+    - shithub_runner_network_inspect.rc == 1
100
+
101
+- name: Inspect Actions Docker network after converge
102
+  command: "{{ shithub_runner_engine }} network inspect {{ shithub_runner_network }}"
103
+  register: shithub_runner_network_final
104
+  changed_when: false
105
+  when: shithub_runner_engine == "docker" and not ansible_check_mode
106
+
107
+- name: Record Actions Docker network facts
108
+  set_fact:
109
+    shithub_runner_network_info: "{{ (shithub_runner_network_final.stdout | from_json)[0] }}"
110
+  when: shithub_runner_engine == "docker" and not ansible_check_mode
111
+
112
+- name: Actions Docker network matches runner firewall config
113
+  assert:
114
+    that:
115
+      - shithub_runner_network_info.Driver == "bridge"
116
+      - shithub_runner_network_info.Options["com.docker.network.bridge.name"] == shithub_runner_network_bridge
117
+      - shithub_runner_network_info.IPAM.Config[0].Subnet == shithub_runner_network_subnet
118
+      - shithub_runner_network_info.IPAM.Config[0].Gateway == shithub_runner_network_gateway
119
+    fail_msg: >-
120
+      Existing Docker network {{ shithub_runner_network }} does not match the
121
+      configured Actions subnet/gateway/bridge. Remove or rename the network
122
+      before re-running the role so firewall rules target the correct bridge.
123
+  when: shithub_runner_engine == "docker" and not ansible_check_mode
124
+
63125
 - name: Upload shithubd-runner binary (built by `make build` locally)
64126
   copy:
65127
     src: "{{ playbook_dir }}/../../bin/shithubd-runner"
@@ -107,8 +169,35 @@
107169
     src: "{{ playbook_dir }}/../runner-config/dnsmasq.conf.j2"
108170
     dest: "{{ shithub_runner_dnsmasq_config }}"
109171
     owner: root
110
-    group: shithub-runner
111
-    mode: "0640"
172
+    group: root
173
+    mode: "0644"
174
+  notify: restart dnsmasq
175
+
176
+- name: Runner firewall script
177
+  template:
178
+    src: "{{ playbook_dir }}/../runner-config/firewall.sh.j2"
179
+    dest: "{{ shithub_runner_firewall_script }}"
180
+    owner: root
181
+    group: root
182
+    mode: "0755"
183
+  notify: restart shithub-runner-firewall
184
+
185
+- name: Runner firewall systemd unit
186
+  copy:
187
+    src: "{{ playbook_dir }}/../systemd/shithub-runner-firewall.service"
188
+    dest: /etc/systemd/system/shithub-runner-firewall.service
189
+    mode: "0644"
190
+  notify: [daemon-reload, restart shithub-runner-firewall]
191
+
192
+- name: Enable + start runner firewall
193
+  systemd:
194
+    name: shithub-runner-firewall
195
+    state: started
196
+    enabled: yes
197
+    daemon_reload: yes
198
+
199
+- name: Enable + start runner dnsmasq
200
+  systemd: { name: dnsmasq, state: started, enabled: yes }
112201
 
113202
 - name: Runner systemd unit
114203
   copy:
deploy/runner-config/README.mdmodified
@@ -15,14 +15,14 @@ Source: `moby/moby` commit
1515
 Update this file deliberately when changing Docker daemon versions or
1616
 runner syscall posture.
1717
 
18
-`dnsmasq.conf.j2` is the optional runner DNS allowlist template. The
19
-Ansible role renders it to `/etc/shithubd-runner/dnsmasq.conf` from
20
-`shithub_runner_network_allowlist`; operators can run dnsmasq bound to
21
-their Actions Docker bridge and point step containers at it with
22
-`engine.dns_servers`.
18
+`dnsmasq.conf.j2` is the runner DNS allowlist template. The Ansible
19
+role renders it to `/etc/dnsmasq.d/shithubd-runner.conf` from
20
+`shithub_runner_network_allowlist`, binds dnsmasq to the dedicated
21
+Actions Docker bridge, and points step containers at that resolver
22
+with `engine.dns_servers`.
2323
 
24
-The dnsmasq template intentionally has no default upstream resolver, so
25
-names outside the allowlist fail resolution. DNS allowlisting alone does
26
-not block direct-IP egress or a workflow that brings its own resolver;
27
-pair it with host firewall rules on the runner bridge for a deny-by-
28
-default network boundary.
24
+`firewall.sh.j2` is installed as `/usr/local/sbin/shithub-runner-firewall`
25
+and run by `shithub-runner-firewall.service`. It creates the ipset used
26
+by dnsmasq and rejects direct-IP egress from the Actions bridge unless
27
+the destination IP was populated by an allowlisted DNS response. DNS to
28
+the bridge resolver is the only DNS path allowed from step containers.
deploy/runner-config/dnsmasq.conf.j2modified
@@ -1,15 +1,23 @@
1
-# Managed by Ansible. Optional DNS allowlist resolver for Actions runners.
2
-#
3
-# Pair this with a Docker bridge/network that uses this resolver as its only
4
-# DNS server. This controls name resolution, not direct-IP egress; enforce
5
-# direct-IP denial with host firewall rules on the runner bridge.
1
+# Managed by Ansible. DNS allowlist resolver for Actions runners.
2
+# Bound only to the dedicated Actions Docker bridge; dnsmasq inserts
3
+# successful allowlisted resolutions into the ipset enforced by
4
+# shithub-runner-firewall.service.
65
 
6
+interface={{ shithub_runner_network_bridge }}
7
+listen-address={{ shithub_runner_network_gateway }}
8
+bind-interfaces
79
 domain-needed
810
 bogus-priv
911
 no-resolv
1012
 no-hosts
1113
 
12
-{% for pattern in shithub_runner_network_allowlist %}
14
+{% if shithub_runner_network_allowlist is string %}
15
+{% set allowlist = shithub_runner_network_allowlist.split(",") | map("trim") | list %}
16
+{% else %}
17
+{% set allowlist = shithub_runner_network_allowlist %}
18
+{% endif %}
19
+{% for pattern in allowlist %}
1320
 {% set host = (pattern[2:] if pattern.startswith("*.") else pattern) %}
1421
 server=/{{ host }}/{{ shithub_runner_dnsmasq_upstream }}
22
+ipset=/{{ host }}/{{ shithub_runner_ipset_name }}
1523
 {% endfor %}
deploy/runner-config/firewall.sh.j2added
@@ -0,0 +1,26 @@
1
+#!/bin/sh
2
+# Managed by Ansible. Enforces deny-by-default egress for the Actions bridge.
3
+set -eu
4
+
5
+IPSET="{{ shithub_runner_ipset_name }}"
6
+CHAIN="SHITHUB_ACTIONS_EGRESS"
7
+SUBNET="{{ shithub_runner_network_subnet }}"
8
+DNS="{{ shithub_runner_network_gateway }}"
9
+
10
+IPSET_BIN="${IPSET_BIN:-ipset}"
11
+IPTABLES="${IPTABLES:-iptables}"
12
+
13
+"$IPSET_BIN" create "$IPSET" hash:ip family inet timeout 86400 -exist
14
+
15
+"$IPTABLES" -w -N "$CHAIN" 2>/dev/null || true
16
+"$IPTABLES" -w -F "$CHAIN"
17
+"$IPTABLES" -w -A "$CHAIN" -m conntrack --ctstate ESTABLISHED,RELATED -j ACCEPT
18
+"$IPTABLES" -w -A "$CHAIN" -d "$DNS" -p udp --dport 53 -j ACCEPT
19
+"$IPTABLES" -w -A "$CHAIN" -d "$DNS" -p tcp --dport 53 -j ACCEPT
20
+"$IPTABLES" -w -A "$CHAIN" -m set --match-set "$IPSET" dst -j ACCEPT
21
+"$IPTABLES" -w -A "$CHAIN" -j REJECT
22
+
23
+while "$IPTABLES" -w -D FORWARD -s "$SUBNET" -j "$CHAIN" 2>/dev/null; do
24
+    :
25
+done
26
+"$IPTABLES" -w -I FORWARD 1 -s "$SUBNET" -j "$CHAIN"
deploy/systemd/shithub-runner-firewall.serviceadded
@@ -0,0 +1,14 @@
1
+[Unit]
2
+Description=shithub Actions runner firewall
3
+After=network-online.target docker.service
4
+Wants=network-online.target
5
+Requires=docker.service
6
+Before=shithubd-runner.service
7
+
8
+[Service]
9
+Type=oneshot
10
+ExecStart=/usr/local/sbin/shithub-runner-firewall
11
+RemainAfterExit=yes
12
+
13
+[Install]
14
+WantedBy=multi-user.target
deploy/systemd/shithubd-runner.servicemodified
@@ -1,7 +1,8 @@
11
 [Unit]
22
 Description=shithub Actions runner
3
-After=network-online.target docker.service
3
+After=network-online.target docker.service dnsmasq.service shithub-runner-firewall.service
44
 Wants=network-online.target docker.service
5
+Requires=dnsmasq.service shithub-runner-firewall.service
56
 
67
 [Service]
78
 Type=simple
docs/internal/runbooks/actions-runner.mdmodified
@@ -9,7 +9,7 @@ For host provisioning and the systemd/Ansible path, see
99
 
1010
 Prereqs:
1111
 
12
-- Database migrations are current through `0053_runner_jwt_used.sql`.
12
+- Database migrations are current through `0055_workflow_job_secret_masks.sql`.
1313
 - `SHITHUB_TOTP_KEY` or `auth.totp_key_b64` is set on the web process.
1414
 - Object storage is configured if testing artifact upload.
1515
 - Docker or Podman is installed on the runner host.
@@ -48,7 +48,9 @@ shithubd-runner run \
4848
   --server-url "$BASE" \
4949
   --token "$RUNNER_TOKEN" \
5050
   --labels self-hosted,linux,ubuntu-latest \
51
-  --workspace-root /var/lib/shithubd-runner/workspaces
51
+  --workspace-root /var/lib/shithubd-runner/workspaces \
52
+  --network shithub-actions \
53
+  --dns-servers 172.30.0.1
5254
 ```
5355
 
5456
 Equivalent config file:
@@ -78,19 +80,25 @@ network_allowlist = [
7880
 [engine]
7981
 kind = "docker"
8082
 default_image = "ghcr.io/shithub/runner-nix:1.0"
81
-network = "bridge"
83
+network = "shithub-actions"
8284
 memory = "2g"
8385
 cpus = "2"
8486
 seccomp_profile = "/etc/shithubd-runner/seccomp.json"
8587
 user = "65534:65534"
8688
 pids_limit = 512
87
-dns_servers = []
89
+dns_servers = ["172.30.0.1"]
8890
 ```
8991
 
9092
 The config path defaults to `/etc/shithubd-runner/config.toml`.
9193
 Environment variables use the `SHITHUB_RUNNER_` prefix, for example
9294
 `SHITHUB_RUNNER_TOKEN` or `SHITHUB_RUNNER_SERVER__BASE_URL`.
9395
 
96
+The Ansible runner role creates the `shithub-actions` bridge, runs the
97
+allowlist resolver at `172.30.0.1`, and installs firewall rules that
98
+reject direct-IP egress from step containers. If you run the binary
99
+without the role, provision equivalent network controls before pointing
100
+workflows at the runner.
101
+
94102
 ## Curl token smoke
95103
 
96104
 Claim a job:
docs/internal/runbooks/runner-deploy.mdmodified
@@ -59,7 +59,6 @@ shithub_runner_default_image=ghcr.io/shithub/runner-nix:1.0
5959
 shithub_runner_seccomp_profile=/etc/shithubd-runner/seccomp.json
6060
 shithub_runner_container_user=65534:65534
6161
 shithub_runner_pids_limit=512
62
-shithub_runner_dns_servers=172.30.0.1
6362
 ```
6463
 
6564
 The role writes non-secret config to
@@ -70,9 +69,10 @@ the systemd unit grants runner writes only to that subtree.
7069
 
7170
 `shithub_runner_network_allowlist` defaults to GitHub source/archive
7271
 hosts plus Docker Hub registry hosts. Override it when a runner must
73
-fetch from an internal package registry. `shithub_runner_dns_servers`
74
-is empty by default; set it only after a DNS allowlist resolver exists
75
-on the runner network.
72
+fetch from an internal package registry. The role creates the
73
+`shithub-actions` Docker bridge at `172.30.0.1/24`, runs dnsmasq on
74
+that bridge, and sets `engine.dns_servers` to the bridge resolver by
75
+default.
7676
 
7777
 ## Deploy
7878
 
@@ -89,8 +89,12 @@ The role:
8989
 - creates the `shithub-runner` system user and joins it to `docker`
9090
 - uploads `/usr/local/bin/shithubd-runner`
9191
 - renders `/etc/shithubd-runner/config.toml` and `runner.env`
92
-- renders `/etc/shithubd-runner/dnsmasq.conf` from the network
93
-  allowlist for operators who run a local DNS allowlist resolver
92
+- creates the dedicated Actions Docker network and bridge
93
+- renders `/etc/dnsmasq.d/shithubd-runner.conf` from the network
94
+  allowlist and starts dnsmasq bound to the Actions bridge
95
+- installs `shithub-runner-firewall.service`, which rejects direct-IP
96
+  egress from step containers unless dnsmasq populated the destination
97
+  in the allowlist ipset
9498
 - installs the pinned seccomp profile at
9599
   `/etc/shithubd-runner/seccomp.json`
96100
 - installs `deploy/systemd/shithubd-runner.service`
@@ -147,6 +151,9 @@ jobs:
147151
 Expected state:
148152
 
149153
 - the UID check prints `65534`
154
+- a workflow-level request for `permissions: {shithub-runner-root: write}`
155
+  still runs as `65534`; root opt-in is disabled in the shipped runner
156
+  config until a trusted-workflow policy exists
150157
 - writing under `/etc` fails because the root filesystem is read-only
151158
 - `mount` fails because the container does not have `CAP_SYS_ADMIN`
152159
 - step logs and systemd journal include the configured image, network,
@@ -162,16 +169,16 @@ The runner config carries two separate network controls:
162169
   Docker `--dns`.
163170
 
164171
 For a single-host deployment, create a dedicated Docker bridge for
165
-Actions jobs, run dnsmasq bound to that bridge, render
166
-`/etc/shithubd-runner/dnsmasq.conf`, and set
172
+Actions jobs, run dnsmasq bound to that bridge, and set
167173
 `shithub_runner_dns_servers` to the bridge address of that resolver.
168
-The rendered dnsmasq config has no default upstream resolver; names not
169
-matching the allowlist fail DNS resolution.
170
-
171
-DNS filtering is not a complete egress boundary by itself. Block
172
-direct-IP egress from the Actions bridge with host firewall rules, and
173
-allow only DNS to the resolver plus established outbound connections
174
-opened by that resolver. Keep the runner on a separate host from web
174
+The Ansible role now does this by default. The rendered dnsmasq config
175
+has no default upstream resolver; names not matching the allowlist fail
176
+DNS resolution.
177
+
178
+The firewall service closes the direct-IP bypass: containers on the
179
+Actions subnet may send DNS only to the bridge resolver, and other
180
+egress is allowed only when the destination IP is present in the
181
+dnsmasq-populated ipset. Keep the runner on a separate host from web
175182
 and database services.
176183
 
177184
 ## Rollback