tenseleyflow/shithub / 713648d

Browse files

runner/engine: harden Docker step containers

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
713648dffea0a0077d06aabb3f2909a19e8b6325
Parents
a19cb1d
Tree
10c1321

2 changed files

StatusFile+-
M internal/runner/engine/docker.go 115 9
M internal/runner/engine/docker_test.go 58 2
internal/runner/engine/docker.gomodified
@@ -8,11 +8,13 @@ import (
88
 	"errors"
99
 	"fmt"
1010
 	"io"
11
+	"log/slog"
1112
 	"os"
1213
 	"os/exec"
1314
 	"path"
1415
 	"regexp"
1516
 	"sort"
17
+	"strconv"
1618
 	"strings"
1719
 	"sync"
1820
 	"time"
@@ -27,6 +29,19 @@ var (
2729
 	ErrUnsupported     = errors.New("runner engine: unsupported operation")
2830
 )
2931
 
32
+const (
33
+	defaultSeccompProfile = "/etc/shithubd-runner/seccomp.json"
34
+	defaultContainerUser  = "65534:65534"
35
+	defaultPidsLimit      = 512
36
+	defaultNofileLimit    = "4096:4096"
37
+	defaultNprocLimit     = "512:512"
38
+
39
+	// rootPermissionKey is an intentionally shithub-specific escape hatch.
40
+	// It requires an explicit per-job permissions entry rather than treating
41
+	// broad write-all permissions as permission to run the container as root.
42
+	rootPermissionKey = "shithub-runner-root"
43
+)
44
+
3045
 type CommandRunner interface {
3146
 	Run(ctx context.Context, name string, args []string, stdout, stderr io.Writer) error
3247
 }
@@ -46,6 +61,9 @@ type DockerConfig struct {
4661
 	Network          string
4762
 	Memory           string
4863
 	CPUs             string
64
+	SeccompProfile   string
65
+	User             string
66
+	PidsLimit        int
4967
 	LogChunkBytes    int
5068
 	LogFlushInterval time.Duration
5169
 	StepLogLimit     int64
@@ -53,6 +71,7 @@ type DockerConfig struct {
5371
 	Stderr           io.Writer
5472
 	Runner           CommandRunner
5573
 	MaskValues       []string
74
+	Logger           *slog.Logger
5675
 }
5776
 
5877
 type Docker struct {
@@ -84,6 +103,18 @@ func NewDocker(cfg DockerConfig) *Docker {
84103
 	if cfg.Runner == nil {
85104
 		cfg.Runner = ExecRunner{}
86105
 	}
106
+	if cfg.SeccompProfile == "" {
107
+		cfg.SeccompProfile = defaultSeccompProfile
108
+	}
109
+	if cfg.User == "" {
110
+		cfg.User = defaultContainerUser
111
+	}
112
+	if cfg.PidsLimit <= 0 {
113
+		cfg.PidsLimit = defaultPidsLimit
114
+	}
115
+	if cfg.Logger == nil {
116
+		cfg.Logger = slog.New(slog.NewTextHandler(io.Discard, nil))
117
+	}
87118
 	return &Docker{cfg: cfg, streams: make(map[int64]chan LogChunk), eventSubs: make(map[int64]chan Event)}
88119
 }
89120
 
@@ -151,36 +182,50 @@ func (d *Docker) executeStep(ctx context.Context, job Job, step Step) error {
151182
 	if strings.TrimSpace(step.Run) == "" {
152183
 		return nil
153184
 	}
154
-	args, err := d.dockerArgs(job, step)
185
+	invocation, err := d.dockerInvocation(job, step)
155186
 	if err != nil {
156187
 		return err
157188
 	}
189
+	d.logStep(ctx, "runner step starting", job, step, invocation, "")
158190
 	writer := d.newStepLogWriter(ctx, job.ID, step.ID, job.MaskValues)
159191
 	out := io.MultiWriter(d.cfg.Stdout, writer)
160192
 	errOut := io.MultiWriter(d.cfg.Stderr, writer)
161
-	if err := d.cfg.Runner.Run(ctx, d.cfg.Binary, args, out, errOut); err != nil {
193
+	if err := d.cfg.Runner.Run(ctx, d.cfg.Binary, invocation.args, out, errOut); err != nil {
194
+		d.logStep(ctx, "runner step completed", job, step, invocation, conclusionForError(err))
162195
 		if closeErr := writer.Close(); closeErr != nil {
163196
 			return fmt.Errorf("runner engine: step %q failed: %w", stepLabel(step), errors.Join(err, closeErr))
164197
 		}
165198
 		return fmt.Errorf("runner engine: step %q failed: %w", stepLabel(step), err)
166199
 	}
200
+	d.logStep(ctx, "runner step completed", job, step, invocation, ConclusionSuccess)
167201
 	if err := writer.Close(); err != nil {
168202
 		return fmt.Errorf("runner engine: flush step %q logs: %w", stepLabel(step), err)
169203
 	}
170204
 	return nil
171205
 }
172206
 
173
-func (d *Docker) dockerArgs(job Job, step Step) ([]string, error) {
207
+type dockerInvocation struct {
208
+	args           []string
209
+	image          string
210
+	network        string
211
+	memory         string
212
+	cpus           string
213
+	user           string
214
+	seccompProfile string
215
+	pidsLimit      int
216
+}
217
+
218
+func (d *Docker) dockerInvocation(job Job, step Step) (dockerInvocation, error) {
174219
 	workdir, err := containerWorkdir(step.WorkingDirectory)
175220
 	if err != nil {
176
-		return nil, err
221
+		return dockerInvocation{}, err
177222
 	}
178223
 	image := strings.TrimSpace(job.Image)
179224
 	if image == "" {
180225
 		image = d.cfg.DefaultImage
181226
 	}
182227
 	if image == "" {
183
-		return nil, errors.New("runner engine: image is required")
228
+		return dockerInvocation{}, errors.New("runner engine: image is required")
184229
 	}
185230
 	rendered, err := runnerexec.RenderStep(runnerexec.StepInput{
186231
 		Run:     step.Run,
@@ -189,7 +234,11 @@ func (d *Docker) dockerArgs(job Job, step Step) ([]string, error) {
189234
 		Context: expressionContext(job),
190235
 	})
191236
 	if err != nil {
192
-		return nil, fmt.Errorf("runner engine: render step %q: %w", stepLabel(step), err)
237
+		return dockerInvocation{}, fmt.Errorf("runner engine: render step %q: %w", stepLabel(step), err)
238
+	}
239
+	user := d.cfg.User
240
+	if permissionsRequestRoot(job.Permissions) {
241
+		user = "0:0"
193242
 	}
194243
 	args := []string{
195244
 		"run",
@@ -197,18 +246,58 @@ func (d *Docker) dockerArgs(job Job, step Step) ([]string, error) {
197246
 		"--network=" + d.cfg.Network,
198247
 		"--memory=" + d.cfg.Memory,
199248
 		"--cpus=" + d.cfg.CPUs,
249
+		"--pids-limit=" + strconv.Itoa(d.cfg.PidsLimit),
250
+		"--read-only",
251
+		"--tmpfs", "/tmp:rw,exec,nosuid,nodev,size=1g",
252
+		"--cap-drop=ALL",
253
+		"--cap-add=DAC_OVERRIDE",
254
+		"--cap-add=SETGID",
255
+		"--cap-add=SETUID",
256
+		"--security-opt=no-new-privileges",
257
+		"--security-opt=seccomp=" + d.cfg.SeccompProfile,
258
+		"--ulimit", "nofile=" + defaultNofileLimit,
259
+		"--ulimit", "nproc=" + defaultNprocLimit,
260
+		"--user", user,
200261
 		"--workdir=" + workdir,
201
-		"-v", job.WorkspaceDir + ":/workspace",
262
+		"--mount", "type=bind,src=" + job.WorkspaceDir + ",dst=/workspace,rw",
202263
 	}
203264
 	env, err := validateEnv(rendered.Env)
204265
 	if err != nil {
205
-		return nil, err
266
+		return dockerInvocation{}, err
206267
 	}
207268
 	for _, key := range sortedKeys(env) {
208269
 		args = append(args, "-e", key+"="+env[key])
209270
 	}
210271
 	args = append(args, image, "bash", "-c", rendered.Run)
211
-	return args, nil
272
+	return dockerInvocation{
273
+		args:           args,
274
+		image:          image,
275
+		network:        d.cfg.Network,
276
+		memory:         d.cfg.Memory,
277
+		cpus:           d.cfg.CPUs,
278
+		user:           user,
279
+		seccompProfile: d.cfg.SeccompProfile,
280
+		pidsLimit:      d.cfg.PidsLimit,
281
+	}, nil
282
+}
283
+
284
+func (d *Docker) logStep(ctx context.Context, msg string, job Job, step Step, invocation dockerInvocation, conclusion string) {
285
+	attrs := []any{
286
+		"run_id", job.RunID,
287
+		"job_id", job.ID,
288
+		"step_id", step.ID,
289
+		"image", invocation.image,
290
+		"network", invocation.network,
291
+		"cpu_limit", invocation.cpus,
292
+		"memory_limit", invocation.memory,
293
+		"pids_limit", invocation.pidsLimit,
294
+		"container_user", invocation.user,
295
+		"seccomp_profile", invocation.seccompProfile,
296
+	}
297
+	if conclusion != "" {
298
+		attrs = append(attrs, "conclusion", conclusion)
299
+	}
300
+	d.cfg.Logger.InfoContext(ctx, msg, attrs...)
212301
 }
213302
 
214303
 func expressionContext(job Job) expr.Context {
@@ -227,6 +316,23 @@ func expressionContext(job Job) expr.Context {
227316
 	}
228317
 }
229318
 
319
+func permissionsRequestRoot(raw json.RawMessage) bool {
320
+	if len(raw) == 0 || !json.Valid(raw) {
321
+		return false
322
+	}
323
+	var shaped struct {
324
+		Per map[string]string `json:"per"`
325
+	}
326
+	if err := json.Unmarshal(raw, &shaped); err == nil && strings.EqualFold(shaped.Per[rootPermissionKey], "write") {
327
+		return true
328
+	}
329
+	var flat map[string]string
330
+	if err := json.Unmarshal(raw, &flat); err != nil {
331
+		return false
332
+	}
333
+	return strings.EqualFold(flat[rootPermissionKey], "write")
334
+}
335
+
230336
 func (d *Docker) StreamLogs(_ context.Context, jobID int64) (<-chan LogChunk, error) {
231337
 	return d.ensureStream(jobID), nil
232338
 }
internal/runner/engine/docker_test.gomodified
@@ -7,6 +7,7 @@ import (
77
 	"errors"
88
 	"io"
99
 	"reflect"
10
+	"strings"
1011
 	"testing"
1112
 	"time"
1213
 )
@@ -70,8 +71,15 @@ func TestDockerExecute_BuildsResourceCappedRunCommand(t *testing.T) {
7071
 		t.Fatalf("Conclusion: %q", out.Conclusion)
7172
 	}
7273
 	want := []string{
73
-		"run", "--rm", "--network=none", "--memory=2g", "--cpus=2", "--workdir=/workspace/subdir",
74
-		"-v", rec.args[7],
74
+		"run", "--rm", "--network=none", "--memory=2g", "--cpus=2",
75
+		"--pids-limit=512", "--read-only",
76
+		"--tmpfs", "/tmp:rw,exec,nosuid,nodev,size=1g",
77
+		"--cap-drop=ALL", "--cap-add=DAC_OVERRIDE", "--cap-add=SETGID", "--cap-add=SETUID",
78
+		"--security-opt=no-new-privileges", "--security-opt=seccomp=/etc/shithubd-runner/seccomp.json",
79
+		"--ulimit", "nofile=4096:4096", "--ulimit", "nproc=512:512",
80
+		"--user", "65534:65534",
81
+		"--workdir=/workspace/subdir",
82
+		"--mount", rec.args[23],
7583
 		"-e", "A=job", "-e", "B=step",
7684
 		"runner-image", "bash", "-c", "echo hi",
7785
 	}
@@ -81,6 +89,9 @@ func TestDockerExecute_BuildsResourceCappedRunCommand(t *testing.T) {
8189
 	if !reflect.DeepEqual(rec.args, want) {
8290
 		t.Fatalf("args:\ngot  %#v\nwant %#v", rec.args, want)
8391
 	}
92
+	if !strings.HasPrefix(rec.args[23], "type=bind,src=") || !strings.HasSuffix(rec.args[23], ",dst=/workspace,rw") {
93
+		t.Fatalf("workspace mount arg: %q", rec.args[23])
94
+	}
8495
 }
8596
 
8697
 func TestDockerExecute_RendersTaintedExpressionsThroughInputEnv(t *testing.T) {
@@ -115,6 +126,42 @@ func TestDockerExecute_RendersTaintedExpressionsThroughInputEnv(t *testing.T) {
115126
 	}
116127
 }
117128
 
129
+func TestDockerExecute_RootRequiresExplicitPermission(t *testing.T) {
130
+	t.Parallel()
131
+	for _, tc := range []struct {
132
+		name        string
133
+		permissions string
134
+		wantUser    string
135
+	}{
136
+		{name: "default", permissions: `{}`, wantUser: "65534:65534"},
137
+		{name: "write-all-does-not-root", permissions: `{"mode":"write-all"}`, wantUser: "65534:65534"},
138
+		{name: "explicit-root", permissions: `{"per":{"shithub-runner-root":"write"}}`, wantUser: "0:0"},
139
+	} {
140
+		t.Run(tc.name, func(t *testing.T) {
141
+			t.Parallel()
142
+			rec := &recordingRunner{}
143
+			d := NewDocker(DockerConfig{
144
+				DefaultImage: "runner-image",
145
+				Network:      "bridge",
146
+				Memory:       "2g",
147
+				CPUs:         "2",
148
+				Runner:       rec,
149
+			})
150
+			if _, err := d.Execute(t.Context(), Job{
151
+				ID:           1,
152
+				Permissions:  []byte(tc.permissions),
153
+				WorkspaceDir: t.TempDir(),
154
+				Steps:        []Step{{Run: "id -u"}},
155
+			}); err != nil {
156
+				t.Fatalf("Execute: %v", err)
157
+			}
158
+			if got := argAfter(rec.args, "--user"); got != tc.wantUser {
159
+				t.Fatalf("--user: got %q want %q in %#v", got, tc.wantUser, rec.args)
160
+			}
161
+		})
162
+	}
163
+}
164
+
118165
 func TestDockerExecute_StreamsStepLogs(t *testing.T) {
119166
 	t.Parallel()
120167
 	d := NewDocker(DockerConfig{
@@ -294,3 +341,12 @@ func containsArg(args []string, want string) bool {
294341
 	}
295342
 	return false
296343
 }
344
+
345
+func argAfter(args []string, flag string) string {
346
+	for i, arg := range args {
347
+		if arg == flag && i+1 < len(args) {
348
+			return args[i+1]
349
+		}
350
+	}
351
+	return ""
352
+}