tenseleyflow/shithub / 7c43086

Browse files

actions/workflow: parser + types + dialect + yaml/v3 direct dep

Adds the typed Workflow tree, the strict YAML parser, the magic-uses allowlist, and the github→shithub namespace alias normalizer. (S41a)
Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
7c43086e03fba44848d7fb5679f3eb42ca95b29b
Parents
3c7723c
Tree
45be124

5 changed files

StatusFile+-
M go.mod 2 2
A internal/actions/workflow/aliases.go 44 0
A internal/actions/workflow/dialect.go 76 0
A internal/actions/workflow/parse.go 622 0
A internal/actions/workflow/types.go 268 0
go.modmodified
@@ -23,8 +23,10 @@ require (
2323
 	go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0
2424
 	go.opentelemetry.io/otel/sdk v1.43.0
2525
 	go.opentelemetry.io/otel/trace v1.43.0
26
+	go.yaml.in/yaml/v3 v3.0.4
2627
 	golang.org/x/crypto v0.50.0
2728
 	golang.org/x/image v0.39.0
29
+	golang.org/x/net v0.53.0
2830
 	golang.org/x/sync v0.20.0
2931
 )
3032
 
@@ -66,8 +68,6 @@ require (
6668
 	go.opentelemetry.io/proto/otlp v1.10.0 // indirect
6769
 	go.uber.org/multierr v1.11.0 // indirect
6870
 	go.yaml.in/yaml/v2 v2.4.2 // indirect
69
-	go.yaml.in/yaml/v3 v3.0.4 // indirect
70
-	golang.org/x/net v0.53.0 // indirect
7171
 	golang.org/x/sys v0.43.0 // indirect
7272
 	golang.org/x/text v0.36.0 // indirect
7373
 	google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 // indirect
internal/actions/workflow/aliases.goadded
@@ -0,0 +1,44 @@
1
+// SPDX-License-Identifier: AGPL-3.0-or-later
2
+
3
+package workflow
4
+
5
+// AllowedUsesAliases is the closed allowlist of step `uses:` references
6
+// the v1 parser accepts. Anything else is a parser error.
7
+//
8
+// The decision (campaign §"What we're cribbing" → "implicit checkout"):
9
+// v1 ships with exactly three magic aliases and refuses everything
10
+// else. Community/Docker `uses:` is parked for v2 because it requires
11
+// a marketplace + sandbox-per-action contract we don't have. Limiting
12
+// the allowlist to three keeps the precedent honest.
13
+//
14
+//	actions/checkout@v4         — shallow-clones the repo into the
15
+//	                              workspace before steps run. The
16
+//	                              runner does this with its own git
17
+//	                              binary (S41d), not via a containerized
18
+//	                              external action.
19
+//
20
+//	shithub/upload-artifact@v1  — calls the runner's artifact-upload
21
+//	                              path which signs an S3 PUT URL via
22
+//	                              shithubd's API.
23
+//
24
+//	shithub/download-artifact@v1 — fetches an artifact uploaded earlier
25
+//	                               in the same run (or, post-S41d, from
26
+//	                               a parent run via parent_run_id).
27
+//
28
+// Adding a fourth alias requires:
29
+//  1. Reviewer-required note in the commit message explaining what
30
+//     the alias does and why it can't be a `run:` step.
31
+//  2. Coverage in tests/fixtures/workflows/.
32
+//  3. Update to the migration CHECK constraint
33
+//     (workflow_steps_uses_alias_known) AND a corresponding migration.
34
+var AllowedUsesAliases = map[string]struct{}{
35
+	"actions/checkout@v4":          {},
36
+	"shithub/upload-artifact@v1":   {},
37
+	"shithub/download-artifact@v1": {},
38
+}
39
+
40
+// IsAllowedUses reports whether ref is a recognized `uses:` alias.
41
+func IsAllowedUses(ref string) bool {
42
+	_, ok := AllowedUsesAliases[ref]
43
+	return ok
44
+}
internal/actions/workflow/dialect.goadded
@@ -0,0 +1,76 @@
1
+// SPDX-License-Identifier: AGPL-3.0-or-later
2
+
3
+package workflow
4
+
5
+import "strings"
6
+
7
+// Dialect controls the workflow file's accepted expression namespace.
8
+//
9
+// Default ("shithub"): the canonical namespace is `${{ shithub.* }}`.
10
+// `${{ github.* }}` is accepted as an alias and the parser emits a
11
+// Severity=Warning diagnostic so workflow authors are nudged to update.
12
+//
13
+// Strict: only `${{ shithub.* }}` is accepted; `${{ github.* }}`
14
+// produces an error. Operators flip via cfg.Actions.DialectStrict
15
+// when they want to forbid the alias outright (post-migration).
16
+type Dialect string
17
+
18
+const (
19
+	DialectDefault Dialect = "shithub"
20
+	DialectStrict  Dialect = "strict"
21
+)
22
+
23
+// NormalizeNamespace rewrites a `${{ … }}` body so any leading
24
+// `github.` reference becomes `shithub.`. Returns the rewritten body
25
+// plus a deprecated=true flag iff a rewrite occurred. The caller (the
26
+// expression evaluator in S41a expr/eval.go) emits a Warning when
27
+// deprecated=true && dialect == DialectDefault, or an Error when
28
+// dialect == DialectStrict.
29
+//
30
+// We rewrite only at the namespace boundary (`github.` followed by
31
+// `.event`, `.run_id`, etc.) — never inside string literals or
32
+// arbitrary substrings. The expression evaluator does this token-aware;
33
+// this helper is the simple form used by parse.go where we don't
34
+// fully tokenize the value.
35
+func NormalizeNamespace(body string) (rewritten string, deprecated bool) {
36
+	// Quick path: the alias is rare; check before allocating.
37
+	if !strings.Contains(body, "github.") {
38
+		return body, false
39
+	}
40
+	// Walk tokens-ish: replace "github." preceded by a non-identifier
41
+	// character (or start of string). This avoids rewriting inside
42
+	// identifiers like `mygithub.foo`.
43
+	var b strings.Builder
44
+	i := 0
45
+	for i < len(body) {
46
+		// Find next "github." occurrence.
47
+		idx := strings.Index(body[i:], "github.")
48
+		if idx < 0 {
49
+			b.WriteString(body[i:])
50
+			break
51
+		}
52
+		abs := i + idx
53
+		// Check the byte before — must be non-ident or start-of-string.
54
+		if abs > 0 {
55
+			c := body[abs-1]
56
+			if isIdentChar(c) {
57
+				// Bail on this occurrence: copy through and keep looking.
58
+				b.WriteString(body[i : abs+len("github.")])
59
+				i = abs + len("github.")
60
+				continue
61
+			}
62
+		}
63
+		b.WriteString(body[i:abs])
64
+		b.WriteString("shithub.")
65
+		i = abs + len("github.")
66
+		deprecated = true
67
+	}
68
+	return b.String(), deprecated
69
+}
70
+
71
+func isIdentChar(c byte) bool {
72
+	return (c >= 'a' && c <= 'z') ||
73
+		(c >= 'A' && c <= 'Z') ||
74
+		(c >= '0' && c <= '9') ||
75
+		c == '_'
76
+}
internal/actions/workflow/parse.goadded
@@ -0,0 +1,622 @@
1
+// SPDX-License-Identifier: AGPL-3.0-or-later
2
+
3
+package workflow
4
+
5
+import (
6
+	"errors"
7
+	"fmt"
8
+	"strconv"
9
+	"strings"
10
+
11
+	"go.yaml.in/yaml/v3"
12
+)
13
+
14
+// ErrTooLarge is returned when the workflow file exceeds
15
+// MaxWorkflowFileBytes. The cap is enforced before YAML decode so a
16
+// malicious file can't blow the parser's memory budget.
17
+var ErrTooLarge = errors.New("workflow file exceeds size limit")
18
+
19
+// ErrTooManyAliases is returned when a YAML document expands more
20
+// than MaxYAMLAliases anchor references — the billion-laughs guard.
21
+var ErrTooManyAliases = errors.New("workflow YAML has too many aliases (anchor-bomb guard)")
22
+
23
+// Parse decodes a workflow file. It returns the parsed document, the
24
+// list of diagnostics encountered (warnings non-fatal, errors fatal),
25
+// and an error iff the file was unparseable.
26
+//
27
+// The parser is strict: unknown top-level keys, unknown step keys,
28
+// and `uses:` references outside the AllowedUsesAliases set all
29
+// produce diagnostics with Severity=Error and the function returns
30
+// (nil, diagnostics, nil). Callers (S41b trigger pipeline,
31
+// `shithubd admin actions parse` CLI) decide what to do with that.
32
+//
33
+// On a YAML-level error (malformed syntax, anchor bomb, oversized
34
+// file), Parse returns (nil, diagnostics, err).
35
+func Parse(src []byte) (*Workflow, []Diagnostic, error) {
36
+	if len(src) > MaxWorkflowFileBytes {
37
+		return nil, []Diagnostic{{
38
+			Message:  fmt.Sprintf("workflow file is %d bytes; limit is %d", len(src), MaxWorkflowFileBytes),
39
+			Severity: Error,
40
+		}}, ErrTooLarge
41
+	}
42
+
43
+	// We decode first to a yaml.Node so we can preserve doc order and
44
+	// catch anchor abuse. Then we hand-walk into the typed Workflow.
45
+	var root yaml.Node
46
+	dec := yaml.NewDecoder(strings.NewReader(string(src)))
47
+	if err := dec.Decode(&root); err != nil {
48
+		return nil, []Diagnostic{{
49
+			Message:  "YAML decode: " + err.Error(),
50
+			Severity: Error,
51
+		}}, err
52
+	}
53
+	if root.Kind == 0 {
54
+		return nil, []Diagnostic{{
55
+			Message:  "workflow file is empty",
56
+			Severity: Error,
57
+		}}, errors.New("empty workflow")
58
+	}
59
+	if root.Kind != yaml.DocumentNode {
60
+		return nil, []Diagnostic{{
61
+			Message:  "expected YAML document at root",
62
+			Severity: Error,
63
+		}}, errors.New("non-document root")
64
+	}
65
+	if len(root.Content) != 1 || root.Content[0].Kind != yaml.MappingNode {
66
+		return nil, []Diagnostic{{
67
+			Message:  "workflow must be a YAML mapping at the top level",
68
+			Severity: Error,
69
+		}}, errors.New("non-mapping root")
70
+	}
71
+	if aliases := countAliases(root.Content[0], 0); aliases > MaxYAMLAliases {
72
+		return nil, []Diagnostic{{
73
+			Message:  fmt.Sprintf("workflow has %d alias references; limit is %d", aliases, MaxYAMLAliases),
74
+			Severity: Error,
75
+		}}, ErrTooManyAliases
76
+	}
77
+
78
+	w := &Workflow{
79
+		Env:  map[string]Value{},
80
+		Jobs: nil,
81
+	}
82
+	var diags []Diagnostic
83
+	mapping := root.Content[0]
84
+
85
+	// Top-level keys are walked deterministically. Unknown keys produce
86
+	// diagnostics so workflow authors catch typos at parse time.
87
+	for i := 0; i < len(mapping.Content); i += 2 {
88
+		k := mapping.Content[i]
89
+		v := mapping.Content[i+1]
90
+		switch k.Value {
91
+		case "name":
92
+			if v.Kind != yaml.ScalarNode {
93
+				diags = append(diags, errAt("name", "must be a scalar string"))
94
+				continue
95
+			}
96
+			w.Name = v.Value
97
+		case "on":
98
+			ts, ds := parseOn(v)
99
+			w.On = ts
100
+			diags = append(diags, ds...)
101
+		case "permissions":
102
+			perms, ds := parsePermissions(v, "permissions")
103
+			w.Permissions = perms
104
+			diags = append(diags, ds...)
105
+		case "env":
106
+			env, ds := parseEnv(v, "env")
107
+			w.Env = env
108
+			diags = append(diags, ds...)
109
+		case "concurrency":
110
+			c, ds := parseConcurrency(v, "concurrency")
111
+			w.Concurrency = c
112
+			diags = append(diags, ds...)
113
+		case "jobs":
114
+			jobs, ds := parseJobs(v)
115
+			w.Jobs = jobs
116
+			diags = append(diags, ds...)
117
+		default:
118
+			diags = append(diags, errAt(k.Value, "unknown top-level key (allowed: name, on, permissions, env, concurrency, jobs)"))
119
+		}
120
+	}
121
+
122
+	if len(w.Jobs) == 0 && !hasError(diags) {
123
+		diags = append(diags, errAt("jobs", "workflow must declare at least one job"))
124
+	}
125
+	if !triggerSetIsNonEmpty(w.On) && !hasError(diags) {
126
+		diags = append(diags, errAt("on", "workflow must declare at least one trigger"))
127
+	}
128
+
129
+	if hasError(diags) {
130
+		return nil, diags, nil
131
+	}
132
+	return w, diags, nil
133
+}
134
+
135
+// countAliases walks the YAML node graph and returns the number of
136
+// alias dereferences. Used only as the anchor-bomb guard; we don't
137
+// resolve aliases ourselves (yaml.v3 does that during Decode).
138
+func countAliases(n *yaml.Node, depth int) int {
139
+	if n == nil || depth > 100 {
140
+		return 0
141
+	}
142
+	count := 0
143
+	if n.Kind == yaml.AliasNode {
144
+		count++
145
+	}
146
+	for _, c := range n.Content {
147
+		count += countAliases(c, depth+1)
148
+		if count > MaxYAMLAliases {
149
+			return count
150
+		}
151
+	}
152
+	return count
153
+}
154
+
155
+// parseOn handles the `on:` block in its three documented shapes:
156
+//   - shorthand string: `on: push`
157
+//   - shorthand list: `on: [push, pull_request]`
158
+//   - mapping: `on: { push: { branches: [main] }, schedule: [...] }`
159
+func parseOn(n *yaml.Node) (TriggerSet, []Diagnostic) {
160
+	var ts TriggerSet
161
+	var diags []Diagnostic
162
+	switch n.Kind {
163
+	case yaml.ScalarNode:
164
+		applyEventName(&ts, n.Value, &diags, "on")
165
+	case yaml.SequenceNode:
166
+		for _, item := range n.Content {
167
+			if item.Kind != yaml.ScalarNode {
168
+				diags = append(diags, errAt("on", "list items must be event names"))
169
+				continue
170
+			}
171
+			applyEventName(&ts, item.Value, &diags, "on")
172
+		}
173
+	case yaml.MappingNode:
174
+		for i := 0; i < len(n.Content); i += 2 {
175
+			k := n.Content[i]
176
+			v := n.Content[i+1]
177
+			switch k.Value {
178
+			case "push":
179
+				ts.Push = parsePushTrigger(v, &diags)
180
+			case "pull_request":
181
+				ts.PullRequest = parsePullRequestTrigger(v, &diags)
182
+			case "schedule":
183
+				ts.Schedule = parseScheduleTriggers(v, &diags)
184
+			case "workflow_dispatch":
185
+				ts.WorkflowDispatch = parseDispatchTrigger(v, &diags)
186
+			default:
187
+				diags = append(diags, errAt("on."+k.Value, "unknown event type (allowed: push, pull_request, schedule, workflow_dispatch)"))
188
+			}
189
+		}
190
+	default:
191
+		diags = append(diags, errAt("on", "must be a string, sequence, or mapping"))
192
+	}
193
+	return ts, diags
194
+}
195
+
196
+func applyEventName(ts *TriggerSet, name string, diags *[]Diagnostic, path string) {
197
+	switch name {
198
+	case "push":
199
+		if ts.Push == nil {
200
+			ts.Push = &PushTrigger{}
201
+		}
202
+	case "pull_request":
203
+		if ts.PullRequest == nil {
204
+			ts.PullRequest = &PullRequestTrigger{}
205
+		}
206
+	case "workflow_dispatch":
207
+		if ts.WorkflowDispatch == nil {
208
+			ts.WorkflowDispatch = &WorkflowDispatchTrigger{}
209
+		}
210
+	default:
211
+		*diags = append(*diags, errAt(path, "unknown event "+strconv.Quote(name)+" (allowed: push, pull_request, workflow_dispatch — schedule requires the mapping form)"))
212
+	}
213
+}
214
+
215
+func parsePushTrigger(n *yaml.Node, diags *[]Diagnostic) *PushTrigger {
216
+	pt := &PushTrigger{}
217
+	if n.Kind == yaml.ScalarNode && n.Value == "" {
218
+		return pt
219
+	}
220
+	if n.Kind != yaml.MappingNode {
221
+		*diags = append(*diags, errAt("on.push", "must be a mapping"))
222
+		return pt
223
+	}
224
+	for i := 0; i < len(n.Content); i += 2 {
225
+		k := n.Content[i]
226
+		v := n.Content[i+1]
227
+		switch k.Value {
228
+		case "branches":
229
+			pt.Branches = scalarList(v, "on.push.branches", diags)
230
+		case "tags":
231
+			pt.Tags = scalarList(v, "on.push.tags", diags)
232
+		case "paths":
233
+			pt.Paths = scalarList(v, "on.push.paths", diags)
234
+		default:
235
+			*diags = append(*diags, errAt("on.push."+k.Value, "unknown push filter (allowed: branches, tags, paths)"))
236
+		}
237
+	}
238
+	return pt
239
+}
240
+
241
+func parsePullRequestTrigger(n *yaml.Node, diags *[]Diagnostic) *PullRequestTrigger {
242
+	prt := &PullRequestTrigger{}
243
+	if n.Kind == yaml.ScalarNode && n.Value == "" {
244
+		return prt
245
+	}
246
+	if n.Kind != yaml.MappingNode {
247
+		*diags = append(*diags, errAt("on.pull_request", "must be a mapping"))
248
+		return prt
249
+	}
250
+	for i := 0; i < len(n.Content); i += 2 {
251
+		k := n.Content[i]
252
+		v := n.Content[i+1]
253
+		switch k.Value {
254
+		case "types":
255
+			prt.Types = scalarList(v, "on.pull_request.types", diags)
256
+		case "branches":
257
+			prt.Branches = scalarList(v, "on.pull_request.branches", diags)
258
+		case "paths":
259
+			prt.Paths = scalarList(v, "on.pull_request.paths", diags)
260
+		default:
261
+			*diags = append(*diags, errAt("on.pull_request."+k.Value, "unknown filter (allowed: types, branches, paths)"))
262
+		}
263
+	}
264
+	return prt
265
+}
266
+
267
+func parseScheduleTriggers(n *yaml.Node, diags *[]Diagnostic) []ScheduleTrigger {
268
+	if n.Kind != yaml.SequenceNode {
269
+		*diags = append(*diags, errAt("on.schedule", "must be a sequence of cron entries"))
270
+		return nil
271
+	}
272
+	out := make([]ScheduleTrigger, 0, len(n.Content))
273
+	for i, entry := range n.Content {
274
+		if entry.Kind != yaml.MappingNode {
275
+			*diags = append(*diags, errAt(fmt.Sprintf("on.schedule[%d]", i), "must be a mapping with a `cron:` key"))
276
+			continue
277
+		}
278
+		var s ScheduleTrigger
279
+		for j := 0; j < len(entry.Content); j += 2 {
280
+			k := entry.Content[j]
281
+			v := entry.Content[j+1]
282
+			switch k.Value {
283
+			case "cron":
284
+				if v.Kind != yaml.ScalarNode {
285
+					*diags = append(*diags, errAt(fmt.Sprintf("on.schedule[%d].cron", i), "must be a scalar cron expression"))
286
+					continue
287
+				}
288
+				s.Cron = v.Value
289
+			default:
290
+				*diags = append(*diags, errAt(fmt.Sprintf("on.schedule[%d].%s", i, k.Value), "unknown schedule key (allowed: cron)"))
291
+			}
292
+		}
293
+		if s.Cron == "" {
294
+			*diags = append(*diags, errAt(fmt.Sprintf("on.schedule[%d]", i), "missing required cron expression"))
295
+			continue
296
+		}
297
+		out = append(out, s)
298
+	}
299
+	return out
300
+}
301
+
302
+func parseDispatchTrigger(n *yaml.Node, diags *[]Diagnostic) *WorkflowDispatchTrigger {
303
+	wdt := &WorkflowDispatchTrigger{}
304
+	if n.Kind == yaml.ScalarNode && n.Value == "" {
305
+		return wdt
306
+	}
307
+	if n.Kind != yaml.MappingNode {
308
+		*diags = append(*diags, errAt("on.workflow_dispatch", "must be a mapping"))
309
+		return wdt
310
+	}
311
+	for i := 0; i < len(n.Content); i += 2 {
312
+		k := n.Content[i]
313
+		v := n.Content[i+1]
314
+		switch k.Value {
315
+		case "inputs":
316
+			wdt.Inputs = parseDispatchInputs(v, diags)
317
+		default:
318
+			*diags = append(*diags, errAt("on.workflow_dispatch."+k.Value, "unknown dispatch key (allowed: inputs)"))
319
+		}
320
+	}
321
+	return wdt
322
+}
323
+
324
+func parseDispatchInputs(n *yaml.Node, diags *[]Diagnostic) []DispatchInput {
325
+	if n.Kind != yaml.MappingNode {
326
+		*diags = append(*diags, errAt("on.workflow_dispatch.inputs", "must be a mapping of input-name → spec"))
327
+		return nil
328
+	}
329
+	out := make([]DispatchInput, 0, len(n.Content)/2)
330
+	for i := 0; i < len(n.Content); i += 2 {
331
+		nameNode := n.Content[i]
332
+		specNode := n.Content[i+1]
333
+		input := DispatchInput{Name: nameNode.Value}
334
+		if specNode.Kind != yaml.MappingNode {
335
+			*diags = append(*diags, errAt("on.workflow_dispatch.inputs."+nameNode.Value, "must be a mapping"))
336
+			continue
337
+		}
338
+		for j := 0; j < len(specNode.Content); j += 2 {
339
+			k := specNode.Content[j]
340
+			v := specNode.Content[j+1]
341
+			switch k.Value {
342
+			case "description":
343
+				input.Description = v.Value
344
+			case "type":
345
+				input.Type = v.Value
346
+			case "default":
347
+				input.Default = v.Value
348
+			case "required":
349
+				input.Required = v.Value == "true"
350
+			case "options":
351
+				input.Options = scalarList(v, "on.workflow_dispatch.inputs."+nameNode.Value+".options", diags)
352
+			default:
353
+				*diags = append(*diags, errAt("on.workflow_dispatch.inputs."+nameNode.Value+"."+k.Value, "unknown input key"))
354
+			}
355
+		}
356
+		if input.Type == "" {
357
+			input.Type = "string"
358
+		}
359
+		out = append(out, input)
360
+	}
361
+	return out
362
+}
363
+
364
+func parsePermissions(n *yaml.Node, path string) (Permissions, []Diagnostic) {
365
+	var diags []Diagnostic
366
+	p := Permissions{Per: map[string]PermissionLevel{}}
367
+	switch n.Kind {
368
+	case yaml.ScalarNode:
369
+		switch n.Value {
370
+		case "read-all", "write-all", "none":
371
+			p.Mode = n.Value
372
+		default:
373
+			diags = append(diags, errAt(path, "unknown shorthand (allowed: read-all, write-all, none)"))
374
+		}
375
+	case yaml.MappingNode:
376
+		for i := 0; i < len(n.Content); i += 2 {
377
+			k := n.Content[i]
378
+			v := n.Content[i+1]
379
+			lvl := PermissionLevel(v.Value)
380
+			if lvl != PermissionNone && lvl != PermissionRead && lvl != PermissionWrite {
381
+				diags = append(diags, errAt(path+"."+k.Value, "permission level must be none, read, or write"))
382
+				continue
383
+			}
384
+			p.Per[k.Value] = lvl
385
+		}
386
+	default:
387
+		diags = append(diags, errAt(path, "must be a shorthand string or a mapping"))
388
+	}
389
+	return p, diags
390
+}
391
+
392
+func parseEnv(n *yaml.Node, path string) (map[string]Value, []Diagnostic) {
393
+	var diags []Diagnostic
394
+	if n.Kind != yaml.MappingNode {
395
+		diags = append(diags, errAt(path, "must be a mapping"))
396
+		return nil, diags
397
+	}
398
+	out := map[string]Value{}
399
+	for i := 0; i < len(n.Content); i += 2 {
400
+		k := n.Content[i]
401
+		v := n.Content[i+1]
402
+		if v.Kind != yaml.ScalarNode {
403
+			diags = append(diags, errAt(path+"."+k.Value, "env values must be scalars"))
404
+			continue
405
+		}
406
+		// We tag env values literal-trusted here. The expression
407
+		// evaluator (S41a expr/eval.go) walks the Raw at dispatch
408
+		// time and propagates taint when the value contains
409
+		// `${{ shithub.event.X }}` references.
410
+		out[k.Value] = V(v.Value)
411
+	}
412
+	return out, diags
413
+}
414
+
415
+func parseConcurrency(n *yaml.Node, path string) (Concurrency, []Diagnostic) {
416
+	var diags []Diagnostic
417
+	c := Concurrency{}
418
+	switch n.Kind {
419
+	case yaml.ScalarNode:
420
+		c.Group = V(n.Value)
421
+	case yaml.MappingNode:
422
+		for i := 0; i < len(n.Content); i += 2 {
423
+			k := n.Content[i]
424
+			v := n.Content[i+1]
425
+			switch k.Value {
426
+			case "group":
427
+				c.Group = V(v.Value)
428
+			case "cancel-in-progress":
429
+				c.CancelInProgress = v.Value == "true"
430
+			default:
431
+				diags = append(diags, errAt(path+"."+k.Value, "unknown concurrency key (allowed: group, cancel-in-progress)"))
432
+			}
433
+		}
434
+	default:
435
+		diags = append(diags, errAt(path, "must be a string or mapping"))
436
+	}
437
+	return c, diags
438
+}
439
+
440
+func parseJobs(n *yaml.Node) ([]Job, []Diagnostic) {
441
+	var diags []Diagnostic
442
+	if n.Kind != yaml.MappingNode {
443
+		diags = append(diags, errAt("jobs", "must be a mapping of job-key → job-spec"))
444
+		return nil, diags
445
+	}
446
+	jobs := make([]Job, 0, len(n.Content)/2)
447
+	for i := 0; i < len(n.Content); i += 2 {
448
+		k := n.Content[i]
449
+		v := n.Content[i+1]
450
+		j, ds := parseJob(k.Value, v)
451
+		diags = append(diags, ds...)
452
+		jobs = append(jobs, j)
453
+	}
454
+	return jobs, diags
455
+}
456
+
457
+func parseJob(key string, n *yaml.Node) (Job, []Diagnostic) {
458
+	var diags []Diagnostic
459
+	j := Job{Key: key, TimeoutMinutes: 360}
460
+	if n.Kind != yaml.MappingNode {
461
+		diags = append(diags, errAt("jobs."+key, "job spec must be a mapping"))
462
+		return j, diags
463
+	}
464
+	for i := 0; i < len(n.Content); i += 2 {
465
+		k := n.Content[i]
466
+		v := n.Content[i+1]
467
+		path := "jobs." + key + "." + k.Value
468
+		switch k.Value {
469
+		case "name":
470
+			j.Name = v.Value
471
+		case "runs-on":
472
+			j.RunsOn = v.Value
473
+		case "needs":
474
+			if v.Kind == yaml.ScalarNode {
475
+				j.Needs = []string{v.Value}
476
+			} else {
477
+				j.Needs = scalarList(v, path, &diags)
478
+			}
479
+		case "if":
480
+			j.If = v.Value
481
+		case "timeout-minutes":
482
+			n, err := strconv.Atoi(v.Value)
483
+			if err != nil || n < 1 || n > 4320 {
484
+				diags = append(diags, errAt(path, "timeout-minutes must be an integer 1-4320"))
485
+				continue
486
+			}
487
+			j.TimeoutMinutes = n
488
+		case "permissions":
489
+			p, ds := parsePermissions(v, path)
490
+			j.Permissions = p
491
+			diags = append(diags, ds...)
492
+		case "env":
493
+			env, ds := parseEnv(v, path)
494
+			j.Env = env
495
+			diags = append(diags, ds...)
496
+		case "steps":
497
+			steps, ds := parseSteps(v, "jobs."+key)
498
+			j.Steps = steps
499
+			diags = append(diags, ds...)
500
+		default:
501
+			diags = append(diags, errAt(path, "unknown job key (allowed: name, runs-on, needs, if, timeout-minutes, permissions, env, steps)"))
502
+		}
503
+	}
504
+	if j.RunsOn == "" {
505
+		diags = append(diags, errAt("jobs."+key, "job missing required `runs-on:`"))
506
+	}
507
+	if len(j.Steps) == 0 {
508
+		diags = append(diags, errAt("jobs."+key, "job has no steps"))
509
+	}
510
+	return j, diags
511
+}
512
+
513
+func parseSteps(n *yaml.Node, jobPath string) ([]Step, []Diagnostic) {
514
+	var diags []Diagnostic
515
+	if n.Kind != yaml.SequenceNode {
516
+		diags = append(diags, errAt(jobPath+".steps", "must be a sequence"))
517
+		return nil, diags
518
+	}
519
+	steps := make([]Step, 0, len(n.Content))
520
+	for idx, item := range n.Content {
521
+		s, ds := parseStep(idx, item, jobPath)
522
+		diags = append(diags, ds...)
523
+		steps = append(steps, s)
524
+	}
525
+	return steps, diags
526
+}
527
+
528
+func parseStep(idx int, n *yaml.Node, jobPath string) (Step, []Diagnostic) {
529
+	var diags []Diagnostic
530
+	s := Step{}
531
+	stepPath := fmt.Sprintf("%s.steps[%d]", jobPath, idx)
532
+	if n.Kind != yaml.MappingNode {
533
+		diags = append(diags, errAt(stepPath, "step must be a mapping"))
534
+		return s, diags
535
+	}
536
+	for i := 0; i < len(n.Content); i += 2 {
537
+		k := n.Content[i]
538
+		v := n.Content[i+1]
539
+		path := stepPath + "." + k.Value
540
+		switch k.Value {
541
+		case "id":
542
+			s.ID = v.Value
543
+		case "name":
544
+			s.Name = v.Value
545
+		case "if":
546
+			s.If = v.Value
547
+		case "run":
548
+			s.Run = v.Value
549
+		case "uses":
550
+			s.Uses = v.Value
551
+		case "with":
552
+			env, ds := parseEnv(v, path)
553
+			s.With = env
554
+			diags = append(diags, ds...)
555
+		case "working-directory":
556
+			s.WorkingDirectory = v.Value
557
+		case "env":
558
+			env, ds := parseEnv(v, path)
559
+			s.Env = env
560
+			diags = append(diags, ds...)
561
+		case "continue-on-error":
562
+			s.ContinueOnError = v.Value == "true"
563
+		default:
564
+			diags = append(diags, errAt(path, "unknown step key (allowed: id, name, if, run, uses, with, working-directory, env, continue-on-error)"))
565
+		}
566
+	}
567
+	if s.Run == "" && s.Uses == "" {
568
+		diags = append(diags, errAt(stepPath, "step must have either `run:` or `uses:`"))
569
+	}
570
+	if s.Run != "" && s.Uses != "" {
571
+		diags = append(diags, errAt(stepPath, "step cannot have both `run:` and `uses:`"))
572
+	}
573
+	if s.Uses != "" && !IsAllowedUses(s.Uses) {
574
+		diags = append(diags, errAt(stepPath+".uses",
575
+			"unsupported `uses:` reference; v1 supports only "+
576
+				"actions/checkout@v4, shithub/upload-artifact@v1, shithub/download-artifact@v1"))
577
+	}
578
+	return s, diags
579
+}
580
+
581
+// scalarList parses either a single scalar or a sequence of scalars
582
+// into a []string. Used for branches/tags/paths/types-style lists.
583
+func scalarList(n *yaml.Node, path string, diags *[]Diagnostic) []string {
584
+	switch n.Kind {
585
+	case yaml.ScalarNode:
586
+		return []string{n.Value}
587
+	case yaml.SequenceNode:
588
+		out := make([]string, 0, len(n.Content))
589
+		for _, item := range n.Content {
590
+			if item.Kind != yaml.ScalarNode {
591
+				*diags = append(*diags, errAt(path, "list items must be scalars"))
592
+				continue
593
+			}
594
+			out = append(out, item.Value)
595
+		}
596
+		return out
597
+	default:
598
+		*diags = append(*diags, errAt(path, "must be a string or sequence of strings"))
599
+		return nil
600
+	}
601
+}
602
+
603
+func errAt(path, msg string) Diagnostic {
604
+	return Diagnostic{Path: path, Message: msg, Severity: Error}
605
+}
606
+
607
+// triggerSetIsNonEmpty reports whether at least one trigger is declared.
608
+// TriggerSet contains slices, so it isn't comparable; this helper avoids
609
+// per-call boilerplate at the parse-validate site.
610
+func triggerSetIsNonEmpty(ts TriggerSet) bool {
611
+	return ts.Push != nil || ts.PullRequest != nil ||
612
+		len(ts.Schedule) > 0 || ts.WorkflowDispatch != nil
613
+}
614
+
615
+func hasError(diags []Diagnostic) bool {
616
+	for _, d := range diags {
617
+		if d.Severity == Error {
618
+			return true
619
+		}
620
+	}
621
+	return false
622
+}
internal/actions/workflow/types.goadded
@@ -0,0 +1,268 @@
1
+// SPDX-License-Identifier: AGPL-3.0-or-later
2
+
3
+// Package workflow parses .shithub/workflows/*.yml files into the typed
4
+// Workflow tree S41b's trigger pipeline + S41c's secret resolver +
5
+// S41d's runner all consume.
6
+//
7
+// The parser is intentionally strict: unknown top-level keys, unknown
8
+// step keys, unknown `uses:` references, and `${{ … }}` expressions
9
+// outside the allowlist all produce diagnostics. Workflow authors
10
+// catch their mistakes immediately instead of silently shipping a
11
+// workflow that does nothing.
12
+//
13
+// Every value that can carry user-controlled text (event payload
14
+// fields like PR title, branch name, etc.) is tagged with
15
+// Tainted=true. The runner's exec layer (S41d) refuses to interpolate
16
+// tainted values into shell strings — they compile to ${SHITHUB_INPUT_*}
17
+// envvar references set safely by the runner. This is the load-bearing
18
+// contract for expression-injection prevention.
19
+package workflow
20
+
21
+// Workflow is the parsed top-level document.
22
+type Workflow struct {
23
+	// Name is the optional human-readable name (`name:` at root). When
24
+	// blank, the UI defaults to the workflow file's basename.
25
+	Name string
26
+
27
+	// On lists trigger predicates. The trigger pipeline (S41b) matches
28
+	// domain_events against these.
29
+	On TriggerSet
30
+
31
+	// Permissions is the workflow-level permissions block; jobs may
32
+	// further narrow but cannot widen.
33
+	Permissions Permissions
34
+
35
+	// Env is the workflow-level env map. Values may carry expressions;
36
+	// resolution + taint propagation happen at dispatch time.
37
+	Env map[string]Value
38
+
39
+	// Concurrency is the workflow-level concurrency control. Honored
40
+	// from S41g; carried in v1 so the schema doesn't churn.
41
+	Concurrency Concurrency
42
+
43
+	// Jobs is keyed by the job's id (`jobs.<key>:`). Order is the YAML
44
+	// document order so re-rendering matches the author's layout.
45
+	Jobs []Job
46
+}
47
+
48
+// TriggerSet is the parsed `on:` block. We support: push, pull_request,
49
+// schedule (cron), workflow_dispatch.
50
+type TriggerSet struct {
51
+	Push             *PushTrigger
52
+	PullRequest      *PullRequestTrigger
53
+	Schedule         []ScheduleTrigger
54
+	WorkflowDispatch *WorkflowDispatchTrigger
55
+}
56
+
57
+// PushTrigger filters which pushes match. Empty Branches/Tags/Paths
58
+// means "all"; non-empty applies the standard GHA glob semantics.
59
+// Both include and exclude (negative-glob with leading !) are accepted
60
+// and are consulted in declaration order.
61
+type PushTrigger struct {
62
+	Branches []string
63
+	Tags     []string
64
+	Paths    []string
65
+}
66
+
67
+// PullRequestTrigger filters which PR events match. Types is the
68
+// list of activity types (opened, synchronize, reopened, …).
69
+type PullRequestTrigger struct {
70
+	Types    []string
71
+	Branches []string
72
+	Paths    []string
73
+}
74
+
75
+// ScheduleTrigger declares a single cron entry. Multiple entries are
76
+// allowed; each fires independently.
77
+type ScheduleTrigger struct {
78
+	Cron string
79
+}
80
+
81
+// WorkflowDispatchTrigger declares the manual-trigger surface. Inputs
82
+// are typed parameters the dispatcher prompts for.
83
+type WorkflowDispatchTrigger struct {
84
+	Inputs []DispatchInput
85
+}
86
+
87
+// DispatchInput is a single typed input for workflow_dispatch. v1
88
+// accepts string, boolean, choice, environment.
89
+type DispatchInput struct {
90
+	Name        string
91
+	Description string
92
+	Type        string // "string" | "boolean" | "choice" | "environment"
93
+	Default     string
94
+	Required    bool
95
+	Options     []string // populated when Type=="choice"
96
+}
97
+
98
+// Permissions is the GitHub-Actions-equivalent permissions block.
99
+// Empty (zero) value means "default" which is read for content.
100
+// Specific keys mirror GHA names: contents, pull-requests, issues,
101
+// actions, deployments, packages, statuses, security-events, etc.
102
+//
103
+// A workflow may set Permissions: {} to deny all, or {Mode: "all"} to
104
+// grant all (subject to the actor having repo write).
105
+type Permissions struct {
106
+	Mode string                     // "" | "read-all" | "write-all" | "none"
107
+	Per  map[string]PermissionLevel // keyed by GHA permission name
108
+}
109
+
110
+// PermissionLevel mirrors GHA's per-permission grants.
111
+type PermissionLevel string
112
+
113
+const (
114
+	PermissionNone  PermissionLevel = "none"
115
+	PermissionRead  PermissionLevel = "read"
116
+	PermissionWrite PermissionLevel = "write"
117
+)
118
+
119
+// Concurrency is the workflow-level concurrency control. Group is an
120
+// expression evaluated against the trigger context to produce the
121
+// group key. CancelInProgress=true cancels older runs for the same
122
+// group when a new one is enqueued.
123
+type Concurrency struct {
124
+	Group            Value
125
+	CancelInProgress bool
126
+}
127
+
128
+// Job is one entry under `jobs:`.
129
+type Job struct {
130
+	// Key is the YAML map key: `jobs.<key>:`. Identifier-shape, used
131
+	// for `needs:` references and as the URL slug.
132
+	Key string
133
+
134
+	// Name is the optional human-readable name (`jobs.<key>.name:`).
135
+	// Falls back to Key when blank.
136
+	Name string
137
+
138
+	// RunsOn is the runner-selector string ("ubuntu-latest", "self-hosted",
139
+	// "nix-flake", etc.). Mapped at runner-claim time to the actual
140
+	// container image / engine.
141
+	RunsOn string
142
+
143
+	// Needs lists job keys this job depends on. The trigger pipeline
144
+	// resolves these to job IDs at insert time and the runner respects
145
+	// them at dispatch time (S41b/d).
146
+	Needs []string
147
+
148
+	// If is the job-level conditional. Evaluated against the trigger
149
+	// context just before dispatch; false → skipped.
150
+	If string
151
+
152
+	// TimeoutMinutes bounds total job runtime. Default 360 (6h),
153
+	// matches GHA. Range 1-4320 enforced by the parser.
154
+	TimeoutMinutes int
155
+
156
+	// Permissions narrows the workflow-level permissions for this job.
157
+	// Cannot widen.
158
+	Permissions Permissions
159
+
160
+	// Env is per-job env overlay. Merged on top of workflow Env.
161
+	Env map[string]Value
162
+
163
+	// Steps run serially. Order is YAML document order.
164
+	Steps []Step
165
+}
166
+
167
+// Step is one entry under a job's `steps:`.
168
+type Step struct {
169
+	// ID is the optional `id:` for cross-step references via
170
+	// ${{ steps.<id>.outputs.X }}. Outputs themselves are v2; we carry
171
+	// the id field now so the schema doesn't churn.
172
+	ID string
173
+
174
+	// Name is the human-readable label. Falls back to a synthesized
175
+	// "Run <first-line-of-run-command>" when blank.
176
+	Name string
177
+
178
+	// If is the step-level conditional. Evaluated mid-job.
179
+	If string
180
+
181
+	// Run is the shell command. Empty when this step uses an alias.
182
+	// May contain `${{ … }}` expressions; the parser resolves them
183
+	// into Value{Tainted: …} nodes.
184
+	Run string
185
+
186
+	// Uses is the magic-alias slug. Exactly one of Run or Uses is
187
+	// non-empty per the migration's CHECK constraint.
188
+	Uses string
189
+
190
+	// With is the input map for `uses:` aliases. Forwarded to the
191
+	// alias-specific runner step (e.g., upload-artifact's `name:`).
192
+	With map[string]Value
193
+
194
+	// WorkingDirectory overrides the step's cwd.
195
+	WorkingDirectory string
196
+
197
+	// Env is per-step env overlay. Merged on top of job Env.
198
+	Env map[string]Value
199
+
200
+	// ContinueOnError lets the job proceed past this step's failure.
201
+	ContinueOnError bool
202
+}
203
+
204
+// Value is a parsed value that may have come from a literal string,
205
+// a `${{ … }}` expression, or a mix. Tainted=true when the value
206
+// transitively depends on an untrusted source (event payload fields
207
+// the workflow author doesn't control). The runner refuses to
208
+// interpolate Tainted values into shell strings.
209
+//
210
+// Raw is the original source string — useful for diagnostics and for
211
+// the runner's input-binding logic.
212
+type Value struct {
213
+	Raw     string
214
+	Tainted bool
215
+}
216
+
217
+// V is a tiny constructor for trusted (literal) values, used by the
218
+// parser when it knows the source is the workflow file itself.
219
+func V(raw string) Value { return Value{Raw: raw, Tainted: false} }
220
+
221
+// Tainted is the constructor for untrusted-source values. The
222
+// expression evaluator (internal/actions/expr) calls this when it
223
+// resolves a reference into the `shithub.event.*` namespace.
224
+func Tainted(raw string) Value { return Value{Raw: raw, Tainted: true} }
225
+
226
+// Diagnostic is a parser finding. Severity controls whether parsing
227
+// continues; Path is dot-notated for UI display ("jobs.test.steps[2].run").
228
+type Diagnostic struct {
229
+	Path     string
230
+	Message  string
231
+	Severity Severity
232
+}
233
+
234
+// Severity classifies a diagnostic.
235
+type Severity int
236
+
237
+const (
238
+	// Error stops parsing — the workflow is unusable.
239
+	Error Severity = iota
240
+
241
+	// Warning is non-fatal but flagged in the UI. Used for the
242
+	// `${{ github.* }}` deprecation alias.
243
+	Warning
244
+)
245
+
246
+// String renders the diagnostic for the admin parse subcommand and
247
+// future UI surfaces.
248
+func (d Diagnostic) String() string {
249
+	prefix := "error"
250
+	if d.Severity == Warning {
251
+		prefix = "warning"
252
+	}
253
+	if d.Path == "" {
254
+		return prefix + ": " + d.Message
255
+	}
256
+	return prefix + " at " + d.Path + ": " + d.Message
257
+}
258
+
259
+// MaxWorkflowFileBytes is the parser-side size cap. Files larger than
260
+// this are rejected before YAML decode begins. 64 KB is the GHA limit
261
+// minus a small margin.
262
+const MaxWorkflowFileBytes = 64 * 1024
263
+
264
+// MaxYAMLAliases bounds anchor expansions per document — the
265
+// billion-laughs guard. yaml.v3 doesn't expose a direct knob; we
266
+// track aliases at decode time via a custom Unmarshaler in parse.go
267
+// and cap at this value.
268
+const MaxYAMLAliases = 100