Go · 18697 bytes Raw Blame History
1 // SPDX-License-Identifier: AGPL-3.0-or-later
2
3 package workflow
4
5 import (
6 "errors"
7 "fmt"
8 "strconv"
9 "strings"
10
11 "go.yaml.in/yaml/v3"
12 )
13
14 // ErrTooLarge is returned when the workflow file exceeds
15 // MaxWorkflowFileBytes. The cap is enforced before YAML decode so a
16 // malicious file can't blow the parser's memory budget.
17 var ErrTooLarge = errors.New("workflow file exceeds size limit")
18
19 // ErrTooManyAliases is returned when a YAML document expands more
20 // than MaxYAMLAliases anchor references — the billion-laughs guard.
21 var ErrTooManyAliases = errors.New("workflow YAML has too many aliases (anchor-bomb guard)")
22
23 // Parse decodes a workflow file. It returns the parsed document, the
24 // list of diagnostics encountered (warnings non-fatal, errors fatal),
25 // and an error iff the file was unparseable.
26 //
27 // The parser is strict: unknown top-level keys, unknown step keys,
28 // and `uses:` references outside the AllowedUsesAliases set all
29 // produce diagnostics with Severity=Error and the function returns
30 // (nil, diagnostics, nil). Callers (S41b trigger pipeline,
31 // `shithubd admin actions parse` CLI) decide what to do with that.
32 //
33 // On a YAML-level error (malformed syntax, anchor bomb, oversized
34 // file), Parse returns (nil, diagnostics, err).
35 func Parse(src []byte) (*Workflow, []Diagnostic, error) {
36 if len(src) > MaxWorkflowFileBytes {
37 return nil, []Diagnostic{{
38 Message: fmt.Sprintf("workflow file is %d bytes; limit is %d", len(src), MaxWorkflowFileBytes),
39 Severity: Error,
40 }}, ErrTooLarge
41 }
42
43 // We decode first to a yaml.Node so we can preserve doc order and
44 // catch anchor abuse. Then we hand-walk into the typed Workflow.
45 var root yaml.Node
46 dec := yaml.NewDecoder(strings.NewReader(string(src)))
47 if err := dec.Decode(&root); err != nil {
48 return nil, []Diagnostic{{
49 Message: "YAML decode: " + err.Error(),
50 Severity: Error,
51 }}, err
52 }
53 if root.Kind == 0 {
54 return nil, []Diagnostic{{
55 Message: "workflow file is empty",
56 Severity: Error,
57 }}, errors.New("empty workflow")
58 }
59 if root.Kind != yaml.DocumentNode {
60 return nil, []Diagnostic{{
61 Message: "expected YAML document at root",
62 Severity: Error,
63 }}, errors.New("non-document root")
64 }
65 if len(root.Content) != 1 || root.Content[0].Kind != yaml.MappingNode {
66 return nil, []Diagnostic{{
67 Message: "workflow must be a YAML mapping at the top level",
68 Severity: Error,
69 }}, errors.New("non-mapping root")
70 }
71 if aliases := countAliases(root.Content[0], 0); aliases > MaxYAMLAliases {
72 return nil, []Diagnostic{{
73 Message: fmt.Sprintf("workflow has %d alias references; limit is %d", aliases, MaxYAMLAliases),
74 Severity: Error,
75 }}, ErrTooManyAliases
76 }
77
78 w := &Workflow{
79 Env: map[string]Value{},
80 Jobs: nil,
81 }
82 var diags []Diagnostic
83 mapping := root.Content[0]
84
85 // Top-level keys are walked deterministically. Unknown keys produce
86 // diagnostics so workflow authors catch typos at parse time.
87 for i := 0; i < len(mapping.Content); i += 2 {
88 k := mapping.Content[i]
89 v := mapping.Content[i+1]
90 switch k.Value {
91 case "name":
92 if v.Kind != yaml.ScalarNode {
93 diags = append(diags, errAt("name", "must be a scalar string"))
94 continue
95 }
96 w.Name = v.Value
97 case "on":
98 ts, ds := parseOn(v)
99 w.On = ts
100 diags = append(diags, ds...)
101 case "permissions":
102 perms, ds := parsePermissions(v, "permissions")
103 w.Permissions = perms
104 diags = append(diags, ds...)
105 case "env":
106 env, ds := parseEnv(v, "env")
107 w.Env = env
108 diags = append(diags, ds...)
109 case "concurrency":
110 c, ds := parseConcurrency(v, "concurrency")
111 w.Concurrency = c
112 diags = append(diags, ds...)
113 case "jobs":
114 jobs, ds := parseJobs(v)
115 w.Jobs = jobs
116 diags = append(diags, ds...)
117 default:
118 diags = append(diags, errAt(k.Value, "unknown top-level key (allowed: name, on, permissions, env, concurrency, jobs)"))
119 }
120 }
121
122 if len(w.Jobs) == 0 && !hasError(diags) {
123 diags = append(diags, errAt("jobs", "workflow must declare at least one job"))
124 }
125 if !triggerSetIsNonEmpty(w.On) && !hasError(diags) {
126 diags = append(diags, errAt("on", "workflow must declare at least one trigger"))
127 }
128
129 if hasError(diags) {
130 return nil, diags, nil
131 }
132 return w, diags, nil
133 }
134
135 // countAliases walks the YAML node graph and returns the number of
136 // alias dereferences. Used only as the anchor-bomb guard; we don't
137 // resolve aliases ourselves (yaml.v3 does that during Decode).
138 func countAliases(n *yaml.Node, depth int) int {
139 if n == nil || depth > 100 {
140 return 0
141 }
142 count := 0
143 if n.Kind == yaml.AliasNode {
144 count++
145 }
146 for _, c := range n.Content {
147 count += countAliases(c, depth+1)
148 if count > MaxYAMLAliases {
149 return count
150 }
151 }
152 return count
153 }
154
155 // parseOn handles the `on:` block in its three documented shapes:
156 // - shorthand string: `on: push`
157 // - shorthand list: `on: [push, pull_request]`
158 // - mapping: `on: { push: { branches: [main] }, schedule: [...] }`
159 func parseOn(n *yaml.Node) (TriggerSet, []Diagnostic) {
160 var ts TriggerSet
161 var diags []Diagnostic
162 switch n.Kind {
163 case yaml.ScalarNode:
164 applyEventName(&ts, n.Value, &diags, "on")
165 case yaml.SequenceNode:
166 for _, item := range n.Content {
167 if item.Kind != yaml.ScalarNode {
168 diags = append(diags, errAt("on", "list items must be event names"))
169 continue
170 }
171 applyEventName(&ts, item.Value, &diags, "on")
172 }
173 case yaml.MappingNode:
174 for i := 0; i < len(n.Content); i += 2 {
175 k := n.Content[i]
176 v := n.Content[i+1]
177 switch k.Value {
178 case "push":
179 ts.Push = parsePushTrigger(v, &diags)
180 case "pull_request":
181 ts.PullRequest = parsePullRequestTrigger(v, &diags)
182 case "schedule":
183 ts.Schedule = parseScheduleTriggers(v, &diags)
184 case "workflow_dispatch":
185 ts.WorkflowDispatch = parseDispatchTrigger(v, &diags)
186 default:
187 diags = append(diags, errAt("on."+k.Value, "unknown event type (allowed: push, pull_request, schedule, workflow_dispatch)"))
188 }
189 }
190 default:
191 diags = append(diags, errAt("on", "must be a string, sequence, or mapping"))
192 }
193 return ts, diags
194 }
195
196 func applyEventName(ts *TriggerSet, name string, diags *[]Diagnostic, path string) {
197 switch name {
198 case "push":
199 if ts.Push == nil {
200 ts.Push = &PushTrigger{}
201 }
202 case "pull_request":
203 if ts.PullRequest == nil {
204 ts.PullRequest = &PullRequestTrigger{}
205 }
206 case "workflow_dispatch":
207 if ts.WorkflowDispatch == nil {
208 ts.WorkflowDispatch = &WorkflowDispatchTrigger{}
209 }
210 default:
211 *diags = append(*diags, errAt(path, "unknown event "+strconv.Quote(name)+" (allowed: push, pull_request, workflow_dispatch — schedule requires the mapping form)"))
212 }
213 }
214
215 func parsePushTrigger(n *yaml.Node, diags *[]Diagnostic) *PushTrigger {
216 pt := &PushTrigger{}
217 if n.Kind == yaml.ScalarNode && n.Value == "" {
218 return pt
219 }
220 if n.Kind != yaml.MappingNode {
221 *diags = append(*diags, errAt("on.push", "must be a mapping"))
222 return pt
223 }
224 for i := 0; i < len(n.Content); i += 2 {
225 k := n.Content[i]
226 v := n.Content[i+1]
227 switch k.Value {
228 case "branches":
229 pt.Branches = scalarList(v, "on.push.branches", diags)
230 case "tags":
231 pt.Tags = scalarList(v, "on.push.tags", diags)
232 case "paths":
233 pt.Paths = scalarList(v, "on.push.paths", diags)
234 default:
235 *diags = append(*diags, errAt("on.push."+k.Value, "unknown push filter (allowed: branches, tags, paths)"))
236 }
237 }
238 return pt
239 }
240
241 func parsePullRequestTrigger(n *yaml.Node, diags *[]Diagnostic) *PullRequestTrigger {
242 prt := &PullRequestTrigger{}
243 if n.Kind == yaml.ScalarNode && n.Value == "" {
244 return prt
245 }
246 if n.Kind != yaml.MappingNode {
247 *diags = append(*diags, errAt("on.pull_request", "must be a mapping"))
248 return prt
249 }
250 for i := 0; i < len(n.Content); i += 2 {
251 k := n.Content[i]
252 v := n.Content[i+1]
253 switch k.Value {
254 case "types":
255 prt.Types = scalarList(v, "on.pull_request.types", diags)
256 case "branches":
257 prt.Branches = scalarList(v, "on.pull_request.branches", diags)
258 case "paths":
259 prt.Paths = scalarList(v, "on.pull_request.paths", diags)
260 default:
261 *diags = append(*diags, errAt("on.pull_request."+k.Value, "unknown filter (allowed: types, branches, paths)"))
262 }
263 }
264 return prt
265 }
266
267 func parseScheduleTriggers(n *yaml.Node, diags *[]Diagnostic) []ScheduleTrigger {
268 if n.Kind != yaml.SequenceNode {
269 *diags = append(*diags, errAt("on.schedule", "must be a sequence of cron entries"))
270 return nil
271 }
272 out := make([]ScheduleTrigger, 0, len(n.Content))
273 for i, entry := range n.Content {
274 if entry.Kind != yaml.MappingNode {
275 *diags = append(*diags, errAt(fmt.Sprintf("on.schedule[%d]", i), "must be a mapping with a `cron:` key"))
276 continue
277 }
278 var s ScheduleTrigger
279 for j := 0; j < len(entry.Content); j += 2 {
280 k := entry.Content[j]
281 v := entry.Content[j+1]
282 switch k.Value {
283 case "cron":
284 if v.Kind != yaml.ScalarNode {
285 *diags = append(*diags, errAt(fmt.Sprintf("on.schedule[%d].cron", i), "must be a scalar cron expression"))
286 continue
287 }
288 s.Cron = v.Value
289 default:
290 *diags = append(*diags, errAt(fmt.Sprintf("on.schedule[%d].%s", i, k.Value), "unknown schedule key (allowed: cron)"))
291 }
292 }
293 if s.Cron == "" {
294 *diags = append(*diags, errAt(fmt.Sprintf("on.schedule[%d]", i), "missing required cron expression"))
295 continue
296 }
297 out = append(out, s)
298 }
299 return out
300 }
301
302 func parseDispatchTrigger(n *yaml.Node, diags *[]Diagnostic) *WorkflowDispatchTrigger {
303 wdt := &WorkflowDispatchTrigger{}
304 if n.Kind == yaml.ScalarNode && n.Value == "" {
305 return wdt
306 }
307 if n.Kind != yaml.MappingNode {
308 *diags = append(*diags, errAt("on.workflow_dispatch", "must be a mapping"))
309 return wdt
310 }
311 for i := 0; i < len(n.Content); i += 2 {
312 k := n.Content[i]
313 v := n.Content[i+1]
314 switch k.Value {
315 case "inputs":
316 wdt.Inputs = parseDispatchInputs(v, diags)
317 default:
318 *diags = append(*diags, errAt("on.workflow_dispatch."+k.Value, "unknown dispatch key (allowed: inputs)"))
319 }
320 }
321 return wdt
322 }
323
324 func parseDispatchInputs(n *yaml.Node, diags *[]Diagnostic) []DispatchInput {
325 if n.Kind != yaml.MappingNode {
326 *diags = append(*diags, errAt("on.workflow_dispatch.inputs", "must be a mapping of input-name → spec"))
327 return nil
328 }
329 out := make([]DispatchInput, 0, len(n.Content)/2)
330 for i := 0; i < len(n.Content); i += 2 {
331 nameNode := n.Content[i]
332 specNode := n.Content[i+1]
333 input := DispatchInput{Name: nameNode.Value}
334 if specNode.Kind != yaml.MappingNode {
335 *diags = append(*diags, errAt("on.workflow_dispatch.inputs."+nameNode.Value, "must be a mapping"))
336 continue
337 }
338 for j := 0; j < len(specNode.Content); j += 2 {
339 k := specNode.Content[j]
340 v := specNode.Content[j+1]
341 switch k.Value {
342 case "description":
343 input.Description = v.Value
344 case "type":
345 input.Type = v.Value
346 case "default":
347 input.Default = v.Value
348 case "required":
349 input.Required = v.Value == "true"
350 case "options":
351 input.Options = scalarList(v, "on.workflow_dispatch.inputs."+nameNode.Value+".options", diags)
352 default:
353 *diags = append(*diags, errAt("on.workflow_dispatch.inputs."+nameNode.Value+"."+k.Value, "unknown input key"))
354 }
355 }
356 if input.Type == "" {
357 input.Type = "string"
358 }
359 out = append(out, input)
360 }
361 return out
362 }
363
364 func parsePermissions(n *yaml.Node, path string) (Permissions, []Diagnostic) {
365 var diags []Diagnostic
366 p := Permissions{Per: map[string]PermissionLevel{}}
367 switch n.Kind {
368 case yaml.ScalarNode:
369 switch n.Value {
370 case "read-all", "write-all", "none":
371 p.Mode = n.Value
372 default:
373 diags = append(diags, errAt(path, "unknown shorthand (allowed: read-all, write-all, none)"))
374 }
375 case yaml.MappingNode:
376 for i := 0; i < len(n.Content); i += 2 {
377 k := n.Content[i]
378 v := n.Content[i+1]
379 lvl := PermissionLevel(v.Value)
380 if lvl != PermissionNone && lvl != PermissionRead && lvl != PermissionWrite {
381 diags = append(diags, errAt(path+"."+k.Value, "permission level must be none, read, or write"))
382 continue
383 }
384 p.Per[k.Value] = lvl
385 }
386 default:
387 diags = append(diags, errAt(path, "must be a shorthand string or a mapping"))
388 }
389 return p, diags
390 }
391
392 func parseEnv(n *yaml.Node, path string) (map[string]Value, []Diagnostic) {
393 var diags []Diagnostic
394 if n.Kind != yaml.MappingNode {
395 diags = append(diags, errAt(path, "must be a mapping"))
396 return nil, diags
397 }
398 out := map[string]Value{}
399 for i := 0; i < len(n.Content); i += 2 {
400 k := n.Content[i]
401 v := n.Content[i+1]
402 if v.Kind != yaml.ScalarNode {
403 diags = append(diags, errAt(path+"."+k.Value, "env values must be scalars"))
404 continue
405 }
406 // We tag env values literal-trusted here. The expression
407 // evaluator (S41a expr/eval.go) walks the Raw at dispatch
408 // time and propagates taint when the value contains
409 // `${{ shithub.event.X }}` references.
410 out[k.Value] = V(v.Value)
411 }
412 return out, diags
413 }
414
415 func parseConcurrency(n *yaml.Node, path string) (Concurrency, []Diagnostic) {
416 var diags []Diagnostic
417 c := Concurrency{}
418 switch n.Kind {
419 case yaml.ScalarNode:
420 c.Group = V(n.Value)
421 case yaml.MappingNode:
422 for i := 0; i < len(n.Content); i += 2 {
423 k := n.Content[i]
424 v := n.Content[i+1]
425 switch k.Value {
426 case "group":
427 c.Group = V(v.Value)
428 case "cancel-in-progress":
429 c.CancelInProgress = v.Value == "true"
430 default:
431 diags = append(diags, errAt(path+"."+k.Value, "unknown concurrency key (allowed: group, cancel-in-progress)"))
432 }
433 }
434 default:
435 diags = append(diags, errAt(path, "must be a string or mapping"))
436 }
437 return c, diags
438 }
439
440 func parseJobs(n *yaml.Node) ([]Job, []Diagnostic) {
441 var diags []Diagnostic
442 if n.Kind != yaml.MappingNode {
443 diags = append(diags, errAt("jobs", "must be a mapping of job-key → job-spec"))
444 return nil, diags
445 }
446 jobs := make([]Job, 0, len(n.Content)/2)
447 for i := 0; i < len(n.Content); i += 2 {
448 k := n.Content[i]
449 v := n.Content[i+1]
450 j, ds := parseJob(k.Value, v)
451 diags = append(diags, ds...)
452 jobs = append(jobs, j)
453 }
454 return jobs, diags
455 }
456
457 func parseJob(key string, n *yaml.Node) (Job, []Diagnostic) {
458 var diags []Diagnostic
459 j := Job{Key: key, TimeoutMinutes: 360}
460 if n.Kind != yaml.MappingNode {
461 diags = append(diags, errAt("jobs."+key, "job spec must be a mapping"))
462 return j, diags
463 }
464 for i := 0; i < len(n.Content); i += 2 {
465 k := n.Content[i]
466 v := n.Content[i+1]
467 path := "jobs." + key + "." + k.Value
468 switch k.Value {
469 case "name":
470 j.Name = v.Value
471 case "runs-on":
472 j.RunsOn = v.Value
473 case "needs":
474 if v.Kind == yaml.ScalarNode {
475 j.Needs = []string{v.Value}
476 } else {
477 j.Needs = scalarList(v, path, &diags)
478 }
479 case "if":
480 j.If = v.Value
481 case "timeout-minutes":
482 n, err := strconv.Atoi(v.Value)
483 if err != nil || n < 1 || n > 4320 {
484 diags = append(diags, errAt(path, "timeout-minutes must be an integer 1-4320"))
485 continue
486 }
487 j.TimeoutMinutes = n
488 case "permissions":
489 p, ds := parsePermissions(v, path)
490 j.Permissions = p
491 diags = append(diags, ds...)
492 case "env":
493 env, ds := parseEnv(v, path)
494 j.Env = env
495 diags = append(diags, ds...)
496 case "steps":
497 steps, ds := parseSteps(v, "jobs."+key)
498 j.Steps = steps
499 diags = append(diags, ds...)
500 default:
501 diags = append(diags, errAt(path, "unknown job key (allowed: name, runs-on, needs, if, timeout-minutes, permissions, env, steps)"))
502 }
503 }
504 if j.RunsOn == "" {
505 diags = append(diags, errAt("jobs."+key, "job missing required `runs-on:`"))
506 }
507 if len(j.Steps) == 0 {
508 diags = append(diags, errAt("jobs."+key, "job has no steps"))
509 }
510 return j, diags
511 }
512
513 func parseSteps(n *yaml.Node, jobPath string) ([]Step, []Diagnostic) {
514 var diags []Diagnostic
515 if n.Kind != yaml.SequenceNode {
516 diags = append(diags, errAt(jobPath+".steps", "must be a sequence"))
517 return nil, diags
518 }
519 steps := make([]Step, 0, len(n.Content))
520 for idx, item := range n.Content {
521 s, ds := parseStep(idx, item, jobPath)
522 diags = append(diags, ds...)
523 steps = append(steps, s)
524 }
525 return steps, diags
526 }
527
528 func parseStep(idx int, n *yaml.Node, jobPath string) (Step, []Diagnostic) {
529 var diags []Diagnostic
530 s := Step{}
531 stepPath := fmt.Sprintf("%s.steps[%d]", jobPath, idx)
532 if n.Kind != yaml.MappingNode {
533 diags = append(diags, errAt(stepPath, "step must be a mapping"))
534 return s, diags
535 }
536 for i := 0; i < len(n.Content); i += 2 {
537 k := n.Content[i]
538 v := n.Content[i+1]
539 path := stepPath + "." + k.Value
540 switch k.Value {
541 case "id":
542 s.ID = v.Value
543 case "name":
544 s.Name = v.Value
545 case "if":
546 s.If = v.Value
547 case "run":
548 s.Run = v.Value
549 case "uses":
550 s.Uses = v.Value
551 case "with":
552 env, ds := parseEnv(v, path)
553 s.With = env
554 diags = append(diags, ds...)
555 case "working-directory":
556 s.WorkingDirectory = v.Value
557 case "env":
558 env, ds := parseEnv(v, path)
559 s.Env = env
560 diags = append(diags, ds...)
561 case "continue-on-error":
562 s.ContinueOnError = v.Value == "true"
563 default:
564 diags = append(diags, errAt(path, "unknown step key (allowed: id, name, if, run, uses, with, working-directory, env, continue-on-error)"))
565 }
566 }
567 if s.Run == "" && s.Uses == "" {
568 diags = append(diags, errAt(stepPath, "step must have either `run:` or `uses:`"))
569 }
570 if s.Run != "" && s.Uses != "" {
571 diags = append(diags, errAt(stepPath, "step cannot have both `run:` and `uses:`"))
572 }
573 if s.Uses != "" && !IsAllowedUses(s.Uses) {
574 diags = append(diags, errAt(stepPath+".uses",
575 "unsupported `uses:` reference; v1 supports only "+
576 "actions/checkout@v4, shithub/upload-artifact@v1, shithub/download-artifact@v1"))
577 }
578 return s, diags
579 }
580
581 // scalarList parses either a single scalar or a sequence of scalars
582 // into a []string. Used for branches/tags/paths/types-style lists.
583 func scalarList(n *yaml.Node, path string, diags *[]Diagnostic) []string {
584 switch n.Kind {
585 case yaml.ScalarNode:
586 return []string{n.Value}
587 case yaml.SequenceNode:
588 out := make([]string, 0, len(n.Content))
589 for _, item := range n.Content {
590 if item.Kind != yaml.ScalarNode {
591 *diags = append(*diags, errAt(path, "list items must be scalars"))
592 continue
593 }
594 out = append(out, item.Value)
595 }
596 return out
597 default:
598 *diags = append(*diags, errAt(path, "must be a string or sequence of strings"))
599 return nil
600 }
601 }
602
603 func errAt(path, msg string) Diagnostic {
604 return Diagnostic{Path: path, Message: msg, Severity: Error}
605 }
606
607 // triggerSetIsNonEmpty reports whether at least one trigger is declared.
608 // TriggerSet contains slices, so it isn't comparable; this helper avoids
609 // per-call boilerplate at the parse-validate site.
610 func triggerSetIsNonEmpty(ts TriggerSet) bool {
611 return ts.Push != nil || ts.PullRequest != nil ||
612 len(ts.Schedule) > 0 || ts.WorkflowDispatch != nil
613 }
614
615 func hasError(diags []Diagnostic) bool {
616 for _, d := range diags {
617 if d.Severity == Error {
618 return true
619 }
620 }
621 return false
622 }
623