`7c43086`

actions/workflow: parser + types + dialect + yaml/v3 direct dep

Adds the typed Workflow tree, the strict YAML parser, the magic-uses allowlist, and the github→shithub namespace alias normalizer. (S41a)

Authored by mfwolffe <wolffemf@dukes.jmu.edu> 3 days ago

SHA: 7c43086e03fba44848d7fb5679f3eb42ca95b29b
Parents: 3c7723c
Tree: 45be124

5 changed files

Status	File	+	-
M	`go.mod`	2	2
A	`internal/actions/workflow/aliases.go`	44	0
A	`internal/actions/workflow/dialect.go`	76	0
A	`internal/actions/workflow/parse.go`	622	0
A	`internal/actions/workflow/types.go`	268	0

go.modmodified

  	go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0
  	go.opentelemetry.io/otel/sdk v1.43.0
  	go.opentelemetry.io/otel/trace v1.43.0
 +	go.yaml.in/yaml/v3 v3.0.4
  	golang.org/x/crypto v0.50.0
  	golang.org/x/image v0.39.0
 +	golang.org/x/net v0.53.0
  	golang.org/x/sync v0.20.0
+ )
  	go.opentelemetry.io/proto/otlp v1.10.0 // indirect
  	go.uber.org/multierr v1.11.0 // indirect
  	go.yaml.in/yaml/v2 v2.4.2 // indirect
 -	go.yaml.in/yaml/v3 v3.0.4 // indirect
 -	golang.org/x/net v0.53.0 // indirect
  	golang.org/x/sys v0.43.0 // indirect
  	golang.org/x/text v0.36.0 // indirect
  	google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 // indirect

internal/actions/workflow/aliases.goadded

 +// SPDX-License-Identifier: AGPL-3.0-or-later
++
 +package workflow
++
 +// AllowedUsesAliases is the closed allowlist of step `uses:` references
 +// the v1 parser accepts. Anything else is a parser error.
 +//
 +// The decision (campaign §"What we're cribbing" → "implicit checkout"):
 +// v1 ships with exactly three magic aliases and refuses everything
 +// else. Community/Docker `uses:` is parked for v2 because it requires
 +// a marketplace + sandbox-per-action contract we don't have. Limiting
 +// the allowlist to three keeps the precedent honest.
 +//
 +//	actions/checkout@v4         — shallow-clones the repo into the
 +//	                              workspace before steps run. The
 +//	                              runner does this with its own git
 +//	                              binary (S41d), not via a containerized
 +//	                              external action.
 +//
 +//	shithub/upload-artifact@v1  — calls the runner's artifact-upload
 +//	                              path which signs an S3 PUT URL via
 +//	                              shithubd's API.
 +//
 +//	shithub/download-artifact@v1 — fetches an artifact uploaded earlier
 +//	                               in the same run (or, post-S41d, from
 +//	                               a parent run via parent_run_id).
 +//
 +// Adding a fourth alias requires:
 +//  1. Reviewer-required note in the commit message explaining what
 +//     the alias does and why it can't be a `run:` step.
 +//  2. Coverage in tests/fixtures/workflows/.
 +//  3. Update to the migration CHECK constraint
 +//     (workflow_steps_uses_alias_known) AND a corresponding migration.
 +var AllowedUsesAliases = map[string]struct{}{
 +	"actions/checkout@v4":          {},
 +	"shithub/upload-artifact@v1":   {},
 +	"shithub/download-artifact@v1": {},
 +}
++
 +// IsAllowedUses reports whether ref is a recognized `uses:` alias.
 +func IsAllowedUses(ref string) bool {
 +	_, ok := AllowedUsesAliases[ref]
 +	return ok
 +}

internal/actions/workflow/dialect.goadded

 +// SPDX-License-Identifier: AGPL-3.0-or-later
++
 +package workflow
++
 +import "strings"
++
 +// Dialect controls the workflow file's accepted expression namespace.
 +//
 +// Default ("shithub"): the canonical namespace is `${{ shithub.* }}`.
 +// `${{ github.* }}` is accepted as an alias and the parser emits a
 +// Severity=Warning diagnostic so workflow authors are nudged to update.
 +//
 +// Strict: only `${{ shithub.* }}` is accepted; `${{ github.* }}`
 +// produces an error. Operators flip via cfg.Actions.DialectStrict
 +// when they want to forbid the alias outright (post-migration).
 +type Dialect string
++
 +const (
 +	DialectDefault Dialect = "shithub"
 +	DialectStrict  Dialect = "strict"
 +)
++
 +// NormalizeNamespace rewrites a `${{ … }}` body so any leading
 +// `github.` reference becomes `shithub.`. Returns the rewritten body
 +// plus a deprecated=true flag iff a rewrite occurred. The caller (the
 +// expression evaluator in S41a expr/eval.go) emits a Warning when
 +// deprecated=true && dialect == DialectDefault, or an Error when
 +// dialect == DialectStrict.
 +//
 +// We rewrite only at the namespace boundary (`github.` followed by
 +// `.event`, `.run_id`, etc.) — never inside string literals or
 +// arbitrary substrings. The expression evaluator does this token-aware;
 +// this helper is the simple form used by parse.go where we don't
 +// fully tokenize the value.
 +func NormalizeNamespace(body string) (rewritten string, deprecated bool) {
 +	// Quick path: the alias is rare; check before allocating.
 +	if !strings.Contains(body, "github.") {
 +		return body, false
 +	}
 +	// Walk tokens-ish: replace "github." preceded by a non-identifier
 +	// character (or start of string). This avoids rewriting inside
 +	// identifiers like `mygithub.foo`.
 +	var b strings.Builder
 +	i := 0
 +	for i < len(body) {
 +		// Find next "github." occurrence.
 +		idx := strings.Index(body[i:], "github.")
 +		if idx < 0 {
 +			b.WriteString(body[i:])
 +			break
 +		}
 +		abs := i + idx
 +		// Check the byte before — must be non-ident or start-of-string.
 +		if abs > 0 {
 +			c := body[abs-1]
 +			if isIdentChar(c) {
 +				// Bail on this occurrence: copy through and keep looking.
 +				b.WriteString(body[i : abs+len("github.")])
 +				i = abs + len("github.")
 +				continue
 +			}
 +		}
 +		b.WriteString(body[i:abs])
 +		b.WriteString("shithub.")
 +		i = abs + len("github.")
 +		deprecated = true
 +	}
 +	return b.String(), deprecated
 +}
++
 +func isIdentChar(c byte) bool {
 +	return (c >= 'a' && c <= 'z') ||
 +		(c >= 'A' && c <= 'Z') ||
 +		(c >= '0' && c <= '9') ||
 +		c == '_'
 +}

internal/actions/workflow/parse.goadded

 +// SPDX-License-Identifier: AGPL-3.0-or-later
++
 +package workflow
++
 +import (
 +	"errors"
 +	"fmt"
 +	"strconv"
 +	"strings"
++
 +	"go.yaml.in/yaml/v3"
 +)
++
 +// ErrTooLarge is returned when the workflow file exceeds
 +// MaxWorkflowFileBytes. The cap is enforced before YAML decode so a
 +// malicious file can't blow the parser's memory budget.
 +var ErrTooLarge = errors.New("workflow file exceeds size limit")
++
 +// ErrTooManyAliases is returned when a YAML document expands more
 +// than MaxYAMLAliases anchor references — the billion-laughs guard.
 +var ErrTooManyAliases = errors.New("workflow YAML has too many aliases (anchor-bomb guard)")
++
 +// Parse decodes a workflow file. It returns the parsed document, the
 +// list of diagnostics encountered (warnings non-fatal, errors fatal),
 +// and an error iff the file was unparseable.
 +//
 +// The parser is strict: unknown top-level keys, unknown step keys,
 +// and `uses:` references outside the AllowedUsesAliases set all
 +// produce diagnostics with Severity=Error and the function returns
 +// (nil, diagnostics, nil). Callers (S41b trigger pipeline,
 +// `shithubd admin actions parse` CLI) decide what to do with that.
 +//
 +// On a YAML-level error (malformed syntax, anchor bomb, oversized
 +// file), Parse returns (nil, diagnostics, err).
 +func Parse(src []byte) (*Workflow, []Diagnostic, error) {
 +	if len(src) > MaxWorkflowFileBytes {
 +		return nil, []Diagnostic{{
 +			Message:  fmt.Sprintf("workflow file is %d bytes; limit is %d", len(src), MaxWorkflowFileBytes),
 +			Severity: Error,
 +		}}, ErrTooLarge
 +	}
++
 +	// We decode first to a yaml.Node so we can preserve doc order and
 +	// catch anchor abuse. Then we hand-walk into the typed Workflow.
 +	var root yaml.Node
 +	dec := yaml.NewDecoder(strings.NewReader(string(src)))
 +	if err := dec.Decode(&root); err != nil {
 +		return nil, []Diagnostic{{
 +			Message:  "YAML decode: " + err.Error(),
 +			Severity: Error,
 +		}}, err
 +	}
 +	if root.Kind == 0 {
 +		return nil, []Diagnostic{{
 +			Message:  "workflow file is empty",
 +			Severity: Error,
 +		}}, errors.New("empty workflow")
 +	}
 +	if root.Kind != yaml.DocumentNode {
 +		return nil, []Diagnostic{{
 +			Message:  "expected YAML document at root",
 +			Severity: Error,
 +		}}, errors.New("non-document root")
 +	}
 +	if len(root.Content) != 1 || root.Content[0].Kind != yaml.MappingNode {
 +		return nil, []Diagnostic{{
 +			Message:  "workflow must be a YAML mapping at the top level",
 +			Severity: Error,
 +		}}, errors.New("non-mapping root")
 +	}
 +	if aliases := countAliases(root.Content[0], 0); aliases > MaxYAMLAliases {
 +		return nil, []Diagnostic{{
 +			Message:  fmt.Sprintf("workflow has %d alias references; limit is %d", aliases, MaxYAMLAliases),
 +			Severity: Error,
 +		}}, ErrTooManyAliases
 +	}
++
 +	w := &Workflow{
 +		Env:  map[string]Value{},
 +		Jobs: nil,
 +	}
 +	var diags []Diagnostic
 +	mapping := root.Content[0]
++
 +	// Top-level keys are walked deterministically. Unknown keys produce
 +	// diagnostics so workflow authors catch typos at parse time.
 +	for i := 0; i < len(mapping.Content); i += 2 {
 +		k := mapping.Content[i]
 +		v := mapping.Content[i+1]
 +		switch k.Value {
 +		case "name":
 +			if v.Kind != yaml.ScalarNode {
 +				diags = append(diags, errAt("name", "must be a scalar string"))
 +				continue
 +			}
 +			w.Name = v.Value
 +		case "on":
 +			ts, ds := parseOn(v)
 +			w.On = ts
 +			diags = append(diags, ds...)
 +		case "permissions":
 +			perms, ds := parsePermissions(v, "permissions")
 +			w.Permissions = perms
 +			diags = append(diags, ds...)
 +		case "env":
 +			env, ds := parseEnv(v, "env")
 +			w.Env = env
 +			diags = append(diags, ds...)
 +		case "concurrency":
 +			c, ds := parseConcurrency(v, "concurrency")
 +			w.Concurrency = c
 +			diags = append(diags, ds...)
 +		case "jobs":
 +			jobs, ds := parseJobs(v)
 +			w.Jobs = jobs
 +			diags = append(diags, ds...)
 +		default:
 +			diags = append(diags, errAt(k.Value, "unknown top-level key (allowed: name, on, permissions, env, concurrency, jobs)"))
 +		}
 +	}
++
 +	if len(w.Jobs) == 0 && !hasError(diags) {
 +		diags = append(diags, errAt("jobs", "workflow must declare at least one job"))
 +	}
 +	if !triggerSetIsNonEmpty(w.On) && !hasError(diags) {
 +		diags = append(diags, errAt("on", "workflow must declare at least one trigger"))
 +	}
++
 +	if hasError(diags) {
 +		return nil, diags, nil
 +	}
 +	return w, diags, nil
 +}
++
 +// countAliases walks the YAML node graph and returns the number of
 +// alias dereferences. Used only as the anchor-bomb guard; we don't
 +// resolve aliases ourselves (yaml.v3 does that during Decode).
 +func countAliases(n *yaml.Node, depth int) int {
 +	if n == nil || depth > 100 {
 +		return 0
 +	}
 +	count := 0
 +	if n.Kind == yaml.AliasNode {
 +		count++
 +	}
 +	for _, c := range n.Content {
 +		count += countAliases(c, depth+1)
 +		if count > MaxYAMLAliases {
 +			return count
 +		}
 +	}
 +	return count
 +}
++
 +// parseOn handles the `on:` block in its three documented shapes:
 +//   - shorthand string: `on: push`
 +//   - shorthand list: `on: [push, pull_request]`
 +//   - mapping: `on: { push: { branches: [main] }, schedule: [...] }`
 +func parseOn(n *yaml.Node) (TriggerSet, []Diagnostic) {
 +	var ts TriggerSet
 +	var diags []Diagnostic
 +	switch n.Kind {
 +	case yaml.ScalarNode:
 +		applyEventName(&ts, n.Value, &diags, "on")
 +	case yaml.SequenceNode:
 +		for _, item := range n.Content {
 +			if item.Kind != yaml.ScalarNode {
 +				diags = append(diags, errAt("on", "list items must be event names"))
 +				continue
 +			}
 +			applyEventName(&ts, item.Value, &diags, "on")
 +		}
 +	case yaml.MappingNode:
 +		for i := 0; i < len(n.Content); i += 2 {
 +			k := n.Content[i]
 +			v := n.Content[i+1]
 +			switch k.Value {
 +			case "push":
 +				ts.Push = parsePushTrigger(v, &diags)
 +			case "pull_request":
 +				ts.PullRequest = parsePullRequestTrigger(v, &diags)
 +			case "schedule":
 +				ts.Schedule = parseScheduleTriggers(v, &diags)
 +			case "workflow_dispatch":
 +				ts.WorkflowDispatch = parseDispatchTrigger(v, &diags)
 +			default:
 +				diags = append(diags, errAt("on."+k.Value, "unknown event type (allowed: push, pull_request, schedule, workflow_dispatch)"))
 +			}
 +		}
 +	default:
 +		diags = append(diags, errAt("on", "must be a string, sequence, or mapping"))
 +	}
 +	return ts, diags
 +}
++
 +func applyEventName(ts *TriggerSet, name string, diags *[]Diagnostic, path string) {
 +	switch name {
 +	case "push":
 +		if ts.Push == nil {
 +			ts.Push = &PushTrigger{}
 +		}
 +	case "pull_request":
 +		if ts.PullRequest == nil {
 +			ts.PullRequest = &PullRequestTrigger{}
 +		}
 +	case "workflow_dispatch":
 +		if ts.WorkflowDispatch == nil {
 +			ts.WorkflowDispatch = &WorkflowDispatchTrigger{}
 +		}
 +	default:
 +		*diags = append(*diags, errAt(path, "unknown event "+strconv.Quote(name)+" (allowed: push, pull_request, workflow_dispatch — schedule requires the mapping form)"))
 +	}
 +}
++
 +func parsePushTrigger(n *yaml.Node, diags *[]Diagnostic) *PushTrigger {
 +	pt := &PushTrigger{}
 +	if n.Kind == yaml.ScalarNode && n.Value == "" {
 +		return pt
 +	}
 +	if n.Kind != yaml.MappingNode {
 +		*diags = append(*diags, errAt("on.push", "must be a mapping"))
 +		return pt
 +	}
 +	for i := 0; i < len(n.Content); i += 2 {
 +		k := n.Content[i]
 +		v := n.Content[i+1]
 +		switch k.Value {
 +		case "branches":
 +			pt.Branches = scalarList(v, "on.push.branches", diags)
 +		case "tags":
 +			pt.Tags = scalarList(v, "on.push.tags", diags)
 +		case "paths":
 +			pt.Paths = scalarList(v, "on.push.paths", diags)
 +		default:
 +			*diags = append(*diags, errAt("on.push."+k.Value, "unknown push filter (allowed: branches, tags, paths)"))
 +		}
 +	}
 +	return pt
 +}
++
 +func parsePullRequestTrigger(n *yaml.Node, diags *[]Diagnostic) *PullRequestTrigger {
 +	prt := &PullRequestTrigger{}
 +	if n.Kind == yaml.ScalarNode && n.Value == "" {
 +		return prt
 +	}
 +	if n.Kind != yaml.MappingNode {
 +		*diags = append(*diags, errAt("on.pull_request", "must be a mapping"))
 +		return prt
 +	}
 +	for i := 0; i < len(n.Content); i += 2 {
 +		k := n.Content[i]
 +		v := n.Content[i+1]
 +		switch k.Value {
 +		case "types":
 +			prt.Types = scalarList(v, "on.pull_request.types", diags)
 +		case "branches":
 +			prt.Branches = scalarList(v, "on.pull_request.branches", diags)
 +		case "paths":
 +			prt.Paths = scalarList(v, "on.pull_request.paths", diags)
 +		default:
 +			*diags = append(*diags, errAt("on.pull_request."+k.Value, "unknown filter (allowed: types, branches, paths)"))
 +		}
 +	}
 +	return prt
 +}
++
 +func parseScheduleTriggers(n *yaml.Node, diags *[]Diagnostic) []ScheduleTrigger {
 +	if n.Kind != yaml.SequenceNode {
 +		*diags = append(*diags, errAt("on.schedule", "must be a sequence of cron entries"))
 +		return nil
 +	}
 +	out := make([]ScheduleTrigger, 0, len(n.Content))
 +	for i, entry := range n.Content {
 +		if entry.Kind != yaml.MappingNode {
 +			*diags = append(*diags, errAt(fmt.Sprintf("on.schedule[%d]", i), "must be a mapping with a `cron:` key"))
 +			continue
 +		}
 +		var s ScheduleTrigger
 +		for j := 0; j < len(entry.Content); j += 2 {
 +			k := entry.Content[j]
 +			v := entry.Content[j+1]
 +			switch k.Value {
 +			case "cron":
 +				if v.Kind != yaml.ScalarNode {
 +					*diags = append(*diags, errAt(fmt.Sprintf("on.schedule[%d].cron", i), "must be a scalar cron expression"))
 +					continue
 +				}
 +				s.Cron = v.Value
 +			default:
 +				*diags = append(*diags, errAt(fmt.Sprintf("on.schedule[%d].%s", i, k.Value), "unknown schedule key (allowed: cron)"))
 +			}
 +		}
 +		if s.Cron == "" {
 +			*diags = append(*diags, errAt(fmt.Sprintf("on.schedule[%d]", i), "missing required cron expression"))
 +			continue
 +		}
 +		out = append(out, s)
 +	}
 +	return out
 +}
++
 +func parseDispatchTrigger(n *yaml.Node, diags *[]Diagnostic) *WorkflowDispatchTrigger {
 +	wdt := &WorkflowDispatchTrigger{}
 +	if n.Kind == yaml.ScalarNode && n.Value == "" {
 +		return wdt
 +	}
 +	if n.Kind != yaml.MappingNode {
 +		*diags = append(*diags, errAt("on.workflow_dispatch", "must be a mapping"))
 +		return wdt
 +	}
 +	for i := 0; i < len(n.Content); i += 2 {
 +		k := n.Content[i]
 +		v := n.Content[i+1]
 +		switch k.Value {
 +		case "inputs":
 +			wdt.Inputs = parseDispatchInputs(v, diags)
 +		default:
 +			*diags = append(*diags, errAt("on.workflow_dispatch."+k.Value, "unknown dispatch key (allowed: inputs)"))
 +		}
 +	}
 +	return wdt
 +}
++
 +func parseDispatchInputs(n *yaml.Node, diags *[]Diagnostic) []DispatchInput {
 +	if n.Kind != yaml.MappingNode {
 +		*diags = append(*diags, errAt("on.workflow_dispatch.inputs", "must be a mapping of input-name → spec"))
 +		return nil
 +	}
 +	out := make([]DispatchInput, 0, len(n.Content)/2)
 +	for i := 0; i < len(n.Content); i += 2 {
 +		nameNode := n.Content[i]
 +		specNode := n.Content[i+1]
 +		input := DispatchInput{Name: nameNode.Value}
 +		if specNode.Kind != yaml.MappingNode {
 +			*diags = append(*diags, errAt("on.workflow_dispatch.inputs."+nameNode.Value, "must be a mapping"))
 +			continue
 +		}
 +		for j := 0; j < len(specNode.Content); j += 2 {
 +			k := specNode.Content[j]
 +			v := specNode.Content[j+1]
 +			switch k.Value {
 +			case "description":
 +				input.Description = v.Value
 +			case "type":
 +				input.Type = v.Value
 +			case "default":
 +				input.Default = v.Value
 +			case "required":
 +				input.Required = v.Value == "true"
 +			case "options":
 +				input.Options = scalarList(v, "on.workflow_dispatch.inputs."+nameNode.Value+".options", diags)
 +			default:
 +				*diags = append(*diags, errAt("on.workflow_dispatch.inputs."+nameNode.Value+"."+k.Value, "unknown input key"))
 +			}
 +		}
 +		if input.Type == "" {
 +			input.Type = "string"
 +		}
 +		out = append(out, input)
 +	}
 +	return out
 +}
++
 +func parsePermissions(n *yaml.Node, path string) (Permissions, []Diagnostic) {
 +	var diags []Diagnostic
 +	p := Permissions{Per: map[string]PermissionLevel{}}
 +	switch n.Kind {
 +	case yaml.ScalarNode:
 +		switch n.Value {
 +		case "read-all", "write-all", "none":
 +			p.Mode = n.Value
 +		default:
 +			diags = append(diags, errAt(path, "unknown shorthand (allowed: read-all, write-all, none)"))
 +		}
 +	case yaml.MappingNode:
 +		for i := 0; i < len(n.Content); i += 2 {
 +			k := n.Content[i]
 +			v := n.Content[i+1]
 +			lvl := PermissionLevel(v.Value)
 +			if lvl != PermissionNone && lvl != PermissionRead && lvl != PermissionWrite {
 +				diags = append(diags, errAt(path+"."+k.Value, "permission level must be none, read, or write"))
 +				continue
 +			}
 +			p.Per[k.Value] = lvl
 +		}
 +	default:
 +		diags = append(diags, errAt(path, "must be a shorthand string or a mapping"))
 +	}
 +	return p, diags
 +}
++
 +func parseEnv(n *yaml.Node, path string) (map[string]Value, []Diagnostic) {
 +	var diags []Diagnostic
 +	if n.Kind != yaml.MappingNode {
 +		diags = append(diags, errAt(path, "must be a mapping"))
 +		return nil, diags
 +	}
 +	out := map[string]Value{}
 +	for i := 0; i < len(n.Content); i += 2 {
 +		k := n.Content[i]
 +		v := n.Content[i+1]
 +		if v.Kind != yaml.ScalarNode {
 +			diags = append(diags, errAt(path+"."+k.Value, "env values must be scalars"))
 +			continue
 +		}
 +		// We tag env values literal-trusted here. The expression
 +		// evaluator (S41a expr/eval.go) walks the Raw at dispatch
 +		// time and propagates taint when the value contains
 +		// `${{ shithub.event.X }}` references.
 +		out[k.Value] = V(v.Value)
 +	}
 +	return out, diags
 +}
++
 +func parseConcurrency(n *yaml.Node, path string) (Concurrency, []Diagnostic) {
 +	var diags []Diagnostic
 +	c := Concurrency{}
 +	switch n.Kind {
 +	case yaml.ScalarNode:
 +		c.Group = V(n.Value)
 +	case yaml.MappingNode:
 +		for i := 0; i < len(n.Content); i += 2 {
 +			k := n.Content[i]
 +			v := n.Content[i+1]
 +			switch k.Value {
 +			case "group":
 +				c.Group = V(v.Value)
 +			case "cancel-in-progress":
 +				c.CancelInProgress = v.Value == "true"
 +			default:
 +				diags = append(diags, errAt(path+"."+k.Value, "unknown concurrency key (allowed: group, cancel-in-progress)"))
 +			}
 +		}
 +	default:
 +		diags = append(diags, errAt(path, "must be a string or mapping"))
 +	}
 +	return c, diags
 +}
++
 +func parseJobs(n *yaml.Node) ([]Job, []Diagnostic) {
 +	var diags []Diagnostic
 +	if n.Kind != yaml.MappingNode {
 +		diags = append(diags, errAt("jobs", "must be a mapping of job-key → job-spec"))
 +		return nil, diags
 +	}
 +	jobs := make([]Job, 0, len(n.Content)/2)
 +	for i := 0; i < len(n.Content); i += 2 {
 +		k := n.Content[i]
 +		v := n.Content[i+1]
 +		j, ds := parseJob(k.Value, v)
 +		diags = append(diags, ds...)
 +		jobs = append(jobs, j)
 +	}
 +	return jobs, diags
 +}
++
 +func parseJob(key string, n *yaml.Node) (Job, []Diagnostic) {
 +	var diags []Diagnostic
 +	j := Job{Key: key, TimeoutMinutes: 360}
 +	if n.Kind != yaml.MappingNode {
 +		diags = append(diags, errAt("jobs."+key, "job spec must be a mapping"))
 +		return j, diags
 +	}
 +	for i := 0; i < len(n.Content); i += 2 {
 +		k := n.Content[i]
 +		v := n.Content[i+1]
 +		path := "jobs." + key + "." + k.Value
 +		switch k.Value {
 +		case "name":
 +			j.Name = v.Value
 +		case "runs-on":
 +			j.RunsOn = v.Value
 +		case "needs":
 +			if v.Kind == yaml.ScalarNode {
 +				j.Needs = []string{v.Value}
 +			} else {
 +				j.Needs = scalarList(v, path, &diags)
 +			}
 +		case "if":
 +			j.If = v.Value
 +		case "timeout-minutes":
 +			n, err := strconv.Atoi(v.Value)
 +			if err != nil || n < 1 || n > 4320 {
 +				diags = append(diags, errAt(path, "timeout-minutes must be an integer 1-4320"))
 +				continue
 +			}
 +			j.TimeoutMinutes = n
 +		case "permissions":
 +			p, ds := parsePermissions(v, path)
 +			j.Permissions = p
 +			diags = append(diags, ds...)
 +		case "env":
 +			env, ds := parseEnv(v, path)
 +			j.Env = env
 +			diags = append(diags, ds...)
 +		case "steps":
 +			steps, ds := parseSteps(v, "jobs."+key)
 +			j.Steps = steps
 +			diags = append(diags, ds...)
 +		default:
 +			diags = append(diags, errAt(path, "unknown job key (allowed: name, runs-on, needs, if, timeout-minutes, permissions, env, steps)"))
 +		}
 +	}
 +	if j.RunsOn == "" {
 +		diags = append(diags, errAt("jobs."+key, "job missing required `runs-on:`"))
 +	}
 +	if len(j.Steps) == 0 {
 +		diags = append(diags, errAt("jobs."+key, "job has no steps"))
 +	}
 +	return j, diags
 +}
++
 +func parseSteps(n *yaml.Node, jobPath string) ([]Step, []Diagnostic) {
 +	var diags []Diagnostic
 +	if n.Kind != yaml.SequenceNode {
 +		diags = append(diags, errAt(jobPath+".steps", "must be a sequence"))
 +		return nil, diags
 +	}
 +	steps := make([]Step, 0, len(n.Content))
 +	for idx, item := range n.Content {
 +		s, ds := parseStep(idx, item, jobPath)
 +		diags = append(diags, ds...)
 +		steps = append(steps, s)
 +	}
 +	return steps, diags
 +}
++
 +func parseStep(idx int, n *yaml.Node, jobPath string) (Step, []Diagnostic) {
 +	var diags []Diagnostic
 +	s := Step{}
 +	stepPath := fmt.Sprintf("%s.steps[%d]", jobPath, idx)
 +	if n.Kind != yaml.MappingNode {
 +		diags = append(diags, errAt(stepPath, "step must be a mapping"))
 +		return s, diags
 +	}
 +	for i := 0; i < len(n.Content); i += 2 {
 +		k := n.Content[i]
 +		v := n.Content[i+1]
 +		path := stepPath + "." + k.Value
 +		switch k.Value {
 +		case "id":
 +			s.ID = v.Value
 +		case "name":
 +			s.Name = v.Value
 +		case "if":
 +			s.If = v.Value
 +		case "run":
 +			s.Run = v.Value
 +		case "uses":
 +			s.Uses = v.Value
 +		case "with":
 +			env, ds := parseEnv(v, path)
 +			s.With = env
 +			diags = append(diags, ds...)
 +		case "working-directory":
 +			s.WorkingDirectory = v.Value
 +		case "env":
 +			env, ds := parseEnv(v, path)
 +			s.Env = env
 +			diags = append(diags, ds...)
 +		case "continue-on-error":
 +			s.ContinueOnError = v.Value == "true"
 +		default:
 +			diags = append(diags, errAt(path, "unknown step key (allowed: id, name, if, run, uses, with, working-directory, env, continue-on-error)"))
 +		}
 +	}
 +	if s.Run == "" && s.Uses == "" {
 +		diags = append(diags, errAt(stepPath, "step must have either `run:` or `uses:`"))
 +	}
 +	if s.Run != "" && s.Uses != "" {
 +		diags = append(diags, errAt(stepPath, "step cannot have both `run:` and `uses:`"))
 +	}
 +	if s.Uses != "" && !IsAllowedUses(s.Uses) {
 +		diags = append(diags, errAt(stepPath+".uses",
 +			"unsupported `uses:` reference; v1 supports only "+
 +				"actions/checkout@v4, shithub/upload-artifact@v1, shithub/download-artifact@v1"))
 +	}
 +	return s, diags
 +}
++
 +// scalarList parses either a single scalar or a sequence of scalars
 +// into a []string. Used for branches/tags/paths/types-style lists.
 +func scalarList(n *yaml.Node, path string, diags *[]Diagnostic) []string {
 +	switch n.Kind {
 +	case yaml.ScalarNode:
 +		return []string{n.Value}
 +	case yaml.SequenceNode:
 +		out := make([]string, 0, len(n.Content))
 +		for _, item := range n.Content {
 +			if item.Kind != yaml.ScalarNode {
 +				*diags = append(*diags, errAt(path, "list items must be scalars"))
 +				continue
 +			}
 +			out = append(out, item.Value)
 +		}
 +		return out
 +	default:
 +		*diags = append(*diags, errAt(path, "must be a string or sequence of strings"))
 +		return nil
 +	}
 +}
++
 +func errAt(path, msg string) Diagnostic {
 +	return Diagnostic{Path: path, Message: msg, Severity: Error}
 +}
++
 +// triggerSetIsNonEmpty reports whether at least one trigger is declared.
 +// TriggerSet contains slices, so it isn't comparable; this helper avoids
 +// per-call boilerplate at the parse-validate site.
 +func triggerSetIsNonEmpty(ts TriggerSet) bool {
 +	return ts.Push != nil || ts.PullRequest != nil ||
 +		len(ts.Schedule) > 0 || ts.WorkflowDispatch != nil
 +}
++
 +func hasError(diags []Diagnostic) bool {
 +	for _, d := range diags {
 +		if d.Severity == Error {
 +			return true
 +		}
 +	}
 +	return false
 +}

internal/actions/workflow/types.goadded

 +// SPDX-License-Identifier: AGPL-3.0-or-later
++
 +// Package workflow parses .shithub/workflows/*.yml files into the typed
 +// Workflow tree S41b's trigger pipeline + S41c's secret resolver +
 +// S41d's runner all consume.
 +//
 +// The parser is intentionally strict: unknown top-level keys, unknown
 +// step keys, unknown `uses:` references, and `${{ … }}` expressions
 +// outside the allowlist all produce diagnostics. Workflow authors
 +// catch their mistakes immediately instead of silently shipping a
 +// workflow that does nothing.
 +//
 +// Every value that can carry user-controlled text (event payload
 +// fields like PR title, branch name, etc.) is tagged with
 +// Tainted=true. The runner's exec layer (S41d) refuses to interpolate
 +// tainted values into shell strings — they compile to ${SHITHUB_INPUT_*}
 +// envvar references set safely by the runner. This is the load-bearing
 +// contract for expression-injection prevention.
 +package workflow
++
 +// Workflow is the parsed top-level document.
 +type Workflow struct {
 +	// Name is the optional human-readable name (`name:` at root). When
 +	// blank, the UI defaults to the workflow file's basename.
 +	Name string
++
 +	// On lists trigger predicates. The trigger pipeline (S41b) matches
 +	// domain_events against these.
 +	On TriggerSet
++
 +	// Permissions is the workflow-level permissions block; jobs may
 +	// further narrow but cannot widen.
 +	Permissions Permissions
++
 +	// Env is the workflow-level env map. Values may carry expressions;
 +	// resolution + taint propagation happen at dispatch time.
 +	Env map[string]Value
++
 +	// Concurrency is the workflow-level concurrency control. Honored
 +	// from S41g; carried in v1 so the schema doesn't churn.
 +	Concurrency Concurrency
++
 +	// Jobs is keyed by the job's id (`jobs.<key>:`). Order is the YAML
 +	// document order so re-rendering matches the author's layout.
 +	Jobs []Job
 +}
++
 +// TriggerSet is the parsed `on:` block. We support: push, pull_request,
 +// schedule (cron), workflow_dispatch.
 +type TriggerSet struct {
 +	Push             *PushTrigger
 +	PullRequest      *PullRequestTrigger
 +	Schedule         []ScheduleTrigger
 +	WorkflowDispatch *WorkflowDispatchTrigger
 +}
++
 +// PushTrigger filters which pushes match. Empty Branches/Tags/Paths
 +// means "all"; non-empty applies the standard GHA glob semantics.
 +// Both include and exclude (negative-glob with leading !) are accepted
 +// and are consulted in declaration order.
 +type PushTrigger struct {
 +	Branches []string
 +	Tags     []string
 +	Paths    []string
 +}
++
 +// PullRequestTrigger filters which PR events match. Types is the
 +// list of activity types (opened, synchronize, reopened, …).
 +type PullRequestTrigger struct {
 +	Types    []string
 +	Branches []string
 +	Paths    []string
 +}
++
 +// ScheduleTrigger declares a single cron entry. Multiple entries are
 +// allowed; each fires independently.
 +type ScheduleTrigger struct {
 +	Cron string
 +}
++
 +// WorkflowDispatchTrigger declares the manual-trigger surface. Inputs
 +// are typed parameters the dispatcher prompts for.
 +type WorkflowDispatchTrigger struct {
 +	Inputs []DispatchInput
 +}
++
 +// DispatchInput is a single typed input for workflow_dispatch. v1
 +// accepts string, boolean, choice, environment.
 +type DispatchInput struct {
 +	Name        string
 +	Description string
 +	Type        string // "string" | "boolean" | "choice" | "environment"
 +	Default     string
 +	Required    bool
 +	Options     []string // populated when Type=="choice"
 +}
++
 +// Permissions is the GitHub-Actions-equivalent permissions block.
 +// Empty (zero) value means "default" which is read for content.
 +// Specific keys mirror GHA names: contents, pull-requests, issues,
 +// actions, deployments, packages, statuses, security-events, etc.
 +//
 +// A workflow may set Permissions: {} to deny all, or {Mode: "all"} to
 +// grant all (subject to the actor having repo write).
 +type Permissions struct {
 +	Mode string                     // "" | "read-all" | "write-all" | "none"
 +	Per  map[string]PermissionLevel // keyed by GHA permission name
 +}
++
 +// PermissionLevel mirrors GHA's per-permission grants.
 +type PermissionLevel string
++
 +const (
 +	PermissionNone  PermissionLevel = "none"
 +	PermissionRead  PermissionLevel = "read"
 +	PermissionWrite PermissionLevel = "write"
 +)
++
 +// Concurrency is the workflow-level concurrency control. Group is an
 +// expression evaluated against the trigger context to produce the
 +// group key. CancelInProgress=true cancels older runs for the same
 +// group when a new one is enqueued.
 +type Concurrency struct {
 +	Group            Value
 +	CancelInProgress bool
 +}
++
 +// Job is one entry under `jobs:`.
 +type Job struct {
 +	// Key is the YAML map key: `jobs.<key>:`. Identifier-shape, used
 +	// for `needs:` references and as the URL slug.
 +	Key string
++
 +	// Name is the optional human-readable name (`jobs.<key>.name:`).
 +	// Falls back to Key when blank.
 +	Name string
++
 +	// RunsOn is the runner-selector string ("ubuntu-latest", "self-hosted",
 +	// "nix-flake", etc.). Mapped at runner-claim time to the actual
 +	// container image / engine.
 +	RunsOn string
++
 +	// Needs lists job keys this job depends on. The trigger pipeline
 +	// resolves these to job IDs at insert time and the runner respects
 +	// them at dispatch time (S41b/d).
 +	Needs []string
++
 +	// If is the job-level conditional. Evaluated against the trigger
 +	// context just before dispatch; false → skipped.
 +	If string
++
 +	// TimeoutMinutes bounds total job runtime. Default 360 (6h),
 +	// matches GHA. Range 1-4320 enforced by the parser.
 +	TimeoutMinutes int
++
 +	// Permissions narrows the workflow-level permissions for this job.
 +	// Cannot widen.
 +	Permissions Permissions
++
 +	// Env is per-job env overlay. Merged on top of workflow Env.
 +	Env map[string]Value
++
 +	// Steps run serially. Order is YAML document order.
 +	Steps []Step
 +}
++
 +// Step is one entry under a job's `steps:`.
 +type Step struct {
 +	// ID is the optional `id:` for cross-step references via
 +	// ${{ steps.<id>.outputs.X }}. Outputs themselves are v2; we carry
 +	// the id field now so the schema doesn't churn.
 +	ID string
++
 +	// Name is the human-readable label. Falls back to a synthesized
 +	// "Run <first-line-of-run-command>" when blank.
 +	Name string
++
 +	// If is the step-level conditional. Evaluated mid-job.
 +	If string
++
 +	// Run is the shell command. Empty when this step uses an alias.
 +	// May contain `${{ … }}` expressions; the parser resolves them
 +	// into Value{Tainted: …} nodes.
 +	Run string
++
 +	// Uses is the magic-alias slug. Exactly one of Run or Uses is
 +	// non-empty per the migration's CHECK constraint.
 +	Uses string
++
 +	// With is the input map for `uses:` aliases. Forwarded to the
 +	// alias-specific runner step (e.g., upload-artifact's `name:`).
 +	With map[string]Value
++
 +	// WorkingDirectory overrides the step's cwd.
 +	WorkingDirectory string
++
 +	// Env is per-step env overlay. Merged on top of job Env.
 +	Env map[string]Value
++
 +	// ContinueOnError lets the job proceed past this step's failure.
 +	ContinueOnError bool
 +}
++
 +// Value is a parsed value that may have come from a literal string,
 +// a `${{ … }}` expression, or a mix. Tainted=true when the value
 +// transitively depends on an untrusted source (event payload fields
 +// the workflow author doesn't control). The runner refuses to
 +// interpolate Tainted values into shell strings.
 +//
 +// Raw is the original source string — useful for diagnostics and for
 +// the runner's input-binding logic.
 +type Value struct {
 +	Raw     string
 +	Tainted bool
 +}
++
 +// V is a tiny constructor for trusted (literal) values, used by the
 +// parser when it knows the source is the workflow file itself.
 +func V(raw string) Value { return Value{Raw: raw, Tainted: false} }
++
 +// Tainted is the constructor for untrusted-source values. The
 +// expression evaluator (internal/actions/expr) calls this when it
 +// resolves a reference into the `shithub.event.*` namespace.
 +func Tainted(raw string) Value { return Value{Raw: raw, Tainted: true} }
++
 +// Diagnostic is a parser finding. Severity controls whether parsing
 +// continues; Path is dot-notated for UI display ("jobs.test.steps[2].run").
 +type Diagnostic struct {
 +	Path     string
 +	Message  string
 +	Severity Severity
 +}
++
 +// Severity classifies a diagnostic.
 +type Severity int
++
 +const (
 +	// Error stops parsing — the workflow is unusable.
 +	Error Severity = iota
++
 +	// Warning is non-fatal but flagged in the UI. Used for the
 +	// `${{ github.* }}` deprecation alias.
 +	Warning
 +)
++
 +// String renders the diagnostic for the admin parse subcommand and
 +// future UI surfaces.
 +func (d Diagnostic) String() string {
 +	prefix := "error"
 +	if d.Severity == Warning {
 +		prefix = "warning"
 +	}
 +	if d.Path == "" {
 +		return prefix + ": " + d.Message
 +	}
 +	return prefix + " at " + d.Path + ": " + d.Message
 +}
++
 +// MaxWorkflowFileBytes is the parser-side size cap. Files larger than
 +// this are rejected before YAML decode begins. 64 KB is the GHA limit
 +// minus a small margin.
 +const MaxWorkflowFileBytes = 64 * 1024
++
 +// MaxYAMLAliases bounds anchor expansions per document — the
 +// billion-laughs guard. yaml.v3 doesn't expose a direct knob; we
 +// track aliases at decode time via a custom Unmarshaler in parse.go
 +// and cap at this value.
 +const MaxYAMLAliases = 100