| 1 | // SPDX-License-Identifier: AGPL-3.0-or-later |
| 2 | |
| 3 | // Package expr is the strict-allowlist expression evaluator for |
| 4 | // `${{ … }}` blocks in workflow files. |
| 5 | // |
| 6 | // The evaluator is intentionally tiny: |
| 7 | // - Allowed namespaces: secrets, env, vars, shithub.event, shithub.run_id, |
| 8 | // shithub.sha, shithub.ref, shithub.actor. |
| 9 | // - Allowed functions: contains, startsWith, endsWith, |
| 10 | // success(), failure(), always(), cancelled(). |
| 11 | // - Operators: && || ! == != binary string concat (none — we don't |
| 12 | // support arithmetic or anything else in v1). |
| 13 | // |
| 14 | // Anything outside that set is an evaluation error. This is the load- |
| 15 | // bearing security surface — the more we accept, the more attack |
| 16 | // surface we open. Future expansion goes through a reviewer-required |
| 17 | // note in the commit message (per the campaign §"Risks": "block any |
| 18 | // S41 PR that adds an evaluator function without a security note"). |
| 19 | // |
| 20 | // Every produced Value carries a Tainted bool. References that |
| 21 | // resolve into the shithub.event.* namespace are tagged Tainted=true; |
| 22 | // taint propagates through string concatenation, comparisons (the |
| 23 | // boolean output isn't tainted, but the comparison operands' values |
| 24 | // are checked), and function returns. |
| 25 | package expr |
| 26 | |
| 27 | import ( |
| 28 | "fmt" |
| 29 | "strings" |
| 30 | "unicode" |
| 31 | "unicode/utf8" |
| 32 | ) |
| 33 | |
| 34 | // TokenKind classifies a lexed token. |
| 35 | type TokenKind int |
| 36 | |
| 37 | const ( |
| 38 | TokInvalid TokenKind = iota |
| 39 | TokIdent // foo, secrets, shithub |
| 40 | TokDot // . |
| 41 | TokLParen // ( |
| 42 | TokRParen // ) |
| 43 | TokComma // , |
| 44 | TokString // 'literal' (single-quoted only — GHA convention) |
| 45 | TokBool // true | false |
| 46 | TokNull // null |
| 47 | TokAnd // && |
| 48 | TokOr // || |
| 49 | TokNot // ! |
| 50 | TokEq // == |
| 51 | TokNe // != |
| 52 | TokEOF |
| 53 | ) |
| 54 | |
| 55 | // Token is a single lexed unit. Pos is the byte offset in the original |
| 56 | // source (useful for diagnostic spans). |
| 57 | type Token struct { |
| 58 | Kind TokenKind |
| 59 | Value string |
| 60 | Pos int |
| 61 | } |
| 62 | |
| 63 | func (k TokenKind) String() string { |
| 64 | switch k { |
| 65 | case TokIdent: |
| 66 | return "identifier" |
| 67 | case TokDot: |
| 68 | return "." |
| 69 | case TokLParen: |
| 70 | return "(" |
| 71 | case TokRParen: |
| 72 | return ")" |
| 73 | case TokComma: |
| 74 | return "," |
| 75 | case TokString: |
| 76 | return "string literal" |
| 77 | case TokBool: |
| 78 | return "boolean" |
| 79 | case TokNull: |
| 80 | return "null" |
| 81 | case TokAnd: |
| 82 | return "&&" |
| 83 | case TokOr: |
| 84 | return "||" |
| 85 | case TokNot: |
| 86 | return "!" |
| 87 | case TokEq: |
| 88 | return "==" |
| 89 | case TokNe: |
| 90 | return "!=" |
| 91 | case TokEOF: |
| 92 | return "end of input" |
| 93 | } |
| 94 | return "invalid" |
| 95 | } |
| 96 | |
| 97 | // Lex returns the token stream for src or an error on the first lexical |
| 98 | // problem. Whitespace is skipped silently. The lexer doesn't strip the |
| 99 | // surrounding `${{ … }}` — the caller does that before calling Lex. |
| 100 | // |
| 101 | // The main loop walks runes (not bytes) so multi-byte UTF-8 characters |
| 102 | // in identifiers — e.g., a Greek-letter variable name in an env-bag |
| 103 | // key — produce correct identifier boundaries instead of mangled |
| 104 | // "unexpected character" errors at continuation bytes. |
| 105 | func Lex(src string) ([]Token, error) { |
| 106 | var out []Token |
| 107 | i := 0 |
| 108 | for i < len(src) { |
| 109 | r, size := utf8.DecodeRuneInString(src[i:]) |
| 110 | if r == utf8.RuneError && size <= 1 { |
| 111 | return nil, fmt.Errorf("expr: invalid UTF-8 at offset %d", i) |
| 112 | } |
| 113 | switch { |
| 114 | case r == ' ' || r == '\t' || r == '\n' || r == '\r': |
| 115 | i += size |
| 116 | case r == '.': |
| 117 | out = append(out, Token{Kind: TokDot, Value: ".", Pos: i}) |
| 118 | i += size |
| 119 | case r == '(': |
| 120 | out = append(out, Token{Kind: TokLParen, Value: "(", Pos: i}) |
| 121 | i += size |
| 122 | case r == ')': |
| 123 | out = append(out, Token{Kind: TokRParen, Value: ")", Pos: i}) |
| 124 | i += size |
| 125 | case r == ',': |
| 126 | out = append(out, Token{Kind: TokComma, Value: ",", Pos: i}) |
| 127 | i += size |
| 128 | case r == '\'': |
| 129 | tok, n, err := lexString(src[i:], i) |
| 130 | if err != nil { |
| 131 | return nil, err |
| 132 | } |
| 133 | out = append(out, tok) |
| 134 | i += n |
| 135 | case r == '&': |
| 136 | if i+1 < len(src) && src[i+1] == '&' { |
| 137 | out = append(out, Token{Kind: TokAnd, Value: "&&", Pos: i}) |
| 138 | i += 2 |
| 139 | } else { |
| 140 | return nil, fmt.Errorf("expr: stray '&' at offset %d (expected '&&')", i) |
| 141 | } |
| 142 | case r == '|': |
| 143 | if i+1 < len(src) && src[i+1] == '|' { |
| 144 | out = append(out, Token{Kind: TokOr, Value: "||", Pos: i}) |
| 145 | i += 2 |
| 146 | } else { |
| 147 | return nil, fmt.Errorf("expr: stray '|' at offset %d (expected '||')", i) |
| 148 | } |
| 149 | case r == '!': |
| 150 | if i+1 < len(src) && src[i+1] == '=' { |
| 151 | out = append(out, Token{Kind: TokNe, Value: "!=", Pos: i}) |
| 152 | i += 2 |
| 153 | } else { |
| 154 | out = append(out, Token{Kind: TokNot, Value: "!", Pos: i}) |
| 155 | i += size |
| 156 | } |
| 157 | case r == '=': |
| 158 | if i+1 < len(src) && src[i+1] == '=' { |
| 159 | out = append(out, Token{Kind: TokEq, Value: "==", Pos: i}) |
| 160 | i += 2 |
| 161 | } else { |
| 162 | return nil, fmt.Errorf("expr: stray '=' at offset %d (expected '==')", i) |
| 163 | } |
| 164 | case isIdentStart(r): |
| 165 | tok, n := lexIdent(src[i:], i) |
| 166 | out = append(out, tok) |
| 167 | i += n |
| 168 | default: |
| 169 | return nil, fmt.Errorf("expr: unexpected character %q at offset %d", r, i) |
| 170 | } |
| 171 | } |
| 172 | out = append(out, Token{Kind: TokEOF, Pos: i}) |
| 173 | return out, nil |
| 174 | } |
| 175 | |
| 176 | func lexString(src string, basePos int) (Token, int, error) { |
| 177 | if len(src) < 2 { |
| 178 | return Token{}, 0, fmt.Errorf("expr: unterminated string at offset %d", basePos) |
| 179 | } |
| 180 | // Walk until matching '. GHA expressions do NOT support backslash |
| 181 | // escapes; the only escape is doubling the quote: '' produces '. |
| 182 | var b strings.Builder |
| 183 | i := 1 // skip opening ' |
| 184 | for i < len(src) { |
| 185 | c := src[i] |
| 186 | if c == '\'' { |
| 187 | if i+1 < len(src) && src[i+1] == '\'' { |
| 188 | b.WriteByte('\'') |
| 189 | i += 2 |
| 190 | continue |
| 191 | } |
| 192 | return Token{Kind: TokString, Value: b.String(), Pos: basePos}, i + 1, nil |
| 193 | } |
| 194 | b.WriteByte(c) |
| 195 | i++ |
| 196 | } |
| 197 | return Token{}, 0, fmt.Errorf("expr: unterminated string at offset %d", basePos) |
| 198 | } |
| 199 | |
| 200 | func lexIdent(src string, basePos int) (Token, int) { |
| 201 | i := 0 |
| 202 | for i < len(src) { |
| 203 | r, size := utf8.DecodeRuneInString(src[i:]) |
| 204 | if r == utf8.RuneError { |
| 205 | break |
| 206 | } |
| 207 | if !isIdentChar(r) { |
| 208 | break |
| 209 | } |
| 210 | i += size |
| 211 | } |
| 212 | v := src[:i] |
| 213 | switch v { |
| 214 | case "true", "false": |
| 215 | return Token{Kind: TokBool, Value: v, Pos: basePos}, i |
| 216 | case "null": |
| 217 | return Token{Kind: TokNull, Value: v, Pos: basePos}, i |
| 218 | } |
| 219 | return Token{Kind: TokIdent, Value: v, Pos: basePos}, i |
| 220 | } |
| 221 | |
| 222 | func isIdentStart(r rune) bool { |
| 223 | return unicode.IsLetter(r) || r == '_' |
| 224 | } |
| 225 | |
| 226 | func isIdentChar(r rune) bool { |
| 227 | return unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_' |
| 228 | } |
| 229 |