| 1 | // SPDX-License-Identifier: AGPL-3.0-or-later |
| 2 | |
| 3 | // Package expr is the strict-allowlist expression evaluator for |
| 4 | // `${{ … }}` blocks in workflow files. |
| 5 | // |
| 6 | // The evaluator is intentionally tiny: |
| 7 | // - Allowed namespaces: secrets, env, vars, shithub.event, shithub.run_id, |
| 8 | // shithub.sha, shithub.ref, shithub.actor. |
| 9 | // - Allowed functions: contains, startsWith, endsWith, |
| 10 | // success(), failure(), always(), cancelled(). |
| 11 | // - Operators: && || ! == != binary string concat (none — we don't |
| 12 | // support arithmetic or anything else in v1). |
| 13 | // |
| 14 | // Anything outside that set is an evaluation error. This is the load- |
| 15 | // bearing security surface — the more we accept, the more attack |
| 16 | // surface we open. Future expansion goes through a reviewer-required |
| 17 | // note in the commit message (per the campaign §"Risks": "block any |
| 18 | // S41 PR that adds an evaluator function without a security note"). |
| 19 | // |
| 20 | // Every produced Value carries a Tainted bool. References that |
| 21 | // resolve into the shithub.event.* namespace are tagged Tainted=true; |
| 22 | // taint propagates through string concatenation, comparisons (the |
| 23 | // boolean output isn't tainted, but the comparison operands' values |
| 24 | // are checked), and function returns. |
| 25 | package expr |
| 26 | |
| 27 | import ( |
| 28 | "fmt" |
| 29 | "strings" |
| 30 | "unicode" |
| 31 | ) |
| 32 | |
| 33 | // TokenKind classifies a lexed token. |
| 34 | type TokenKind int |
| 35 | |
| 36 | const ( |
| 37 | TokInvalid TokenKind = iota |
| 38 | TokIdent // foo, secrets, shithub |
| 39 | TokDot // . |
| 40 | TokLParen // ( |
| 41 | TokRParen // ) |
| 42 | TokComma // , |
| 43 | TokString // 'literal' (single-quoted only — GHA convention) |
| 44 | TokBool // true | false |
| 45 | TokNull // null |
| 46 | TokAnd // && |
| 47 | TokOr // || |
| 48 | TokNot // ! |
| 49 | TokEq // == |
| 50 | TokNe // != |
| 51 | TokEOF |
| 52 | ) |
| 53 | |
| 54 | // Token is a single lexed unit. Pos is the byte offset in the original |
| 55 | // source (useful for diagnostic spans). |
| 56 | type Token struct { |
| 57 | Kind TokenKind |
| 58 | Value string |
| 59 | Pos int |
| 60 | } |
| 61 | |
| 62 | func (k TokenKind) String() string { |
| 63 | switch k { |
| 64 | case TokIdent: |
| 65 | return "identifier" |
| 66 | case TokDot: |
| 67 | return "." |
| 68 | case TokLParen: |
| 69 | return "(" |
| 70 | case TokRParen: |
| 71 | return ")" |
| 72 | case TokComma: |
| 73 | return "," |
| 74 | case TokString: |
| 75 | return "string literal" |
| 76 | case TokBool: |
| 77 | return "boolean" |
| 78 | case TokNull: |
| 79 | return "null" |
| 80 | case TokAnd: |
| 81 | return "&&" |
| 82 | case TokOr: |
| 83 | return "||" |
| 84 | case TokNot: |
| 85 | return "!" |
| 86 | case TokEq: |
| 87 | return "==" |
| 88 | case TokNe: |
| 89 | return "!=" |
| 90 | case TokEOF: |
| 91 | return "end of input" |
| 92 | } |
| 93 | return "invalid" |
| 94 | } |
| 95 | |
| 96 | // Lex returns the token stream for src or an error on the first lexical |
| 97 | // problem. Whitespace is skipped silently. The lexer doesn't strip the |
| 98 | // surrounding `${{ … }}` — the caller does that before calling Lex. |
| 99 | func Lex(src string) ([]Token, error) { |
| 100 | var out []Token |
| 101 | i := 0 |
| 102 | for i < len(src) { |
| 103 | c := src[i] |
| 104 | switch { |
| 105 | case c == ' ' || c == '\t' || c == '\n' || c == '\r': |
| 106 | i++ |
| 107 | case c == '.': |
| 108 | out = append(out, Token{Kind: TokDot, Value: ".", Pos: i}) |
| 109 | i++ |
| 110 | case c == '(': |
| 111 | out = append(out, Token{Kind: TokLParen, Value: "(", Pos: i}) |
| 112 | i++ |
| 113 | case c == ')': |
| 114 | out = append(out, Token{Kind: TokRParen, Value: ")", Pos: i}) |
| 115 | i++ |
| 116 | case c == ',': |
| 117 | out = append(out, Token{Kind: TokComma, Value: ",", Pos: i}) |
| 118 | i++ |
| 119 | case c == '\'': |
| 120 | tok, n, err := lexString(src[i:], i) |
| 121 | if err != nil { |
| 122 | return nil, err |
| 123 | } |
| 124 | out = append(out, tok) |
| 125 | i += n |
| 126 | case c == '&': |
| 127 | if i+1 < len(src) && src[i+1] == '&' { |
| 128 | out = append(out, Token{Kind: TokAnd, Value: "&&", Pos: i}) |
| 129 | i += 2 |
| 130 | } else { |
| 131 | return nil, fmt.Errorf("expr: stray '&' at offset %d (expected '&&')", i) |
| 132 | } |
| 133 | case c == '|': |
| 134 | if i+1 < len(src) && src[i+1] == '|' { |
| 135 | out = append(out, Token{Kind: TokOr, Value: "||", Pos: i}) |
| 136 | i += 2 |
| 137 | } else { |
| 138 | return nil, fmt.Errorf("expr: stray '|' at offset %d (expected '||')", i) |
| 139 | } |
| 140 | case c == '!': |
| 141 | if i+1 < len(src) && src[i+1] == '=' { |
| 142 | out = append(out, Token{Kind: TokNe, Value: "!=", Pos: i}) |
| 143 | i += 2 |
| 144 | } else { |
| 145 | out = append(out, Token{Kind: TokNot, Value: "!", Pos: i}) |
| 146 | i++ |
| 147 | } |
| 148 | case c == '=': |
| 149 | if i+1 < len(src) && src[i+1] == '=' { |
| 150 | out = append(out, Token{Kind: TokEq, Value: "==", Pos: i}) |
| 151 | i += 2 |
| 152 | } else { |
| 153 | return nil, fmt.Errorf("expr: stray '=' at offset %d (expected '==')", i) |
| 154 | } |
| 155 | case isIdentStart(c): |
| 156 | tok, n := lexIdent(src[i:], i) |
| 157 | out = append(out, tok) |
| 158 | i += n |
| 159 | default: |
| 160 | return nil, fmt.Errorf("expr: unexpected character %q at offset %d", c, i) |
| 161 | } |
| 162 | } |
| 163 | out = append(out, Token{Kind: TokEOF, Pos: i}) |
| 164 | return out, nil |
| 165 | } |
| 166 | |
| 167 | func lexString(src string, basePos int) (Token, int, error) { |
| 168 | if len(src) < 2 { |
| 169 | return Token{}, 0, fmt.Errorf("expr: unterminated string at offset %d", basePos) |
| 170 | } |
| 171 | // Walk until matching '. GHA expressions do NOT support backslash |
| 172 | // escapes; the only escape is doubling the quote: '' produces '. |
| 173 | var b strings.Builder |
| 174 | i := 1 // skip opening ' |
| 175 | for i < len(src) { |
| 176 | c := src[i] |
| 177 | if c == '\'' { |
| 178 | if i+1 < len(src) && src[i+1] == '\'' { |
| 179 | b.WriteByte('\'') |
| 180 | i += 2 |
| 181 | continue |
| 182 | } |
| 183 | return Token{Kind: TokString, Value: b.String(), Pos: basePos}, i + 1, nil |
| 184 | } |
| 185 | b.WriteByte(c) |
| 186 | i++ |
| 187 | } |
| 188 | return Token{}, 0, fmt.Errorf("expr: unterminated string at offset %d", basePos) |
| 189 | } |
| 190 | |
| 191 | func lexIdent(src string, basePos int) (Token, int) { |
| 192 | i := 0 |
| 193 | for i < len(src) && isIdentChar(src[i]) { |
| 194 | i++ |
| 195 | } |
| 196 | v := src[:i] |
| 197 | switch v { |
| 198 | case "true", "false": |
| 199 | return Token{Kind: TokBool, Value: v, Pos: basePos}, i |
| 200 | case "null": |
| 201 | return Token{Kind: TokNull, Value: v, Pos: basePos}, i |
| 202 | } |
| 203 | return Token{Kind: TokIdent, Value: v, Pos: basePos}, i |
| 204 | } |
| 205 | |
| 206 | func isIdentStart(c byte) bool { |
| 207 | return unicode.IsLetter(rune(c)) || c == '_' |
| 208 | } |
| 209 | |
| 210 | func isIdentChar(c byte) bool { |
| 211 | return unicode.IsLetter(rune(c)) || unicode.IsDigit(rune(c)) || c == '_' |
| 212 | } |
| 213 |