Go · 6334 bytes Raw Blame History
1 // SPDX-License-Identifier: AGPL-3.0-or-later
2
3 // Package expr is the strict-allowlist expression evaluator for
4 // `${{ … }}` blocks in workflow files.
5 //
6 // The evaluator is intentionally tiny:
7 // - Allowed namespaces: secrets, env, vars, shithub.event, shithub.run_id,
8 // shithub.sha, shithub.ref, shithub.actor.
9 // - Allowed functions: contains, startsWith, endsWith,
10 // success(), failure(), always(), cancelled().
11 // - Operators: && || ! == != binary string concat (none — we don't
12 // support arithmetic or anything else in v1).
13 //
14 // Anything outside that set is an evaluation error. This is the load-
15 // bearing security surface — the more we accept, the more attack
16 // surface we open. Future expansion goes through a reviewer-required
17 // note in the commit message (per the campaign §"Risks": "block any
18 // S41 PR that adds an evaluator function without a security note").
19 //
20 // Every produced Value carries a Tainted bool. References that
21 // resolve into the shithub.event.* namespace are tagged Tainted=true;
22 // taint propagates through string concatenation, comparisons (the
23 // boolean output isn't tainted, but the comparison operands' values
24 // are checked), and function returns.
25 package expr
26
27 import (
28 "fmt"
29 "strings"
30 "unicode"
31 "unicode/utf8"
32 )
33
34 // TokenKind classifies a lexed token.
35 type TokenKind int
36
37 const (
38 TokInvalid TokenKind = iota
39 TokIdent // foo, secrets, shithub
40 TokDot // .
41 TokLParen // (
42 TokRParen // )
43 TokComma // ,
44 TokString // 'literal' (single-quoted only — GHA convention)
45 TokBool // true | false
46 TokNull // null
47 TokAnd // &&
48 TokOr // ||
49 TokNot // !
50 TokEq // ==
51 TokNe // !=
52 TokEOF
53 )
54
55 // Token is a single lexed unit. Pos is the byte offset in the original
56 // source (useful for diagnostic spans).
57 type Token struct {
58 Kind TokenKind
59 Value string
60 Pos int
61 }
62
63 func (k TokenKind) String() string {
64 switch k {
65 case TokIdent:
66 return "identifier"
67 case TokDot:
68 return "."
69 case TokLParen:
70 return "("
71 case TokRParen:
72 return ")"
73 case TokComma:
74 return ","
75 case TokString:
76 return "string literal"
77 case TokBool:
78 return "boolean"
79 case TokNull:
80 return "null"
81 case TokAnd:
82 return "&&"
83 case TokOr:
84 return "||"
85 case TokNot:
86 return "!"
87 case TokEq:
88 return "=="
89 case TokNe:
90 return "!="
91 case TokEOF:
92 return "end of input"
93 }
94 return "invalid"
95 }
96
97 // Lex returns the token stream for src or an error on the first lexical
98 // problem. Whitespace is skipped silently. The lexer doesn't strip the
99 // surrounding `${{ … }}` — the caller does that before calling Lex.
100 //
101 // The main loop walks runes (not bytes) so multi-byte UTF-8 characters
102 // in identifiers — e.g., a Greek-letter variable name in an env-bag
103 // key — produce correct identifier boundaries instead of mangled
104 // "unexpected character" errors at continuation bytes.
105 func Lex(src string) ([]Token, error) {
106 var out []Token
107 i := 0
108 for i < len(src) {
109 r, size := utf8.DecodeRuneInString(src[i:])
110 if r == utf8.RuneError && size <= 1 {
111 return nil, fmt.Errorf("expr: invalid UTF-8 at offset %d", i)
112 }
113 switch {
114 case r == ' ' || r == '\t' || r == '\n' || r == '\r':
115 i += size
116 case r == '.':
117 out = append(out, Token{Kind: TokDot, Value: ".", Pos: i})
118 i += size
119 case r == '(':
120 out = append(out, Token{Kind: TokLParen, Value: "(", Pos: i})
121 i += size
122 case r == ')':
123 out = append(out, Token{Kind: TokRParen, Value: ")", Pos: i})
124 i += size
125 case r == ',':
126 out = append(out, Token{Kind: TokComma, Value: ",", Pos: i})
127 i += size
128 case r == '\'':
129 tok, n, err := lexString(src[i:], i)
130 if err != nil {
131 return nil, err
132 }
133 out = append(out, tok)
134 i += n
135 case r == '&':
136 if i+1 < len(src) && src[i+1] == '&' {
137 out = append(out, Token{Kind: TokAnd, Value: "&&", Pos: i})
138 i += 2
139 } else {
140 return nil, fmt.Errorf("expr: stray '&' at offset %d (expected '&&')", i)
141 }
142 case r == '|':
143 if i+1 < len(src) && src[i+1] == '|' {
144 out = append(out, Token{Kind: TokOr, Value: "||", Pos: i})
145 i += 2
146 } else {
147 return nil, fmt.Errorf("expr: stray '|' at offset %d (expected '||')", i)
148 }
149 case r == '!':
150 if i+1 < len(src) && src[i+1] == '=' {
151 out = append(out, Token{Kind: TokNe, Value: "!=", Pos: i})
152 i += 2
153 } else {
154 out = append(out, Token{Kind: TokNot, Value: "!", Pos: i})
155 i += size
156 }
157 case r == '=':
158 if i+1 < len(src) && src[i+1] == '=' {
159 out = append(out, Token{Kind: TokEq, Value: "==", Pos: i})
160 i += 2
161 } else {
162 return nil, fmt.Errorf("expr: stray '=' at offset %d (expected '==')", i)
163 }
164 case isIdentStart(r):
165 tok, n := lexIdent(src[i:], i)
166 out = append(out, tok)
167 i += n
168 default:
169 return nil, fmt.Errorf("expr: unexpected character %q at offset %d", r, i)
170 }
171 }
172 out = append(out, Token{Kind: TokEOF, Pos: i})
173 return out, nil
174 }
175
176 func lexString(src string, basePos int) (Token, int, error) {
177 if len(src) < 2 {
178 return Token{}, 0, fmt.Errorf("expr: unterminated string at offset %d", basePos)
179 }
180 // Walk until matching '. GHA expressions do NOT support backslash
181 // escapes; the only escape is doubling the quote: '' produces '.
182 var b strings.Builder
183 i := 1 // skip opening '
184 for i < len(src) {
185 c := src[i]
186 if c == '\'' {
187 if i+1 < len(src) && src[i+1] == '\'' {
188 b.WriteByte('\'')
189 i += 2
190 continue
191 }
192 return Token{Kind: TokString, Value: b.String(), Pos: basePos}, i + 1, nil
193 }
194 b.WriteByte(c)
195 i++
196 }
197 return Token{}, 0, fmt.Errorf("expr: unterminated string at offset %d", basePos)
198 }
199
200 func lexIdent(src string, basePos int) (Token, int) {
201 i := 0
202 for i < len(src) {
203 r, size := utf8.DecodeRuneInString(src[i:])
204 if r == utf8.RuneError {
205 break
206 }
207 if !isIdentChar(r) {
208 break
209 }
210 i += size
211 }
212 v := src[:i]
213 switch v {
214 case "true", "false":
215 return Token{Kind: TokBool, Value: v, Pos: basePos}, i
216 case "null":
217 return Token{Kind: TokNull, Value: v, Pos: basePos}, i
218 }
219 return Token{Kind: TokIdent, Value: v, Pos: basePos}, i
220 }
221
222 func isIdentStart(r rune) bool {
223 return unicode.IsLetter(r) || r == '_'
224 }
225
226 func isIdentChar(r rune) bool {
227 return unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_'
228 }
229