Go · 5784 bytes Raw Blame History
1 // SPDX-License-Identifier: AGPL-3.0-or-later
2
3 // Package expr is the strict-allowlist expression evaluator for
4 // `${{ … }}` blocks in workflow files.
5 //
6 // The evaluator is intentionally tiny:
7 // - Allowed namespaces: secrets, env, vars, shithub.event, shithub.run_id,
8 // shithub.sha, shithub.ref, shithub.actor.
9 // - Allowed functions: contains, startsWith, endsWith,
10 // success(), failure(), always(), cancelled().
11 // - Operators: && || ! == != binary string concat (none — we don't
12 // support arithmetic or anything else in v1).
13 //
14 // Anything outside that set is an evaluation error. This is the load-
15 // bearing security surface — the more we accept, the more attack
16 // surface we open. Future expansion goes through a reviewer-required
17 // note in the commit message (per the campaign §"Risks": "block any
18 // S41 PR that adds an evaluator function without a security note").
19 //
20 // Every produced Value carries a Tainted bool. References that
21 // resolve into the shithub.event.* namespace are tagged Tainted=true;
22 // taint propagates through string concatenation, comparisons (the
23 // boolean output isn't tainted, but the comparison operands' values
24 // are checked), and function returns.
25 package expr
26
27 import (
28 "fmt"
29 "strings"
30 "unicode"
31 )
32
33 // TokenKind classifies a lexed token.
34 type TokenKind int
35
36 const (
37 TokInvalid TokenKind = iota
38 TokIdent // foo, secrets, shithub
39 TokDot // .
40 TokLParen // (
41 TokRParen // )
42 TokComma // ,
43 TokString // 'literal' (single-quoted only — GHA convention)
44 TokBool // true | false
45 TokNull // null
46 TokAnd // &&
47 TokOr // ||
48 TokNot // !
49 TokEq // ==
50 TokNe // !=
51 TokEOF
52 )
53
54 // Token is a single lexed unit. Pos is the byte offset in the original
55 // source (useful for diagnostic spans).
56 type Token struct {
57 Kind TokenKind
58 Value string
59 Pos int
60 }
61
62 func (k TokenKind) String() string {
63 switch k {
64 case TokIdent:
65 return "identifier"
66 case TokDot:
67 return "."
68 case TokLParen:
69 return "("
70 case TokRParen:
71 return ")"
72 case TokComma:
73 return ","
74 case TokString:
75 return "string literal"
76 case TokBool:
77 return "boolean"
78 case TokNull:
79 return "null"
80 case TokAnd:
81 return "&&"
82 case TokOr:
83 return "||"
84 case TokNot:
85 return "!"
86 case TokEq:
87 return "=="
88 case TokNe:
89 return "!="
90 case TokEOF:
91 return "end of input"
92 }
93 return "invalid"
94 }
95
96 // Lex returns the token stream for src or an error on the first lexical
97 // problem. Whitespace is skipped silently. The lexer doesn't strip the
98 // surrounding `${{ … }}` — the caller does that before calling Lex.
99 func Lex(src string) ([]Token, error) {
100 var out []Token
101 i := 0
102 for i < len(src) {
103 c := src[i]
104 switch {
105 case c == ' ' || c == '\t' || c == '\n' || c == '\r':
106 i++
107 case c == '.':
108 out = append(out, Token{Kind: TokDot, Value: ".", Pos: i})
109 i++
110 case c == '(':
111 out = append(out, Token{Kind: TokLParen, Value: "(", Pos: i})
112 i++
113 case c == ')':
114 out = append(out, Token{Kind: TokRParen, Value: ")", Pos: i})
115 i++
116 case c == ',':
117 out = append(out, Token{Kind: TokComma, Value: ",", Pos: i})
118 i++
119 case c == '\'':
120 tok, n, err := lexString(src[i:], i)
121 if err != nil {
122 return nil, err
123 }
124 out = append(out, tok)
125 i += n
126 case c == '&':
127 if i+1 < len(src) && src[i+1] == '&' {
128 out = append(out, Token{Kind: TokAnd, Value: "&&", Pos: i})
129 i += 2
130 } else {
131 return nil, fmt.Errorf("expr: stray '&' at offset %d (expected '&&')", i)
132 }
133 case c == '|':
134 if i+1 < len(src) && src[i+1] == '|' {
135 out = append(out, Token{Kind: TokOr, Value: "||", Pos: i})
136 i += 2
137 } else {
138 return nil, fmt.Errorf("expr: stray '|' at offset %d (expected '||')", i)
139 }
140 case c == '!':
141 if i+1 < len(src) && src[i+1] == '=' {
142 out = append(out, Token{Kind: TokNe, Value: "!=", Pos: i})
143 i += 2
144 } else {
145 out = append(out, Token{Kind: TokNot, Value: "!", Pos: i})
146 i++
147 }
148 case c == '=':
149 if i+1 < len(src) && src[i+1] == '=' {
150 out = append(out, Token{Kind: TokEq, Value: "==", Pos: i})
151 i += 2
152 } else {
153 return nil, fmt.Errorf("expr: stray '=' at offset %d (expected '==')", i)
154 }
155 case isIdentStart(c):
156 tok, n := lexIdent(src[i:], i)
157 out = append(out, tok)
158 i += n
159 default:
160 return nil, fmt.Errorf("expr: unexpected character %q at offset %d", c, i)
161 }
162 }
163 out = append(out, Token{Kind: TokEOF, Pos: i})
164 return out, nil
165 }
166
167 func lexString(src string, basePos int) (Token, int, error) {
168 if len(src) < 2 {
169 return Token{}, 0, fmt.Errorf("expr: unterminated string at offset %d", basePos)
170 }
171 // Walk until matching '. GHA expressions do NOT support backslash
172 // escapes; the only escape is doubling the quote: '' produces '.
173 var b strings.Builder
174 i := 1 // skip opening '
175 for i < len(src) {
176 c := src[i]
177 if c == '\'' {
178 if i+1 < len(src) && src[i+1] == '\'' {
179 b.WriteByte('\'')
180 i += 2
181 continue
182 }
183 return Token{Kind: TokString, Value: b.String(), Pos: basePos}, i + 1, nil
184 }
185 b.WriteByte(c)
186 i++
187 }
188 return Token{}, 0, fmt.Errorf("expr: unterminated string at offset %d", basePos)
189 }
190
191 func lexIdent(src string, basePos int) (Token, int) {
192 i := 0
193 for i < len(src) && isIdentChar(src[i]) {
194 i++
195 }
196 v := src[:i]
197 switch v {
198 case "true", "false":
199 return Token{Kind: TokBool, Value: v, Pos: basePos}, i
200 case "null":
201 return Token{Kind: TokNull, Value: v, Pos: basePos}, i
202 }
203 return Token{Kind: TokIdent, Value: v, Pos: basePos}, i
204 }
205
206 func isIdentStart(c byte) bool {
207 return unicode.IsLetter(rune(c)) || c == '_'
208 }
209
210 func isIdentChar(c byte) bool {
211 return unicode.IsLetter(rune(c)) || unicode.IsDigit(rune(c)) || c == '_'
212 }
213