shithub Public

Watch 1 Fork 0 Star 0

Go · 5784 bytes Raw Blame History

  
        1
        // SPDX-License-Identifier: AGPL-3.0-or-later
      
        2
        
        3
        // Package expr is the strict-allowlist expression evaluator for
      
        4
        // `${{ … }}` blocks in workflow files.
      
        5
        //
      
        6
        // The evaluator is intentionally tiny:
      
        7
        //   - Allowed namespaces: secrets, env, vars, shithub.event, shithub.run_id,
      
        8
        //     shithub.sha, shithub.ref, shithub.actor.
      
        9
        //   - Allowed functions: contains, startsWith, endsWith,
      
        10
        //     success(), failure(), always(), cancelled().
      
        11
        //   - Operators: && || ! == != binary string concat (none — we don't
      
        12
        //     support arithmetic or anything else in v1).
      
        13
        //
      
        14
        // Anything outside that set is an evaluation error. This is the load-
      
        15
        // bearing security surface — the more we accept, the more attack
      
        16
        // surface we open. Future expansion goes through a reviewer-required
      
        17
        // note in the commit message (per the campaign §"Risks": "block any
      
        18
        // S41 PR that adds an evaluator function without a security note").
      
        19
        //
      
        20
        // Every produced Value carries a Tainted bool. References that
      
        21
        // resolve into the shithub.event.* namespace are tagged Tainted=true;
      
        22
        // taint propagates through string concatenation, comparisons (the
      
        23
        // boolean output isn't tainted, but the comparison operands' values
      
        24
        // are checked), and function returns.
      
        25
        package expr
      
        26
        
        27
        import (
      
        28
        	"fmt"
      
        29
        	"strings"
      
        30
        	"unicode"
      
        31
        )
      
        32
        
        33
        // TokenKind classifies a lexed token.
      
        34
        type TokenKind int
      
        35
        
        36
        const (
      
        37
        	TokInvalid TokenKind = iota
      
        38
        	TokIdent             // foo, secrets, shithub
      
        39
        	TokDot               // .
      
        40
        	TokLParen            // (
      
        41
        	TokRParen            // )
      
        42
        	TokComma             // ,
      
        43
        	TokString            // 'literal' (single-quoted only — GHA convention)
      
        44
        	TokBool              // true | false
      
        45
        	TokNull              // null
      
        46
        	TokAnd               // &&
      
        47
        	TokOr                // ||
      
        48
        	TokNot               // !
      
        49
        	TokEq                // ==
      
        50
        	TokNe                // !=
      
        51
        	TokEOF
      
        52
        )
      
        53
        
        54
        // Token is a single lexed unit. Pos is the byte offset in the original
      
        55
        // source (useful for diagnostic spans).
      
        56
        type Token struct {
      
        57
        	Kind  TokenKind
      
        58
        	Value string
      
        59
        	Pos   int
      
        60
        }
      
        61
        
        62
        func (k TokenKind) String() string {
      
        63
        	switch k {
      
        64
        	case TokIdent:
      
        65
        		return "identifier"
      
        66
        	case TokDot:
      
        67
        		return "."
      
        68
        	case TokLParen:
      
        69
        		return "("
      
        70
        	case TokRParen:
      
        71
        		return ")"
      
        72
        	case TokComma:
      
        73
        		return ","
      
        74
        	case TokString:
      
        75
        		return "string literal"
      
        76
        	case TokBool:
      
        77
        		return "boolean"
      
        78
        	case TokNull:
      
        79
        		return "null"
      
        80
        	case TokAnd:
      
        81
        		return "&&"
      
        82
        	case TokOr:
      
        83
        		return "||"
      
        84
        	case TokNot:
      
        85
        		return "!"
      
        86
        	case TokEq:
      
        87
        		return "=="
      
        88
        	case TokNe:
      
        89
        		return "!="
      
        90
        	case TokEOF:
      
        91
        		return "end of input"
      
        92
        	}
      
        93
        	return "invalid"
      
        94
        }
      
        95
        
        96
        // Lex returns the token stream for src or an error on the first lexical
      
        97
        // problem. Whitespace is skipped silently. The lexer doesn't strip the
      
        98
        // surrounding `${{ … }}` — the caller does that before calling Lex.
      
        99
        func Lex(src string) ([]Token, error) {
      
        100
        	var out []Token
      
        101
        	i := 0
      
        102
        	for i < len(src) {
      
        103
        		c := src[i]
      
        104
        		switch {
      
        105
        		case c == ' ' || c == '\t' || c == '\n' || c == '\r':
      
        106
        			i++
      
        107
        		case c == '.':
      
        108
        			out = append(out, Token{Kind: TokDot, Value: ".", Pos: i})
      
        109
        			i++
      
        110
        		case c == '(':
      
        111
        			out = append(out, Token{Kind: TokLParen, Value: "(", Pos: i})
      
        112
        			i++
      
        113
        		case c == ')':
      
        114
        			out = append(out, Token{Kind: TokRParen, Value: ")", Pos: i})
      
        115
        			i++
      
        116
        		case c == ',':
      
        117
        			out = append(out, Token{Kind: TokComma, Value: ",", Pos: i})
      
        118
        			i++
      
        119
        		case c == '\'':
      
        120
        			tok, n, err := lexString(src[i:], i)
      
        121
        			if err != nil {
      
        122
        				return nil, err
      
        123
        			}
      
        124
        			out = append(out, tok)
      
        125
        			i += n
      
        126
        		case c == '&':
      
        127
        			if i+1 < len(src) && src[i+1] == '&' {
      
        128
        				out = append(out, Token{Kind: TokAnd, Value: "&&", Pos: i})
      
        129
        				i += 2
      
        130
        			} else {
      
        131
        				return nil, fmt.Errorf("expr: stray '&' at offset %d (expected '&&')", i)
      
        132
        			}
      
        133
        		case c == '|':
      
        134
        			if i+1 < len(src) && src[i+1] == '|' {
      
        135
        				out = append(out, Token{Kind: TokOr, Value: "||", Pos: i})
      
        136
        				i += 2
      
        137
        			} else {
      
        138
        				return nil, fmt.Errorf("expr: stray '|' at offset %d (expected '||')", i)
      
        139
        			}
      
        140
        		case c == '!':
      
        141
        			if i+1 < len(src) && src[i+1] == '=' {
      
        142
        				out = append(out, Token{Kind: TokNe, Value: "!=", Pos: i})
      
        143
        				i += 2
      
        144
        			} else {
      
        145
        				out = append(out, Token{Kind: TokNot, Value: "!", Pos: i})
      
        146
        				i++
      
        147
        			}
      
        148
        		case c == '=':
      
        149
        			if i+1 < len(src) && src[i+1] == '=' {
      
        150
        				out = append(out, Token{Kind: TokEq, Value: "==", Pos: i})
      
        151
        				i += 2
      
        152
        			} else {
      
        153
        				return nil, fmt.Errorf("expr: stray '=' at offset %d (expected '==')", i)
      
        154
        			}
      
        155
        		case isIdentStart(c):
      
        156
        			tok, n := lexIdent(src[i:], i)
      
        157
        			out = append(out, tok)
      
        158
        			i += n
      
        159
        		default:
      
        160
        			return nil, fmt.Errorf("expr: unexpected character %q at offset %d", c, i)
      
        161
        		}
      
        162
        	}
      
        163
        	out = append(out, Token{Kind: TokEOF, Pos: i})
      
        164
        	return out, nil
      
        165
        }
      
        166
        
        167
        func lexString(src string, basePos int) (Token, int, error) {
      
        168
        	if len(src) < 2 {
      
        169
        		return Token{}, 0, fmt.Errorf("expr: unterminated string at offset %d", basePos)
      
        170
        	}
      
        171
        	// Walk until matching '. GHA expressions do NOT support backslash
      
        172
        	// escapes; the only escape is doubling the quote: '' produces '.
      
        173
        	var b strings.Builder
      
        174
        	i := 1 // skip opening '
      
        175
        	for i < len(src) {
      
        176
        		c := src[i]
      
        177
        		if c == '\'' {
      
        178
        			if i+1 < len(src) && src[i+1] == '\'' {
      
        179
        				b.WriteByte('\'')
      
        180
        				i += 2
      
        181
        				continue
      
        182
        			}
      
        183
        			return Token{Kind: TokString, Value: b.String(), Pos: basePos}, i + 1, nil
      
        184
        		}
      
        185
        		b.WriteByte(c)
      
        186
        		i++
      
        187
        	}
      
        188
        	return Token{}, 0, fmt.Errorf("expr: unterminated string at offset %d", basePos)
      
        189
        }
      
        190
        
        191
        func lexIdent(src string, basePos int) (Token, int) {
      
        192
        	i := 0
      
        193
        	for i < len(src) && isIdentChar(src[i]) {
      
        194
        		i++
      
        195
        	}
      
        196
        	v := src[:i]
      
        197
        	switch v {
      
        198
        	case "true", "false":
      
        199
        		return Token{Kind: TokBool, Value: v, Pos: basePos}, i
      
        200
        	case "null":
      
        201
        		return Token{Kind: TokNull, Value: v, Pos: basePos}, i
      
        202
        	}
      
        203
        	return Token{Kind: TokIdent, Value: v, Pos: basePos}, i
      
        204
        }
      
        205
        
        206
        func isIdentStart(c byte) bool {
      
        207
        	return unicode.IsLetter(rune(c)) || c == '_'
      
        208
        }
      
        209
        
        210
        func isIdentChar(c byte) bool {
      
        211
        	return unicode.IsLetter(rune(c)) || unicode.IsDigit(rune(c)) || c == '_'
      
        212
        }
      
        213

1	// SPDX-License-Identifier: AGPL-3.0-or-later
2
3	// Package expr is the strict-allowlist expression evaluator for
4	// `${{ … }}` blocks in workflow files.
5	//
6	// The evaluator is intentionally tiny:
7	// - Allowed namespaces: secrets, env, vars, shithub.event, shithub.run_id,
8	// shithub.sha, shithub.ref, shithub.actor.
9	// - Allowed functions: contains, startsWith, endsWith,
10	// success(), failure(), always(), cancelled().
11	// - Operators: && \|\| ! == != binary string concat (none — we don't
12	// support arithmetic or anything else in v1).
13	//
14	// Anything outside that set is an evaluation error. This is the load-
15	// bearing security surface — the more we accept, the more attack
16	// surface we open. Future expansion goes through a reviewer-required
17	// note in the commit message (per the campaign §"Risks": "block any
18	// S41 PR that adds an evaluator function without a security note").
19	//
20	// Every produced Value carries a Tainted bool. References that
21	// resolve into the shithub.event.* namespace are tagged Tainted=true;
22	// taint propagates through string concatenation, comparisons (the
23	// boolean output isn't tainted, but the comparison operands' values
24	// are checked), and function returns.
25	package expr
26
27	import (
28	"fmt"
29	"strings"
30	"unicode"
31	)
32
33	// TokenKind classifies a lexed token.
34	type TokenKind int
35
36	const (
37	TokInvalid TokenKind = iota
38	TokIdent // foo, secrets, shithub
39	TokDot // .
40	TokLParen // (
41	TokRParen // )
42	TokComma // ,
43	TokString // 'literal' (single-quoted only — GHA convention)
44	TokBool // true \| false
45	TokNull // null
46	TokAnd // &&
47	TokOr // \|\|
48	TokNot // !
49	TokEq // ==
50	TokNe // !=
51	TokEOF
52	)
53
54	// Token is a single lexed unit. Pos is the byte offset in the original
55	// source (useful for diagnostic spans).
56	type Token struct {
57	Kind TokenKind
58	Value string
59	Pos int
60	}
61
62	func (k TokenKind) String() string {
63	switch k {
64	case TokIdent:
65	return "identifier"
66	case TokDot:
67	return "."
68	case TokLParen:
69	return "("
70	case TokRParen:
71	return ")"
72	case TokComma:
73	return ","
74	case TokString:
75	return "string literal"
76	case TokBool:
77	return "boolean"
78	case TokNull:
79	return "null"
80	case TokAnd:
81	return "&&"
82	case TokOr:
83	return "\|\|"
84	case TokNot:
85	return "!"
86	case TokEq:
87	return "=="
88	case TokNe:
89	return "!="
90	case TokEOF:
91	return "end of input"
92	}
93	return "invalid"
94	}
95
96	// Lex returns the token stream for src or an error on the first lexical
97	// problem. Whitespace is skipped silently. The lexer doesn't strip the
98	// surrounding `${{ … }}` — the caller does that before calling Lex.
99	func Lex(src string) ([]Token, error) {
100	var out []Token
101	i := 0
102	for i < len(src) {
103	c := src[i]
104	switch {
105	case c == ' ' \|\| c == '\t' \|\| c == '\n' \|\| c == '\r':
106	i++
107	case c == '.':
108	out = append(out, Token{Kind: TokDot, Value: ".", Pos: i})
109	i++
110	case c == '(':
111	out = append(out, Token{Kind: TokLParen, Value: "(", Pos: i})
112	i++
113	case c == ')':
114	out = append(out, Token{Kind: TokRParen, Value: ")", Pos: i})
115	i++
116	case c == ',':
117	out = append(out, Token{Kind: TokComma, Value: ",", Pos: i})
118	i++
119	case c == '\'':
120	tok, n, err := lexString(src[i:], i)
121	if err != nil {
122	return nil, err
123	}
124	out = append(out, tok)
125	i += n
126	case c == '&':
127	if i+1 < len(src) && src[i+1] == '&' {
128	out = append(out, Token{Kind: TokAnd, Value: "&&", Pos: i})
129	i += 2
130	} else {
131	return nil, fmt.Errorf("expr: stray '&' at offset %d (expected '&&')", i)
132	}
133	case c == '\|':
134	if i+1 < len(src) && src[i+1] == '\|' {
135	out = append(out, Token{Kind: TokOr, Value: "\|\|", Pos: i})
136	i += 2
137	} else {
138	return nil, fmt.Errorf("expr: stray '\|' at offset %d (expected '\|\|')", i)
139	}
140	case c == '!':
141	if i+1 < len(src) && src[i+1] == '=' {
142	out = append(out, Token{Kind: TokNe, Value: "!=", Pos: i})
143	i += 2
144	} else {
145	out = append(out, Token{Kind: TokNot, Value: "!", Pos: i})
146	i++
147	}
148	case c == '=':
149	if i+1 < len(src) && src[i+1] == '=' {
150	out = append(out, Token{Kind: TokEq, Value: "==", Pos: i})
151	i += 2
152	} else {
153	return nil, fmt.Errorf("expr: stray '=' at offset %d (expected '==')", i)
154	}
155	case isIdentStart(c):
156	tok, n := lexIdent(src[i:], i)
157	out = append(out, tok)
158	i += n
159	default:
160	return nil, fmt.Errorf("expr: unexpected character %q at offset %d", c, i)
161	}
162	}
163	out = append(out, Token{Kind: TokEOF, Pos: i})
164	return out, nil
165	}
166
167	func lexString(src string, basePos int) (Token, int, error) {
168	if len(src) < 2 {
169	return Token{}, 0, fmt.Errorf("expr: unterminated string at offset %d", basePos)
170	}
171	// Walk until matching '. GHA expressions do NOT support backslash
172	// escapes; the only escape is doubling the quote: '' produces '.
173	var b strings.Builder
174	i := 1 // skip opening '
175	for i < len(src) {
176	c := src[i]
177	if c == '\'' {
178	if i+1 < len(src) && src[i+1] == '\'' {
179	b.WriteByte('\'')
180	i += 2
181	continue
182	}
183	return Token{Kind: TokString, Value: b.String(), Pos: basePos}, i + 1, nil
184	}
185	b.WriteByte(c)
186	i++
187	}
188	return Token{}, 0, fmt.Errorf("expr: unterminated string at offset %d", basePos)
189	}
190
191	func lexIdent(src string, basePos int) (Token, int) {
192	i := 0
193	for i < len(src) && isIdentChar(src[i]) {
194	i++
195	}
196	v := src[:i]
197	switch v {
198	case "true", "false":
199	return Token{Kind: TokBool, Value: v, Pos: basePos}, i
200	case "null":
201	return Token{Kind: TokNull, Value: v, Pos: basePos}, i
202	}
203	return Token{Kind: TokIdent, Value: v, Pos: basePos}, i
204	}
205
206	func isIdentStart(c byte) bool {
207	return unicode.IsLetter(rune(c)) \|\| c == '_'
208	}
209
210	func isIdentChar(c byte) bool {
211	return unicode.IsLetter(rune(c)) \|\| unicode.IsDigit(rune(c)) \|\| c == '_'
212	}
213