Go · 11540 bytes Raw Blame History
1 // SPDX-License-Identifier: AGPL-3.0-or-later
2
3 package markdown
4
5 import (
6 "context"
7 "strings"
8 "testing"
9 )
10
11 // TestRender_HostileInputs is the XSS-vector cheatsheet. Every
12 // fixture is a markdown body that *attempts* to inject executable
13 // JS through a different vector. The pass condition: the rendered
14 // HTML contains no `<script` tag, no `javascript:` URL, no event
15 // handler attribute (`on*`), and no `data:` URI.
16 //
17 // Add new vectors here when a CVE / advisory lands in goldmark or
18 // bluemonday — they're cheap to keep.
19 func TestRender_HostileInputs(t *testing.T) {
20 t.Parallel()
21 vectors := []string{
22 // Direct script tag.
23 `<script>alert(1)</script>`,
24 `<SCRIPT>alert(1)</SCRIPT>`,
25 `<script src="//evil.com/x.js"></script>`,
26 // Inline event handlers.
27 `<img src="x" onerror="alert(1)">`,
28 `<img src=x onerror=alert(1)>`,
29 `<a onmouseover="alert(1)">x</a>`,
30 `<body onload="alert(1)">`,
31 // Style with expressions.
32 `<style>body{background:url("javascript:alert(1)")}</style>`,
33 `<div style="background:url(javascript:alert(1))">x</div>`,
34 // javascript: links.
35 `[click](javascript:alert(1))`,
36 `<a href="javascript:alert(1)">x</a>`,
37 `<a href="JaVaScRiPt:alert(1)">x</a>`,
38 `[click](JAVASCRIPT:alert(1))`,
39 // data: URIs (we disallow even data:image).
40 `<img src="data:image/svg+xml;base64,PHN2Zz4=">`,
41 `[x](data:text/html,<script>alert(1)</script>)`,
42 // vbscript:.
43 `<a href="vbscript:msgbox(1)">x</a>`,
44 // SVG-embedded scripts.
45 `<svg><script>alert(1)</script></svg>`,
46 `<svg onload="alert(1)"></svg>`,
47 // iframes.
48 `<iframe src="//evil.com"></iframe>`,
49 `<iframe srcdoc="<script>alert(1)</script>"></iframe>`,
50 // HTML in markdown link text doesn't escape sanitizer.
51 `[<script>alert(1)</script>](https://example.com)`,
52 // Mutation XSS via mismatched quotes.
53 `<a href="x"onmouseover="alert(1)">x</a>`,
54 // Encoded payloads.
55 `<a href="&#x6A;avascript:alert(1)">x</a>`,
56 `<a href="&#106;avascript:alert(1)">x</a>`,
57 // Backticked code-like content shouldn't escape.
58 "`<script>alert(1)</script>`",
59 // Embedded in autolinks.
60 `<javascript:alert(1)>`,
61 // Object/embed.
62 `<object data="x.swf"></object>`,
63 `<embed src="x.swf">`,
64 // Form/button with formaction.
65 `<form><button formaction="javascript:alert(1)">x</button></form>`,
66 // Meta refresh.
67 `<meta http-equiv="refresh" content="0; url=javascript:alert(1)">`,
68 // Base href hijack.
69 `<base href="javascript:">`,
70 // MathML / annotation.
71 `<math><annotation-xml encoding="text/html"><script>alert(1)</script></annotation-xml></math>`,
72 // CSS expression (legacy IE).
73 `<div style="width: expression(alert(1))">x</div>`,
74 // Nested fenced code with a script.
75 "```\n<script>alert(1)</script>\n```",
76 // Markdown link href with newlines.
77 "[x](\njavascript:alert(1))",
78 // Image with javascript:.
79 `![x](javascript:alert(1))`,
80 // HTML entities in URI.
81 `[x](java&#0000115;cript:alert(1))`,
82 // Hex / decimal entities in href attribute.
83 `<a href="javasc&#x72;ipt:alert(1)">x</a>`,
84 // Tab/newline obfuscation.
85 "<a href=\"java\tscript:alert(1)\">x</a>",
86 "<a href=\"java\nscript:alert(1)\">x</a>",
87 // Polyglot HTML+SVG.
88 `<svg/onload=alert(1)>`,
89 // Anchor with target=_blank but no rel (we want rel auto-set).
90 `<a href="https://evil.com" target="_blank">x</a>`,
91 }
92 for i, src := range vectors {
93 out, _, _, err := Render(context.Background(), []byte(src), Options{})
94 if err != nil {
95 t.Fatalf("vector %d render error: %v", i, err)
96 }
97 // Lower-case for case-insensitive substring search. We
98 // distinguish "executable surface" from "harmless text".
99 // Plain-text "javascript:" in prose is safe; "javascript:"
100 // inside href/src is an XSS — guard the latter shape only.
101 s := strings.ToLower(string(out))
102 for _, bad := range []string{
103 "<script", "</script>",
104 `href="javascript:`, `href='javascript:`,
105 `src="javascript:`, `src='javascript:`,
106 `href="vbscript:`, `src="vbscript:`,
107 `href="data:`, `src="data:text`, `src="data:image`,
108 " onerror=", " onload=", " onclick=", " onmouseover=",
109 "<iframe", "<object", "<embed",
110 "<style", "<base ", "<meta ",
111 "<annotation-xml", "expression(",
112 } {
113 if strings.Contains(s, bad) {
114 t.Errorf("vector %d (%q): rendered HTML contains %q\nout=%q", i, src, bad, out)
115 }
116 }
117 }
118 }
119
120 // TestRender_AllowsSafeHTML ensures the strict policy doesn't strip
121 // `<details>`, `<summary>`, `<kbd>`, `<sup>`, `<sub>`, task-list
122 // checkboxes, language-* class on code blocks, or auto-heading IDs.
123 func TestRender_AllowsSafeHTML(t *testing.T) {
124 t.Parallel()
125 cases := []struct {
126 name string
127 src string
128 mustContain []string
129 }{
130 {
131 "details + summary",
132 "<details><summary>click</summary>secret</details>",
133 []string{"<details>", "<summary>", "click", "secret"},
134 },
135 {
136 "kbd",
137 "press <kbd>Ctrl</kbd>+<kbd>C</kbd>",
138 []string{"<kbd>Ctrl</kbd>", "<kbd>C</kbd>"},
139 },
140 {
141 "sup/sub",
142 "x<sup>2</sup> + y<sub>i</sub>",
143 []string{"<sup>2</sup>", "<sub>i</sub>"},
144 },
145 {
146 "task list",
147 "- [x] done\n- [ ] not yet\n",
148 []string{"<input", "checkbox", "disabled"},
149 },
150 {
151 "fenced code with language",
152 "```go\nfmt.Println(\"hi\")\n```",
153 []string{`class="language-go"`},
154 },
155 {
156 "heading anchor id",
157 "# Hello world",
158 []string{`id="hello-world"`},
159 },
160 {
161 "GFM table",
162 "| a | b |\n|---|---|\n| 1 | 2 |\n",
163 []string{"<table>", "<th>a</th>", "<td>1</td>"},
164 },
165 {
166 "strikethrough",
167 "~~obsolete~~",
168 []string{"<del>obsolete</del>"},
169 },
170 {
171 "autolink",
172 "https://example.com",
173 []string{`href="https://example.com"`},
174 },
175 }
176 for _, c := range cases {
177 c := c
178 t.Run(c.name, func(t *testing.T) {
179 t.Parallel()
180 out, _, _, err := Render(context.Background(), []byte(c.src), Options{})
181 if err != nil {
182 t.Fatalf("render: %v", err)
183 }
184 s := string(out)
185 for _, want := range c.mustContain {
186 if !strings.Contains(s, want) {
187 t.Errorf("expected %q in output, got %q", want, s)
188 }
189 }
190 })
191 }
192 }
193
194 // TestRender_MentionResolution checks that @user resolves when the
195 // resolver returns ok and stays plain text otherwise.
196 func TestRender_MentionResolution(t *testing.T) {
197 t.Parallel()
198 resolver := func(_ context.Context, name string) (string, bool) {
199 if name == "alice" {
200 return "/alice", true
201 }
202 return "", false
203 }
204 out, _, mentions, err := Render(context.Background(), []byte("hi @alice and @bob"), Options{
205 Resolvers: Resolvers{User: resolver},
206 })
207 if err != nil {
208 t.Fatalf("render: %v", err)
209 }
210 s := string(out)
211 if !strings.Contains(s, `href="/alice"`) {
212 t.Errorf("expected @alice link, got %q", s)
213 }
214 if strings.Contains(s, `href="/bob"`) {
215 t.Errorf("@bob should not link, got %q", s)
216 }
217 if len(mentions) != 1 || mentions[0].Username != "alice" {
218 t.Errorf("expected 1 mention (alice), got %v", mentions)
219 }
220 }
221
222 // TestRender_TeamMentionResolution: @org/team renders via the Team
223 // resolver and falls back to plain text when the resolver declines
224 // (e.g. secret team invisible to viewer). S31.
225 func TestRender_TeamMentionResolution(t *testing.T) {
226 t.Parallel()
227 teamResolver := func(_ context.Context, org, team string, _ int64) (string, bool) {
228 if org == "acme" && team == "eng" {
229 return "/acme/teams/eng", true
230 }
231 return "", false
232 }
233 out, _, _, err := Render(context.Background(), []byte("ping @acme/eng and @acme/secret here"), Options{
234 Resolvers: Resolvers{Team: teamResolver},
235 })
236 if err != nil {
237 t.Fatalf("render: %v", err)
238 }
239 s := string(out)
240 if !strings.Contains(s, `href="/acme/teams/eng"`) {
241 t.Errorf("expected @acme/eng link, got %q", s)
242 }
243 if strings.Contains(s, `href="/acme/teams/secret"`) {
244 t.Errorf("@acme/secret should not link, got %q", s)
245 }
246 }
247
248 // TestRender_IssueRefResolution checks both same-repo and cross-repo
249 // refs, and that an unresolvable ref renders as plain text (no link).
250 func TestRender_IssueRefResolution(t *testing.T) {
251 t.Parallel()
252 resolver := func(_ context.Context, owner, name string, num int64, _ int64) (string, bool) {
253 // Same-repo refs leave owner+name empty.
254 if owner == "" && name == "" && num == 7 {
255 return "/o/r/issues/7", true
256 }
257 if owner == "alice" && name == "proj" && num == 3 {
258 return "/alice/proj/issues/3", true
259 }
260 return "", false
261 }
262 out, refs, _, err := Render(context.Background(), []byte("see #7 and alice/proj#3, but not bob/x#9"), Options{
263 Resolvers: Resolvers{Issue: resolver},
264 })
265 if err != nil {
266 t.Fatalf("render: %v", err)
267 }
268 s := string(out)
269 if !strings.Contains(s, `href="/o/r/issues/7"`) {
270 t.Errorf("expected #7 link, got %q", s)
271 }
272 if !strings.Contains(s, `href="/alice/proj/issues/3"`) {
273 t.Errorf("expected alice/proj#3 link, got %q", s)
274 }
275 if strings.Contains(s, `href="/bob/x/issues/9"`) {
276 t.Errorf("bob/x#9 should not link, got %q", s)
277 }
278 if len(refs) != 2 {
279 t.Errorf("expected 2 refs, got %v", refs)
280 }
281 }
282
283 // TestRender_RefsInsideCodeAreInert confirms that #N inside inline
284 // code or fenced code stays as text.
285 func TestRender_RefsInsideCodeAreInert(t *testing.T) {
286 t.Parallel()
287 resolver := func(_ context.Context, owner, name string, num int64, _ int64) (string, bool) {
288 return "/should/not/appear", true
289 }
290 src := "Inline `#7` and:\n\n```\nblock #7 here\n```"
291 out, refs, _, err := Render(context.Background(), []byte(src), Options{
292 Resolvers: Resolvers{Issue: resolver},
293 })
294 if err != nil {
295 t.Fatalf("render: %v", err)
296 }
297 if strings.Contains(string(out), "/should/not/appear") {
298 t.Errorf("ref leaked into code block: %q", out)
299 }
300 if len(refs) != 0 {
301 t.Errorf("expected 0 refs inside code, got %v", refs)
302 }
303 }
304
305 // TestRender_EmojiShortcodes checks the curated set works.
306 func TestRender_EmojiShortcodes(t *testing.T) {
307 t.Parallel()
308 out, _, _, err := Render(context.Background(), []byte("ship it :rocket: :+1: :notrealemoji:"), Options{})
309 if err != nil {
310 t.Fatalf("render: %v", err)
311 }
312 s := string(out)
313 if !strings.Contains(s, "🚀") {
314 t.Errorf("expected rocket emoji in output, got %q", s)
315 }
316 if !strings.Contains(s, "👍") {
317 t.Errorf("expected +1 emoji in output, got %q", s)
318 }
319 if !strings.Contains(s, ":notrealemoji:") {
320 t.Errorf("unknown shortcode should pass through, got %q", s)
321 }
322 }
323
324 // TestRender_InputTooLarge enforces the renderer's defensive cap.
325 func TestRender_InputTooLarge(t *testing.T) {
326 t.Parallel()
327 big := make([]byte, MaxRenderInputBytes+1)
328 for i := range big {
329 big[i] = 'x'
330 }
331 if _, _, _, err := Render(context.Background(), big, Options{}); err == nil {
332 t.Errorf("expected ErrInputTooLarge")
333 }
334 }
335
336 // TestRender_SoftBreakAsBR controls the comment-vs-readme newline
337 // handling.
338 func TestRender_SoftBreakAsBR(t *testing.T) {
339 t.Parallel()
340 src := "line one\nline two\n"
341 br, _, _, _ := Render(context.Background(), []byte(src), Options{SoftBreakAsBR: true})
342 noBR, _, _, _ := Render(context.Background(), []byte(src), Options{SoftBreakAsBR: false})
343 if !strings.Contains(string(br), "<br") {
344 t.Errorf("SoftBreakAsBR=true: expected <br>, got %q", br)
345 }
346 if strings.Contains(string(noBR), "<br") {
347 t.Errorf("SoftBreakAsBR=false: should not contain <br>, got %q", noBR)
348 }
349 }
350
351 // TestRender_BackCompatRenderHTML keeps the old shim working so the
352 // interim S17/S21/S22 callers don't need rewrite during S25.
353 func TestRender_BackCompatRenderHTML(t *testing.T) {
354 t.Parallel()
355 html, err := RenderHTML([]byte("**bold** text"))
356 if err != nil {
357 t.Fatalf("RenderHTML: %v", err)
358 }
359 if !strings.Contains(html, "<strong>bold</strong>") {
360 t.Errorf("expected bold, got %q", html)
361 }
362 }
363