Go · 11782 bytes Raw Blame History
1 // SPDX-License-Identifier: AGPL-3.0-or-later
2
3 package markdown
4
5 import (
6 "context"
7 "strings"
8 "testing"
9 )
10
11 // TestRender_HostileInputs is the XSS-vector cheatsheet. Every
12 // fixture is a markdown body that *attempts* to inject executable
13 // JS through a different vector. The pass condition: the rendered
14 // HTML contains no `<script` tag, no `javascript:` URL, no event
15 // handler attribute (`on*`), and no `data:` URI.
16 //
17 // Add new vectors here when a CVE / advisory lands in goldmark or
18 // bluemonday — they're cheap to keep.
19 func TestRender_HostileInputs(t *testing.T) {
20 t.Parallel()
21 vectors := []string{
22 // Direct script tag.
23 `<script>alert(1)</script>`,
24 `<SCRIPT>alert(1)</SCRIPT>`,
25 `<script src="//evil.com/x.js"></script>`,
26 // Inline event handlers.
27 `<img src="x" onerror="alert(1)">`,
28 `<img src=x onerror=alert(1)>`,
29 `<a onmouseover="alert(1)">x</a>`,
30 `<body onload="alert(1)">`,
31 // Style with expressions.
32 `<style>body{background:url("javascript:alert(1)")}</style>`,
33 `<div style="background:url(javascript:alert(1))">x</div>`,
34 // javascript: links.
35 `[click](javascript:alert(1))`,
36 `<a href="javascript:alert(1)">x</a>`,
37 `<a href="JaVaScRiPt:alert(1)">x</a>`,
38 `[click](JAVASCRIPT:alert(1))`,
39 // data: URIs (we disallow even data:image).
40 `<img src="data:image/svg+xml;base64,PHN2Zz4=">`,
41 `[x](data:text/html,<script>alert(1)</script>)`,
42 // vbscript:.
43 `<a href="vbscript:msgbox(1)">x</a>`,
44 // SVG-embedded scripts.
45 `<svg><script>alert(1)</script></svg>`,
46 `<svg onload="alert(1)"></svg>`,
47 // iframes.
48 `<iframe src="//evil.com"></iframe>`,
49 `<iframe srcdoc="<script>alert(1)</script>"></iframe>`,
50 // HTML in markdown link text doesn't escape sanitizer.
51 `[<script>alert(1)</script>](https://example.com)`,
52 // Mutation XSS via mismatched quotes.
53 `<a href="x"onmouseover="alert(1)">x</a>`,
54 // Encoded payloads.
55 `<a href="&#x6A;avascript:alert(1)">x</a>`,
56 `<a href="&#106;avascript:alert(1)">x</a>`,
57 // Backticked code-like content shouldn't escape.
58 "`<script>alert(1)</script>`",
59 // Embedded in autolinks.
60 `<javascript:alert(1)>`,
61 // Object/embed.
62 `<object data="x.swf"></object>`,
63 `<embed src="x.swf">`,
64 // Form/button with formaction.
65 `<form><button formaction="javascript:alert(1)">x</button></form>`,
66 // Meta refresh.
67 `<meta http-equiv="refresh" content="0; url=javascript:alert(1)">`,
68 // Base href hijack.
69 `<base href="javascript:">`,
70 // MathML / annotation.
71 `<math><annotation-xml encoding="text/html"><script>alert(1)</script></annotation-xml></math>`,
72 // CSS expression (legacy IE).
73 `<div style="width: expression(alert(1))">x</div>`,
74 // Nested fenced code with a script.
75 "```\n<script>alert(1)</script>\n```",
76 // Markdown link href with newlines.
77 "[x](\njavascript:alert(1))",
78 // Image with javascript:.
79 `![x](javascript:alert(1))`,
80 // HTML entities in URI.
81 `[x](java&#0000115;cript:alert(1))`,
82 // Hex / decimal entities in href attribute.
83 `<a href="javasc&#x72;ipt:alert(1)">x</a>`,
84 // Tab/newline obfuscation.
85 "<a href=\"java\tscript:alert(1)\">x</a>",
86 "<a href=\"java\nscript:alert(1)\">x</a>",
87 // Polyglot HTML+SVG.
88 `<svg/onload=alert(1)>`,
89 // Anchor with target=_blank but no rel (we want rel auto-set).
90 `<a href="https://evil.com" target="_blank">x</a>`,
91 }
92 for i, src := range vectors {
93 out, _, _, err := Render(context.Background(), []byte(src), Options{})
94 if err != nil {
95 t.Fatalf("vector %d render error: %v", i, err)
96 }
97 // Lower-case for case-insensitive substring search. We
98 // distinguish "executable surface" from "harmless text".
99 // Plain-text "javascript:" in prose is safe; "javascript:"
100 // inside href/src is an XSS — guard the latter shape only.
101 s := strings.ToLower(string(out))
102 for _, bad := range []string{
103 "<script", "</script>",
104 `href="javascript:`, `href='javascript:`,
105 `src="javascript:`, `src='javascript:`,
106 `href="vbscript:`, `src="vbscript:`,
107 `href="data:`, `src="data:text`, `src="data:image`,
108 " onerror=", " onload=", " onclick=", " onmouseover=",
109 "<iframe", "<object", "<embed",
110 "<style", "<base ", "<meta ",
111 "<annotation-xml", "expression(",
112 } {
113 if strings.Contains(s, bad) {
114 t.Errorf("vector %d (%q): rendered HTML contains %q\nout=%q", i, src, bad, out)
115 }
116 }
117 }
118 }
119
120 // TestRender_AllowsSafeHTML ensures the strict policy doesn't strip
121 // `<details>`, `<summary>`, `<kbd>`, `<sup>`, `<sub>`, task-list
122 // checkboxes, language-* class on code blocks, or auto-heading IDs.
123 func TestRender_AllowsSafeHTML(t *testing.T) {
124 t.Parallel()
125 cases := []struct {
126 name string
127 src string
128 mustContain []string
129 }{
130 {
131 "details + summary",
132 "<details><summary>click</summary>secret</details>",
133 []string{"<details>", "<summary>", "click", "secret"},
134 },
135 {
136 "kbd",
137 "press <kbd>Ctrl</kbd>+<kbd>C</kbd>",
138 []string{"<kbd>Ctrl</kbd>", "<kbd>C</kbd>"},
139 },
140 {
141 "sup/sub",
142 "x<sup>2</sup> + y<sub>i</sub>",
143 []string{"<sup>2</sup>", "<sub>i</sub>"},
144 },
145 {
146 "task list",
147 "- [x] done\n- [ ] not yet\n",
148 []string{"<input", "checkbox", "disabled"},
149 },
150 {
151 "fenced code with language",
152 "```go\nfmt.Println(\"hi\")\n```",
153 []string{`class="language-go"`},
154 },
155 {
156 "heading anchor id",
157 "# Hello world",
158 []string{`id="hello-world"`},
159 },
160 {
161 "readme presentation html",
162 `<p align="center"><img src="logo.svg" alt="" width="120"></p><h1 align="center">shithub</h1>`,
163 []string{`<p align="center">`, `<img`, `src="logo.svg"`, `width="120"`, `<h1 align="center">`},
164 },
165 {
166 "GFM table",
167 "| a | b |\n|---|---|\n| 1 | 2 |\n",
168 []string{"<table>", "<th>a</th>", "<td>1</td>"},
169 },
170 {
171 "strikethrough",
172 "~~obsolete~~",
173 []string{"<del>obsolete</del>"},
174 },
175 {
176 "autolink",
177 "https://example.com",
178 []string{`href="https://example.com"`},
179 },
180 }
181 for _, c := range cases {
182 c := c
183 t.Run(c.name, func(t *testing.T) {
184 t.Parallel()
185 out, _, _, err := Render(context.Background(), []byte(c.src), Options{})
186 if err != nil {
187 t.Fatalf("render: %v", err)
188 }
189 s := string(out)
190 for _, want := range c.mustContain {
191 if !strings.Contains(s, want) {
192 t.Errorf("expected %q in output, got %q", want, s)
193 }
194 }
195 })
196 }
197 }
198
199 // TestRender_MentionResolution checks that @user resolves when the
200 // resolver returns ok and stays plain text otherwise.
201 func TestRender_MentionResolution(t *testing.T) {
202 t.Parallel()
203 resolver := func(_ context.Context, name string) (string, bool) {
204 if name == "alice" {
205 return "/alice", true
206 }
207 return "", false
208 }
209 out, _, mentions, err := Render(context.Background(), []byte("hi @alice and @bob"), Options{
210 Resolvers: Resolvers{User: resolver},
211 })
212 if err != nil {
213 t.Fatalf("render: %v", err)
214 }
215 s := string(out)
216 if !strings.Contains(s, `href="/alice"`) {
217 t.Errorf("expected @alice link, got %q", s)
218 }
219 if strings.Contains(s, `href="/bob"`) {
220 t.Errorf("@bob should not link, got %q", s)
221 }
222 if len(mentions) != 1 || mentions[0].Username != "alice" {
223 t.Errorf("expected 1 mention (alice), got %v", mentions)
224 }
225 }
226
227 // TestRender_TeamMentionResolution: @org/team renders via the Team
228 // resolver and falls back to plain text when the resolver declines
229 // (e.g. secret team invisible to viewer). S31.
230 func TestRender_TeamMentionResolution(t *testing.T) {
231 t.Parallel()
232 teamResolver := func(_ context.Context, org, team string, _ int64) (string, bool) {
233 if org == "acme" && team == "eng" {
234 return "/acme/teams/eng", true
235 }
236 return "", false
237 }
238 out, _, _, err := Render(context.Background(), []byte("ping @acme/eng and @acme/secret here"), Options{
239 Resolvers: Resolvers{Team: teamResolver},
240 })
241 if err != nil {
242 t.Fatalf("render: %v", err)
243 }
244 s := string(out)
245 if !strings.Contains(s, `href="/acme/teams/eng"`) {
246 t.Errorf("expected @acme/eng link, got %q", s)
247 }
248 if strings.Contains(s, `href="/acme/teams/secret"`) {
249 t.Errorf("@acme/secret should not link, got %q", s)
250 }
251 }
252
253 // TestRender_IssueRefResolution checks both same-repo and cross-repo
254 // refs, and that an unresolvable ref renders as plain text (no link).
255 func TestRender_IssueRefResolution(t *testing.T) {
256 t.Parallel()
257 resolver := func(_ context.Context, owner, name string, num int64, _ int64) (string, bool) {
258 // Same-repo refs leave owner+name empty.
259 if owner == "" && name == "" && num == 7 {
260 return "/o/r/issues/7", true
261 }
262 if owner == "alice" && name == "proj" && num == 3 {
263 return "/alice/proj/issues/3", true
264 }
265 return "", false
266 }
267 out, refs, _, err := Render(context.Background(), []byte("see #7 and alice/proj#3, but not bob/x#9"), Options{
268 Resolvers: Resolvers{Issue: resolver},
269 })
270 if err != nil {
271 t.Fatalf("render: %v", err)
272 }
273 s := string(out)
274 if !strings.Contains(s, `href="/o/r/issues/7"`) {
275 t.Errorf("expected #7 link, got %q", s)
276 }
277 if !strings.Contains(s, `href="/alice/proj/issues/3"`) {
278 t.Errorf("expected alice/proj#3 link, got %q", s)
279 }
280 if strings.Contains(s, `href="/bob/x/issues/9"`) {
281 t.Errorf("bob/x#9 should not link, got %q", s)
282 }
283 if len(refs) != 2 {
284 t.Errorf("expected 2 refs, got %v", refs)
285 }
286 }
287
288 // TestRender_RefsInsideCodeAreInert confirms that #N inside inline
289 // code or fenced code stays as text.
290 func TestRender_RefsInsideCodeAreInert(t *testing.T) {
291 t.Parallel()
292 resolver := func(_ context.Context, owner, name string, num int64, _ int64) (string, bool) {
293 return "/should/not/appear", true
294 }
295 src := "Inline `#7` and:\n\n```\nblock #7 here\n```"
296 out, refs, _, err := Render(context.Background(), []byte(src), Options{
297 Resolvers: Resolvers{Issue: resolver},
298 })
299 if err != nil {
300 t.Fatalf("render: %v", err)
301 }
302 if strings.Contains(string(out), "/should/not/appear") {
303 t.Errorf("ref leaked into code block: %q", out)
304 }
305 if len(refs) != 0 {
306 t.Errorf("expected 0 refs inside code, got %v", refs)
307 }
308 }
309
310 // TestRender_EmojiShortcodes checks the curated set works.
311 func TestRender_EmojiShortcodes(t *testing.T) {
312 t.Parallel()
313 out, _, _, err := Render(context.Background(), []byte("ship it :rocket: :+1: :notrealemoji:"), Options{})
314 if err != nil {
315 t.Fatalf("render: %v", err)
316 }
317 s := string(out)
318 if !strings.Contains(s, "🚀") {
319 t.Errorf("expected rocket emoji in output, got %q", s)
320 }
321 if !strings.Contains(s, "👍") {
322 t.Errorf("expected +1 emoji in output, got %q", s)
323 }
324 if !strings.Contains(s, ":notrealemoji:") {
325 t.Errorf("unknown shortcode should pass through, got %q", s)
326 }
327 }
328
329 // TestRender_InputTooLarge enforces the renderer's defensive cap.
330 func TestRender_InputTooLarge(t *testing.T) {
331 t.Parallel()
332 big := make([]byte, MaxRenderInputBytes+1)
333 for i := range big {
334 big[i] = 'x'
335 }
336 if _, _, _, err := Render(context.Background(), big, Options{}); err == nil {
337 t.Errorf("expected ErrInputTooLarge")
338 }
339 }
340
341 // TestRender_SoftBreakAsBR controls the comment-vs-readme newline
342 // handling.
343 func TestRender_SoftBreakAsBR(t *testing.T) {
344 t.Parallel()
345 src := "line one\nline two\n"
346 br, _, _, _ := Render(context.Background(), []byte(src), Options{SoftBreakAsBR: true})
347 noBR, _, _, _ := Render(context.Background(), []byte(src), Options{SoftBreakAsBR: false})
348 if !strings.Contains(string(br), "<br") {
349 t.Errorf("SoftBreakAsBR=true: expected <br>, got %q", br)
350 }
351 if strings.Contains(string(noBR), "<br") {
352 t.Errorf("SoftBreakAsBR=false: should not contain <br>, got %q", noBR)
353 }
354 }
355
356 // TestRender_BackCompatRenderHTML keeps the old shim working so the
357 // interim S17/S21/S22 callers don't need rewrite during S25.
358 func TestRender_BackCompatRenderHTML(t *testing.T) {
359 t.Parallel()
360 html, err := RenderHTML([]byte("**bold** text"))
361 if err != nil {
362 t.Fatalf("RenderHTML: %v", err)
363 }
364 if !strings.Contains(html, "<strong>bold</strong>") {
365 t.Errorf("expected bold, got %q", html)
366 }
367 }
368