Go · 12676 bytes Raw Blame History
1 // SPDX-License-Identifier: AGPL-3.0-or-later
2
3 // Package extensions hosts the AST transformer that adds shithub-
4 // specific inline patterns (`@user`, `#N`, `owner/repo#N`, commit
5 // SHAs, emoji shortcodes) to Goldmark's parsed text without ever
6 // touching the contents of code blocks or inline code.
7 //
8 // Approach: a single `parser.ASTTransformer` walks the document
9 // after parsing, visiting only `*ast.Text` nodes whose ancestors
10 // are NOT code/codespan/autolink/link nodes. Each visited text node
11 // is run through one combined regex; matches are replaced with
12 // `*ast.Link` (mention/ref/commit) or `*ast.String` (emoji) nodes,
13 // with the surrounding text preserved as `*ast.String` segments.
14 //
15 // Why an ASTTransformer instead of inline parsers: inline parsers
16 // run during the main parse pass and need a `Trigger()` byte set
17 // plus careful interaction with Goldmark's existing inline
18 // disambiguation. The transformer approach is simpler, well-trodden
19 // in other Go markdown stacks, and produces equivalent output for
20 // every input we care about.
21 package extensions
22
23 import (
24 "bytes"
25 "context"
26 "regexp"
27 "strconv"
28
29 "github.com/yuin/goldmark"
30 "github.com/yuin/goldmark/ast"
31 "github.com/yuin/goldmark/parser"
32 "github.com/yuin/goldmark/text"
33 "github.com/yuin/goldmark/util"
34 )
35
36 // Resolvers wires the transformer against the runtime. The fields
37 // are independent so the parent package can decide which flavors
38 // to enable. nil-resolver means "render this kind as plain text"
39 // (no link, no error).
40 //
41 // All resolvers MUST be visibility-aware. The transformer does not
42 // re-check visibility — it trusts the resolver's `ok` to gate
43 // existence.
44 type Resolvers struct {
45 User func(ctx context.Context, username string) (href string, ok bool)
46 // Issue covers both same-repo (#N when ownerHint == "") and
47 // cross-repo (owner/repo#N).
48 Issue func(ctx context.Context, ownerHint, repoHint string, number int64, viewerUserID int64) (href string, ok bool)
49 // Commit is invoked only when RepoOwner+RepoName are both set
50 // (a same-repo render) and the matched token is a 7-40 char
51 // lowercase hex string at a word boundary.
52 Commit func(ctx context.Context, repoOwner, repoName, shaPrefix string) (href, fullSHA string, ok bool)
53 // Team resolves an `@org/team` mention to the team page link.
54 // Visibility-aware: a secret team the viewer can't see should
55 // return `ok=false` so the renderer falls back to plain text
56 // (no existence leak).
57 Team func(ctx context.Context, orgSlug, teamSlug string, viewerUserID int64) (href string, ok bool)
58 }
59
60 // Options is the per-render config consumed by the transformer.
61 type Options struct {
62 Ctx context.Context
63 RepoOwner string
64 RepoName string
65 ViewerUserID int64
66 Resolvers Resolvers
67 // Refs and Mentions accumulate resolved references for the caller.
68 // Pointers so the transformer can append.
69 Refs *[]Ref
70 Mentions *[]Mention
71 }
72
73 // Ref / Mention mirror the parent-package types; we redeclare to
74 // avoid an import cycle.
75 type Ref struct {
76 Kind string
77 Owner string
78 Repo string
79 Number int64
80 FullSHA string
81 Href string
82 }
83
84 type Mention struct {
85 Username string
86 Href string
87 }
88
89 // reCombined matches every pattern in one pass. Order in the
90 // alternation matters because the `@org/team` branch is more
91 // specific than `@user` and must come first — otherwise `@org` is
92 // captured by the user branch and the trailing `/team` is left
93 // behind as unstructured text.
94 //
95 // Capture-index map (each MatchAllSubmatchIndex hit is a flat slice;
96 // indices below are the START of the named group):
97 //
98 // #2 / #4 / #6 cross-repo: owner / repo / number
99 // #8 same-repo: number
100 // #10 / #12 team mention: org / team (S31)
101 // #14 user mention: username
102 // #16 commit prefix
103 // #18 emoji name
104 var reCombined = regexp.MustCompile(`` +
105 // cross-repo: alice/proj#3 — left boundary required so we don't
106 // chew into a preceding word.
107 `(?:^|[^\w/])([A-Za-z0-9][A-Za-z0-9._-]*)/([A-Za-z0-9][A-Za-z0-9._-]*)#([0-9]{1,9})\b` +
108 // or same-repo: #3
109 `|(?:^|[^\w/])#([0-9]{1,9})\b` +
110 // or team mention: @org/team — comes BEFORE @user so the
111 // trailing `/team` doesn't get split off as text. Slug shape
112 // matches users.username + teams.slug.
113 `|(?:^|[^\w])@([a-z0-9](?:[a-z0-9-]{0,37}[a-z0-9])?)/([a-z0-9](?:[a-z0-9._-]{0,48}[a-z0-9])?)\b` +
114 // or user mention: @alice
115 `|(?:^|[^\w])@([A-Za-z0-9][A-Za-z0-9_-]{0,38})\b` +
116 // or commit SHA: 7–40 lowercase hex
117 `|(?:^|[^\w/])([0-9a-f]{7,40})\b` +
118 // or emoji shortcode: :smile:
119 `|:([a-z0-9_+\-]+):`,
120 )
121
122 // Extension is a goldmark.Extender that registers the AST transformer.
123 type Extension struct{ Opts *Options }
124
125 // New constructs the extender with the given options.
126 func New(opts *Options) goldmark.Extender { return &Extension{Opts: opts} }
127
128 // Extend implements goldmark.Extender.
129 func (e *Extension) Extend(m goldmark.Markdown) {
130 m.Parser().AddOptions(parser.WithASTTransformers(
131 util.Prioritized(&transformer{opts: e.Opts}, 999),
132 ))
133 }
134
135 type transformer struct{ opts *Options }
136
137 // Transform walks the document and replaces matched text segments.
138 func (t *transformer) Transform(doc *ast.Document, reader text.Reader, _ parser.Context) {
139 if t.opts == nil {
140 return
141 }
142 source := reader.Source()
143 _ = ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
144 if !entering {
145 return ast.WalkContinue, nil
146 }
147 // Skip subtrees that should never be linkified.
148 switch n.(type) {
149 case *ast.CodeSpan, *ast.AutoLink, *ast.Link, *ast.Image,
150 *ast.FencedCodeBlock, *ast.CodeBlock, *ast.RawHTML, *ast.HTMLBlock:
151 return ast.WalkSkipChildren, nil
152 }
153 txt, ok := n.(*ast.Text)
154 if !ok {
155 return ast.WalkContinue, nil
156 }
157 t.replaceText(txt, source)
158 return ast.WalkContinue, nil
159 })
160 }
161
162 // replaceText finds matches in the segment of `txt` and inserts
163 // new sibling nodes (string runs + links) before the original text;
164 // the original text is removed once everything's stitched in.
165 func (t *transformer) replaceText(txt *ast.Text, source []byte) {
166 body := txt.Segment.Value(source)
167 matches := reCombined.FindAllSubmatchIndex(body, -1)
168 if len(matches) == 0 {
169 return
170 }
171 parent := txt.Parent()
172 if parent == nil {
173 return
174 }
175
176 cursor := 0
177 for _, m := range matches {
178 matchStart, matchEnd := m[0], m[1]
179
180 // Determine which alternation captured + where the visible
181 // content starts (excluding the regex-consumed boundary
182 // char, if any).
183 var (
184 isCrossRepo = m[2] >= 0
185 isSameRepo = m[8] >= 0
186 isTeamMen = m[10] >= 0
187 isMention = m[14] >= 0
188 isCommit = m[16] >= 0
189 isEmoji = m[18] >= 0
190 )
191 var contentStart int
192 switch {
193 case isCrossRepo:
194 // m[2] is the owner-start position (after the regex's
195 // leading non-word boundary char). Same shape as same-repo
196 // — emit the boundary char as plain text, then the link.
197 contentStart = m[2]
198 case isSameRepo:
199 contentStart = m[8] - 1 // include `#`
200 case isTeamMen:
201 contentStart = m[10] - 1 // include `@`
202 case isMention:
203 contentStart = m[14] - 1 // include `@`
204 case isCommit:
205 contentStart = m[16]
206 case isEmoji:
207 contentStart = m[18] - 1 // include leading `:`
208 }
209
210 // Emit (a) any text between the previous cursor and the
211 // match start, then (b) the consumed-but-not-content
212 // boundary char (when contentStart > matchStart). Both into
213 // the parent before the original text node.
214 if matchStart > cursor {
215 t.insertText(parent, txt, body[cursor:matchStart])
216 }
217 if contentStart > matchStart {
218 t.insertText(parent, txt, body[matchStart:contentStart])
219 }
220
221 // Now emit the resolved (or fallback-plain) match content.
222 display := body[contentStart:matchEnd]
223 switch {
224 case isCrossRepo:
225 owner := string(body[m[2]:m[3]])
226 repo := string(body[m[4]:m[5]])
227 numStr := string(body[m[6]:m[7]])
228 if !t.appendIssueLink(parent, txt, owner, repo, numStr, display) {
229 t.insertText(parent, txt, display)
230 }
231 case isSameRepo:
232 numStr := string(body[m[8]:m[9]])
233 if !t.appendIssueLink(parent, txt, "", "", numStr, display) {
234 t.insertText(parent, txt, display)
235 }
236 case isTeamMen:
237 orgSlug := string(body[m[10]:m[11]])
238 teamSlug := string(body[m[12]:m[13]])
239 if !t.appendTeamMentionLink(parent, txt, orgSlug, teamSlug, display) {
240 t.insertText(parent, txt, display)
241 }
242 case isMention:
243 name := string(body[m[14]:m[15]])
244 if !t.appendMentionLink(parent, txt, name, display) {
245 t.insertText(parent, txt, display)
246 }
247 case isCommit:
248 sha := string(body[m[16]:m[17]])
249 if !t.appendCommitLink(parent, txt, sha, display) {
250 t.insertText(parent, txt, display)
251 }
252 case isEmoji:
253 name := string(body[m[18]:m[19]])
254 if uni, ok := lookupEmoji(name); ok {
255 t.insertText(parent, txt, []byte(uni))
256 } else {
257 t.insertText(parent, txt, display)
258 }
259 }
260 cursor = matchEnd
261 }
262 // Trailing text after the last match.
263 if cursor < len(body) {
264 t.insertText(parent, txt, body[cursor:])
265 }
266 parent.RemoveChild(parent, txt)
267 }
268
269 // insertText appends a string node before the original text node
270 // (which is removed at the end of replaceText).
271 func (t *transformer) insertText(parent, before ast.Node, b []byte) {
272 if len(b) == 0 {
273 return
274 }
275 s := ast.NewString(append([]byte(nil), b...))
276 parent.InsertBefore(parent, before, s)
277 }
278
279 // appendIssueLink resolves an issue/PR ref and inserts a Link node.
280 // `display` is the visible text the user typed (e.g. "#42" or
281 // "alice/proj#5"). Returns false when the resolver declines (in
282 // which case the caller renders the display text as plain text —
283 // no link, no existence leak).
284 func (t *transformer) appendIssueLink(parent, before ast.Node, owner, repo, numStr string, display []byte) bool {
285 if t.opts.Resolvers.Issue == nil {
286 return false
287 }
288 num, err := strconv.ParseInt(numStr, 10, 64)
289 if err != nil {
290 return false
291 }
292 href, ok := t.opts.Resolvers.Issue(t.opts.Ctx, owner, repo, num, t.opts.ViewerUserID)
293 if !ok {
294 return false
295 }
296 link := ast.NewLink()
297 link.Destination = []byte(href)
298 link.AppendChild(link, ast.NewString(append([]byte(nil), display...)))
299 parent.InsertBefore(parent, before, link)
300
301 if t.opts.Refs != nil {
302 *t.opts.Refs = append(*t.opts.Refs, Ref{
303 Kind: "issue",
304 Owner: owner,
305 Repo: repo,
306 Number: num,
307 Href: href,
308 })
309 }
310 return true
311 }
312
313 // appendTeamMentionLink resolves an @org/team and inserts a Link
314 // node. Returns false on any failure (unknown org, secret team
315 // invisible to viewer, no resolver wired) — the caller renders the
316 // matched text as-is.
317 func (t *transformer) appendTeamMentionLink(parent, before ast.Node, orgSlug, teamSlug string, display []byte) bool {
318 if t.opts.Resolvers.Team == nil {
319 return false
320 }
321 href, ok := t.opts.Resolvers.Team(t.opts.Ctx, orgSlug, teamSlug, t.opts.ViewerUserID)
322 if !ok {
323 return false
324 }
325 link := ast.NewLink()
326 link.Destination = []byte(href)
327 link.AppendChild(link, ast.NewString(append([]byte(nil), display...)))
328 parent.InsertBefore(parent, before, link)
329 return true
330 }
331
332 // appendMentionLink resolves a @username and inserts a Link node.
333 func (t *transformer) appendMentionLink(parent, before ast.Node, username string, display []byte) bool {
334 if t.opts.Resolvers.User == nil {
335 return false
336 }
337 href, ok := t.opts.Resolvers.User(t.opts.Ctx, username)
338 if !ok {
339 return false
340 }
341 link := ast.NewLink()
342 link.Destination = []byte(href)
343 link.AppendChild(link, ast.NewString(append([]byte(nil), display...)))
344 parent.InsertBefore(parent, before, link)
345 if t.opts.Mentions != nil {
346 *t.opts.Mentions = append(*t.opts.Mentions, Mention{
347 Username: username,
348 Href: href,
349 })
350 }
351 return true
352 }
353
354 // appendCommitLink resolves a commit SHA prefix in the current repo.
355 func (t *transformer) appendCommitLink(parent, before ast.Node, shaPrefix string, display []byte) bool {
356 if t.opts.Resolvers.Commit == nil || t.opts.RepoOwner == "" || t.opts.RepoName == "" {
357 return false
358 }
359 href, full, ok := t.opts.Resolvers.Commit(t.opts.Ctx, t.opts.RepoOwner, t.opts.RepoName, shaPrefix)
360 if !ok {
361 return false
362 }
363 link := ast.NewLink()
364 link.Destination = []byte(href)
365 // Display the SHA as <code>; preserve the user's typed length.
366 codeText := append([]byte(nil), display...)
367 codeText = bytes.TrimSpace(codeText)
368 link.AppendChild(link, ast.NewCodeSpan())
369 cs := link.LastChild().(*ast.CodeSpan)
370 cs.AppendChild(cs, ast.NewString(codeText))
371 parent.InsertBefore(parent, before, link)
372
373 if t.opts.Refs != nil {
374 *t.opts.Refs = append(*t.opts.Refs, Ref{
375 Kind: "commit",
376 FullSHA: full,
377 Href: href,
378 })
379 }
380 return true
381 }
382
383