Go · 11185 bytes Raw Blame History
1 // SPDX-License-Identifier: AGPL-3.0-or-later
2
3 // Package extensions hosts the AST transformer that adds shithub-
4 // specific inline patterns (`@user`, `#N`, `owner/repo#N`, commit
5 // SHAs, emoji shortcodes) to Goldmark's parsed text without ever
6 // touching the contents of code blocks or inline code.
7 //
8 // Approach: a single `parser.ASTTransformer` walks the document
9 // after parsing, visiting only `*ast.Text` nodes whose ancestors
10 // are NOT code/codespan/autolink/link nodes. Each visited text node
11 // is run through one combined regex; matches are replaced with
12 // `*ast.Link` (mention/ref/commit) or `*ast.String` (emoji) nodes,
13 // with the surrounding text preserved as `*ast.String` segments.
14 //
15 // Why an ASTTransformer instead of inline parsers: inline parsers
16 // run during the main parse pass and need a `Trigger()` byte set
17 // plus careful interaction with Goldmark's existing inline
18 // disambiguation. The transformer approach is simpler, well-trodden
19 // in other Go markdown stacks, and produces equivalent output for
20 // every input we care about.
21 package extensions
22
23 import (
24 "bytes"
25 "context"
26 "regexp"
27 "strconv"
28 "strings"
29
30 "github.com/yuin/goldmark"
31 "github.com/yuin/goldmark/ast"
32 "github.com/yuin/goldmark/parser"
33 "github.com/yuin/goldmark/text"
34 "github.com/yuin/goldmark/util"
35 )
36
37 // Resolvers wires the transformer against the runtime. The fields
38 // are independent so the parent package can decide which flavors
39 // to enable. nil-resolver means "render this kind as plain text"
40 // (no link, no error).
41 //
42 // All resolvers MUST be visibility-aware. The transformer does not
43 // re-check visibility — it trusts the resolver's `ok` to gate
44 // existence.
45 type Resolvers struct {
46 User func(ctx context.Context, username string) (href string, ok bool)
47 // Issue covers both same-repo (#N when ownerHint == "") and
48 // cross-repo (owner/repo#N).
49 Issue func(ctx context.Context, ownerHint, repoHint string, number int64, viewerUserID int64) (href string, ok bool)
50 // Commit is invoked only when RepoOwner+RepoName are both set
51 // (a same-repo render) and the matched token is a 7-40 char
52 // lowercase hex string at a word boundary.
53 Commit func(ctx context.Context, repoOwner, repoName, shaPrefix string) (href, fullSHA string, ok bool)
54 }
55
56 // Options is the per-render config consumed by the transformer.
57 type Options struct {
58 Ctx context.Context
59 RepoOwner string
60 RepoName string
61 ViewerUserID int64
62 Resolvers Resolvers
63 // Refs and Mentions accumulate resolved references for the caller.
64 // Pointers so the transformer can append.
65 Refs *[]Ref
66 Mentions *[]Mention
67 }
68
69 // Ref / Mention mirror the parent-package types; we redeclare to
70 // avoid an import cycle.
71 type Ref struct {
72 Kind string
73 Owner string
74 Repo string
75 Number int64
76 FullSHA string
77 Href string
78 }
79
80 type Mention struct {
81 Username string
82 Href string
83 }
84
85 // reCombined matches every pattern in one pass. Order in the
86 // alternation is by how they appear in source after parsing — left
87 // to right. Capture groups:
88 //
89 // (?:^|[^\w/]) leading boundary (consumed but reattached as text)
90 // #1 cross-repo: owner / repo / number
91 // #4 same-repo: number
92 // #5 mention: username
93 // #6 commit prefix
94 // #7 emoji name
95 var reCombined = regexp.MustCompile(`` +
96 // cross-repo: alice/proj#3
97 `([A-Za-z0-9][A-Za-z0-9._-]*)/([A-Za-z0-9][A-Za-z0-9._-]*)#([0-9]{1,9})\b` +
98 // or same-repo: #3 — must have non-word non-/ boundary on the left
99 `|(?:^|[^\w/])#([0-9]{1,9})\b` +
100 // or mention: @alice — must have non-word boundary on the left
101 `|(?:^|[^\w])@([A-Za-z0-9][A-Za-z0-9_-]{0,38})\b` +
102 // or commit SHA: 7–40 lowercase hex, word-boundary on both sides
103 `|(?:^|[^\w/])([0-9a-f]{7,40})\b` +
104 // or emoji shortcode: :smile:
105 `|:([a-z0-9_+\-]+):`,
106 )
107
108 // Extension is a goldmark.Extender that registers the AST transformer.
109 type Extension struct{ Opts *Options }
110
111 // New constructs the extender with the given options.
112 func New(opts *Options) goldmark.Extender { return &Extension{Opts: opts} }
113
114 // Extend implements goldmark.Extender.
115 func (e *Extension) Extend(m goldmark.Markdown) {
116 m.Parser().AddOptions(parser.WithASTTransformers(
117 util.Prioritized(&transformer{opts: e.Opts}, 999),
118 ))
119 }
120
121 type transformer struct{ opts *Options }
122
123 // Transform walks the document and replaces matched text segments.
124 func (t *transformer) Transform(doc *ast.Document, reader text.Reader, _ parser.Context) {
125 if t.opts == nil {
126 return
127 }
128 source := reader.Source()
129 _ = ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
130 if !entering {
131 return ast.WalkContinue, nil
132 }
133 // Skip subtrees that should never be linkified.
134 switch n.(type) {
135 case *ast.CodeSpan, *ast.AutoLink, *ast.Link, *ast.Image,
136 *ast.FencedCodeBlock, *ast.CodeBlock, *ast.RawHTML, *ast.HTMLBlock:
137 return ast.WalkSkipChildren, nil
138 }
139 txt, ok := n.(*ast.Text)
140 if !ok {
141 return ast.WalkContinue, nil
142 }
143 t.replaceText(txt, source)
144 return ast.WalkContinue, nil
145 })
146 }
147
148 // replaceText finds matches in the segment of `txt` and inserts
149 // new sibling nodes (string runs + links) before the original text;
150 // the original text is removed once everything's stitched in.
151 func (t *transformer) replaceText(txt *ast.Text, source []byte) {
152 body := txt.Segment.Value(source)
153 matches := reCombined.FindAllSubmatchIndex(body, -1)
154 if len(matches) == 0 {
155 return
156 }
157 parent := txt.Parent()
158 if parent == nil {
159 return
160 }
161
162 cursor := 0
163 for _, m := range matches {
164 matchStart, matchEnd := m[0], m[1]
165
166 // Determine which alternation captured + where the visible
167 // content starts (excluding the regex-consumed boundary
168 // char, if any).
169 var (
170 isCrossRepo = m[2] >= 0
171 isSameRepo = m[8] >= 0
172 isMention = m[10] >= 0
173 isCommit = m[12] >= 0
174 isEmoji = m[14] >= 0
175 )
176 var contentStart int
177 switch {
178 case isCrossRepo:
179 contentStart = m[2]
180 case isSameRepo:
181 contentStart = m[8] - 1 // include `#`
182 case isMention:
183 contentStart = m[10] - 1 // include `@`
184 case isCommit:
185 contentStart = m[12]
186 case isEmoji:
187 contentStart = m[14] - 1 // include leading `:`
188 }
189
190 // Emit (a) any text between the previous cursor and the
191 // match start, then (b) the consumed-but-not-content
192 // boundary char (when contentStart > matchStart). Both into
193 // the parent before the original text node.
194 if matchStart > cursor {
195 t.insertText(parent, txt, body[cursor:matchStart])
196 }
197 if contentStart > matchStart {
198 t.insertText(parent, txt, body[matchStart:contentStart])
199 }
200
201 // Now emit the resolved (or fallback-plain) match content.
202 display := body[contentStart:matchEnd]
203 switch {
204 case isCrossRepo:
205 owner := string(body[m[2]:m[3]])
206 repo := string(body[m[4]:m[5]])
207 numStr := string(body[m[6]:m[7]])
208 if !t.appendIssueLink(parent, txt, owner, repo, numStr, display) {
209 t.insertText(parent, txt, display)
210 }
211 case isSameRepo:
212 numStr := string(body[m[8]:m[9]])
213 if !t.appendIssueLink(parent, txt, "", "", numStr, display) {
214 t.insertText(parent, txt, display)
215 }
216 case isMention:
217 name := string(body[m[10]:m[11]])
218 if !t.appendMentionLink(parent, txt, name, display) {
219 t.insertText(parent, txt, display)
220 }
221 case isCommit:
222 sha := string(body[m[12]:m[13]])
223 if !t.appendCommitLink(parent, txt, sha, display) {
224 t.insertText(parent, txt, display)
225 }
226 case isEmoji:
227 name := string(body[m[14]:m[15]])
228 if uni, ok := lookupEmoji(name); ok {
229 t.insertText(parent, txt, []byte(uni))
230 } else {
231 t.insertText(parent, txt, display)
232 }
233 }
234 cursor = matchEnd
235 }
236 // Trailing text after the last match.
237 if cursor < len(body) {
238 t.insertText(parent, txt, body[cursor:])
239 }
240 parent.RemoveChild(parent, txt)
241 }
242
243 // insertText appends a string node before the original text node
244 // (which is removed at the end of replaceText).
245 func (t *transformer) insertText(parent, before ast.Node, b []byte) {
246 if len(b) == 0 {
247 return
248 }
249 s := ast.NewString(append([]byte(nil), b...))
250 parent.InsertBefore(parent, before, s)
251 }
252
253 // appendIssueLink resolves an issue/PR ref and inserts a Link node.
254 // `display` is the visible text the user typed (e.g. "#42" or
255 // "alice/proj#5"). Returns false when the resolver declines (in
256 // which case the caller renders the display text as plain text —
257 // no link, no existence leak).
258 func (t *transformer) appendIssueLink(parent, before ast.Node, owner, repo, numStr string, display []byte) bool {
259 if t.opts.Resolvers.Issue == nil {
260 return false
261 }
262 num, err := strconv.ParseInt(numStr, 10, 64)
263 if err != nil {
264 return false
265 }
266 href, ok := t.opts.Resolvers.Issue(t.opts.Ctx, owner, repo, num, t.opts.ViewerUserID)
267 if !ok {
268 return false
269 }
270 link := ast.NewLink()
271 link.Destination = []byte(href)
272 link.AppendChild(link, ast.NewString(append([]byte(nil), display...)))
273 parent.InsertBefore(parent, before, link)
274
275 if t.opts.Refs != nil {
276 *t.opts.Refs = append(*t.opts.Refs, Ref{
277 Kind: "issue",
278 Owner: owner,
279 Repo: repo,
280 Number: num,
281 Href: href,
282 })
283 }
284 return true
285 }
286
287 // appendMentionLink resolves a @username and inserts a Link node.
288 func (t *transformer) appendMentionLink(parent, before ast.Node, username string, display []byte) bool {
289 if t.opts.Resolvers.User == nil {
290 return false
291 }
292 href, ok := t.opts.Resolvers.User(t.opts.Ctx, username)
293 if !ok {
294 return false
295 }
296 link := ast.NewLink()
297 link.Destination = []byte(href)
298 link.AppendChild(link, ast.NewString(append([]byte(nil), display...)))
299 parent.InsertBefore(parent, before, link)
300 if t.opts.Mentions != nil {
301 *t.opts.Mentions = append(*t.opts.Mentions, Mention{
302 Username: username,
303 Href: href,
304 })
305 }
306 return true
307 }
308
309 // appendCommitLink resolves a commit SHA prefix in the current repo.
310 func (t *transformer) appendCommitLink(parent, before ast.Node, shaPrefix string, display []byte) bool {
311 if t.opts.Resolvers.Commit == nil || t.opts.RepoOwner == "" || t.opts.RepoName == "" {
312 return false
313 }
314 href, full, ok := t.opts.Resolvers.Commit(t.opts.Ctx, t.opts.RepoOwner, t.opts.RepoName, shaPrefix)
315 if !ok {
316 return false
317 }
318 link := ast.NewLink()
319 link.Destination = []byte(href)
320 // Display the SHA as <code>; preserve the user's typed length.
321 codeText := append([]byte(nil), display...)
322 codeText = bytes.TrimSpace(codeText)
323 link.AppendChild(link, ast.NewCodeSpan())
324 cs := link.LastChild().(*ast.CodeSpan)
325 cs.AppendChild(cs, ast.NewString(codeText))
326 parent.InsertBefore(parent, before, link)
327
328 if t.opts.Refs != nil {
329 *t.opts.Refs = append(*t.opts.Refs, Ref{
330 Kind: "commit",
331 FullSHA: full,
332 Href: href,
333 })
334 }
335 return true
336 }
337
338 // trimLeadingNonWord drops the leading boundary char(s) — used when
339 // a same-repo or mention token is rendered as plain text fallback.
340 func trimLeadingNonWord(b []byte) []byte {
341 for len(b) > 0 && !isWordByte(b[0]) {
342 b = b[1:]
343 }
344 return b
345 }
346
347 func isWordByte(c byte) bool {
348 return c == '_' || (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
349 }
350
351 // silence unused-import warnings in a stripped build.
352 var (
353 _ = strings.Builder{}
354 _ = trimLeadingNonWord
355 )
356