| 1 | // SPDX-License-Identifier: AGPL-3.0-or-later |
| 2 | |
| 3 | // Package extensions hosts the AST transformer that adds shithub- |
| 4 | // specific inline patterns (`@user`, `#N`, `owner/repo#N`, commit |
| 5 | // SHAs, emoji shortcodes) to Goldmark's parsed text without ever |
| 6 | // touching the contents of code blocks or inline code. |
| 7 | // |
| 8 | // Approach: a single `parser.ASTTransformer` walks the document |
| 9 | // after parsing, visiting only `*ast.Text` nodes whose ancestors |
| 10 | // are NOT code/codespan/autolink/link nodes. Each visited text node |
| 11 | // is run through one combined regex; matches are replaced with |
| 12 | // `*ast.Link` (mention/ref/commit) or `*ast.String` (emoji) nodes, |
| 13 | // with the surrounding text preserved as `*ast.String` segments. |
| 14 | // |
| 15 | // Why an ASTTransformer instead of inline parsers: inline parsers |
| 16 | // run during the main parse pass and need a `Trigger()` byte set |
| 17 | // plus careful interaction with Goldmark's existing inline |
| 18 | // disambiguation. The transformer approach is simpler, well-trodden |
| 19 | // in other Go markdown stacks, and produces equivalent output for |
| 20 | // every input we care about. |
| 21 | package extensions |
| 22 | |
| 23 | import ( |
| 24 | "bytes" |
| 25 | "context" |
| 26 | "regexp" |
| 27 | "strconv" |
| 28 | "strings" |
| 29 | |
| 30 | "github.com/yuin/goldmark" |
| 31 | "github.com/yuin/goldmark/ast" |
| 32 | "github.com/yuin/goldmark/parser" |
| 33 | "github.com/yuin/goldmark/text" |
| 34 | "github.com/yuin/goldmark/util" |
| 35 | ) |
| 36 | |
| 37 | // Resolvers wires the transformer against the runtime. The fields |
| 38 | // are independent so the parent package can decide which flavors |
| 39 | // to enable. nil-resolver means "render this kind as plain text" |
| 40 | // (no link, no error). |
| 41 | // |
| 42 | // All resolvers MUST be visibility-aware. The transformer does not |
| 43 | // re-check visibility — it trusts the resolver's `ok` to gate |
| 44 | // existence. |
| 45 | type Resolvers struct { |
| 46 | User func(ctx context.Context, username string) (href string, ok bool) |
| 47 | // Issue covers both same-repo (#N when ownerHint == "") and |
| 48 | // cross-repo (owner/repo#N). |
| 49 | Issue func(ctx context.Context, ownerHint, repoHint string, number int64, viewerUserID int64) (href string, ok bool) |
| 50 | // Commit is invoked only when RepoOwner+RepoName are both set |
| 51 | // (a same-repo render) and the matched token is a 7-40 char |
| 52 | // lowercase hex string at a word boundary. |
| 53 | Commit func(ctx context.Context, repoOwner, repoName, shaPrefix string) (href, fullSHA string, ok bool) |
| 54 | } |
| 55 | |
| 56 | // Options is the per-render config consumed by the transformer. |
| 57 | type Options struct { |
| 58 | Ctx context.Context |
| 59 | RepoOwner string |
| 60 | RepoName string |
| 61 | ViewerUserID int64 |
| 62 | Resolvers Resolvers |
| 63 | // Refs and Mentions accumulate resolved references for the caller. |
| 64 | // Pointers so the transformer can append. |
| 65 | Refs *[]Ref |
| 66 | Mentions *[]Mention |
| 67 | } |
| 68 | |
| 69 | // Ref / Mention mirror the parent-package types; we redeclare to |
| 70 | // avoid an import cycle. |
| 71 | type Ref struct { |
| 72 | Kind string |
| 73 | Owner string |
| 74 | Repo string |
| 75 | Number int64 |
| 76 | FullSHA string |
| 77 | Href string |
| 78 | } |
| 79 | |
| 80 | type Mention struct { |
| 81 | Username string |
| 82 | Href string |
| 83 | } |
| 84 | |
| 85 | // reCombined matches every pattern in one pass. Order in the |
| 86 | // alternation is by how they appear in source after parsing — left |
| 87 | // to right. Capture groups: |
| 88 | // |
| 89 | // (?:^|[^\w/]) leading boundary (consumed but reattached as text) |
| 90 | // #1 cross-repo: owner / repo / number |
| 91 | // #4 same-repo: number |
| 92 | // #5 mention: username |
| 93 | // #6 commit prefix |
| 94 | // #7 emoji name |
| 95 | var reCombined = regexp.MustCompile(`` + |
| 96 | // cross-repo: alice/proj#3 |
| 97 | `([A-Za-z0-9][A-Za-z0-9._-]*)/([A-Za-z0-9][A-Za-z0-9._-]*)#([0-9]{1,9})\b` + |
| 98 | // or same-repo: #3 — must have non-word non-/ boundary on the left |
| 99 | `|(?:^|[^\w/])#([0-9]{1,9})\b` + |
| 100 | // or mention: @alice — must have non-word boundary on the left |
| 101 | `|(?:^|[^\w])@([A-Za-z0-9][A-Za-z0-9_-]{0,38})\b` + |
| 102 | // or commit SHA: 7–40 lowercase hex, word-boundary on both sides |
| 103 | `|(?:^|[^\w/])([0-9a-f]{7,40})\b` + |
| 104 | // or emoji shortcode: :smile: |
| 105 | `|:([a-z0-9_+\-]+):`, |
| 106 | ) |
| 107 | |
| 108 | // Extension is a goldmark.Extender that registers the AST transformer. |
| 109 | type Extension struct{ Opts *Options } |
| 110 | |
| 111 | // New constructs the extender with the given options. |
| 112 | func New(opts *Options) goldmark.Extender { return &Extension{Opts: opts} } |
| 113 | |
| 114 | // Extend implements goldmark.Extender. |
| 115 | func (e *Extension) Extend(m goldmark.Markdown) { |
| 116 | m.Parser().AddOptions(parser.WithASTTransformers( |
| 117 | util.Prioritized(&transformer{opts: e.Opts}, 999), |
| 118 | )) |
| 119 | } |
| 120 | |
| 121 | type transformer struct{ opts *Options } |
| 122 | |
| 123 | // Transform walks the document and replaces matched text segments. |
| 124 | func (t *transformer) Transform(doc *ast.Document, reader text.Reader, _ parser.Context) { |
| 125 | if t.opts == nil { |
| 126 | return |
| 127 | } |
| 128 | source := reader.Source() |
| 129 | _ = ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) { |
| 130 | if !entering { |
| 131 | return ast.WalkContinue, nil |
| 132 | } |
| 133 | // Skip subtrees that should never be linkified. |
| 134 | switch n.(type) { |
| 135 | case *ast.CodeSpan, *ast.AutoLink, *ast.Link, *ast.Image, |
| 136 | *ast.FencedCodeBlock, *ast.CodeBlock, *ast.RawHTML, *ast.HTMLBlock: |
| 137 | return ast.WalkSkipChildren, nil |
| 138 | } |
| 139 | txt, ok := n.(*ast.Text) |
| 140 | if !ok { |
| 141 | return ast.WalkContinue, nil |
| 142 | } |
| 143 | t.replaceText(txt, source) |
| 144 | return ast.WalkContinue, nil |
| 145 | }) |
| 146 | } |
| 147 | |
| 148 | // replaceText finds matches in the segment of `txt` and inserts |
| 149 | // new sibling nodes (string runs + links) before the original text; |
| 150 | // the original text is removed once everything's stitched in. |
| 151 | func (t *transformer) replaceText(txt *ast.Text, source []byte) { |
| 152 | body := txt.Segment.Value(source) |
| 153 | matches := reCombined.FindAllSubmatchIndex(body, -1) |
| 154 | if len(matches) == 0 { |
| 155 | return |
| 156 | } |
| 157 | parent := txt.Parent() |
| 158 | if parent == nil { |
| 159 | return |
| 160 | } |
| 161 | |
| 162 | cursor := 0 |
| 163 | for _, m := range matches { |
| 164 | matchStart, matchEnd := m[0], m[1] |
| 165 | |
| 166 | // Determine which alternation captured + where the visible |
| 167 | // content starts (excluding the regex-consumed boundary |
| 168 | // char, if any). |
| 169 | var ( |
| 170 | isCrossRepo = m[2] >= 0 |
| 171 | isSameRepo = m[8] >= 0 |
| 172 | isMention = m[10] >= 0 |
| 173 | isCommit = m[12] >= 0 |
| 174 | isEmoji = m[14] >= 0 |
| 175 | ) |
| 176 | var contentStart int |
| 177 | switch { |
| 178 | case isCrossRepo: |
| 179 | contentStart = m[2] |
| 180 | case isSameRepo: |
| 181 | contentStart = m[8] - 1 // include `#` |
| 182 | case isMention: |
| 183 | contentStart = m[10] - 1 // include `@` |
| 184 | case isCommit: |
| 185 | contentStart = m[12] |
| 186 | case isEmoji: |
| 187 | contentStart = m[14] - 1 // include leading `:` |
| 188 | } |
| 189 | |
| 190 | // Emit (a) any text between the previous cursor and the |
| 191 | // match start, then (b) the consumed-but-not-content |
| 192 | // boundary char (when contentStart > matchStart). Both into |
| 193 | // the parent before the original text node. |
| 194 | if matchStart > cursor { |
| 195 | t.insertText(parent, txt, body[cursor:matchStart]) |
| 196 | } |
| 197 | if contentStart > matchStart { |
| 198 | t.insertText(parent, txt, body[matchStart:contentStart]) |
| 199 | } |
| 200 | |
| 201 | // Now emit the resolved (or fallback-plain) match content. |
| 202 | display := body[contentStart:matchEnd] |
| 203 | switch { |
| 204 | case isCrossRepo: |
| 205 | owner := string(body[m[2]:m[3]]) |
| 206 | repo := string(body[m[4]:m[5]]) |
| 207 | numStr := string(body[m[6]:m[7]]) |
| 208 | if !t.appendIssueLink(parent, txt, owner, repo, numStr, display) { |
| 209 | t.insertText(parent, txt, display) |
| 210 | } |
| 211 | case isSameRepo: |
| 212 | numStr := string(body[m[8]:m[9]]) |
| 213 | if !t.appendIssueLink(parent, txt, "", "", numStr, display) { |
| 214 | t.insertText(parent, txt, display) |
| 215 | } |
| 216 | case isMention: |
| 217 | name := string(body[m[10]:m[11]]) |
| 218 | if !t.appendMentionLink(parent, txt, name, display) { |
| 219 | t.insertText(parent, txt, display) |
| 220 | } |
| 221 | case isCommit: |
| 222 | sha := string(body[m[12]:m[13]]) |
| 223 | if !t.appendCommitLink(parent, txt, sha, display) { |
| 224 | t.insertText(parent, txt, display) |
| 225 | } |
| 226 | case isEmoji: |
| 227 | name := string(body[m[14]:m[15]]) |
| 228 | if uni, ok := lookupEmoji(name); ok { |
| 229 | t.insertText(parent, txt, []byte(uni)) |
| 230 | } else { |
| 231 | t.insertText(parent, txt, display) |
| 232 | } |
| 233 | } |
| 234 | cursor = matchEnd |
| 235 | } |
| 236 | // Trailing text after the last match. |
| 237 | if cursor < len(body) { |
| 238 | t.insertText(parent, txt, body[cursor:]) |
| 239 | } |
| 240 | parent.RemoveChild(parent, txt) |
| 241 | } |
| 242 | |
| 243 | // insertText appends a string node before the original text node |
| 244 | // (which is removed at the end of replaceText). |
| 245 | func (t *transformer) insertText(parent, before ast.Node, b []byte) { |
| 246 | if len(b) == 0 { |
| 247 | return |
| 248 | } |
| 249 | s := ast.NewString(append([]byte(nil), b...)) |
| 250 | parent.InsertBefore(parent, before, s) |
| 251 | } |
| 252 | |
| 253 | // appendIssueLink resolves an issue/PR ref and inserts a Link node. |
| 254 | // `display` is the visible text the user typed (e.g. "#42" or |
| 255 | // "alice/proj#5"). Returns false when the resolver declines (in |
| 256 | // which case the caller renders the display text as plain text — |
| 257 | // no link, no existence leak). |
| 258 | func (t *transformer) appendIssueLink(parent, before ast.Node, owner, repo, numStr string, display []byte) bool { |
| 259 | if t.opts.Resolvers.Issue == nil { |
| 260 | return false |
| 261 | } |
| 262 | num, err := strconv.ParseInt(numStr, 10, 64) |
| 263 | if err != nil { |
| 264 | return false |
| 265 | } |
| 266 | href, ok := t.opts.Resolvers.Issue(t.opts.Ctx, owner, repo, num, t.opts.ViewerUserID) |
| 267 | if !ok { |
| 268 | return false |
| 269 | } |
| 270 | link := ast.NewLink() |
| 271 | link.Destination = []byte(href) |
| 272 | link.AppendChild(link, ast.NewString(append([]byte(nil), display...))) |
| 273 | parent.InsertBefore(parent, before, link) |
| 274 | |
| 275 | if t.opts.Refs != nil { |
| 276 | *t.opts.Refs = append(*t.opts.Refs, Ref{ |
| 277 | Kind: "issue", |
| 278 | Owner: owner, |
| 279 | Repo: repo, |
| 280 | Number: num, |
| 281 | Href: href, |
| 282 | }) |
| 283 | } |
| 284 | return true |
| 285 | } |
| 286 | |
| 287 | // appendMentionLink resolves a @username and inserts a Link node. |
| 288 | func (t *transformer) appendMentionLink(parent, before ast.Node, username string, display []byte) bool { |
| 289 | if t.opts.Resolvers.User == nil { |
| 290 | return false |
| 291 | } |
| 292 | href, ok := t.opts.Resolvers.User(t.opts.Ctx, username) |
| 293 | if !ok { |
| 294 | return false |
| 295 | } |
| 296 | link := ast.NewLink() |
| 297 | link.Destination = []byte(href) |
| 298 | link.AppendChild(link, ast.NewString(append([]byte(nil), display...))) |
| 299 | parent.InsertBefore(parent, before, link) |
| 300 | if t.opts.Mentions != nil { |
| 301 | *t.opts.Mentions = append(*t.opts.Mentions, Mention{ |
| 302 | Username: username, |
| 303 | Href: href, |
| 304 | }) |
| 305 | } |
| 306 | return true |
| 307 | } |
| 308 | |
| 309 | // appendCommitLink resolves a commit SHA prefix in the current repo. |
| 310 | func (t *transformer) appendCommitLink(parent, before ast.Node, shaPrefix string, display []byte) bool { |
| 311 | if t.opts.Resolvers.Commit == nil || t.opts.RepoOwner == "" || t.opts.RepoName == "" { |
| 312 | return false |
| 313 | } |
| 314 | href, full, ok := t.opts.Resolvers.Commit(t.opts.Ctx, t.opts.RepoOwner, t.opts.RepoName, shaPrefix) |
| 315 | if !ok { |
| 316 | return false |
| 317 | } |
| 318 | link := ast.NewLink() |
| 319 | link.Destination = []byte(href) |
| 320 | // Display the SHA as <code>; preserve the user's typed length. |
| 321 | codeText := append([]byte(nil), display...) |
| 322 | codeText = bytes.TrimSpace(codeText) |
| 323 | link.AppendChild(link, ast.NewCodeSpan()) |
| 324 | cs := link.LastChild().(*ast.CodeSpan) |
| 325 | cs.AppendChild(cs, ast.NewString(codeText)) |
| 326 | parent.InsertBefore(parent, before, link) |
| 327 | |
| 328 | if t.opts.Refs != nil { |
| 329 | *t.opts.Refs = append(*t.opts.Refs, Ref{ |
| 330 | Kind: "commit", |
| 331 | FullSHA: full, |
| 332 | Href: href, |
| 333 | }) |
| 334 | } |
| 335 | return true |
| 336 | } |
| 337 | |
| 338 | // trimLeadingNonWord drops the leading boundary char(s) — used when |
| 339 | // a same-repo or mention token is rendered as plain text fallback. |
| 340 | func trimLeadingNonWord(b []byte) []byte { |
| 341 | for len(b) > 0 && !isWordByte(b[0]) { |
| 342 | b = b[1:] |
| 343 | } |
| 344 | return b |
| 345 | } |
| 346 | |
| 347 | func isWordByte(c byte) bool { |
| 348 | return c == '_' || (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') |
| 349 | } |
| 350 | |
| 351 | // silence unused-import warnings in a stripped build. |
| 352 | var ( |
| 353 | _ = strings.Builder{} |
| 354 | _ = trimLeadingNonWord |
| 355 | ) |
| 356 |