| 1 | // SPDX-License-Identifier: AGPL-3.0-or-later |
| 2 | |
| 3 | // Package extensions hosts the AST transformer that adds shithub- |
| 4 | // specific inline patterns (`@user`, `#N`, `owner/repo#N`, commit |
| 5 | // SHAs, emoji shortcodes) to Goldmark's parsed text without ever |
| 6 | // touching the contents of code blocks or inline code. |
| 7 | // |
| 8 | // Approach: a single `parser.ASTTransformer` walks the document |
| 9 | // after parsing, visiting only `*ast.Text` nodes whose ancestors |
| 10 | // are NOT code/codespan/autolink/link nodes. Each visited text node |
| 11 | // is run through one combined regex; matches are replaced with |
| 12 | // `*ast.Link` (mention/ref/commit) or `*ast.String` (emoji) nodes, |
| 13 | // with the surrounding text preserved as `*ast.String` segments. |
| 14 | // |
| 15 | // Why an ASTTransformer instead of inline parsers: inline parsers |
| 16 | // run during the main parse pass and need a `Trigger()` byte set |
| 17 | // plus careful interaction with Goldmark's existing inline |
| 18 | // disambiguation. The transformer approach is simpler, well-trodden |
| 19 | // in other Go markdown stacks, and produces equivalent output for |
| 20 | // every input we care about. |
| 21 | package extensions |
| 22 | |
| 23 | import ( |
| 24 | "bytes" |
| 25 | "context" |
| 26 | "regexp" |
| 27 | "strconv" |
| 28 | |
| 29 | "github.com/yuin/goldmark" |
| 30 | "github.com/yuin/goldmark/ast" |
| 31 | "github.com/yuin/goldmark/parser" |
| 32 | "github.com/yuin/goldmark/text" |
| 33 | "github.com/yuin/goldmark/util" |
| 34 | ) |
| 35 | |
| 36 | // Resolvers wires the transformer against the runtime. The fields |
| 37 | // are independent so the parent package can decide which flavors |
| 38 | // to enable. nil-resolver means "render this kind as plain text" |
| 39 | // (no link, no error). |
| 40 | // |
| 41 | // All resolvers MUST be visibility-aware. The transformer does not |
| 42 | // re-check visibility — it trusts the resolver's `ok` to gate |
| 43 | // existence. |
| 44 | type Resolvers struct { |
| 45 | User func(ctx context.Context, username string) (href string, ok bool) |
| 46 | // Issue covers both same-repo (#N when ownerHint == "") and |
| 47 | // cross-repo (owner/repo#N). |
| 48 | Issue func(ctx context.Context, ownerHint, repoHint string, number int64, viewerUserID int64) (href string, ok bool) |
| 49 | // Commit is invoked only when RepoOwner+RepoName are both set |
| 50 | // (a same-repo render) and the matched token is a 7-40 char |
| 51 | // lowercase hex string at a word boundary. |
| 52 | Commit func(ctx context.Context, repoOwner, repoName, shaPrefix string) (href, fullSHA string, ok bool) |
| 53 | // Team resolves an `@org/team` mention to the team page link. |
| 54 | // Visibility-aware: a secret team the viewer can't see should |
| 55 | // return `ok=false` so the renderer falls back to plain text |
| 56 | // (no existence leak). |
| 57 | Team func(ctx context.Context, orgSlug, teamSlug string, viewerUserID int64) (href string, ok bool) |
| 58 | } |
| 59 | |
| 60 | // Options is the per-render config consumed by the transformer. |
| 61 | type Options struct { |
| 62 | Ctx context.Context |
| 63 | RepoOwner string |
| 64 | RepoName string |
| 65 | ViewerUserID int64 |
| 66 | Resolvers Resolvers |
| 67 | // Refs and Mentions accumulate resolved references for the caller. |
| 68 | // Pointers so the transformer can append. |
| 69 | Refs *[]Ref |
| 70 | Mentions *[]Mention |
| 71 | } |
| 72 | |
| 73 | // Ref / Mention mirror the parent-package types; we redeclare to |
| 74 | // avoid an import cycle. |
| 75 | type Ref struct { |
| 76 | Kind string |
| 77 | Owner string |
| 78 | Repo string |
| 79 | Number int64 |
| 80 | FullSHA string |
| 81 | Href string |
| 82 | } |
| 83 | |
| 84 | type Mention struct { |
| 85 | Username string |
| 86 | Href string |
| 87 | } |
| 88 | |
| 89 | // reCombined matches every pattern in one pass. Order in the |
| 90 | // alternation matters because the `@org/team` branch is more |
| 91 | // specific than `@user` and must come first — otherwise `@org` is |
| 92 | // captured by the user branch and the trailing `/team` is left |
| 93 | // behind as unstructured text. |
| 94 | // |
| 95 | // Capture-index map (each MatchAllSubmatchIndex hit is a flat slice; |
| 96 | // indices below are the START of the named group): |
| 97 | // |
| 98 | // #2 / #4 / #6 cross-repo: owner / repo / number |
| 99 | // #8 same-repo: number |
| 100 | // #10 / #12 team mention: org / team (S31) |
| 101 | // #14 user mention: username |
| 102 | // #16 commit prefix |
| 103 | // #18 emoji name |
| 104 | var reCombined = regexp.MustCompile(`` + |
| 105 | // cross-repo: alice/proj#3 — left boundary required so we don't |
| 106 | // chew into a preceding word. |
| 107 | `(?:^|[^\w/])([A-Za-z0-9][A-Za-z0-9._-]*)/([A-Za-z0-9][A-Za-z0-9._-]*)#([0-9]{1,9})\b` + |
| 108 | // or same-repo: #3 |
| 109 | `|(?:^|[^\w/])#([0-9]{1,9})\b` + |
| 110 | // or team mention: @org/team — comes BEFORE @user so the |
| 111 | // trailing `/team` doesn't get split off as text. Slug shape |
| 112 | // matches users.username + teams.slug. |
| 113 | `|(?:^|[^\w])@([a-z0-9](?:[a-z0-9-]{0,37}[a-z0-9])?)/([a-z0-9](?:[a-z0-9._-]{0,48}[a-z0-9])?)\b` + |
| 114 | // or user mention: @alice |
| 115 | `|(?:^|[^\w])@([A-Za-z0-9][A-Za-z0-9_-]{0,38})\b` + |
| 116 | // or commit SHA: 7–40 lowercase hex |
| 117 | `|(?:^|[^\w/])([0-9a-f]{7,40})\b` + |
| 118 | // or emoji shortcode: :smile: |
| 119 | `|:([a-z0-9_+\-]+):`, |
| 120 | ) |
| 121 | |
| 122 | // Extension is a goldmark.Extender that registers the AST transformer. |
| 123 | type Extension struct{ Opts *Options } |
| 124 | |
| 125 | // New constructs the extender with the given options. |
| 126 | func New(opts *Options) goldmark.Extender { return &Extension{Opts: opts} } |
| 127 | |
| 128 | // Extend implements goldmark.Extender. |
| 129 | func (e *Extension) Extend(m goldmark.Markdown) { |
| 130 | m.Parser().AddOptions(parser.WithASTTransformers( |
| 131 | util.Prioritized(&transformer{opts: e.Opts}, 999), |
| 132 | )) |
| 133 | } |
| 134 | |
| 135 | type transformer struct{ opts *Options } |
| 136 | |
| 137 | // Transform walks the document and replaces matched text segments. |
| 138 | func (t *transformer) Transform(doc *ast.Document, reader text.Reader, _ parser.Context) { |
| 139 | if t.opts == nil { |
| 140 | return |
| 141 | } |
| 142 | source := reader.Source() |
| 143 | _ = ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) { |
| 144 | if !entering { |
| 145 | return ast.WalkContinue, nil |
| 146 | } |
| 147 | // Skip subtrees that should never be linkified. |
| 148 | switch n.(type) { |
| 149 | case *ast.CodeSpan, *ast.AutoLink, *ast.Link, *ast.Image, |
| 150 | *ast.FencedCodeBlock, *ast.CodeBlock, *ast.RawHTML, *ast.HTMLBlock: |
| 151 | return ast.WalkSkipChildren, nil |
| 152 | } |
| 153 | txt, ok := n.(*ast.Text) |
| 154 | if !ok { |
| 155 | return ast.WalkContinue, nil |
| 156 | } |
| 157 | t.replaceText(txt, source) |
| 158 | return ast.WalkContinue, nil |
| 159 | }) |
| 160 | } |
| 161 | |
| 162 | // replaceText finds matches in the segment of `txt` and inserts |
| 163 | // new sibling nodes (string runs + links) before the original text; |
| 164 | // the original text is removed once everything's stitched in. |
| 165 | func (t *transformer) replaceText(txt *ast.Text, source []byte) { |
| 166 | body := txt.Segment.Value(source) |
| 167 | matches := reCombined.FindAllSubmatchIndex(body, -1) |
| 168 | if len(matches) == 0 { |
| 169 | return |
| 170 | } |
| 171 | parent := txt.Parent() |
| 172 | if parent == nil { |
| 173 | return |
| 174 | } |
| 175 | |
| 176 | cursor := 0 |
| 177 | for _, m := range matches { |
| 178 | matchStart, matchEnd := m[0], m[1] |
| 179 | |
| 180 | // Determine which alternation captured + where the visible |
| 181 | // content starts (excluding the regex-consumed boundary |
| 182 | // char, if any). |
| 183 | var ( |
| 184 | isCrossRepo = m[2] >= 0 |
| 185 | isSameRepo = m[8] >= 0 |
| 186 | isTeamMen = m[10] >= 0 |
| 187 | isMention = m[14] >= 0 |
| 188 | isCommit = m[16] >= 0 |
| 189 | isEmoji = m[18] >= 0 |
| 190 | ) |
| 191 | var contentStart int |
| 192 | switch { |
| 193 | case isCrossRepo: |
| 194 | // m[2] is the owner-start position (after the regex's |
| 195 | // leading non-word boundary char). Same shape as same-repo |
| 196 | // — emit the boundary char as plain text, then the link. |
| 197 | contentStart = m[2] |
| 198 | case isSameRepo: |
| 199 | contentStart = m[8] - 1 // include `#` |
| 200 | case isTeamMen: |
| 201 | contentStart = m[10] - 1 // include `@` |
| 202 | case isMention: |
| 203 | contentStart = m[14] - 1 // include `@` |
| 204 | case isCommit: |
| 205 | contentStart = m[16] |
| 206 | case isEmoji: |
| 207 | contentStart = m[18] - 1 // include leading `:` |
| 208 | } |
| 209 | |
| 210 | // Emit (a) any text between the previous cursor and the |
| 211 | // match start, then (b) the consumed-but-not-content |
| 212 | // boundary char (when contentStart > matchStart). Both into |
| 213 | // the parent before the original text node. |
| 214 | if matchStart > cursor { |
| 215 | t.insertText(parent, txt, body[cursor:matchStart]) |
| 216 | } |
| 217 | if contentStart > matchStart { |
| 218 | t.insertText(parent, txt, body[matchStart:contentStart]) |
| 219 | } |
| 220 | |
| 221 | // Now emit the resolved (or fallback-plain) match content. |
| 222 | display := body[contentStart:matchEnd] |
| 223 | switch { |
| 224 | case isCrossRepo: |
| 225 | owner := string(body[m[2]:m[3]]) |
| 226 | repo := string(body[m[4]:m[5]]) |
| 227 | numStr := string(body[m[6]:m[7]]) |
| 228 | if !t.appendIssueLink(parent, txt, owner, repo, numStr, display) { |
| 229 | t.insertText(parent, txt, display) |
| 230 | } |
| 231 | case isSameRepo: |
| 232 | numStr := string(body[m[8]:m[9]]) |
| 233 | if !t.appendIssueLink(parent, txt, "", "", numStr, display) { |
| 234 | t.insertText(parent, txt, display) |
| 235 | } |
| 236 | case isTeamMen: |
| 237 | orgSlug := string(body[m[10]:m[11]]) |
| 238 | teamSlug := string(body[m[12]:m[13]]) |
| 239 | if !t.appendTeamMentionLink(parent, txt, orgSlug, teamSlug, display) { |
| 240 | t.insertText(parent, txt, display) |
| 241 | } |
| 242 | case isMention: |
| 243 | name := string(body[m[14]:m[15]]) |
| 244 | if !t.appendMentionLink(parent, txt, name, display) { |
| 245 | t.insertText(parent, txt, display) |
| 246 | } |
| 247 | case isCommit: |
| 248 | sha := string(body[m[16]:m[17]]) |
| 249 | if !t.appendCommitLink(parent, txt, sha, display) { |
| 250 | t.insertText(parent, txt, display) |
| 251 | } |
| 252 | case isEmoji: |
| 253 | name := string(body[m[18]:m[19]]) |
| 254 | if uni, ok := lookupEmoji(name); ok { |
| 255 | t.insertText(parent, txt, []byte(uni)) |
| 256 | } else { |
| 257 | t.insertText(parent, txt, display) |
| 258 | } |
| 259 | } |
| 260 | cursor = matchEnd |
| 261 | } |
| 262 | // Trailing text after the last match. |
| 263 | if cursor < len(body) { |
| 264 | t.insertText(parent, txt, body[cursor:]) |
| 265 | } |
| 266 | parent.RemoveChild(parent, txt) |
| 267 | } |
| 268 | |
| 269 | // insertText appends a string node before the original text node |
| 270 | // (which is removed at the end of replaceText). |
| 271 | func (t *transformer) insertText(parent, before ast.Node, b []byte) { |
| 272 | if len(b) == 0 { |
| 273 | return |
| 274 | } |
| 275 | s := ast.NewString(append([]byte(nil), b...)) |
| 276 | parent.InsertBefore(parent, before, s) |
| 277 | } |
| 278 | |
| 279 | // appendIssueLink resolves an issue/PR ref and inserts a Link node. |
| 280 | // `display` is the visible text the user typed (e.g. "#42" or |
| 281 | // "alice/proj#5"). Returns false when the resolver declines (in |
| 282 | // which case the caller renders the display text as plain text — |
| 283 | // no link, no existence leak). |
| 284 | func (t *transformer) appendIssueLink(parent, before ast.Node, owner, repo, numStr string, display []byte) bool { |
| 285 | if t.opts.Resolvers.Issue == nil { |
| 286 | return false |
| 287 | } |
| 288 | num, err := strconv.ParseInt(numStr, 10, 64) |
| 289 | if err != nil { |
| 290 | return false |
| 291 | } |
| 292 | href, ok := t.opts.Resolvers.Issue(t.opts.Ctx, owner, repo, num, t.opts.ViewerUserID) |
| 293 | if !ok { |
| 294 | return false |
| 295 | } |
| 296 | link := ast.NewLink() |
| 297 | link.Destination = []byte(href) |
| 298 | link.AppendChild(link, ast.NewString(append([]byte(nil), display...))) |
| 299 | parent.InsertBefore(parent, before, link) |
| 300 | |
| 301 | if t.opts.Refs != nil { |
| 302 | *t.opts.Refs = append(*t.opts.Refs, Ref{ |
| 303 | Kind: "issue", |
| 304 | Owner: owner, |
| 305 | Repo: repo, |
| 306 | Number: num, |
| 307 | Href: href, |
| 308 | }) |
| 309 | } |
| 310 | return true |
| 311 | } |
| 312 | |
| 313 | // appendTeamMentionLink resolves an @org/team and inserts a Link |
| 314 | // node. Returns false on any failure (unknown org, secret team |
| 315 | // invisible to viewer, no resolver wired) — the caller renders the |
| 316 | // matched text as-is. |
| 317 | func (t *transformer) appendTeamMentionLink(parent, before ast.Node, orgSlug, teamSlug string, display []byte) bool { |
| 318 | if t.opts.Resolvers.Team == nil { |
| 319 | return false |
| 320 | } |
| 321 | href, ok := t.opts.Resolvers.Team(t.opts.Ctx, orgSlug, teamSlug, t.opts.ViewerUserID) |
| 322 | if !ok { |
| 323 | return false |
| 324 | } |
| 325 | link := ast.NewLink() |
| 326 | link.Destination = []byte(href) |
| 327 | link.AppendChild(link, ast.NewString(append([]byte(nil), display...))) |
| 328 | parent.InsertBefore(parent, before, link) |
| 329 | return true |
| 330 | } |
| 331 | |
| 332 | // appendMentionLink resolves a @username and inserts a Link node. |
| 333 | func (t *transformer) appendMentionLink(parent, before ast.Node, username string, display []byte) bool { |
| 334 | if t.opts.Resolvers.User == nil { |
| 335 | return false |
| 336 | } |
| 337 | href, ok := t.opts.Resolvers.User(t.opts.Ctx, username) |
| 338 | if !ok { |
| 339 | return false |
| 340 | } |
| 341 | link := ast.NewLink() |
| 342 | link.Destination = []byte(href) |
| 343 | link.AppendChild(link, ast.NewString(append([]byte(nil), display...))) |
| 344 | parent.InsertBefore(parent, before, link) |
| 345 | if t.opts.Mentions != nil { |
| 346 | *t.opts.Mentions = append(*t.opts.Mentions, Mention{ |
| 347 | Username: username, |
| 348 | Href: href, |
| 349 | }) |
| 350 | } |
| 351 | return true |
| 352 | } |
| 353 | |
| 354 | // appendCommitLink resolves a commit SHA prefix in the current repo. |
| 355 | func (t *transformer) appendCommitLink(parent, before ast.Node, shaPrefix string, display []byte) bool { |
| 356 | if t.opts.Resolvers.Commit == nil || t.opts.RepoOwner == "" || t.opts.RepoName == "" { |
| 357 | return false |
| 358 | } |
| 359 | href, full, ok := t.opts.Resolvers.Commit(t.opts.Ctx, t.opts.RepoOwner, t.opts.RepoName, shaPrefix) |
| 360 | if !ok { |
| 361 | return false |
| 362 | } |
| 363 | link := ast.NewLink() |
| 364 | link.Destination = []byte(href) |
| 365 | // Display the SHA as <code>; preserve the user's typed length. |
| 366 | codeText := append([]byte(nil), display...) |
| 367 | codeText = bytes.TrimSpace(codeText) |
| 368 | link.AppendChild(link, ast.NewCodeSpan()) |
| 369 | cs := link.LastChild().(*ast.CodeSpan) |
| 370 | cs.AppendChild(cs, ast.NewString(codeText)) |
| 371 | parent.InsertBefore(parent, before, link) |
| 372 | |
| 373 | if t.opts.Refs != nil { |
| 374 | *t.opts.Refs = append(*t.opts.Refs, Ref{ |
| 375 | Kind: "commit", |
| 376 | FullSHA: full, |
| 377 | Href: href, |
| 378 | }) |
| 379 | } |
| 380 | return true |
| 381 | } |
| 382 | |
| 383 |