@@ -1,14 +1,17 @@ |
| 1 | 1 | // SPDX-License-Identifier: AGPL-3.0-or-later |
| 2 | 2 | |
| 3 | 3 | // Package highlight wraps Chroma so the rest of the project doesn't |
| 4 | | -// import it directly. The returned HTML is Chroma's standard "html" |
| 5 | | -// formatter output with line numbers; the caller embeds it in the |
| 6 | | -// blob template inside a code-styled wrapper. |
| 4 | +// import it directly. RenderLines returns one HTML fragment per |
| 5 | +// source line — the caller composes the row + gutter table itself |
| 6 | +// (this is the GitHub-classic / Forgejo / Gitea pattern; chroma's |
| 7 | +// own table mode is bypassed for layout-control reasons documented |
| 8 | +// in RenderLines). |
| 7 | 9 | package highlight |
| 8 | 10 | |
| 9 | 11 | import ( |
| 10 | 12 | "bytes" |
| 11 | 13 | stdhtml "html" |
| 14 | + "html/template" |
| 12 | 15 | "path/filepath" |
| 13 | 16 | "strings" |
| 14 | 17 | |
@@ -18,20 +21,25 @@ import ( |
| 18 | 21 | "github.com/alecthomas/chroma/v2/styles" |
| 19 | 22 | ) |
| 20 | 23 | |
| 21 | | -// Render returns syntax-highlighted HTML for source. filename is used |
| 22 | | -// to guess the lexer; on miss we fall back to content sniffing, then |
| 23 | | -// finally to plain text (no highlighting). Line numbers are always on. |
| 24 | +// RenderLines tokenizes source via Chroma and returns one HTML |
| 25 | +// fragment per line, with no surrounding `<pre>`/`<code>`/table. The |
| 26 | +// caller composes the gutter + line table itself (S33 blob refactor). |
| 24 | 27 | // |
| 25 | | -// The output is a `<pre class="chroma">…</pre>` block ready to embed |
| 26 | | -// in the page; line-number cells are linkable via Chroma's `LineLinks` |
| 27 | | -// option (rendered as `#L42`). |
| 28 | | -func Render(filename, source string) string { |
| 28 | +// Per-line splitting respects multi-line tokens: a docstring or block |
| 29 | +// comment that spans 5 lines yields 5 fragments, each with the open |
| 30 | +// `<span class="…">` re-emitted at the start and a `</span>` closer |
| 31 | +// at the end, so every fragment is independently well-formed and the |
| 32 | +// surrounding row table can intersperse other markup safely. |
| 33 | +// |
| 34 | +// `filename` only drives lexer selection; the returned fragments |
| 35 | +// don't reference it. |
| 36 | +func RenderLines(filename, source string) []template.HTML { |
| 29 | 37 | lexer := lexers.Match(filename) |
| 30 | 38 | if lexer == nil { |
| 31 | 39 | lexer = lexers.Analyse(source) |
| 32 | 40 | } |
| 33 | 41 | if lexer == nil { |
| 34 | | - return plainPre(source) |
| 42 | + return plainLines(source) |
| 35 | 43 | } |
| 36 | 44 | lexer = chroma.Coalesce(lexer) |
| 37 | 45 | style := styles.Get("github") |
@@ -39,20 +47,18 @@ func Render(filename, source string) string { |
| 39 | 47 | style = styles.Fallback |
| 40 | 48 | } |
| 41 | 49 | formatter := chromahtml.New( |
| 42 | | - chromahtml.WithLineNumbers(true), |
| 43 | | - chromahtml.WithLinkableLineNumbers(true, "L"), |
| 44 | | - chromahtml.LineNumbersInTable(true), |
| 45 | 50 | chromahtml.WithClasses(true), |
| 51 | + chromahtml.PreventSurroundingPre(true), |
| 46 | 52 | ) |
| 47 | 53 | iter, err := lexer.Tokenise(nil, source) |
| 48 | 54 | if err != nil { |
| 49 | | - return plainPre(source) |
| 55 | + return plainLines(source) |
| 50 | 56 | } |
| 51 | 57 | var buf bytes.Buffer |
| 52 | 58 | if err := formatter.Format(&buf, style, iter); err != nil { |
| 53 | | - return plainPre(source) |
| 59 | + return plainLines(source) |
| 54 | 60 | } |
| 55 | | - return buf.String() |
| 61 | + return splitChromaLines(buf.String()) |
| 56 | 62 | } |
| 57 | 63 | |
| 58 | 64 | // CSS returns the `<style>`-wrappable CSS for the highlight theme so |
@@ -84,7 +90,6 @@ func writeStyleCSS(name string) string { |
| 84 | 90 | } |
| 85 | 91 | formatter := chromahtml.New( |
| 86 | 92 | chromahtml.WithClasses(true), |
| 87 | | - chromahtml.LineNumbersInTable(true), |
| 88 | 93 | ) |
| 89 | 94 | var buf bytes.Buffer |
| 90 | 95 | _ = formatter.WriteCSS(&buf, style) |
@@ -153,47 +158,91 @@ func splitTopLevelRules(css string) []string { |
| 153 | 158 | return rules |
| 154 | 159 | } |
| 155 | 160 | |
| 156 | | -// plainPre escapes source and wraps it in a <pre> for the no-lexer |
| 157 | | -// fallback. We still provide line numbers via a <table> so the blob |
| 158 | | -// template renders consistently. |
| 159 | | -func plainPre(source string) string { |
| 160 | | - lines := strings.Split(source, "\n") |
| 161 | | - var lineNums, code bytes.Buffer |
| 162 | | - for i := range lines { |
| 163 | | - lineNums.WriteString("<a href=\"#L") |
| 164 | | - lineNums.WriteString(itoa(i + 1)) |
| 165 | | - lineNums.WriteString("\">") |
| 166 | | - lineNums.WriteString(itoa(i + 1)) |
| 167 | | - lineNums.WriteString("</a>\n") |
| 168 | | - } |
| 169 | | - for i, l := range lines { |
| 170 | | - code.WriteString("<span id=\"L") |
| 171 | | - code.WriteString(itoa(i + 1)) |
| 172 | | - code.WriteString("\">") |
| 173 | | - code.WriteString(stdhtml.EscapeString(l)) |
| 174 | | - code.WriteString("</span>\n") |
| 175 | | - } |
| 176 | | - return `<div class="chroma"><table><tr><td class="lntable"><pre class="chroma"><code>` + |
| 177 | | - lineNums.String() + |
| 178 | | - `</code></pre></td><td><pre class="chroma"><code>` + |
| 179 | | - code.String() + |
| 180 | | - `</code></pre></td></tr></table></div>` |
| 161 | +// plainLines is the no-lexer fallback: HTML-escape each line and |
| 162 | +// hand it back. No syntax highlighting; the row table handles the |
| 163 | +// gutter + line layout the same way it does for a chroma'd file. |
| 164 | +func plainLines(source string) []template.HTML { |
| 165 | + if source == "" { |
| 166 | + // A truly empty file still gets one row so the panel chrome |
| 167 | + // renders consistently. The line is the empty string. |
| 168 | + return []template.HTML{template.HTML("")} |
| 169 | + } |
| 170 | + raw := strings.Split(source, "\n") |
| 171 | + out := make([]template.HTML, len(raw)) |
| 172 | + for i, l := range raw { |
| 173 | + out[i] = template.HTML(stdhtml.EscapeString(l)) //nolint:gosec // EscapeString output is safe HTML |
| 174 | + } |
| 175 | + return out |
| 181 | 176 | } |
| 182 | 177 | |
| 183 | | -// itoa is a tiny int-to-string used inside plainPre to avoid pulling |
| 184 | | -// fmt for the hot path. |
| 185 | | -func itoa(n int) string { |
| 186 | | - if n == 0 { |
| 187 | | - return "0" |
| 188 | | - } |
| 189 | | - var buf [20]byte |
| 190 | | - i := len(buf) |
| 191 | | - for n > 0 { |
| 192 | | - i-- |
| 193 | | - buf[i] = byte('0' + n%10) |
| 194 | | - n /= 10 |
| 195 | | - } |
| 196 | | - return string(buf[i:]) |
| 178 | +// splitChromaLines walks chroma's classes-mode HTML and returns one |
| 179 | +// fragment per source line. The wrinkle: chroma may wrap a multi-line |
| 180 | +// token (docstring, block comment, raw string literal) in a single |
| 181 | +// `<span class="…">…</span>` that crosses line boundaries. A naive |
| 182 | +// strings.Split on '\n' would leave half-open spans in some lines and |
| 183 | +// orphan `</span>` in others, breaking the row table. |
| 184 | +// |
| 185 | +// The walker tracks the open-span stack: at every '\n' it closes any |
| 186 | +// currently-open spans, emits the line, then reopens the same spans |
| 187 | +// at the start of the next line. The result: each line's HTML is |
| 188 | +// independently well-formed, and a multi-line token still carries |
| 189 | +// the same CSS class on every line it touches. |
| 190 | +func splitChromaLines(html string) []template.HTML { |
| 191 | + var ( |
| 192 | + lines []template.HTML |
| 193 | + openTags []string // verbatim "<span …>" strings, used to reopen |
| 194 | + cur strings.Builder |
| 195 | + ) |
| 196 | + closeAll := func() { |
| 197 | + for range openTags { |
| 198 | + cur.WriteString("</span>") |
| 199 | + } |
| 200 | + } |
| 201 | + reopenAll := func() { |
| 202 | + for _, t := range openTags { |
| 203 | + cur.WriteString(t) |
| 204 | + } |
| 205 | + } |
| 206 | + |
| 207 | + i := 0 |
| 208 | + for i < len(html) { |
| 209 | + switch { |
| 210 | + case strings.HasPrefix(html[i:], "<span"): |
| 211 | + end := strings.IndexByte(html[i:], '>') |
| 212 | + if end < 0 { |
| 213 | + // Malformed; bail to a single-line emit so the caller |
| 214 | + // at least gets unbroken markup. |
| 215 | + cur.WriteString(html[i:]) |
| 216 | + i = len(html) |
| 217 | + continue |
| 218 | + } |
| 219 | + tag := html[i : i+end+1] |
| 220 | + cur.WriteString(tag) |
| 221 | + openTags = append(openTags, tag) |
| 222 | + i += end + 1 |
| 223 | + case strings.HasPrefix(html[i:], "</span>"): |
| 224 | + cur.WriteString("</span>") |
| 225 | + if len(openTags) > 0 { |
| 226 | + openTags = openTags[:len(openTags)-1] |
| 227 | + } |
| 228 | + i += len("</span>") |
| 229 | + case html[i] == '\n': |
| 230 | + closeAll() |
| 231 | + lines = append(lines, template.HTML(cur.String())) //nolint:gosec // assembled from chroma + escaped tokens |
| 232 | + cur.Reset() |
| 233 | + reopenAll() |
| 234 | + i++ |
| 235 | + default: |
| 236 | + cur.WriteByte(html[i]) |
| 237 | + i++ |
| 238 | + } |
| 239 | + } |
| 240 | + // Trailing line (no terminating \n). |
| 241 | + closeAll() |
| 242 | + if cur.Len() > 0 || len(lines) == 0 { |
| 243 | + lines = append(lines, template.HTML(cur.String())) //nolint:gosec // see above |
| 244 | + } |
| 245 | + return lines |
| 197 | 246 | } |
| 198 | 247 | |
| 199 | 248 | // LanguageGuess returns the human-readable language name (or "Text" |