tenseleyflow/shithub / 30f2c74

Browse files

highlight: replace Render with per-line RenderLines (no chroma table)

Authored by espadonne
SHA
30f2c745eaa8ef6d7b84dfea0d0ca0520924ede2
Parents
205190f
Tree
d844e46

1 changed file

StatusFile+-
M internal/repos/highlight/chroma.go 106 57
internal/repos/highlight/chroma.gomodified
@@ -1,14 +1,17 @@
11
 // SPDX-License-Identifier: AGPL-3.0-or-later
22
 
33
 // Package highlight wraps Chroma so the rest of the project doesn't
4
-// import it directly. The returned HTML is Chroma's standard "html"
5
-// formatter output with line numbers; the caller embeds it in the
6
-// blob template inside a code-styled wrapper.
4
+// import it directly. RenderLines returns one HTML fragment per
5
+// source line — the caller composes the row + gutter table itself
6
+// (this is the GitHub-classic / Forgejo / Gitea pattern; chroma's
7
+// own table mode is bypassed for layout-control reasons documented
8
+// in RenderLines).
79
 package highlight
810
 
911
 import (
1012
 	"bytes"
1113
 	stdhtml "html"
14
+	"html/template"
1215
 	"path/filepath"
1316
 	"strings"
1417
 
@@ -18,20 +21,25 @@ import (
1821
 	"github.com/alecthomas/chroma/v2/styles"
1922
 )
2023
 
21
-// Render returns syntax-highlighted HTML for source. filename is used
22
-// to guess the lexer; on miss we fall back to content sniffing, then
23
-// finally to plain text (no highlighting). Line numbers are always on.
24
+// RenderLines tokenizes source via Chroma and returns one HTML
25
+// fragment per line, with no surrounding `<pre>`/`<code>`/table. The
26
+// caller composes the gutter + line table itself (S33 blob refactor).
2427
 //
25
-// The output is a `<pre class="chroma">…</pre>` block ready to embed
26
-// in the page; line-number cells are linkable via Chroma's `LineLinks`
27
-// option (rendered as `#L42`).
28
-func Render(filename, source string) string {
28
+// Per-line splitting respects multi-line tokens: a docstring or block
29
+// comment that spans 5 lines yields 5 fragments, each with the open
30
+// `<span class="…">` re-emitted at the start and a `</span>` closer
31
+// at the end, so every fragment is independently well-formed and the
32
+// surrounding row table can intersperse other markup safely.
33
+//
34
+// `filename` only drives lexer selection; the returned fragments
35
+// don't reference it.
36
+func RenderLines(filename, source string) []template.HTML {
2937
 	lexer := lexers.Match(filename)
3038
 	if lexer == nil {
3139
 		lexer = lexers.Analyse(source)
3240
 	}
3341
 	if lexer == nil {
34
-		return plainPre(source)
42
+		return plainLines(source)
3543
 	}
3644
 	lexer = chroma.Coalesce(lexer)
3745
 	style := styles.Get("github")
@@ -39,20 +47,18 @@ func Render(filename, source string) string {
3947
 		style = styles.Fallback
4048
 	}
4149
 	formatter := chromahtml.New(
42
-		chromahtml.WithLineNumbers(true),
43
-		chromahtml.WithLinkableLineNumbers(true, "L"),
44
-		chromahtml.LineNumbersInTable(true),
4550
 		chromahtml.WithClasses(true),
51
+		chromahtml.PreventSurroundingPre(true),
4652
 	)
4753
 	iter, err := lexer.Tokenise(nil, source)
4854
 	if err != nil {
49
-		return plainPre(source)
55
+		return plainLines(source)
5056
 	}
5157
 	var buf bytes.Buffer
5258
 	if err := formatter.Format(&buf, style, iter); err != nil {
53
-		return plainPre(source)
59
+		return plainLines(source)
5460
 	}
55
-	return buf.String()
61
+	return splitChromaLines(buf.String())
5662
 }
5763
 
5864
 // CSS returns the `<style>`-wrappable CSS for the highlight theme so
@@ -84,7 +90,6 @@ func writeStyleCSS(name string) string {
8490
 	}
8591
 	formatter := chromahtml.New(
8692
 		chromahtml.WithClasses(true),
87
-		chromahtml.LineNumbersInTable(true),
8893
 	)
8994
 	var buf bytes.Buffer
9095
 	_ = formatter.WriteCSS(&buf, style)
@@ -153,47 +158,91 @@ func splitTopLevelRules(css string) []string {
153158
 	return rules
154159
 }
155160
 
156
-// plainPre escapes source and wraps it in a <pre> for the no-lexer
157
-// fallback. We still provide line numbers via a <table> so the blob
158
-// template renders consistently.
159
-func plainPre(source string) string {
160
-	lines := strings.Split(source, "\n")
161
-	var lineNums, code bytes.Buffer
162
-	for i := range lines {
163
-		lineNums.WriteString("<a href=\"#L")
164
-		lineNums.WriteString(itoa(i + 1))
165
-		lineNums.WriteString("\">")
166
-		lineNums.WriteString(itoa(i + 1))
167
-		lineNums.WriteString("</a>\n")
168
-	}
169
-	for i, l := range lines {
170
-		code.WriteString("<span id=\"L")
171
-		code.WriteString(itoa(i + 1))
172
-		code.WriteString("\">")
173
-		code.WriteString(stdhtml.EscapeString(l))
174
-		code.WriteString("</span>\n")
175
-	}
176
-	return `<div class="chroma"><table><tr><td class="lntable"><pre class="chroma"><code>` +
177
-		lineNums.String() +
178
-		`</code></pre></td><td><pre class="chroma"><code>` +
179
-		code.String() +
180
-		`</code></pre></td></tr></table></div>`
161
+// plainLines is the no-lexer fallback: HTML-escape each line and
162
+// hand it back. No syntax highlighting; the row table handles the
163
+// gutter + line layout the same way it does for a chroma'd file.
164
+func plainLines(source string) []template.HTML {
165
+	if source == "" {
166
+		// A truly empty file still gets one row so the panel chrome
167
+		// renders consistently. The line is the empty string.
168
+		return []template.HTML{template.HTML("")}
169
+	}
170
+	raw := strings.Split(source, "\n")
171
+	out := make([]template.HTML, len(raw))
172
+	for i, l := range raw {
173
+		out[i] = template.HTML(stdhtml.EscapeString(l)) //nolint:gosec // EscapeString output is safe HTML
174
+	}
175
+	return out
181176
 }
182177
 
183
-// itoa is a tiny int-to-string used inside plainPre to avoid pulling
184
-// fmt for the hot path.
185
-func itoa(n int) string {
186
-	if n == 0 {
187
-		return "0"
188
-	}
189
-	var buf [20]byte
190
-	i := len(buf)
191
-	for n > 0 {
192
-		i--
193
-		buf[i] = byte('0' + n%10)
194
-		n /= 10
195
-	}
196
-	return string(buf[i:])
178
+// splitChromaLines walks chroma's classes-mode HTML and returns one
179
+// fragment per source line. The wrinkle: chroma may wrap a multi-line
180
+// token (docstring, block comment, raw string literal) in a single
181
+// `<span class="…">…</span>` that crosses line boundaries. A naive
182
+// strings.Split on '\n' would leave half-open spans in some lines and
183
+// orphan `</span>` in others, breaking the row table.
184
+//
185
+// The walker tracks the open-span stack: at every '\n' it closes any
186
+// currently-open spans, emits the line, then reopens the same spans
187
+// at the start of the next line. The result: each line's HTML is
188
+// independently well-formed, and a multi-line token still carries
189
+// the same CSS class on every line it touches.
190
+func splitChromaLines(html string) []template.HTML {
191
+	var (
192
+		lines    []template.HTML
193
+		openTags []string // verbatim "<span …>" strings, used to reopen
194
+		cur      strings.Builder
195
+	)
196
+	closeAll := func() {
197
+		for range openTags {
198
+			cur.WriteString("</span>")
199
+		}
200
+	}
201
+	reopenAll := func() {
202
+		for _, t := range openTags {
203
+			cur.WriteString(t)
204
+		}
205
+	}
206
+
207
+	i := 0
208
+	for i < len(html) {
209
+		switch {
210
+		case strings.HasPrefix(html[i:], "<span"):
211
+			end := strings.IndexByte(html[i:], '>')
212
+			if end < 0 {
213
+				// Malformed; bail to a single-line emit so the caller
214
+				// at least gets unbroken markup.
215
+				cur.WriteString(html[i:])
216
+				i = len(html)
217
+				continue
218
+			}
219
+			tag := html[i : i+end+1]
220
+			cur.WriteString(tag)
221
+			openTags = append(openTags, tag)
222
+			i += end + 1
223
+		case strings.HasPrefix(html[i:], "</span>"):
224
+			cur.WriteString("</span>")
225
+			if len(openTags) > 0 {
226
+				openTags = openTags[:len(openTags)-1]
227
+			}
228
+			i += len("</span>")
229
+		case html[i] == '\n':
230
+			closeAll()
231
+			lines = append(lines, template.HTML(cur.String())) //nolint:gosec // assembled from chroma + escaped tokens
232
+			cur.Reset()
233
+			reopenAll()
234
+			i++
235
+		default:
236
+			cur.WriteByte(html[i])
237
+			i++
238
+		}
239
+	}
240
+	// Trailing line (no terminating \n).
241
+	closeAll()
242
+	if cur.Len() > 0 || len(lines) == 0 {
243
+		lines = append(lines, template.HTML(cur.String())) //nolint:gosec // see above
244
+	}
245
+	return lines
197246
 }
198247
 
199248
 // LanguageGuess returns the human-readable language name (or "Text"