Go · 8162 bytes Raw Blame History
1 // SPDX-License-Identifier: AGPL-3.0-or-later
2
3 // Package highlight wraps Chroma so the rest of the project doesn't
4 // import it directly. RenderLines returns one HTML fragment per
5 // source line — the caller composes the row + gutter table itself
6 // (this is the GitHub-classic / Forgejo / Gitea pattern; chroma's
7 // own table mode is bypassed for layout-control reasons documented
8 // in RenderLines).
9 package highlight
10
11 import (
12 "bytes"
13 stdhtml "html"
14 "html/template"
15 "path/filepath"
16 "strings"
17
18 "github.com/alecthomas/chroma/v2"
19 chromahtml "github.com/alecthomas/chroma/v2/formatters/html"
20 "github.com/alecthomas/chroma/v2/lexers"
21 "github.com/alecthomas/chroma/v2/styles"
22 )
23
24 // RenderLines tokenizes source via Chroma and returns one HTML
25 // fragment per line, with no surrounding `<pre>`/`<code>`/table. The
26 // caller composes the gutter + line table itself (S33 blob refactor).
27 //
28 // Per-line splitting respects multi-line tokens: a docstring or block
29 // comment that spans 5 lines yields 5 fragments, each with the open
30 // `<span class="…">` re-emitted at the start and a `</span>` closer
31 // at the end, so every fragment is independently well-formed and the
32 // surrounding row table can intersperse other markup safely.
33 //
34 // `filename` only drives lexer selection; the returned fragments
35 // don't reference it.
36 func RenderLines(filename, source string) []template.HTML {
37 lexer := lexers.Match(filename)
38 if lexer == nil {
39 lexer = lexers.Analyse(source)
40 }
41 if lexer == nil {
42 return plainLines(source)
43 }
44 lexer = chroma.Coalesce(lexer)
45 style := styles.Get("github")
46 if style == nil {
47 style = styles.Fallback
48 }
49 formatter := chromahtml.New(
50 chromahtml.WithClasses(true),
51 chromahtml.PreventSurroundingPre(true),
52 )
53 iter, err := lexer.Tokenise(nil, source)
54 if err != nil {
55 return plainLines(source)
56 }
57 var buf bytes.Buffer
58 if err := formatter.Format(&buf, style, iter); err != nil {
59 return plainLines(source)
60 }
61 return splitChromaLines(buf.String())
62 }
63
64 // CSS returns the `<style>`-wrappable CSS for the highlight theme so
65 // the operator can serve it once at /static/css/chroma.css. Generated
66 // from BOTH the light (`github`) and dark (`github-dark`) Chroma styles
67 // so blob views render correctly under either theme. Each block is
68 // gated by `[data-theme="…"]` (the layout sets that on <html>) so only
69 // one set of rules is active per view. Without the dark variant the
70 // blob viewer renders code on a light background regardless of the
71 // page's theme — invisible text in dark mode.
72 func CSS() string {
73 light := writeStyleCSS("github")
74 dark := writeStyleCSS("github-dark")
75
76 var buf bytes.Buffer
77 buf.WriteString("/* light (default) — applies when [data-theme] is unset or 'light' */\n")
78 buf.WriteString(prefixChromaSelectors(light, `[data-theme="light"] `, ""))
79 buf.WriteString("\n/* dark */\n")
80 buf.WriteString(prefixChromaSelectors(dark, `[data-theme="dark"] `, ""))
81 return buf.String()
82 }
83
84 // writeStyleCSS emits Chroma's classes-mode CSS for a named style.
85 // Falls back to the Fallback style when the name is unknown.
86 func writeStyleCSS(name string) string {
87 style := styles.Get(name)
88 if style == nil {
89 style = styles.Fallback
90 }
91 formatter := chromahtml.New(
92 chromahtml.WithClasses(true),
93 )
94 var buf bytes.Buffer
95 _ = formatter.WriteCSS(&buf, style)
96 return buf.String()
97 }
98
99 // prefixChromaSelectors prefixes every selector in css with `prefix`
100 // so the rule only applies under the given theme attribute. Chroma's
101 // CSS rules all start with `.chroma` (or its line-number child
102 // classes); we walk top-level rules and prefix each.
103 //
104 // `_` is a placeholder for a future per-theme suffix (e.g. !important
105 // on borders) — currently unused.
106 func prefixChromaSelectors(css, prefix, _ string) string {
107 var out bytes.Buffer
108 for _, raw := range splitTopLevelRules(css) {
109 rule := strings.TrimSpace(raw)
110 if rule == "" {
111 continue
112 }
113 brace := strings.IndexByte(rule, '{')
114 if brace < 0 {
115 out.WriteString(rule)
116 continue
117 }
118 selectors := rule[:brace]
119 body := rule[brace:]
120 // Selector lists like ".chroma .nx, .chroma .nf" — prefix each.
121 parts := strings.Split(selectors, ",")
122 for i, p := range parts {
123 parts[i] = prefix + strings.TrimSpace(p)
124 }
125 out.WriteString(strings.Join(parts, ", "))
126 out.WriteString(" ")
127 out.WriteString(body)
128 out.WriteByte('\n')
129 }
130 return out.String()
131 }
132
133 // splitTopLevelRules splits a CSS blob on `}` boundaries while
134 // preserving the brace as part of the preceding rule. Chroma's output
135 // has no nested rules so naive depth-1 splitting is sufficient.
136 func splitTopLevelRules(css string) []string {
137 var rules []string
138 start := 0
139 depth := 0
140 for i := 0; i < len(css); i++ {
141 switch css[i] {
142 case '{':
143 depth++
144 case '}':
145 depth--
146 if depth == 0 {
147 rules = append(rules, css[start:i+1])
148 start = i + 1
149 }
150 }
151 }
152 if start < len(css) {
153 tail := strings.TrimSpace(css[start:])
154 if tail != "" {
155 rules = append(rules, tail)
156 }
157 }
158 return rules
159 }
160
161 // plainLines is the no-lexer fallback: HTML-escape each line and
162 // hand it back. No syntax highlighting; the row table handles the
163 // gutter + line layout the same way it does for a chroma'd file.
164 func plainLines(source string) []template.HTML {
165 if source == "" {
166 // A truly empty file still gets one row so the panel chrome
167 // renders consistently. The line is the empty string.
168 return []template.HTML{template.HTML("")}
169 }
170 raw := strings.Split(source, "\n")
171 out := make([]template.HTML, len(raw))
172 for i, l := range raw {
173 out[i] = template.HTML(stdhtml.EscapeString(l)) //nolint:gosec // EscapeString output is safe HTML
174 }
175 return out
176 }
177
178 // splitChromaLines walks chroma's classes-mode HTML and returns one
179 // fragment per source line. The wrinkle: chroma may wrap a multi-line
180 // token (docstring, block comment, raw string literal) in a single
181 // `<span class="…">…</span>` that crosses line boundaries. A naive
182 // strings.Split on '\n' would leave half-open spans in some lines and
183 // orphan `</span>` in others, breaking the row table.
184 //
185 // The walker tracks the open-span stack: at every '\n' it closes any
186 // currently-open spans, emits the line, then reopens the same spans
187 // at the start of the next line. The result: each line's HTML is
188 // independently well-formed, and a multi-line token still carries
189 // the same CSS class on every line it touches.
190 func splitChromaLines(html string) []template.HTML {
191 var (
192 lines []template.HTML
193 openTags []string // verbatim "<span …>" strings, used to reopen
194 cur strings.Builder
195 )
196 closeAll := func() {
197 for range openTags {
198 cur.WriteString("</span>")
199 }
200 }
201 reopenAll := func() {
202 for _, t := range openTags {
203 cur.WriteString(t)
204 }
205 }
206
207 i := 0
208 for i < len(html) {
209 switch {
210 case strings.HasPrefix(html[i:], "<span"):
211 end := strings.IndexByte(html[i:], '>')
212 if end < 0 {
213 // Malformed; bail to a single-line emit so the caller
214 // at least gets unbroken markup.
215 cur.WriteString(html[i:])
216 i = len(html)
217 continue
218 }
219 tag := html[i : i+end+1]
220 cur.WriteString(tag)
221 openTags = append(openTags, tag)
222 i += end + 1
223 case strings.HasPrefix(html[i:], "</span>"):
224 cur.WriteString("</span>")
225 if len(openTags) > 0 {
226 openTags = openTags[:len(openTags)-1]
227 }
228 i += len("</span>")
229 case html[i] == '\n':
230 closeAll()
231 lines = append(lines, template.HTML(cur.String())) //nolint:gosec // assembled from chroma + escaped tokens
232 cur.Reset()
233 reopenAll()
234 i++
235 default:
236 cur.WriteByte(html[i])
237 i++
238 }
239 }
240 // Trailing line (no terminating \n).
241 closeAll()
242 if cur.Len() > 0 || len(lines) == 0 {
243 lines = append(lines, template.HTML(cur.String())) //nolint:gosec // see above
244 }
245 return lines
246 }
247
248 // LanguageGuess returns the human-readable language name (or "Text"
249 // fallback) for display in the blob viewer's header.
250 func LanguageGuess(filename string) string {
251 if lexer := lexers.Match(filename); lexer != nil {
252 return lexer.Config().Name
253 }
254 if ext := filepath.Ext(filename); ext != "" {
255 if l := lexers.Get(strings.TrimPrefix(ext, ".")); l != nil {
256 return l.Config().Name
257 }
258 }
259 return "Text"
260 }
261