| 1 | // SPDX-License-Identifier: AGPL-3.0-or-later |
| 2 | |
| 3 | // Package highlight wraps Chroma so the rest of the project doesn't |
| 4 | // import it directly. RenderLines returns one HTML fragment per |
| 5 | // source line — the caller composes the row + gutter table itself |
| 6 | // (this is the GitHub-classic / Forgejo / Gitea pattern; chroma's |
| 7 | // own table mode is bypassed for layout-control reasons documented |
| 8 | // in RenderLines). |
| 9 | package highlight |
| 10 | |
| 11 | import ( |
| 12 | "bytes" |
| 13 | stdhtml "html" |
| 14 | "html/template" |
| 15 | "path/filepath" |
| 16 | "strings" |
| 17 | |
| 18 | "github.com/alecthomas/chroma/v2" |
| 19 | chromahtml "github.com/alecthomas/chroma/v2/formatters/html" |
| 20 | "github.com/alecthomas/chroma/v2/lexers" |
| 21 | "github.com/alecthomas/chroma/v2/styles" |
| 22 | ) |
| 23 | |
| 24 | // RenderLines tokenizes source via Chroma and returns one HTML |
| 25 | // fragment per line, with no surrounding `<pre>`/`<code>`/table. The |
| 26 | // caller composes the gutter + line table itself (S33 blob refactor). |
| 27 | // |
| 28 | // Per-line splitting respects multi-line tokens: a docstring or block |
| 29 | // comment that spans 5 lines yields 5 fragments, each with the open |
| 30 | // `<span class="…">` re-emitted at the start and a `</span>` closer |
| 31 | // at the end, so every fragment is independently well-formed and the |
| 32 | // surrounding row table can intersperse other markup safely. |
| 33 | // |
| 34 | // `filename` only drives lexer selection; the returned fragments |
| 35 | // don't reference it. |
| 36 | func RenderLines(filename, source string) []template.HTML { |
| 37 | lexer := lexers.Match(filename) |
| 38 | if lexer == nil { |
| 39 | lexer = lexers.Analyse(source) |
| 40 | } |
| 41 | if lexer == nil { |
| 42 | return plainLines(source) |
| 43 | } |
| 44 | lexer = chroma.Coalesce(lexer) |
| 45 | style := styles.Get("github") |
| 46 | if style == nil { |
| 47 | style = styles.Fallback |
| 48 | } |
| 49 | formatter := chromahtml.New( |
| 50 | chromahtml.WithClasses(true), |
| 51 | chromahtml.PreventSurroundingPre(true), |
| 52 | ) |
| 53 | iter, err := lexer.Tokenise(nil, source) |
| 54 | if err != nil { |
| 55 | return plainLines(source) |
| 56 | } |
| 57 | var buf bytes.Buffer |
| 58 | if err := formatter.Format(&buf, style, iter); err != nil { |
| 59 | return plainLines(source) |
| 60 | } |
| 61 | return splitChromaLines(buf.String()) |
| 62 | } |
| 63 | |
| 64 | // CSS returns the `<style>`-wrappable CSS for the highlight theme so |
| 65 | // the operator can serve it once at /static/css/chroma.css. Generated |
| 66 | // from BOTH the light (`github`) and dark (`github-dark`) Chroma styles |
| 67 | // so blob views render correctly under either theme. Each block is |
| 68 | // gated by `[data-theme="…"]` (the layout sets that on <html>) so only |
| 69 | // one set of rules is active per view. Without the dark variant the |
| 70 | // blob viewer renders code on a light background regardless of the |
| 71 | // page's theme — invisible text in dark mode. |
| 72 | func CSS() string { |
| 73 | light := writeStyleCSS("github") |
| 74 | dark := writeStyleCSS("github-dark") |
| 75 | |
| 76 | var buf bytes.Buffer |
| 77 | buf.WriteString("/* light (default) — applies when [data-theme] is unset or 'light' */\n") |
| 78 | buf.WriteString(prefixChromaSelectors(light, `[data-theme="light"] `, "")) |
| 79 | buf.WriteString("\n/* dark */\n") |
| 80 | buf.WriteString(prefixChromaSelectors(dark, `[data-theme="dark"] `, "")) |
| 81 | return buf.String() |
| 82 | } |
| 83 | |
| 84 | // writeStyleCSS emits Chroma's classes-mode CSS for a named style. |
| 85 | // Falls back to the Fallback style when the name is unknown. |
| 86 | func writeStyleCSS(name string) string { |
| 87 | style := styles.Get(name) |
| 88 | if style == nil { |
| 89 | style = styles.Fallback |
| 90 | } |
| 91 | formatter := chromahtml.New( |
| 92 | chromahtml.WithClasses(true), |
| 93 | ) |
| 94 | var buf bytes.Buffer |
| 95 | _ = formatter.WriteCSS(&buf, style) |
| 96 | return buf.String() |
| 97 | } |
| 98 | |
| 99 | // prefixChromaSelectors prefixes every selector in css with `prefix` |
| 100 | // so the rule only applies under the given theme attribute. Chroma's |
| 101 | // CSS rules all start with `.chroma` (or its line-number child |
| 102 | // classes); we walk top-level rules and prefix each. |
| 103 | // |
| 104 | // `_` is a placeholder for a future per-theme suffix (e.g. !important |
| 105 | // on borders) — currently unused. |
| 106 | func prefixChromaSelectors(css, prefix, _ string) string { |
| 107 | var out bytes.Buffer |
| 108 | for _, raw := range splitTopLevelRules(css) { |
| 109 | rule := strings.TrimSpace(raw) |
| 110 | if rule == "" { |
| 111 | continue |
| 112 | } |
| 113 | brace := strings.IndexByte(rule, '{') |
| 114 | if brace < 0 { |
| 115 | out.WriteString(rule) |
| 116 | continue |
| 117 | } |
| 118 | selectors := rule[:brace] |
| 119 | body := rule[brace:] |
| 120 | // Selector lists like ".chroma .nx, .chroma .nf" — prefix each. |
| 121 | parts := strings.Split(selectors, ",") |
| 122 | for i, p := range parts { |
| 123 | parts[i] = prefix + strings.TrimSpace(p) |
| 124 | } |
| 125 | out.WriteString(strings.Join(parts, ", ")) |
| 126 | out.WriteString(" ") |
| 127 | out.WriteString(body) |
| 128 | out.WriteByte('\n') |
| 129 | } |
| 130 | return out.String() |
| 131 | } |
| 132 | |
| 133 | // splitTopLevelRules splits a CSS blob on `}` boundaries while |
| 134 | // preserving the brace as part of the preceding rule. Chroma's output |
| 135 | // has no nested rules so naive depth-1 splitting is sufficient. |
| 136 | func splitTopLevelRules(css string) []string { |
| 137 | var rules []string |
| 138 | start := 0 |
| 139 | depth := 0 |
| 140 | for i := 0; i < len(css); i++ { |
| 141 | switch css[i] { |
| 142 | case '{': |
| 143 | depth++ |
| 144 | case '}': |
| 145 | depth-- |
| 146 | if depth == 0 { |
| 147 | rules = append(rules, css[start:i+1]) |
| 148 | start = i + 1 |
| 149 | } |
| 150 | } |
| 151 | } |
| 152 | if start < len(css) { |
| 153 | tail := strings.TrimSpace(css[start:]) |
| 154 | if tail != "" { |
| 155 | rules = append(rules, tail) |
| 156 | } |
| 157 | } |
| 158 | return rules |
| 159 | } |
| 160 | |
| 161 | // plainLines is the no-lexer fallback: HTML-escape each line and |
| 162 | // hand it back. No syntax highlighting; the row table handles the |
| 163 | // gutter + line layout the same way it does for a chroma'd file. |
| 164 | func plainLines(source string) []template.HTML { |
| 165 | if source == "" { |
| 166 | // A truly empty file still gets one row so the panel chrome |
| 167 | // renders consistently. The line is the empty string. |
| 168 | return []template.HTML{template.HTML("")} |
| 169 | } |
| 170 | raw := strings.Split(source, "\n") |
| 171 | out := make([]template.HTML, len(raw)) |
| 172 | for i, l := range raw { |
| 173 | out[i] = template.HTML(stdhtml.EscapeString(l)) //nolint:gosec // EscapeString output is safe HTML |
| 174 | } |
| 175 | return out |
| 176 | } |
| 177 | |
| 178 | // splitChromaLines walks chroma's classes-mode HTML and returns one |
| 179 | // fragment per source line. The wrinkle: chroma may wrap a multi-line |
| 180 | // token (docstring, block comment, raw string literal) in a single |
| 181 | // `<span class="…">…</span>` that crosses line boundaries. A naive |
| 182 | // strings.Split on '\n' would leave half-open spans in some lines and |
| 183 | // orphan `</span>` in others, breaking the row table. |
| 184 | // |
| 185 | // The walker tracks the open-span stack: at every '\n' it closes any |
| 186 | // currently-open spans, emits the line, then reopens the same spans |
| 187 | // at the start of the next line. The result: each line's HTML is |
| 188 | // independently well-formed, and a multi-line token still carries |
| 189 | // the same CSS class on every line it touches. |
| 190 | func splitChromaLines(html string) []template.HTML { |
| 191 | var ( |
| 192 | lines []template.HTML |
| 193 | openTags []string // verbatim "<span …>" strings, used to reopen |
| 194 | cur strings.Builder |
| 195 | ) |
| 196 | closeAll := func() { |
| 197 | for range openTags { |
| 198 | cur.WriteString("</span>") |
| 199 | } |
| 200 | } |
| 201 | reopenAll := func() { |
| 202 | for _, t := range openTags { |
| 203 | cur.WriteString(t) |
| 204 | } |
| 205 | } |
| 206 | |
| 207 | i := 0 |
| 208 | for i < len(html) { |
| 209 | switch { |
| 210 | case strings.HasPrefix(html[i:], "<span"): |
| 211 | end := strings.IndexByte(html[i:], '>') |
| 212 | if end < 0 { |
| 213 | // Malformed; bail to a single-line emit so the caller |
| 214 | // at least gets unbroken markup. |
| 215 | cur.WriteString(html[i:]) |
| 216 | i = len(html) |
| 217 | continue |
| 218 | } |
| 219 | tag := html[i : i+end+1] |
| 220 | cur.WriteString(tag) |
| 221 | openTags = append(openTags, tag) |
| 222 | i += end + 1 |
| 223 | case strings.HasPrefix(html[i:], "</span>"): |
| 224 | cur.WriteString("</span>") |
| 225 | if len(openTags) > 0 { |
| 226 | openTags = openTags[:len(openTags)-1] |
| 227 | } |
| 228 | i += len("</span>") |
| 229 | case html[i] == '\n': |
| 230 | closeAll() |
| 231 | lines = append(lines, template.HTML(cur.String())) //nolint:gosec // assembled from chroma + escaped tokens |
| 232 | cur.Reset() |
| 233 | reopenAll() |
| 234 | i++ |
| 235 | default: |
| 236 | cur.WriteByte(html[i]) |
| 237 | i++ |
| 238 | } |
| 239 | } |
| 240 | // Trailing line (no terminating \n). |
| 241 | closeAll() |
| 242 | if cur.Len() > 0 || len(lines) == 0 { |
| 243 | lines = append(lines, template.HTML(cur.String())) //nolint:gosec // see above |
| 244 | } |
| 245 | return lines |
| 246 | } |
| 247 | |
| 248 | // LanguageGuess returns the human-readable language name (or "Text" |
| 249 | // fallback) for display in the blob viewer's header. |
| 250 | func LanguageGuess(filename string) string { |
| 251 | if lexer := lexers.Match(filename); lexer != nil { |
| 252 | return lexer.Config().Name |
| 253 | } |
| 254 | if ext := filepath.Ext(filename); ext != "" { |
| 255 | if l := lexers.Get(strings.TrimPrefix(ext, ".")); l != nil { |
| 256 | return l.Config().Name |
| 257 | } |
| 258 | } |
| 259 | return "Text" |
| 260 | } |
| 261 |