`8065d54`

S17: git treeops, finder, chroma highlight, goldmark markdown packages

Authored by mfwolffe <wolffemf@dukes.jmu.edu> 6 days ago

SHA: 8065d548f0e5d064a83d63210e9b995524ad7a03
Parents: cb638ba
Tree: c3fc8e3

6 changed files

Status	File	+
A	`internal/repos/finder/finder.go`	123
A	`internal/repos/finder/finder_test.go`	75
A	`internal/repos/git/treeops.go`	335
A	`internal/repos/git/treeops_test.go`	63
A	`internal/repos/highlight/chroma.go`	130
A	`internal/repos/markdown/render.go`	67

internal/repos/finder/finder.goadded

 +// SPDX-License-Identifier: AGPL-3.0-or-later
++
 +// Package finder implements the "Go to file" fuzzy match used by the
 +// /find/{ref} endpoint. The matcher is a simple subsequence scorer
 +// with bonuses for path-segment boundaries — close enough to feel like
 +// VS Code's quickopen for a few thousand entries without pulling a
 +// full fuzzy library.
 +package finder
++
 +import (
 +	"sort"
 +	"strings"
 +	"unicode"
 +)
++
 +// Match is one row in the finder result list.
 +type Match struct {
 +	Path  string
 +	Score int
 +}
++
 +// Filter returns the top `limit` matches against query, scored highest
 +// first. A blank query returns the first `limit` paths in input order.
 +//
 +// The matcher is case-insensitive and rewards:
 +//   - consecutive characters (longer runs score higher)
 +//   - matches at path-segment starts (after `/` or at index 0)
 +//   - matches at filename basename
 +//
 +// Designed for input sizes up to ~50k paths; past that, restrict the
 +// callable surface or paginate.
 +func Filter(paths []string, query string, limit int) []Match {
 +	q := strings.TrimSpace(query)
 +	if q == "" {
 +		out := make([]Match, 0, min(limit, len(paths)))
 +		for i, p := range paths {
 +			if i >= limit {
 +				break
 +			}
 +			out = append(out, Match{Path: p, Score: 0})
 +		}
 +		return out
 +	}
 +	qLower := []rune(strings.ToLower(q))
 +	matches := make([]Match, 0, 64)
 +	for _, p := range paths {
 +		if score, ok := score(p, qLower); ok {
 +			matches = append(matches, Match{Path: p, Score: score})
 +		}
 +	}
 +	sort.SliceStable(matches, func(i, j int) bool {
 +		if matches[i].Score != matches[j].Score {
 +			return matches[i].Score > matches[j].Score
 +		}
 +		// Tiebreaker: shorter paths first (more specific matches).
 +		if len(matches[i].Path) != len(matches[j].Path) {
 +			return len(matches[i].Path) < len(matches[j].Path)
 +		}
 +		return matches[i].Path < matches[j].Path
 +	})
 +	if len(matches) > limit {
 +		matches = matches[:limit]
 +	}
 +	return matches
 +}
++
 +// score is a single subsequence-with-bonus pass. Returns (score, true)
 +// when every query rune is consumed in order; otherwise (0, false).
 +func score(path string, q []rune) (int, bool) {
 +	if len(q) == 0 {
 +		return 0, true
 +	}
 +	pLower := []rune(strings.ToLower(path))
 +	score := 0
 +	qi := 0
 +	prevMatchedAt := -2 // forces "consecutive" check to fail on first hit
 +	for i := 0; i < len(pLower) && qi < len(q); i++ {
 +		if pLower[i] != q[qi] {
 +			continue
 +		}
 +		// Base hit.
 +		score += 1
 +		// Consecutive run bonus.
 +		if i == prevMatchedAt+1 {
 +			score += 4
 +		}
 +		// Boundary bonus: start of path or after `/`.
 +		if i == 0 || pLower[i-1] == '/' {
 +			score += 6
 +		}
 +		// Camel/kebab boundary: lowercase-after-uppercase is a weaker
 +		// boundary than `/`, but worth a smaller bonus.
 +		if i > 0 && unicode.IsUpper(rune(path[i])) && unicode.IsLower(rune(path[i-1])) {
 +			score += 3
 +		}
 +		prevMatchedAt = i
 +		qi++
 +	}
 +	if qi != len(q) {
 +		return 0, false
 +	}
 +	// Filename-basename bonus: query that fully matches the basename
 +	// gets a kicker so `repo.go` ranks above `repo_settings_form.html`.
 +	if base := basename(path); strings.Contains(strings.ToLower(base), string(q)) {
 +		score += 8
 +	}
 +	return score, true
 +}
++
 +// basename returns the path's last segment (no leading slash).
 +func basename(p string) string {
 +	if i := strings.LastIndexByte(p, '/'); i >= 0 {
 +		return p[i+1:]
 +	}
 +	return p
 +}
++
 +func min(a, b int) int {
 +	if a < b {
 +		return a
 +	}
 +	return b
 +}

internal/repos/finder/finder_test.goadded

 +// SPDX-License-Identifier: AGPL-3.0-or-later
++
 +package finder_test
++
 +import (
 +	"testing"
++
 +	"github.com/tenseleyFlow/shithub/internal/repos/finder"
 +)
++
 +func TestFilter_Empty(t *testing.T) {
 +	t.Parallel()
 +	paths := []string{"a", "b", "c"}
 +	got := finder.Filter(paths, "", 100)
 +	if len(got) != 3 {
 +		t.Errorf("len = %d, want 3", len(got))
 +	}
 +}
++
 +func TestFilter_PrefersBoundaryAndConsecutive(t *testing.T) {
 +	t.Parallel()
 +	paths := []string{
 +		"internal/web/handlers/repo/repo.go",
 +		"internal/repos/git/treeops.go",
 +		"docs/internal/repo-lifecycle.md",
 +		"internal/repos/lifecycle/rename.go",
 +	}
 +	got := finder.Filter(paths, "rename", 5)
 +	if len(got) == 0 {
 +		t.Fatalf("no matches")
 +	}
 +	if got[0].Path != "internal/repos/lifecycle/rename.go" {
 +		t.Errorf("top match = %q, want rename.go", got[0].Path)
 +	}
 +}
++
 +func TestFilter_Subsequence(t *testing.T) {
 +	t.Parallel()
 +	paths := []string{"main.go", "foo/bar/Main.go", "manifest.json"}
 +	got := finder.Filter(paths, "main", 5)
 +	if len(got) < 2 {
 +		t.Fatalf("got=%d, want at least 2", len(got))
 +	}
 +	// Both main.go variants should match; manifest.json doesn't (no
 +	// 'in' subsequence after 'a' break — actually m-a-n is there, but
 +	// the boundary score should prefer main.go).
 +	want := map[string]bool{"main.go": false, "foo/bar/Main.go": false}
 +	for _, m := range got {
 +		if _, ok := want[m.Path]; ok {
 +			want[m.Path] = true
 +		}
 +	}
 +	for p, seen := range want {
 +		if !seen {
 +			t.Errorf("expected %q in matches", p)
 +		}
 +	}
 +}
++
 +func TestFilter_NoMatchesEmpty(t *testing.T) {
 +	t.Parallel()
 +	got := finder.Filter([]string{"abc", "def"}, "xyz", 5)
 +	if len(got) != 0 {
 +		t.Errorf("unexpected matches: %+v", got)
 +	}
 +}
++
 +func TestFilter_LimitRespected(t *testing.T) {
 +	t.Parallel()
 +	paths := []string{"a.go", "ab.go", "abc.go", "abcd.go", "abcde.go"}
 +	got := finder.Filter(paths, "a", 3)
 +	if len(got) != 3 {
 +		t.Errorf("len=%d, want 3", len(got))
 +	}
 +}

internal/repos/git/treeops.goadded

 +// SPDX-License-Identifier: AGPL-3.0-or-later
++
 +package git
++
 +import (
 +	"bytes"
 +	"context"
 +	"errors"
 +	"fmt"
 +	"io"
 +	"os/exec"
 +	"sort"
 +	"strconv"
 +	"strings"
 +)
++
 +// ListRefs runs `git for-each-ref` and returns every ref under
 +// refs/heads/ and refs/tags/. Used by the ref-resolver to do
 +// longest-prefix matching against URLs like /tree/feature/x/sub/dir.
 +//
 +// Returns separate lists so callers can fork on UX (branches vs tags).
 +type RefListing struct {
 +	Branches []RefEntry // refs/heads/<name>
 +	Tags     []RefEntry // refs/tags/<name>
 +}
++
 +// RefEntry is one ref from for-each-ref.
 +type RefEntry struct {
 +	Name string // short name (without refs/heads/ or refs/tags/)
 +	OID  string // 40-char hex sha
 +}
++
 +// ListRefs enumerates branches and tags. Empty repos return empty
 +// slices, not an error.
 +func ListRefs(ctx context.Context, gitDir string) (RefListing, error) {
 +	cmd := exec.CommandContext(ctx, "git", "-C", gitDir,
 +		"for-each-ref", "--format=%(refname)\x1f%(objectname)",
 +		"refs/heads/", "refs/tags/")
 +	out, err := cmd.Output()
 +	if err != nil {
 +		return RefListing{}, wrapExecErr(err)
 +	}
 +	var rl RefListing
 +	for _, line := range strings.Split(strings.TrimRight(string(out), "\n"), "\n") {
 +		if line == "" {
 +			continue
 +		}
 +		name, oid, ok := strings.Cut(line, "\x1f")
 +		if !ok {
 +			continue
 +		}
 +		switch {
 +		case strings.HasPrefix(name, "refs/heads/"):
 +			rl.Branches = append(rl.Branches, RefEntry{Name: strings.TrimPrefix(name, "refs/heads/"), OID: oid})
 +		case strings.HasPrefix(name, "refs/tags/"):
 +			rl.Tags = append(rl.Tags, RefEntry{Name: strings.TrimPrefix(name, "refs/tags/"), OID: oid})
 +		}
 +	}
 +	sort.Slice(rl.Branches, func(i, j int) bool { return rl.Branches[i].Name < rl.Branches[j].Name })
 +	sort.Slice(rl.Tags, func(i, j int) bool { return rl.Tags[i].Name < rl.Tags[j].Name })
 +	return rl, nil
 +}
++
 +// ResolveRef takes the URL segments after /tree/ or /blob/ and finds
 +// the longest-prefix match against the supplied refs. Hex-SHA shortcut:
 +// if the first segment is exactly 40 hex chars, treat it as a SHA. The
 +// returned `path` is the joined remainder after the matched ref (no
 +// leading slash).
 +//
 +// Example: refs = ["main", "feature/x"], URL = ["feature", "x", "sub", "f.go"]
 +// → ref="feature/x", path="sub/f.go".
 +func ResolveRef(refs []string, segments []string) (ref, path string, ok bool) {
 +	if len(segments) == 0 {
 +		return "", "", false
 +	}
 +	// Hex-SHA shortcut.
 +	if len(segments[0]) == 40 && isHex(segments[0]) {
 +		return segments[0], strings.Join(segments[1:], "/"), true
 +	}
 +	// Longest prefix wins. Sort the refs by descending length so the
 +	// first match is the longest.
 +	candidates := append([]string(nil), refs...)
 +	sort.Slice(candidates, func(i, j int) bool { return len(candidates[i]) > len(candidates[j]) })
 +	joined := strings.Join(segments, "/")
 +	for _, r := range candidates {
 +		if joined == r {
 +			return r, "", true
 +		}
 +		if strings.HasPrefix(joined, r+"/") {
 +			return r, strings.TrimPrefix(joined, r+"/"), true
 +		}
 +	}
 +	return "", "", false
 +}
++
 +func isHex(s string) bool {
 +	for _, c := range s {
 +		switch {
 +		case c >= '0' && c <= '9', c >= 'a' && c <= 'f', c >= 'A' && c <= 'F':
 +		default:
 +			return false
 +		}
 +	}
 +	return true
 +}
++
 +// TreeEntryKind is one of the four shapes git tree entries take.
 +type TreeEntryKind string
++
 +const (
 +	EntryTree    TreeEntryKind = "tree"
 +	EntryBlob    TreeEntryKind = "blob"
 +	EntrySubmod  TreeEntryKind = "commit" // a "commit" entry in a tree is a submodule pointer
 +	EntrySymlink TreeEntryKind = "symlink"
 +)
++
 +// TreeEntry is one row from `git ls-tree --long --full-tree`.
 +type TreeEntry struct {
 +	Kind TreeEntryKind
 +	Mode string // 100644, 100755, 040000, 160000, 120000
 +	OID  string
 +	Size int64  // -1 when N/A (trees, submodules)
 +	Name string // basename relative to the listed path
 +}
++
 +// LsTree lists entries at <ref>:<path>. Empty path lists the repo root.
 +// Returns an empty slice when the path doesn't exist or is itself a
 +// blob — callers should fall back to BlobInfo.
 +func LsTree(ctx context.Context, gitDir, ref, path string) ([]TreeEntry, error) {
 +	target := ref + ":" + path
 +	if path == "" {
 +		target = ref + ":"
 +	}
 +	cmd := exec.CommandContext(ctx, "git", "-C", gitDir,
 +		"ls-tree", "--long", "--full-tree", target)
 +	out, err := cmd.Output()
 +	if err != nil {
 +		// `fatal: not a tree object` for a blob path; surface a typed err.
 +		var ee *exec.ExitError
 +		if errors.As(err, &ee) {
 +			stderr := string(ee.Stderr)
 +			if strings.Contains(stderr, "Not a valid object name") || strings.Contains(stderr, "not a tree") {
 +				return nil, ErrNotATree
 +			}
 +		}
 +		return nil, wrapExecErr(err)
 +	}
++
 +	entries := make([]TreeEntry, 0, 32)
 +	for _, line := range strings.Split(strings.TrimRight(string(out), "\n"), "\n") {
 +		if line == "" {
 +			continue
 +		}
 +		// Format: "<mode> <type> <oid> <size>\t<name>"
 +		// Size is "-" for trees and submodules.
 +		tabIdx := strings.IndexByte(line, '\t')
 +		if tabIdx < 0 {
 +			continue
 +		}
 +		left, name := line[:tabIdx], line[tabIdx+1:]
 +		fields := strings.Fields(left)
 +		if len(fields) != 4 {
 +			continue
 +		}
 +		var size int64 = -1
 +		if fields[3] != "-" {
 +			size, _ = strconv.ParseInt(fields[3], 10, 64)
 +		}
 +		kind := classifyEntry(fields[0], fields[1])
 +		entries = append(entries, TreeEntry{
 +			Kind: kind, Mode: fields[0], OID: fields[2], Size: size, Name: name,
 +		})
 +	}
 +	// Spec: directories first, then files alphabetically.
 +	sort.SliceStable(entries, func(i, j int) bool {
 +		if entries[i].Kind == EntryTree && entries[j].Kind != EntryTree {
 +			return true
 +		}
 +		if entries[i].Kind != EntryTree && entries[j].Kind == EntryTree {
 +			return false
 +		}
 +		return entries[i].Name < entries[j].Name
 +	})
 +	return entries, nil
 +}
++
 +// classifyEntry maps git's mode+type fields to our four kinds.
 +// Symlinks come in with mode 120000 type=blob; we surface them as
 +// symlink so the UI can avoid Reading them.
 +func classifyEntry(mode, gitType string) TreeEntryKind {
 +	if mode == "120000" {
 +		return EntrySymlink
 +	}
 +	switch gitType {
 +	case "tree":
 +		return EntryTree
 +	case "commit":
 +		return EntrySubmod
 +	default:
 +		return EntryBlob
 +	}
 +}
++
 +// ErrNotATree is returned by LsTree when the requested path turns out
 +// to be a blob (so the caller should fall through to BlobInfo).
 +var ErrNotATree = errors.New("git: not a tree")
++
 +// ErrNotABlob is the inverse — used by ReadBlob.
 +var ErrNotABlob = errors.New("git: not a blob")
++
 +// ErrPathNotFound is for paths that don't exist on the ref.
 +var ErrPathNotFound = errors.New("git: path not found")
++
 +// BlobInfo is the result of `git cat-file -e -p` style introspection.
 +type BlobInfo struct {
 +	OID  string
 +	Size int64
 +}
++
 +// StatPath returns the kind + OID + size for `<ref>:<path>`. Used by
 +// the handler to decide whether to render tree or blob without a
 +// second round-trip.
 +func StatPath(ctx context.Context, gitDir, ref, path string) (kind TreeEntryKind, oid string, size int64, err error) {
 +	target := ref + ":" + path
 +	if path == "" {
 +		target = ref + ":"
 +	}
 +	// `git cat-file -t <ref>:<path>` returns the type.
 +	tCmd := exec.CommandContext(ctx, "git", "-C", gitDir, "cat-file", "-t", target)
 +	tOut, tErr := tCmd.Output()
 +	if tErr != nil {
 +		var ee *exec.ExitError
 +		if errors.As(tErr, &ee) {
 +			stderr := string(ee.Stderr)
 +			if strings.Contains(stderr, "Not a valid object name") ||
 +				strings.Contains(stderr, "does not exist") ||
 +				strings.Contains(stderr, "fatal: path") {
 +				return "", "", 0, ErrPathNotFound
 +			}
 +		}
 +		return "", "", 0, wrapExecErr(tErr)
 +	}
 +	gitType := strings.TrimSpace(string(tOut))
++
 +	switch gitType {
 +	case "tree":
 +		return EntryTree, "", -1, nil
 +	case "commit":
 +		// commit object referenced from a tree → submodule pointer.
 +		return EntrySubmod, "", -1, nil
 +	case "blob":
 +	default:
 +		return "", "", 0, fmt.Errorf("git: unexpected type %q", gitType)
 +	}
++
 +	sCmd := exec.CommandContext(ctx, "git", "-C", gitDir, "cat-file", "-s", target)
 +	sOut, err := sCmd.Output()
 +	if err != nil {
 +		return "", "", 0, wrapExecErr(err)
 +	}
 +	sz, err := strconv.ParseInt(strings.TrimSpace(string(sOut)), 10, 64)
 +	if err != nil {
 +		return "", "", 0, fmt.Errorf("git: parse size: %w", err)
 +	}
 +	return EntryBlob, "", sz, nil
 +}
++
 +// ReadBlobBytes reads the entire blob at `<ref>:<path>`. Caller-imposed
 +// max-size limit is the right guard — git itself doesn't bound the
 +// stream. Pass 0 for "no cap"; otherwise an oversize read returns
 +// ErrBlobTooLarge.
 +func ReadBlobBytes(ctx context.Context, gitDir, ref, path string, maxBytes int64) ([]byte, error) {
 +	target := ref + ":" + path
 +	cmd := exec.CommandContext(ctx, "git", "-C", gitDir, "cat-file", "-p", target)
 +	stdout, err := cmd.StdoutPipe()
 +	if err != nil {
 +		return nil, err
 +	}
 +	if err := cmd.Start(); err != nil {
 +		return nil, err
 +	}
 +	defer func() { _ = cmd.Wait() }()
 +	var r io.Reader = stdout
 +	if maxBytes > 0 {
 +		// LimitReader so giant blobs don't OOM us.
 +		r = io.LimitReader(stdout, maxBytes+1)
 +	}
 +	body, err := io.ReadAll(r)
 +	if err != nil {
 +		return nil, err
 +	}
 +	if maxBytes > 0 && int64(len(body)) > maxBytes {
 +		return body[:maxBytes], ErrBlobTooLarge
 +	}
 +	return body, nil
 +}
++
 +// StreamBlob writes the blob bytes to w. For raw downloads we never
 +// buffer; this lets the response stream as `git cat-file -p` produces.
 +func StreamBlob(ctx context.Context, gitDir, ref, path string, w io.Writer) error {
 +	target := ref + ":" + path
 +	cmd := exec.CommandContext(ctx, "git", "-C", gitDir, "cat-file", "-p", target)
 +	cmd.Stdout = w
 +	var stderr bytes.Buffer
 +	cmd.Stderr = &stderr
 +	if err := cmd.Run(); err != nil {
 +		return fmt.Errorf("git cat-file: %w (%s)", err, stderr.String())
 +	}
 +	return nil
 +}
++
 +// ErrBlobTooLarge is returned when a maxBytes cap is hit on ReadBlobBytes.
 +var ErrBlobTooLarge = errors.New("git: blob exceeds size cap")
++
 +// ListAllPaths runs `git ls-tree -r --name-only` and returns every
 +// blob path under the ref. Used by the "Go to file" finder. Filters
 +// out submodule-style entries (commit type) which shouldn't surface
 +// in the file finder.
 +func ListAllPaths(ctx context.Context, gitDir, ref string) ([]string, error) {
 +	cmd := exec.CommandContext(ctx, "git", "-C", gitDir,
 +		"ls-tree", "-r", "--full-tree", "--name-only", ref)
 +	out, err := cmd.Output()
 +	if err != nil {
 +		return nil, wrapExecErr(err)
 +	}
 +	lines := strings.Split(strings.TrimRight(string(out), "\n"), "\n")
 +	out2 := make([]string, 0, len(lines))
 +	for _, l := range lines {
 +		if l == "" {
 +			continue
 +		}
 +		out2 = append(out2, l)
 +	}
 +	return out2, nil
 +}

internal/repos/git/treeops_test.goadded

 +// SPDX-License-Identifier: AGPL-3.0-or-later
++
 +package git_test
++
 +import (
 +	"testing"
++
 +	gitops "github.com/tenseleyFlow/shithub/internal/repos/git"
 +)
++
 +func TestResolveRef_LongestPrefixWins(t *testing.T) {
 +	t.Parallel()
 +	refs := []string{"main", "feature/x", "release/v1.0/beta"}
 +	cases := []struct {
 +		segs     []string
 +		wantRef  string
 +		wantPath string
 +		wantOK   bool
 +	}{
 +		{[]string{"main"}, "main", "", true},
 +		{[]string{"main", "src", "f.go"}, "main", "src/f.go", true},
 +		{[]string{"feature", "x"}, "feature/x", "", true},
 +		{[]string{"feature", "x", "sub", "f.go"}, "feature/x", "sub/f.go", true},
 +		{[]string{"release", "v1.0", "beta"}, "release/v1.0/beta", "", true},
 +		{[]string{"release", "v1.0", "beta", "README.md"}, "release/v1.0/beta", "README.md", true},
 +		{[]string{"missing"}, "", "", false},
 +		{[]string{}, "", "", false},
 +	}
 +	for _, c := range cases {
 +		ref, path, ok := gitops.ResolveRef(refs, c.segs)
 +		if ok != c.wantOK || ref != c.wantRef || path != c.wantPath {
 +			t.Errorf("segs=%v: got (%q, %q, %v), want (%q, %q, %v)",
 +				c.segs, ref, path, ok, c.wantRef, c.wantPath, c.wantOK)
 +		}
 +	}
 +}
++
 +func TestResolveRef_HexShortcut(t *testing.T) {
 +	t.Parallel()
 +	refs := []string{"main"}
 +	sha := "abcdef0123456789abcdef0123456789abcdef01"
 +	ref, path, ok := gitops.ResolveRef(refs, []string{sha, "src", "f.go"})
 +	if !ok || ref != sha || path != "src/f.go" {
 +		t.Errorf("sha shortcut: got (%q, %q, %v)", ref, path, ok)
 +	}
 +}
++
 +func TestResolveRef_HexLooksLikeBranch(t *testing.T) {
 +	t.Parallel()
 +	// A branch named like a 40-hex string would be unusual; the spec
 +	// says ref-lookup takes priority. Here we don't list it as a ref,
 +	// so the SHA shortcut wins.
 +	sha := "abcdef0123456789abcdef0123456789abcdef01"
 +	ref, _, ok := gitops.ResolveRef([]string{"main"}, []string{sha})
 +	if !ok || ref != sha {
 +		t.Errorf("expected SHA shortcut, got %q", ref)
 +	}
 +	// When the ref list contains the same string, ref-lookup wins.
 +	ref, path, ok := gitops.ResolveRef([]string{"main", sha}, []string{sha, "x"})
 +	if !ok || ref != sha || path != "x" {
 +		t.Errorf("ref-lookup should win: got (%q, %q, %v)", ref, path, ok)
 +	}
 +}

internal/repos/highlight/chroma.goadded

 +// SPDX-License-Identifier: AGPL-3.0-or-later
++
 +// Package highlight wraps Chroma so the rest of the project doesn't
 +// import it directly. The returned HTML is Chroma's standard "html"
 +// formatter output with line numbers; the caller embeds it in the
 +// blob template inside a code-styled wrapper.
 +package highlight
++
 +import (
 +	"bytes"
 +	stdhtml "html"
 +	"path/filepath"
 +	"strings"
++
 +	"github.com/alecthomas/chroma/v2"
 +	chromahtml "github.com/alecthomas/chroma/v2/formatters/html"
 +	"github.com/alecthomas/chroma/v2/lexers"
 +	"github.com/alecthomas/chroma/v2/styles"
 +)
++
 +// Render returns syntax-highlighted HTML for source. filename is used
 +// to guess the lexer; on miss we fall back to content sniffing, then
 +// finally to plain text (no highlighting). Line numbers are always on.
 +//
 +// The output is a `<pre class="chroma">…</pre>` block ready to embed
 +// in the page; line-number cells are linkable via Chroma's `LineLinks`
 +// option (rendered as `#L42`).
 +func Render(filename, source string) string {
 +	lexer := lexers.Match(filename)
 +	if lexer == nil {
 +		lexer = lexers.Analyse(source)
 +	}
 +	if lexer == nil {
 +		return plainPre(source)
 +	}
 +	lexer = chroma.Coalesce(lexer)
 +	style := styles.Get("github")
 +	if style == nil {
 +		style = styles.Fallback
 +	}
 +	formatter := chromahtml.New(
 +		chromahtml.WithLineNumbers(true),
 +		chromahtml.WithLinkableLineNumbers(true, "L"),
 +		chromahtml.LineNumbersInTable(true),
 +		chromahtml.WithClasses(true),
 +	)
 +	iter, err := lexer.Tokenise(nil, source)
 +	if err != nil {
 +		return plainPre(source)
 +	}
 +	var buf bytes.Buffer
 +	if err := formatter.Format(&buf, style, iter); err != nil {
 +		return plainPre(source)
 +	}
 +	return buf.String()
 +}
++
 +// CSS returns the `<style>`-wrappable CSS for the highlight theme so
 +// the operator can serve it once at /static/css/chroma.css. Generated
 +// from the same `github` style Render uses, so colors stay consistent.
 +func CSS() string {
 +	style := styles.Get("github")
 +	if style == nil {
 +		style = styles.Fallback
 +	}
 +	formatter := chromahtml.New(
 +		chromahtml.WithClasses(true),
 +		chromahtml.LineNumbersInTable(true),
 +	)
 +	var buf bytes.Buffer
 +	_ = formatter.WriteCSS(&buf, style)
 +	return buf.String()
 +}
++
 +// plainPre escapes source and wraps it in a <pre> for the no-lexer
 +// fallback. We still provide line numbers via a <table> so the blob
 +// template renders consistently.
 +func plainPre(source string) string {
 +	lines := strings.Split(source, "\n")
 +	var lineNums, code bytes.Buffer
 +	for i := range lines {
 +		lineNums.WriteString("<a href=\"#L")
 +		lineNums.WriteString(itoa(i + 1))
 +		lineNums.WriteString("\">")
 +		lineNums.WriteString(itoa(i + 1))
 +		lineNums.WriteString("</a>\n")
 +	}
 +	for i, l := range lines {
 +		code.WriteString("<span id=\"L")
 +		code.WriteString(itoa(i + 1))
 +		code.WriteString("\">")
 +		code.WriteString(stdhtml.EscapeString(l))
 +		code.WriteString("</span>\n")
 +	}
 +	return `<div class="chroma"><table><tr><td class="lntable"><pre class="chroma"><code>` +
 +		lineNums.String() +
 +		`</code></pre></td><td><pre class="chroma"><code>` +
 +		code.String() +
 +		`</code></pre></td></tr></table></div>`
 +}
++
 +// itoa is a tiny int-to-string used inside plainPre to avoid pulling
 +// fmt for the hot path.
 +func itoa(n int) string {
 +	if n == 0 {
 +		return "0"
 +	}
 +	var buf [20]byte
 +	i := len(buf)
 +	for n > 0 {
 +		i--
 +		buf[i] = byte('0' + n%10)
 +		n /= 10
 +	}
 +	return string(buf[i:])
 +}
++
 +// LanguageGuess returns the human-readable language name (or "Text"
 +// fallback) for display in the blob viewer's header.
 +func LanguageGuess(filename string) string {
 +	if lexer := lexers.Match(filename); lexer != nil {
 +		return lexer.Config().Name
 +	}
 +	if ext := filepath.Ext(filename); ext != "" {
 +		if l := lexers.Get(strings.TrimPrefix(ext, ".")); l != nil {
 +			return l.Config().Name
 +		}
 +	}
 +	return "Text"
 +}

internal/repos/markdown/render.goadded

 +// SPDX-License-Identifier: AGPL-3.0-or-later
++
 +// Package markdown wraps Goldmark + bluemonday for safe README
 +// rendering. S25 will broaden this with auto-mention, issue-ref
 +// linking, and cross-repo extensions; S17 ships only what's needed
 +// for tree-page README rendering.
 +package markdown
++
 +import (
 +	"bytes"
++
 +	"github.com/microcosm-cc/bluemonday"
 +	"github.com/yuin/goldmark"
 +	"github.com/yuin/goldmark/extension"
 +	"github.com/yuin/goldmark/parser"
 +	"github.com/yuin/goldmark/renderer/html"
 +)
++
 +// gm is the shared Goldmark instance. CommonMark + GFM (tables,
 +// strikethrough, autolinks, task-list) + auto-heading-id for in-page
 +// anchors. We deliberately do NOT enable HTML passthrough; raw HTML
 +// in user content is escaped.
 +var gm = goldmark.New(
 +	goldmark.WithExtensions(
 +		extension.GFM,
 +		extension.Footnote,
 +	),
 +	goldmark.WithParserOptions(parser.WithAutoHeadingID()),
 +	goldmark.WithRendererOptions(
 +		html.WithHardWraps(),
 +		html.WithXHTML(),
 +	),
 +)
++
 +// sanitizer is bluemonday's UGC policy with two adjustments:
 +//   - allow class attributes on `<code>` (Goldmark emits language-foo)
 +//   - allow `id` on headings so anchor links work
 +//
 +// Anything Goldmark emits passes through; anything user-injected via
 +// raw HTML in markdown gets stripped because Goldmark didn't enable
 +// HTML rendering in the first place. Defense in depth.
 +var sanitizer = func() *bluemonday.Policy {
 +	p := bluemonday.UGCPolicy()
 +	p.AllowAttrs("class").Matching(bluemonday.SpaceSeparatedTokens).OnElements("code", "pre", "span")
 +	p.AllowAttrs("id").OnElements("h1", "h2", "h3", "h4", "h5", "h6")
 +	// Disallow remote images outright; readme images normally live in
 +	// the same repo and resolve to /raw/ which we control. Users who
 +	// want external images can paste links instead.
 +	p.AllowImages()
 +	return p
 +}()
++
 +// RenderHTML returns sanitized HTML for the given markdown bytes.
 +// Empty input returns an empty string. The output is suitable for
 +// inserting into a template via `{{ . | safeHTML }}` — every byte has
 +// passed bluemonday.
 +func RenderHTML(src []byte) (string, error) {
 +	if len(src) == 0 {
 +		return "", nil
 +	}
 +	var buf bytes.Buffer
 +	if err := gm.Convert(src, &buf); err != nil {
 +		return "", err
 +	}
 +	clean := sanitizer.SanitizeBytes(buf.Bytes())
 +	return string(clean), nil
 +}