tenseleyflow/shithub / 8065d54

Browse files

S17: git treeops, finder, chroma highlight, goldmark markdown packages

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
8065d548f0e5d064a83d63210e9b995524ad7a03
Parents
cb638ba
Tree
c3fc8e3

6 changed files

StatusFile+-
A internal/repos/finder/finder.go 123 0
A internal/repos/finder/finder_test.go 75 0
A internal/repos/git/treeops.go 335 0
A internal/repos/git/treeops_test.go 63 0
A internal/repos/highlight/chroma.go 130 0
A internal/repos/markdown/render.go 67 0
internal/repos/finder/finder.goadded
@@ -0,0 +1,123 @@
1
+// SPDX-License-Identifier: AGPL-3.0-or-later
2
+
3
+// Package finder implements the "Go to file" fuzzy match used by the
4
+// /find/{ref} endpoint. The matcher is a simple subsequence scorer
5
+// with bonuses for path-segment boundaries — close enough to feel like
6
+// VS Code's quickopen for a few thousand entries without pulling a
7
+// full fuzzy library.
8
+package finder
9
+
10
+import (
11
+	"sort"
12
+	"strings"
13
+	"unicode"
14
+)
15
+
16
+// Match is one row in the finder result list.
17
+type Match struct {
18
+	Path  string
19
+	Score int
20
+}
21
+
22
+// Filter returns the top `limit` matches against query, scored highest
23
+// first. A blank query returns the first `limit` paths in input order.
24
+//
25
+// The matcher is case-insensitive and rewards:
26
+//   - consecutive characters (longer runs score higher)
27
+//   - matches at path-segment starts (after `/` or at index 0)
28
+//   - matches at filename basename
29
+//
30
+// Designed for input sizes up to ~50k paths; past that, restrict the
31
+// callable surface or paginate.
32
+func Filter(paths []string, query string, limit int) []Match {
33
+	q := strings.TrimSpace(query)
34
+	if q == "" {
35
+		out := make([]Match, 0, min(limit, len(paths)))
36
+		for i, p := range paths {
37
+			if i >= limit {
38
+				break
39
+			}
40
+			out = append(out, Match{Path: p, Score: 0})
41
+		}
42
+		return out
43
+	}
44
+	qLower := []rune(strings.ToLower(q))
45
+	matches := make([]Match, 0, 64)
46
+	for _, p := range paths {
47
+		if score, ok := score(p, qLower); ok {
48
+			matches = append(matches, Match{Path: p, Score: score})
49
+		}
50
+	}
51
+	sort.SliceStable(matches, func(i, j int) bool {
52
+		if matches[i].Score != matches[j].Score {
53
+			return matches[i].Score > matches[j].Score
54
+		}
55
+		// Tiebreaker: shorter paths first (more specific matches).
56
+		if len(matches[i].Path) != len(matches[j].Path) {
57
+			return len(matches[i].Path) < len(matches[j].Path)
58
+		}
59
+		return matches[i].Path < matches[j].Path
60
+	})
61
+	if len(matches) > limit {
62
+		matches = matches[:limit]
63
+	}
64
+	return matches
65
+}
66
+
67
+// score is a single subsequence-with-bonus pass. Returns (score, true)
68
+// when every query rune is consumed in order; otherwise (0, false).
69
+func score(path string, q []rune) (int, bool) {
70
+	if len(q) == 0 {
71
+		return 0, true
72
+	}
73
+	pLower := []rune(strings.ToLower(path))
74
+	score := 0
75
+	qi := 0
76
+	prevMatchedAt := -2 // forces "consecutive" check to fail on first hit
77
+	for i := 0; i < len(pLower) && qi < len(q); i++ {
78
+		if pLower[i] != q[qi] {
79
+			continue
80
+		}
81
+		// Base hit.
82
+		score += 1
83
+		// Consecutive run bonus.
84
+		if i == prevMatchedAt+1 {
85
+			score += 4
86
+		}
87
+		// Boundary bonus: start of path or after `/`.
88
+		if i == 0 || pLower[i-1] == '/' {
89
+			score += 6
90
+		}
91
+		// Camel/kebab boundary: lowercase-after-uppercase is a weaker
92
+		// boundary than `/`, but worth a smaller bonus.
93
+		if i > 0 && unicode.IsUpper(rune(path[i])) && unicode.IsLower(rune(path[i-1])) {
94
+			score += 3
95
+		}
96
+		prevMatchedAt = i
97
+		qi++
98
+	}
99
+	if qi != len(q) {
100
+		return 0, false
101
+	}
102
+	// Filename-basename bonus: query that fully matches the basename
103
+	// gets a kicker so `repo.go` ranks above `repo_settings_form.html`.
104
+	if base := basename(path); strings.Contains(strings.ToLower(base), string(q)) {
105
+		score += 8
106
+	}
107
+	return score, true
108
+}
109
+
110
+// basename returns the path's last segment (no leading slash).
111
+func basename(p string) string {
112
+	if i := strings.LastIndexByte(p, '/'); i >= 0 {
113
+		return p[i+1:]
114
+	}
115
+	return p
116
+}
117
+
118
+func min(a, b int) int {
119
+	if a < b {
120
+		return a
121
+	}
122
+	return b
123
+}
internal/repos/finder/finder_test.goadded
@@ -0,0 +1,75 @@
1
+// SPDX-License-Identifier: AGPL-3.0-or-later
2
+
3
+package finder_test
4
+
5
+import (
6
+	"testing"
7
+
8
+	"github.com/tenseleyFlow/shithub/internal/repos/finder"
9
+)
10
+
11
+func TestFilter_Empty(t *testing.T) {
12
+	t.Parallel()
13
+	paths := []string{"a", "b", "c"}
14
+	got := finder.Filter(paths, "", 100)
15
+	if len(got) != 3 {
16
+		t.Errorf("len = %d, want 3", len(got))
17
+	}
18
+}
19
+
20
+func TestFilter_PrefersBoundaryAndConsecutive(t *testing.T) {
21
+	t.Parallel()
22
+	paths := []string{
23
+		"internal/web/handlers/repo/repo.go",
24
+		"internal/repos/git/treeops.go",
25
+		"docs/internal/repo-lifecycle.md",
26
+		"internal/repos/lifecycle/rename.go",
27
+	}
28
+	got := finder.Filter(paths, "rename", 5)
29
+	if len(got) == 0 {
30
+		t.Fatalf("no matches")
31
+	}
32
+	if got[0].Path != "internal/repos/lifecycle/rename.go" {
33
+		t.Errorf("top match = %q, want rename.go", got[0].Path)
34
+	}
35
+}
36
+
37
+func TestFilter_Subsequence(t *testing.T) {
38
+	t.Parallel()
39
+	paths := []string{"main.go", "foo/bar/Main.go", "manifest.json"}
40
+	got := finder.Filter(paths, "main", 5)
41
+	if len(got) < 2 {
42
+		t.Fatalf("got=%d, want at least 2", len(got))
43
+	}
44
+	// Both main.go variants should match; manifest.json doesn't (no
45
+	// 'in' subsequence after 'a' break — actually m-a-n is there, but
46
+	// the boundary score should prefer main.go).
47
+	want := map[string]bool{"main.go": false, "foo/bar/Main.go": false}
48
+	for _, m := range got {
49
+		if _, ok := want[m.Path]; ok {
50
+			want[m.Path] = true
51
+		}
52
+	}
53
+	for p, seen := range want {
54
+		if !seen {
55
+			t.Errorf("expected %q in matches", p)
56
+		}
57
+	}
58
+}
59
+
60
+func TestFilter_NoMatchesEmpty(t *testing.T) {
61
+	t.Parallel()
62
+	got := finder.Filter([]string{"abc", "def"}, "xyz", 5)
63
+	if len(got) != 0 {
64
+		t.Errorf("unexpected matches: %+v", got)
65
+	}
66
+}
67
+
68
+func TestFilter_LimitRespected(t *testing.T) {
69
+	t.Parallel()
70
+	paths := []string{"a.go", "ab.go", "abc.go", "abcd.go", "abcde.go"}
71
+	got := finder.Filter(paths, "a", 3)
72
+	if len(got) != 3 {
73
+		t.Errorf("len=%d, want 3", len(got))
74
+	}
75
+}
internal/repos/git/treeops.goadded
@@ -0,0 +1,335 @@
1
+// SPDX-License-Identifier: AGPL-3.0-or-later
2
+
3
+package git
4
+
5
+import (
6
+	"bytes"
7
+	"context"
8
+	"errors"
9
+	"fmt"
10
+	"io"
11
+	"os/exec"
12
+	"sort"
13
+	"strconv"
14
+	"strings"
15
+)
16
+
17
+// ListRefs runs `git for-each-ref` and returns every ref under
18
+// refs/heads/ and refs/tags/. Used by the ref-resolver to do
19
+// longest-prefix matching against URLs like /tree/feature/x/sub/dir.
20
+//
21
+// Returns separate lists so callers can fork on UX (branches vs tags).
22
+type RefListing struct {
23
+	Branches []RefEntry // refs/heads/<name>
24
+	Tags     []RefEntry // refs/tags/<name>
25
+}
26
+
27
+// RefEntry is one ref from for-each-ref.
28
+type RefEntry struct {
29
+	Name string // short name (without refs/heads/ or refs/tags/)
30
+	OID  string // 40-char hex sha
31
+}
32
+
33
+// ListRefs enumerates branches and tags. Empty repos return empty
34
+// slices, not an error.
35
+func ListRefs(ctx context.Context, gitDir string) (RefListing, error) {
36
+	cmd := exec.CommandContext(ctx, "git", "-C", gitDir,
37
+		"for-each-ref", "--format=%(refname)\x1f%(objectname)",
38
+		"refs/heads/", "refs/tags/")
39
+	out, err := cmd.Output()
40
+	if err != nil {
41
+		return RefListing{}, wrapExecErr(err)
42
+	}
43
+	var rl RefListing
44
+	for _, line := range strings.Split(strings.TrimRight(string(out), "\n"), "\n") {
45
+		if line == "" {
46
+			continue
47
+		}
48
+		name, oid, ok := strings.Cut(line, "\x1f")
49
+		if !ok {
50
+			continue
51
+		}
52
+		switch {
53
+		case strings.HasPrefix(name, "refs/heads/"):
54
+			rl.Branches = append(rl.Branches, RefEntry{Name: strings.TrimPrefix(name, "refs/heads/"), OID: oid})
55
+		case strings.HasPrefix(name, "refs/tags/"):
56
+			rl.Tags = append(rl.Tags, RefEntry{Name: strings.TrimPrefix(name, "refs/tags/"), OID: oid})
57
+		}
58
+	}
59
+	sort.Slice(rl.Branches, func(i, j int) bool { return rl.Branches[i].Name < rl.Branches[j].Name })
60
+	sort.Slice(rl.Tags, func(i, j int) bool { return rl.Tags[i].Name < rl.Tags[j].Name })
61
+	return rl, nil
62
+}
63
+
64
+// ResolveRef takes the URL segments after /tree/ or /blob/ and finds
65
+// the longest-prefix match against the supplied refs. Hex-SHA shortcut:
66
+// if the first segment is exactly 40 hex chars, treat it as a SHA. The
67
+// returned `path` is the joined remainder after the matched ref (no
68
+// leading slash).
69
+//
70
+// Example: refs = ["main", "feature/x"], URL = ["feature", "x", "sub", "f.go"]
71
+// → ref="feature/x", path="sub/f.go".
72
+func ResolveRef(refs []string, segments []string) (ref, path string, ok bool) {
73
+	if len(segments) == 0 {
74
+		return "", "", false
75
+	}
76
+	// Hex-SHA shortcut.
77
+	if len(segments[0]) == 40 && isHex(segments[0]) {
78
+		return segments[0], strings.Join(segments[1:], "/"), true
79
+	}
80
+	// Longest prefix wins. Sort the refs by descending length so the
81
+	// first match is the longest.
82
+	candidates := append([]string(nil), refs...)
83
+	sort.Slice(candidates, func(i, j int) bool { return len(candidates[i]) > len(candidates[j]) })
84
+	joined := strings.Join(segments, "/")
85
+	for _, r := range candidates {
86
+		if joined == r {
87
+			return r, "", true
88
+		}
89
+		if strings.HasPrefix(joined, r+"/") {
90
+			return r, strings.TrimPrefix(joined, r+"/"), true
91
+		}
92
+	}
93
+	return "", "", false
94
+}
95
+
96
+func isHex(s string) bool {
97
+	for _, c := range s {
98
+		switch {
99
+		case c >= '0' && c <= '9', c >= 'a' && c <= 'f', c >= 'A' && c <= 'F':
100
+		default:
101
+			return false
102
+		}
103
+	}
104
+	return true
105
+}
106
+
107
+// TreeEntryKind is one of the four shapes git tree entries take.
108
+type TreeEntryKind string
109
+
110
+const (
111
+	EntryTree    TreeEntryKind = "tree"
112
+	EntryBlob    TreeEntryKind = "blob"
113
+	EntrySubmod  TreeEntryKind = "commit" // a "commit" entry in a tree is a submodule pointer
114
+	EntrySymlink TreeEntryKind = "symlink"
115
+)
116
+
117
+// TreeEntry is one row from `git ls-tree --long --full-tree`.
118
+type TreeEntry struct {
119
+	Kind TreeEntryKind
120
+	Mode string // 100644, 100755, 040000, 160000, 120000
121
+	OID  string
122
+	Size int64  // -1 when N/A (trees, submodules)
123
+	Name string // basename relative to the listed path
124
+}
125
+
126
+// LsTree lists entries at <ref>:<path>. Empty path lists the repo root.
127
+// Returns an empty slice when the path doesn't exist or is itself a
128
+// blob — callers should fall back to BlobInfo.
129
+func LsTree(ctx context.Context, gitDir, ref, path string) ([]TreeEntry, error) {
130
+	target := ref + ":" + path
131
+	if path == "" {
132
+		target = ref + ":"
133
+	}
134
+	cmd := exec.CommandContext(ctx, "git", "-C", gitDir,
135
+		"ls-tree", "--long", "--full-tree", target)
136
+	out, err := cmd.Output()
137
+	if err != nil {
138
+		// `fatal: not a tree object` for a blob path; surface a typed err.
139
+		var ee *exec.ExitError
140
+		if errors.As(err, &ee) {
141
+			stderr := string(ee.Stderr)
142
+			if strings.Contains(stderr, "Not a valid object name") || strings.Contains(stderr, "not a tree") {
143
+				return nil, ErrNotATree
144
+			}
145
+		}
146
+		return nil, wrapExecErr(err)
147
+	}
148
+
149
+	entries := make([]TreeEntry, 0, 32)
150
+	for _, line := range strings.Split(strings.TrimRight(string(out), "\n"), "\n") {
151
+		if line == "" {
152
+			continue
153
+		}
154
+		// Format: "<mode> <type> <oid> <size>\t<name>"
155
+		// Size is "-" for trees and submodules.
156
+		tabIdx := strings.IndexByte(line, '\t')
157
+		if tabIdx < 0 {
158
+			continue
159
+		}
160
+		left, name := line[:tabIdx], line[tabIdx+1:]
161
+		fields := strings.Fields(left)
162
+		if len(fields) != 4 {
163
+			continue
164
+		}
165
+		var size int64 = -1
166
+		if fields[3] != "-" {
167
+			size, _ = strconv.ParseInt(fields[3], 10, 64)
168
+		}
169
+		kind := classifyEntry(fields[0], fields[1])
170
+		entries = append(entries, TreeEntry{
171
+			Kind: kind, Mode: fields[0], OID: fields[2], Size: size, Name: name,
172
+		})
173
+	}
174
+	// Spec: directories first, then files alphabetically.
175
+	sort.SliceStable(entries, func(i, j int) bool {
176
+		if entries[i].Kind == EntryTree && entries[j].Kind != EntryTree {
177
+			return true
178
+		}
179
+		if entries[i].Kind != EntryTree && entries[j].Kind == EntryTree {
180
+			return false
181
+		}
182
+		return entries[i].Name < entries[j].Name
183
+	})
184
+	return entries, nil
185
+}
186
+
187
+// classifyEntry maps git's mode+type fields to our four kinds.
188
+// Symlinks come in with mode 120000 type=blob; we surface them as
189
+// symlink so the UI can avoid Reading them.
190
+func classifyEntry(mode, gitType string) TreeEntryKind {
191
+	if mode == "120000" {
192
+		return EntrySymlink
193
+	}
194
+	switch gitType {
195
+	case "tree":
196
+		return EntryTree
197
+	case "commit":
198
+		return EntrySubmod
199
+	default:
200
+		return EntryBlob
201
+	}
202
+}
203
+
204
+// ErrNotATree is returned by LsTree when the requested path turns out
205
+// to be a blob (so the caller should fall through to BlobInfo).
206
+var ErrNotATree = errors.New("git: not a tree")
207
+
208
+// ErrNotABlob is the inverse — used by ReadBlob.
209
+var ErrNotABlob = errors.New("git: not a blob")
210
+
211
+// ErrPathNotFound is for paths that don't exist on the ref.
212
+var ErrPathNotFound = errors.New("git: path not found")
213
+
214
+// BlobInfo is the result of `git cat-file -e -p` style introspection.
215
+type BlobInfo struct {
216
+	OID  string
217
+	Size int64
218
+}
219
+
220
+// StatPath returns the kind + OID + size for `<ref>:<path>`. Used by
221
+// the handler to decide whether to render tree or blob without a
222
+// second round-trip.
223
+func StatPath(ctx context.Context, gitDir, ref, path string) (kind TreeEntryKind, oid string, size int64, err error) {
224
+	target := ref + ":" + path
225
+	if path == "" {
226
+		target = ref + ":"
227
+	}
228
+	// `git cat-file -t <ref>:<path>` returns the type.
229
+	tCmd := exec.CommandContext(ctx, "git", "-C", gitDir, "cat-file", "-t", target)
230
+	tOut, tErr := tCmd.Output()
231
+	if tErr != nil {
232
+		var ee *exec.ExitError
233
+		if errors.As(tErr, &ee) {
234
+			stderr := string(ee.Stderr)
235
+			if strings.Contains(stderr, "Not a valid object name") ||
236
+				strings.Contains(stderr, "does not exist") ||
237
+				strings.Contains(stderr, "fatal: path") {
238
+				return "", "", 0, ErrPathNotFound
239
+			}
240
+		}
241
+		return "", "", 0, wrapExecErr(tErr)
242
+	}
243
+	gitType := strings.TrimSpace(string(tOut))
244
+
245
+	switch gitType {
246
+	case "tree":
247
+		return EntryTree, "", -1, nil
248
+	case "commit":
249
+		// commit object referenced from a tree → submodule pointer.
250
+		return EntrySubmod, "", -1, nil
251
+	case "blob":
252
+	default:
253
+		return "", "", 0, fmt.Errorf("git: unexpected type %q", gitType)
254
+	}
255
+
256
+	sCmd := exec.CommandContext(ctx, "git", "-C", gitDir, "cat-file", "-s", target)
257
+	sOut, err := sCmd.Output()
258
+	if err != nil {
259
+		return "", "", 0, wrapExecErr(err)
260
+	}
261
+	sz, err := strconv.ParseInt(strings.TrimSpace(string(sOut)), 10, 64)
262
+	if err != nil {
263
+		return "", "", 0, fmt.Errorf("git: parse size: %w", err)
264
+	}
265
+	return EntryBlob, "", sz, nil
266
+}
267
+
268
+// ReadBlobBytes reads the entire blob at `<ref>:<path>`. Caller-imposed
269
+// max-size limit is the right guard — git itself doesn't bound the
270
+// stream. Pass 0 for "no cap"; otherwise an oversize read returns
271
+// ErrBlobTooLarge.
272
+func ReadBlobBytes(ctx context.Context, gitDir, ref, path string, maxBytes int64) ([]byte, error) {
273
+	target := ref + ":" + path
274
+	cmd := exec.CommandContext(ctx, "git", "-C", gitDir, "cat-file", "-p", target)
275
+	stdout, err := cmd.StdoutPipe()
276
+	if err != nil {
277
+		return nil, err
278
+	}
279
+	if err := cmd.Start(); err != nil {
280
+		return nil, err
281
+	}
282
+	defer func() { _ = cmd.Wait() }()
283
+	var r io.Reader = stdout
284
+	if maxBytes > 0 {
285
+		// LimitReader so giant blobs don't OOM us.
286
+		r = io.LimitReader(stdout, maxBytes+1)
287
+	}
288
+	body, err := io.ReadAll(r)
289
+	if err != nil {
290
+		return nil, err
291
+	}
292
+	if maxBytes > 0 && int64(len(body)) > maxBytes {
293
+		return body[:maxBytes], ErrBlobTooLarge
294
+	}
295
+	return body, nil
296
+}
297
+
298
+// StreamBlob writes the blob bytes to w. For raw downloads we never
299
+// buffer; this lets the response stream as `git cat-file -p` produces.
300
+func StreamBlob(ctx context.Context, gitDir, ref, path string, w io.Writer) error {
301
+	target := ref + ":" + path
302
+	cmd := exec.CommandContext(ctx, "git", "-C", gitDir, "cat-file", "-p", target)
303
+	cmd.Stdout = w
304
+	var stderr bytes.Buffer
305
+	cmd.Stderr = &stderr
306
+	if err := cmd.Run(); err != nil {
307
+		return fmt.Errorf("git cat-file: %w (%s)", err, stderr.String())
308
+	}
309
+	return nil
310
+}
311
+
312
+// ErrBlobTooLarge is returned when a maxBytes cap is hit on ReadBlobBytes.
313
+var ErrBlobTooLarge = errors.New("git: blob exceeds size cap")
314
+
315
+// ListAllPaths runs `git ls-tree -r --name-only` and returns every
316
+// blob path under the ref. Used by the "Go to file" finder. Filters
317
+// out submodule-style entries (commit type) which shouldn't surface
318
+// in the file finder.
319
+func ListAllPaths(ctx context.Context, gitDir, ref string) ([]string, error) {
320
+	cmd := exec.CommandContext(ctx, "git", "-C", gitDir,
321
+		"ls-tree", "-r", "--full-tree", "--name-only", ref)
322
+	out, err := cmd.Output()
323
+	if err != nil {
324
+		return nil, wrapExecErr(err)
325
+	}
326
+	lines := strings.Split(strings.TrimRight(string(out), "\n"), "\n")
327
+	out2 := make([]string, 0, len(lines))
328
+	for _, l := range lines {
329
+		if l == "" {
330
+			continue
331
+		}
332
+		out2 = append(out2, l)
333
+	}
334
+	return out2, nil
335
+}
internal/repos/git/treeops_test.goadded
@@ -0,0 +1,63 @@
1
+// SPDX-License-Identifier: AGPL-3.0-or-later
2
+
3
+package git_test
4
+
5
+import (
6
+	"testing"
7
+
8
+	gitops "github.com/tenseleyFlow/shithub/internal/repos/git"
9
+)
10
+
11
+func TestResolveRef_LongestPrefixWins(t *testing.T) {
12
+	t.Parallel()
13
+	refs := []string{"main", "feature/x", "release/v1.0/beta"}
14
+	cases := []struct {
15
+		segs     []string
16
+		wantRef  string
17
+		wantPath string
18
+		wantOK   bool
19
+	}{
20
+		{[]string{"main"}, "main", "", true},
21
+		{[]string{"main", "src", "f.go"}, "main", "src/f.go", true},
22
+		{[]string{"feature", "x"}, "feature/x", "", true},
23
+		{[]string{"feature", "x", "sub", "f.go"}, "feature/x", "sub/f.go", true},
24
+		{[]string{"release", "v1.0", "beta"}, "release/v1.0/beta", "", true},
25
+		{[]string{"release", "v1.0", "beta", "README.md"}, "release/v1.0/beta", "README.md", true},
26
+		{[]string{"missing"}, "", "", false},
27
+		{[]string{}, "", "", false},
28
+	}
29
+	for _, c := range cases {
30
+		ref, path, ok := gitops.ResolveRef(refs, c.segs)
31
+		if ok != c.wantOK || ref != c.wantRef || path != c.wantPath {
32
+			t.Errorf("segs=%v: got (%q, %q, %v), want (%q, %q, %v)",
33
+				c.segs, ref, path, ok, c.wantRef, c.wantPath, c.wantOK)
34
+		}
35
+	}
36
+}
37
+
38
+func TestResolveRef_HexShortcut(t *testing.T) {
39
+	t.Parallel()
40
+	refs := []string{"main"}
41
+	sha := "abcdef0123456789abcdef0123456789abcdef01"
42
+	ref, path, ok := gitops.ResolveRef(refs, []string{sha, "src", "f.go"})
43
+	if !ok || ref != sha || path != "src/f.go" {
44
+		t.Errorf("sha shortcut: got (%q, %q, %v)", ref, path, ok)
45
+	}
46
+}
47
+
48
+func TestResolveRef_HexLooksLikeBranch(t *testing.T) {
49
+	t.Parallel()
50
+	// A branch named like a 40-hex string would be unusual; the spec
51
+	// says ref-lookup takes priority. Here we don't list it as a ref,
52
+	// so the SHA shortcut wins.
53
+	sha := "abcdef0123456789abcdef0123456789abcdef01"
54
+	ref, _, ok := gitops.ResolveRef([]string{"main"}, []string{sha})
55
+	if !ok || ref != sha {
56
+		t.Errorf("expected SHA shortcut, got %q", ref)
57
+	}
58
+	// When the ref list contains the same string, ref-lookup wins.
59
+	ref, path, ok := gitops.ResolveRef([]string{"main", sha}, []string{sha, "x"})
60
+	if !ok || ref != sha || path != "x" {
61
+		t.Errorf("ref-lookup should win: got (%q, %q, %v)", ref, path, ok)
62
+	}
63
+}
internal/repos/highlight/chroma.goadded
@@ -0,0 +1,130 @@
1
+// SPDX-License-Identifier: AGPL-3.0-or-later
2
+
3
+// Package highlight wraps Chroma so the rest of the project doesn't
4
+// import it directly. The returned HTML is Chroma's standard "html"
5
+// formatter output with line numbers; the caller embeds it in the
6
+// blob template inside a code-styled wrapper.
7
+package highlight
8
+
9
+import (
10
+	"bytes"
11
+	stdhtml "html"
12
+	"path/filepath"
13
+	"strings"
14
+
15
+	"github.com/alecthomas/chroma/v2"
16
+	chromahtml "github.com/alecthomas/chroma/v2/formatters/html"
17
+	"github.com/alecthomas/chroma/v2/lexers"
18
+	"github.com/alecthomas/chroma/v2/styles"
19
+)
20
+
21
+// Render returns syntax-highlighted HTML for source. filename is used
22
+// to guess the lexer; on miss we fall back to content sniffing, then
23
+// finally to plain text (no highlighting). Line numbers are always on.
24
+//
25
+// The output is a `<pre class="chroma">…</pre>` block ready to embed
26
+// in the page; line-number cells are linkable via Chroma's `LineLinks`
27
+// option (rendered as `#L42`).
28
+func Render(filename, source string) string {
29
+	lexer := lexers.Match(filename)
30
+	if lexer == nil {
31
+		lexer = lexers.Analyse(source)
32
+	}
33
+	if lexer == nil {
34
+		return plainPre(source)
35
+	}
36
+	lexer = chroma.Coalesce(lexer)
37
+	style := styles.Get("github")
38
+	if style == nil {
39
+		style = styles.Fallback
40
+	}
41
+	formatter := chromahtml.New(
42
+		chromahtml.WithLineNumbers(true),
43
+		chromahtml.WithLinkableLineNumbers(true, "L"),
44
+		chromahtml.LineNumbersInTable(true),
45
+		chromahtml.WithClasses(true),
46
+	)
47
+	iter, err := lexer.Tokenise(nil, source)
48
+	if err != nil {
49
+		return plainPre(source)
50
+	}
51
+	var buf bytes.Buffer
52
+	if err := formatter.Format(&buf, style, iter); err != nil {
53
+		return plainPre(source)
54
+	}
55
+	return buf.String()
56
+}
57
+
58
+// CSS returns the `<style>`-wrappable CSS for the highlight theme so
59
+// the operator can serve it once at /static/css/chroma.css. Generated
60
+// from the same `github` style Render uses, so colors stay consistent.
61
+func CSS() string {
62
+	style := styles.Get("github")
63
+	if style == nil {
64
+		style = styles.Fallback
65
+	}
66
+	formatter := chromahtml.New(
67
+		chromahtml.WithClasses(true),
68
+		chromahtml.LineNumbersInTable(true),
69
+	)
70
+	var buf bytes.Buffer
71
+	_ = formatter.WriteCSS(&buf, style)
72
+	return buf.String()
73
+}
74
+
75
+// plainPre escapes source and wraps it in a <pre> for the no-lexer
76
+// fallback. We still provide line numbers via a <table> so the blob
77
+// template renders consistently.
78
+func plainPre(source string) string {
79
+	lines := strings.Split(source, "\n")
80
+	var lineNums, code bytes.Buffer
81
+	for i := range lines {
82
+		lineNums.WriteString("<a href=\"#L")
83
+		lineNums.WriteString(itoa(i + 1))
84
+		lineNums.WriteString("\">")
85
+		lineNums.WriteString(itoa(i + 1))
86
+		lineNums.WriteString("</a>\n")
87
+	}
88
+	for i, l := range lines {
89
+		code.WriteString("<span id=\"L")
90
+		code.WriteString(itoa(i + 1))
91
+		code.WriteString("\">")
92
+		code.WriteString(stdhtml.EscapeString(l))
93
+		code.WriteString("</span>\n")
94
+	}
95
+	return `<div class="chroma"><table><tr><td class="lntable"><pre class="chroma"><code>` +
96
+		lineNums.String() +
97
+		`</code></pre></td><td><pre class="chroma"><code>` +
98
+		code.String() +
99
+		`</code></pre></td></tr></table></div>`
100
+}
101
+
102
+// itoa is a tiny int-to-string used inside plainPre to avoid pulling
103
+// fmt for the hot path.
104
+func itoa(n int) string {
105
+	if n == 0 {
106
+		return "0"
107
+	}
108
+	var buf [20]byte
109
+	i := len(buf)
110
+	for n > 0 {
111
+		i--
112
+		buf[i] = byte('0' + n%10)
113
+		n /= 10
114
+	}
115
+	return string(buf[i:])
116
+}
117
+
118
+// LanguageGuess returns the human-readable language name (or "Text"
119
+// fallback) for display in the blob viewer's header.
120
+func LanguageGuess(filename string) string {
121
+	if lexer := lexers.Match(filename); lexer != nil {
122
+		return lexer.Config().Name
123
+	}
124
+	if ext := filepath.Ext(filename); ext != "" {
125
+		if l := lexers.Get(strings.TrimPrefix(ext, ".")); l != nil {
126
+			return l.Config().Name
127
+		}
128
+	}
129
+	return "Text"
130
+}
internal/repos/markdown/render.goadded
@@ -0,0 +1,67 @@
1
+// SPDX-License-Identifier: AGPL-3.0-or-later
2
+
3
+// Package markdown wraps Goldmark + bluemonday for safe README
4
+// rendering. S25 will broaden this with auto-mention, issue-ref
5
+// linking, and cross-repo extensions; S17 ships only what's needed
6
+// for tree-page README rendering.
7
+package markdown
8
+
9
+import (
10
+	"bytes"
11
+
12
+	"github.com/microcosm-cc/bluemonday"
13
+	"github.com/yuin/goldmark"
14
+	"github.com/yuin/goldmark/extension"
15
+	"github.com/yuin/goldmark/parser"
16
+	"github.com/yuin/goldmark/renderer/html"
17
+)
18
+
19
+// gm is the shared Goldmark instance. CommonMark + GFM (tables,
20
+// strikethrough, autolinks, task-list) + auto-heading-id for in-page
21
+// anchors. We deliberately do NOT enable HTML passthrough; raw HTML
22
+// in user content is escaped.
23
+var gm = goldmark.New(
24
+	goldmark.WithExtensions(
25
+		extension.GFM,
26
+		extension.Footnote,
27
+	),
28
+	goldmark.WithParserOptions(parser.WithAutoHeadingID()),
29
+	goldmark.WithRendererOptions(
30
+		html.WithHardWraps(),
31
+		html.WithXHTML(),
32
+	),
33
+)
34
+
35
+// sanitizer is bluemonday's UGC policy with two adjustments:
36
+//   - allow class attributes on `<code>` (Goldmark emits language-foo)
37
+//   - allow `id` on headings so anchor links work
38
+//
39
+// Anything Goldmark emits passes through; anything user-injected via
40
+// raw HTML in markdown gets stripped because Goldmark didn't enable
41
+// HTML rendering in the first place. Defense in depth.
42
+var sanitizer = func() *bluemonday.Policy {
43
+	p := bluemonday.UGCPolicy()
44
+	p.AllowAttrs("class").Matching(bluemonday.SpaceSeparatedTokens).OnElements("code", "pre", "span")
45
+	p.AllowAttrs("id").OnElements("h1", "h2", "h3", "h4", "h5", "h6")
46
+	// Disallow remote images outright; readme images normally live in
47
+	// the same repo and resolve to /raw/ which we control. Users who
48
+	// want external images can paste links instead.
49
+	p.AllowImages()
50
+	return p
51
+}()
52
+
53
+// RenderHTML returns sanitized HTML for the given markdown bytes.
54
+// Empty input returns an empty string. The output is suitable for
55
+// inserting into a template via `{{ . | safeHTML }}` — every byte has
56
+// passed bluemonday.
57
+func RenderHTML(src []byte) (string, error) {
58
+	if len(src) == 0 {
59
+		return "", nil
60
+	}
61
+	var buf bytes.Buffer
62
+	if err := gm.Convert(src, &buf); err != nil {
63
+		return "", err
64
+	}
65
+	clean := sanitizer.SanitizeBytes(buf.Bytes())
66
+	return string(clean), nil
67
+}