@@ -0,0 +1,335 @@ |
| 1 | +// SPDX-License-Identifier: AGPL-3.0-or-later |
| 2 | + |
| 3 | +package git |
| 4 | + |
| 5 | +import ( |
| 6 | + "bytes" |
| 7 | + "context" |
| 8 | + "errors" |
| 9 | + "fmt" |
| 10 | + "io" |
| 11 | + "os/exec" |
| 12 | + "sort" |
| 13 | + "strconv" |
| 14 | + "strings" |
| 15 | +) |
| 16 | + |
| 17 | +// ListRefs runs `git for-each-ref` and returns every ref under |
| 18 | +// refs/heads/ and refs/tags/. Used by the ref-resolver to do |
| 19 | +// longest-prefix matching against URLs like /tree/feature/x/sub/dir. |
| 20 | +// |
| 21 | +// Returns separate lists so callers can fork on UX (branches vs tags). |
| 22 | +type RefListing struct { |
| 23 | + Branches []RefEntry // refs/heads/<name> |
| 24 | + Tags []RefEntry // refs/tags/<name> |
| 25 | +} |
| 26 | + |
| 27 | +// RefEntry is one ref from for-each-ref. |
| 28 | +type RefEntry struct { |
| 29 | + Name string // short name (without refs/heads/ or refs/tags/) |
| 30 | + OID string // 40-char hex sha |
| 31 | +} |
| 32 | + |
| 33 | +// ListRefs enumerates branches and tags. Empty repos return empty |
| 34 | +// slices, not an error. |
| 35 | +func ListRefs(ctx context.Context, gitDir string) (RefListing, error) { |
| 36 | + cmd := exec.CommandContext(ctx, "git", "-C", gitDir, |
| 37 | + "for-each-ref", "--format=%(refname)\x1f%(objectname)", |
| 38 | + "refs/heads/", "refs/tags/") |
| 39 | + out, err := cmd.Output() |
| 40 | + if err != nil { |
| 41 | + return RefListing{}, wrapExecErr(err) |
| 42 | + } |
| 43 | + var rl RefListing |
| 44 | + for _, line := range strings.Split(strings.TrimRight(string(out), "\n"), "\n") { |
| 45 | + if line == "" { |
| 46 | + continue |
| 47 | + } |
| 48 | + name, oid, ok := strings.Cut(line, "\x1f") |
| 49 | + if !ok { |
| 50 | + continue |
| 51 | + } |
| 52 | + switch { |
| 53 | + case strings.HasPrefix(name, "refs/heads/"): |
| 54 | + rl.Branches = append(rl.Branches, RefEntry{Name: strings.TrimPrefix(name, "refs/heads/"), OID: oid}) |
| 55 | + case strings.HasPrefix(name, "refs/tags/"): |
| 56 | + rl.Tags = append(rl.Tags, RefEntry{Name: strings.TrimPrefix(name, "refs/tags/"), OID: oid}) |
| 57 | + } |
| 58 | + } |
| 59 | + sort.Slice(rl.Branches, func(i, j int) bool { return rl.Branches[i].Name < rl.Branches[j].Name }) |
| 60 | + sort.Slice(rl.Tags, func(i, j int) bool { return rl.Tags[i].Name < rl.Tags[j].Name }) |
| 61 | + return rl, nil |
| 62 | +} |
| 63 | + |
| 64 | +// ResolveRef takes the URL segments after /tree/ or /blob/ and finds |
| 65 | +// the longest-prefix match against the supplied refs. Hex-SHA shortcut: |
| 66 | +// if the first segment is exactly 40 hex chars, treat it as a SHA. The |
| 67 | +// returned `path` is the joined remainder after the matched ref (no |
| 68 | +// leading slash). |
| 69 | +// |
| 70 | +// Example: refs = ["main", "feature/x"], URL = ["feature", "x", "sub", "f.go"] |
| 71 | +// → ref="feature/x", path="sub/f.go". |
| 72 | +func ResolveRef(refs []string, segments []string) (ref, path string, ok bool) { |
| 73 | + if len(segments) == 0 { |
| 74 | + return "", "", false |
| 75 | + } |
| 76 | + // Hex-SHA shortcut. |
| 77 | + if len(segments[0]) == 40 && isHex(segments[0]) { |
| 78 | + return segments[0], strings.Join(segments[1:], "/"), true |
| 79 | + } |
| 80 | + // Longest prefix wins. Sort the refs by descending length so the |
| 81 | + // first match is the longest. |
| 82 | + candidates := append([]string(nil), refs...) |
| 83 | + sort.Slice(candidates, func(i, j int) bool { return len(candidates[i]) > len(candidates[j]) }) |
| 84 | + joined := strings.Join(segments, "/") |
| 85 | + for _, r := range candidates { |
| 86 | + if joined == r { |
| 87 | + return r, "", true |
| 88 | + } |
| 89 | + if strings.HasPrefix(joined, r+"/") { |
| 90 | + return r, strings.TrimPrefix(joined, r+"/"), true |
| 91 | + } |
| 92 | + } |
| 93 | + return "", "", false |
| 94 | +} |
| 95 | + |
| 96 | +func isHex(s string) bool { |
| 97 | + for _, c := range s { |
| 98 | + switch { |
| 99 | + case c >= '0' && c <= '9', c >= 'a' && c <= 'f', c >= 'A' && c <= 'F': |
| 100 | + default: |
| 101 | + return false |
| 102 | + } |
| 103 | + } |
| 104 | + return true |
| 105 | +} |
| 106 | + |
| 107 | +// TreeEntryKind is one of the four shapes git tree entries take. |
| 108 | +type TreeEntryKind string |
| 109 | + |
| 110 | +const ( |
| 111 | + EntryTree TreeEntryKind = "tree" |
| 112 | + EntryBlob TreeEntryKind = "blob" |
| 113 | + EntrySubmod TreeEntryKind = "commit" // a "commit" entry in a tree is a submodule pointer |
| 114 | + EntrySymlink TreeEntryKind = "symlink" |
| 115 | +) |
| 116 | + |
| 117 | +// TreeEntry is one row from `git ls-tree --long --full-tree`. |
| 118 | +type TreeEntry struct { |
| 119 | + Kind TreeEntryKind |
| 120 | + Mode string // 100644, 100755, 040000, 160000, 120000 |
| 121 | + OID string |
| 122 | + Size int64 // -1 when N/A (trees, submodules) |
| 123 | + Name string // basename relative to the listed path |
| 124 | +} |
| 125 | + |
| 126 | +// LsTree lists entries at <ref>:<path>. Empty path lists the repo root. |
| 127 | +// Returns an empty slice when the path doesn't exist or is itself a |
| 128 | +// blob — callers should fall back to BlobInfo. |
| 129 | +func LsTree(ctx context.Context, gitDir, ref, path string) ([]TreeEntry, error) { |
| 130 | + target := ref + ":" + path |
| 131 | + if path == "" { |
| 132 | + target = ref + ":" |
| 133 | + } |
| 134 | + cmd := exec.CommandContext(ctx, "git", "-C", gitDir, |
| 135 | + "ls-tree", "--long", "--full-tree", target) |
| 136 | + out, err := cmd.Output() |
| 137 | + if err != nil { |
| 138 | + // `fatal: not a tree object` for a blob path; surface a typed err. |
| 139 | + var ee *exec.ExitError |
| 140 | + if errors.As(err, &ee) { |
| 141 | + stderr := string(ee.Stderr) |
| 142 | + if strings.Contains(stderr, "Not a valid object name") || strings.Contains(stderr, "not a tree") { |
| 143 | + return nil, ErrNotATree |
| 144 | + } |
| 145 | + } |
| 146 | + return nil, wrapExecErr(err) |
| 147 | + } |
| 148 | + |
| 149 | + entries := make([]TreeEntry, 0, 32) |
| 150 | + for _, line := range strings.Split(strings.TrimRight(string(out), "\n"), "\n") { |
| 151 | + if line == "" { |
| 152 | + continue |
| 153 | + } |
| 154 | + // Format: "<mode> <type> <oid> <size>\t<name>" |
| 155 | + // Size is "-" for trees and submodules. |
| 156 | + tabIdx := strings.IndexByte(line, '\t') |
| 157 | + if tabIdx < 0 { |
| 158 | + continue |
| 159 | + } |
| 160 | + left, name := line[:tabIdx], line[tabIdx+1:] |
| 161 | + fields := strings.Fields(left) |
| 162 | + if len(fields) != 4 { |
| 163 | + continue |
| 164 | + } |
| 165 | + var size int64 = -1 |
| 166 | + if fields[3] != "-" { |
| 167 | + size, _ = strconv.ParseInt(fields[3], 10, 64) |
| 168 | + } |
| 169 | + kind := classifyEntry(fields[0], fields[1]) |
| 170 | + entries = append(entries, TreeEntry{ |
| 171 | + Kind: kind, Mode: fields[0], OID: fields[2], Size: size, Name: name, |
| 172 | + }) |
| 173 | + } |
| 174 | + // Spec: directories first, then files alphabetically. |
| 175 | + sort.SliceStable(entries, func(i, j int) bool { |
| 176 | + if entries[i].Kind == EntryTree && entries[j].Kind != EntryTree { |
| 177 | + return true |
| 178 | + } |
| 179 | + if entries[i].Kind != EntryTree && entries[j].Kind == EntryTree { |
| 180 | + return false |
| 181 | + } |
| 182 | + return entries[i].Name < entries[j].Name |
| 183 | + }) |
| 184 | + return entries, nil |
| 185 | +} |
| 186 | + |
| 187 | +// classifyEntry maps git's mode+type fields to our four kinds. |
| 188 | +// Symlinks come in with mode 120000 type=blob; we surface them as |
| 189 | +// symlink so the UI can avoid Reading them. |
| 190 | +func classifyEntry(mode, gitType string) TreeEntryKind { |
| 191 | + if mode == "120000" { |
| 192 | + return EntrySymlink |
| 193 | + } |
| 194 | + switch gitType { |
| 195 | + case "tree": |
| 196 | + return EntryTree |
| 197 | + case "commit": |
| 198 | + return EntrySubmod |
| 199 | + default: |
| 200 | + return EntryBlob |
| 201 | + } |
| 202 | +} |
| 203 | + |
| 204 | +// ErrNotATree is returned by LsTree when the requested path turns out |
| 205 | +// to be a blob (so the caller should fall through to BlobInfo). |
| 206 | +var ErrNotATree = errors.New("git: not a tree") |
| 207 | + |
| 208 | +// ErrNotABlob is the inverse — used by ReadBlob. |
| 209 | +var ErrNotABlob = errors.New("git: not a blob") |
| 210 | + |
| 211 | +// ErrPathNotFound is for paths that don't exist on the ref. |
| 212 | +var ErrPathNotFound = errors.New("git: path not found") |
| 213 | + |
| 214 | +// BlobInfo is the result of `git cat-file -e -p` style introspection. |
| 215 | +type BlobInfo struct { |
| 216 | + OID string |
| 217 | + Size int64 |
| 218 | +} |
| 219 | + |
| 220 | +// StatPath returns the kind + OID + size for `<ref>:<path>`. Used by |
| 221 | +// the handler to decide whether to render tree or blob without a |
| 222 | +// second round-trip. |
| 223 | +func StatPath(ctx context.Context, gitDir, ref, path string) (kind TreeEntryKind, oid string, size int64, err error) { |
| 224 | + target := ref + ":" + path |
| 225 | + if path == "" { |
| 226 | + target = ref + ":" |
| 227 | + } |
| 228 | + // `git cat-file -t <ref>:<path>` returns the type. |
| 229 | + tCmd := exec.CommandContext(ctx, "git", "-C", gitDir, "cat-file", "-t", target) |
| 230 | + tOut, tErr := tCmd.Output() |
| 231 | + if tErr != nil { |
| 232 | + var ee *exec.ExitError |
| 233 | + if errors.As(tErr, &ee) { |
| 234 | + stderr := string(ee.Stderr) |
| 235 | + if strings.Contains(stderr, "Not a valid object name") || |
| 236 | + strings.Contains(stderr, "does not exist") || |
| 237 | + strings.Contains(stderr, "fatal: path") { |
| 238 | + return "", "", 0, ErrPathNotFound |
| 239 | + } |
| 240 | + } |
| 241 | + return "", "", 0, wrapExecErr(tErr) |
| 242 | + } |
| 243 | + gitType := strings.TrimSpace(string(tOut)) |
| 244 | + |
| 245 | + switch gitType { |
| 246 | + case "tree": |
| 247 | + return EntryTree, "", -1, nil |
| 248 | + case "commit": |
| 249 | + // commit object referenced from a tree → submodule pointer. |
| 250 | + return EntrySubmod, "", -1, nil |
| 251 | + case "blob": |
| 252 | + default: |
| 253 | + return "", "", 0, fmt.Errorf("git: unexpected type %q", gitType) |
| 254 | + } |
| 255 | + |
| 256 | + sCmd := exec.CommandContext(ctx, "git", "-C", gitDir, "cat-file", "-s", target) |
| 257 | + sOut, err := sCmd.Output() |
| 258 | + if err != nil { |
| 259 | + return "", "", 0, wrapExecErr(err) |
| 260 | + } |
| 261 | + sz, err := strconv.ParseInt(strings.TrimSpace(string(sOut)), 10, 64) |
| 262 | + if err != nil { |
| 263 | + return "", "", 0, fmt.Errorf("git: parse size: %w", err) |
| 264 | + } |
| 265 | + return EntryBlob, "", sz, nil |
| 266 | +} |
| 267 | + |
| 268 | +// ReadBlobBytes reads the entire blob at `<ref>:<path>`. Caller-imposed |
| 269 | +// max-size limit is the right guard — git itself doesn't bound the |
| 270 | +// stream. Pass 0 for "no cap"; otherwise an oversize read returns |
| 271 | +// ErrBlobTooLarge. |
| 272 | +func ReadBlobBytes(ctx context.Context, gitDir, ref, path string, maxBytes int64) ([]byte, error) { |
| 273 | + target := ref + ":" + path |
| 274 | + cmd := exec.CommandContext(ctx, "git", "-C", gitDir, "cat-file", "-p", target) |
| 275 | + stdout, err := cmd.StdoutPipe() |
| 276 | + if err != nil { |
| 277 | + return nil, err |
| 278 | + } |
| 279 | + if err := cmd.Start(); err != nil { |
| 280 | + return nil, err |
| 281 | + } |
| 282 | + defer func() { _ = cmd.Wait() }() |
| 283 | + var r io.Reader = stdout |
| 284 | + if maxBytes > 0 { |
| 285 | + // LimitReader so giant blobs don't OOM us. |
| 286 | + r = io.LimitReader(stdout, maxBytes+1) |
| 287 | + } |
| 288 | + body, err := io.ReadAll(r) |
| 289 | + if err != nil { |
| 290 | + return nil, err |
| 291 | + } |
| 292 | + if maxBytes > 0 && int64(len(body)) > maxBytes { |
| 293 | + return body[:maxBytes], ErrBlobTooLarge |
| 294 | + } |
| 295 | + return body, nil |
| 296 | +} |
| 297 | + |
| 298 | +// StreamBlob writes the blob bytes to w. For raw downloads we never |
| 299 | +// buffer; this lets the response stream as `git cat-file -p` produces. |
| 300 | +func StreamBlob(ctx context.Context, gitDir, ref, path string, w io.Writer) error { |
| 301 | + target := ref + ":" + path |
| 302 | + cmd := exec.CommandContext(ctx, "git", "-C", gitDir, "cat-file", "-p", target) |
| 303 | + cmd.Stdout = w |
| 304 | + var stderr bytes.Buffer |
| 305 | + cmd.Stderr = &stderr |
| 306 | + if err := cmd.Run(); err != nil { |
| 307 | + return fmt.Errorf("git cat-file: %w (%s)", err, stderr.String()) |
| 308 | + } |
| 309 | + return nil |
| 310 | +} |
| 311 | + |
| 312 | +// ErrBlobTooLarge is returned when a maxBytes cap is hit on ReadBlobBytes. |
| 313 | +var ErrBlobTooLarge = errors.New("git: blob exceeds size cap") |
| 314 | + |
| 315 | +// ListAllPaths runs `git ls-tree -r --name-only` and returns every |
| 316 | +// blob path under the ref. Used by the "Go to file" finder. Filters |
| 317 | +// out submodule-style entries (commit type) which shouldn't surface |
| 318 | +// in the file finder. |
| 319 | +func ListAllPaths(ctx context.Context, gitDir, ref string) ([]string, error) { |
| 320 | + cmd := exec.CommandContext(ctx, "git", "-C", gitDir, |
| 321 | + "ls-tree", "-r", "--full-tree", "--name-only", ref) |
| 322 | + out, err := cmd.Output() |
| 323 | + if err != nil { |
| 324 | + return nil, wrapExecErr(err) |
| 325 | + } |
| 326 | + lines := strings.Split(strings.TrimRight(string(out), "\n"), "\n") |
| 327 | + out2 := make([]string, 0, len(lines)) |
| 328 | + for _, l := range lines { |
| 329 | + if l == "" { |
| 330 | + continue |
| 331 | + } |
| 332 | + out2 = append(out2, l) |
| 333 | + } |
| 334 | + return out2, nil |
| 335 | +} |