Go · 14638 bytes Raw Blame History
1 // SPDX-License-Identifier: AGPL-3.0-or-later
2
3 package storage
4
5 import (
6 "context"
7 "errors"
8 "fmt"
9 "io/fs"
10 "os"
11 "os/exec"
12 "path/filepath"
13 "regexp"
14 "strings"
15 )
16
17 // RepoFS owns the on-disk layout for bare git repositories. All callers
18 // that touch repo paths route through this type so the path-validation
19 // rules live in exactly one place.
20 type RepoFS struct {
21 root string
22 }
23
24 // NewRepoFS validates root (must be absolute, must exist, must be a
25 // directory) and returns the layer.
26 func NewRepoFS(root string) (*RepoFS, error) {
27 if root == "" {
28 return nil, errors.New("storage: repofs: root required")
29 }
30 if !filepath.IsAbs(root) {
31 return nil, fmt.Errorf("storage: repofs: root must be absolute, got %q", root)
32 }
33 abs, err := filepath.Abs(filepath.Clean(root))
34 if err != nil {
35 return nil, fmt.Errorf("storage: repofs: clean root: %w", err)
36 }
37 info, err := os.Stat(abs)
38 if err != nil {
39 return nil, fmt.Errorf("storage: repofs: stat root: %w", err)
40 }
41 if !info.IsDir() {
42 return nil, fmt.Errorf("storage: repofs: root %q is not a directory", abs)
43 }
44 return &RepoFS{root: abs}, nil
45 }
46
47 // Root returns the absolute root path. Useful for logging and `storage check`.
48 func (r *RepoFS) Root() string { return r.root }
49
50 // ownerNameRE is the whitelist for owner names: lowercase ASCII letters,
51 // digits, and hyphens; cannot start or end with a hyphen; length 1..39
52 // (matches GitHub's username constraint).
53 var ownerNameRE = regexp.MustCompile(`^[a-z0-9](?:[a-z0-9-]{0,37}[a-z0-9])?$`)
54
55 // repoNameRE is the whitelist for repository names: lowercase ASCII
56 // letters, digits, hyphens, dots, and underscores. Can't start or end
57 // with a separator. Length 1..100 (matches GitHub).
58 var repoNameRE = regexp.MustCompile(`^[a-z0-9](?:[a-z0-9._-]{0,98}[a-z0-9_])?$`)
59
60 // validateName enforces the per-kind whitelist. Returns ErrInvalidPath
61 // wrapped with a precise reason on failure.
62 func validateName(kind, name string) error {
63 if name == "" {
64 return fmt.Errorf("%w: %s empty", ErrInvalidPath, kind)
65 }
66 maxLen, re, alphabet := 39, ownerNameRE, "[a-z0-9-]"
67 if kind == "repo" {
68 maxLen, re, alphabet = 100, repoNameRE, "[a-z0-9._-]"
69 }
70 if len(name) > maxLen {
71 return fmt.Errorf("%w: %s %q too long (max %d)", ErrInvalidPath, kind, name, maxLen)
72 }
73 if name != strings.ToLower(name) {
74 return fmt.Errorf("%w: %s %q must be lowercase", ErrInvalidPath, kind, name)
75 }
76 if strings.Contains(name, "..") {
77 return fmt.Errorf("%w: %s contains dot-dot", ErrInvalidPath, kind)
78 }
79 if strings.HasPrefix(name, ".") {
80 return fmt.Errorf("%w: %s starts with dot", ErrInvalidPath, kind)
81 }
82 if filepath.IsAbs(name) {
83 return fmt.Errorf("%w: %s is absolute", ErrInvalidPath, kind)
84 }
85 if !re.MatchString(name) {
86 return fmt.Errorf("%w: %s %q fails whitelist %s", ErrInvalidPath, kind, name, alphabet)
87 }
88 return nil
89 }
90
91 // shardOf returns the two-character shard prefix for owner. When owner is
92 // shorter than two characters, pads with `_` so the path remains stable.
93 func shardOf(owner string) string {
94 switch len(owner) {
95 case 0:
96 return "__"
97 case 1:
98 return owner + "_"
99 default:
100 return owner[:2]
101 }
102 }
103
104 // RepoPath returns the absolute disk path for the bare repository at
105 // (owner, name). Validates inputs and guarantees the result is rooted at
106 // r.root. Both inputs are lowercased before path construction.
107 func (r *RepoFS) RepoPath(owner, name string) (string, error) {
108 owner = strings.ToLower(owner)
109 name = strings.ToLower(name)
110 if err := validateName("owner", owner); err != nil {
111 return "", err
112 }
113 if err := validateName("repo", name); err != nil {
114 return "", err
115 }
116 p := filepath.Join(r.root, shardOf(owner), owner, name+".git")
117 if err := r.containedInRoot(p); err != nil {
118 return "", err
119 }
120 return p, nil
121 }
122
123 // DeletedRepoPath returns the internal tombstone path used while a
124 // soft-deleted repo is inside its restore grace window. Keeping
125 // tombstones outside the canonical <owner>/<name>.git path lets a new
126 // active repo reuse the name without losing the old row's restore data.
127 func (r *RepoFS) DeletedRepoPath(owner, name string, repoID int64) (string, error) {
128 if repoID <= 0 {
129 return "", fmt.Errorf("%w: repo id required", ErrInvalidPath)
130 }
131 canonical, err := r.RepoPath(owner, name)
132 if err != nil {
133 return "", err
134 }
135 p := filepath.Join(filepath.Dir(canonical), ".deleted", fmt.Sprintf("%d.git", repoID))
136 if err := r.containedInRoot(p); err != nil {
137 return "", err
138 }
139 return p, nil
140 }
141
142 // containedInRoot returns ErrEscapesRoot when p does not resolve under r.root.
143 // Defense-in-depth: validateName already rejects ".." and absolute paths,
144 // but a future caller might compose paths differently.
145 func (r *RepoFS) containedInRoot(p string) error {
146 clean := filepath.Clean(p)
147 if !strings.HasPrefix(clean, r.root+string(filepath.Separator)) && clean != r.root {
148 return fmt.Errorf("%w: %s not under %s", ErrEscapesRoot, clean, r.root)
149 }
150 return nil
151 }
152
153 // Exists reports whether path exists. Validates that path is under root.
154 func (r *RepoFS) Exists(path string) (bool, error) {
155 if err := r.containedInRoot(path); err != nil {
156 return false, err
157 }
158 _, err := os.Stat(path)
159 if err == nil {
160 return true, nil
161 }
162 if errors.Is(err, os.ErrNotExist) {
163 return false, nil
164 }
165 return false, fmt.Errorf("storage: repofs: stat %s: %w", path, err)
166 }
167
168 // DiskUsageBytes sums the byte size of every regular file under path.
169 // It is used for quota gates and repo:size_recalc, so it validates that
170 // the target stays inside the configured repo root and never follows
171 // directory symlinks.
172 func (r *RepoFS) DiskUsageBytes(ctx context.Context, path string) (int64, error) {
173 if err := r.containedInRoot(path); err != nil {
174 return 0, err
175 }
176 var total int64
177 err := filepath.WalkDir(path, func(_ string, d fs.DirEntry, walkErr error) error {
178 if walkErr != nil {
179 return walkErr
180 }
181 if err := ctx.Err(); err != nil {
182 return err
183 }
184 if d.IsDir() {
185 return nil
186 }
187 info, err := d.Info()
188 if err != nil {
189 return err
190 }
191 if info.Mode().IsRegular() {
192 total += info.Size()
193 }
194 return nil
195 })
196 return total, err
197 }
198
199 // InitBare creates a bare git repository at path. Default branch is
200 // "trunk" — there is no path through this package that creates a bare
201 // repo with a different initial branch.
202 //
203 // The parent directory tree is created on demand. ErrAlreadyExists is
204 // returned if path is non-empty.
205 //
206 // The repo is initialized with `--shared=group`, which:
207 //
208 // - persists `core.sharedRepository=group` in config
209 // - sets the setgid bit on directories (2775)
210 // - keeps group-writable mode bits on files (0664)
211 //
212 // Both shithubd-web (web pushes via the HTTPS handler, runs as the
213 // `shithub` user) and the SSH `git` user (the AuthorizedKeysCommand
214 // dispatches into a process running as `git`, which is in the
215 // `shithub` group) write to the same bare repo on disk. Without
216 // `--shared=group`, git-receive-pack via SSH fails with
217 // "unable to create temporary object directory" because objects/
218 // is 0755 and group write isn't set.
219 func (r *RepoFS) InitBare(ctx context.Context, path string) error {
220 if err := r.containedInRoot(path); err != nil {
221 return err
222 }
223 if entries, err := os.ReadDir(path); err == nil && len(entries) > 0 {
224 return fmt.Errorf("%w: %s", ErrAlreadyExists, path)
225 }
226 if err := os.MkdirAll(filepath.Dir(path), 0o2750); err != nil {
227 return fmt.Errorf("storage: repofs: mkdir parent: %w", err)
228 }
229 if err := os.MkdirAll(path, 0o2750); err != nil {
230 return fmt.Errorf("storage: repofs: mkdir target: %w", err)
231 }
232 // G204: path is constructed via RepoPath (strict whitelist) and verified
233 // to live under r.root. Caller cannot inject arbitrary args.
234 cmd := exec.CommandContext(ctx, "git", "init", "--bare", "--shared=group", "--initial-branch=trunk", path) //nolint:gosec
235 out, err := cmd.CombinedOutput()
236 if err != nil {
237 return fmt.Errorf("storage: repofs: git init --bare: %w (output: %s)", err, strings.TrimSpace(string(out)))
238 }
239 return nil
240 }
241
242 // CloneBareShared clones src → dst as a bare repo with object
243 // alternates pointing back at src. Disk usage of the result is
244 // essentially refs + a small overhead; objects live in src's
245 // `objects/` until the fork is detached (S16 hard-delete cascade
246 // repacks each fork before removing the source).
247 //
248 // Both paths must be contained in r.root and on the same volume —
249 // the same-volume requirement is what makes alternates safe (S04).
250 //
251 // On success the dst directory exists with `git init --bare` shape
252 // plus an `objects/info/alternates` file pointing at src/objects.
253 // On failure the dst directory is removed so a retry sees a clean
254 // slate.
255 func (r *RepoFS) CloneBareShared(ctx context.Context, src, dst string) error {
256 if err := r.containedInRoot(src); err != nil {
257 return err
258 }
259 if err := r.containedInRoot(dst); err != nil {
260 return err
261 }
262 if entries, err := os.ReadDir(dst); err == nil && len(entries) > 0 {
263 return fmt.Errorf("%w: %s", ErrAlreadyExists, dst)
264 }
265 if err := os.MkdirAll(filepath.Dir(dst), 0o2750); err != nil {
266 return fmt.Errorf("storage: repofs: mkdir parent: %w", err)
267 }
268 // `git clone --shared` (here: object-alternates flag, NOT a perms
269 // flag — same name, different sense than init's --shared=group).
270 // To get group-writable perms we set core.sharedRepository=group
271 // via -c so the cloned config has it from byte zero. Without this,
272 // SSH-git push to a fork hits the same EACCES on objects/ that
273 // PR for SR2 #287 fixed for `git init --bare` (see InitBare).
274 //
275 // G204: src/dst are RepoPath-derived, both verified under r.root.
276 cmd := exec.CommandContext(ctx, "git", "-c", "core.sharedRepository=group", "clone", "--bare", "--shared", src, dst) //nolint:gosec
277 out, err := cmd.CombinedOutput()
278 if err != nil {
279 // Best-effort cleanup; if removal fails too, surface the
280 // original clone error since that's the actionable signal.
281 _ = os.RemoveAll(dst)
282 return fmt.Errorf("storage: repofs: git clone --bare --shared: %w (output: %s)", err, strings.TrimSpace(string(out)))
283 }
284 return nil
285 }
286
287 // RepairSharedPerms brings an existing bare repo to the
288 // `--shared=group` contract InitBare now produces from byte zero
289 // (SR2 #287). Idempotent: a repo already at the contract is left
290 // alone except for explicitly setting the config (cheap).
291 //
292 // Steps:
293 // 1. `git config core.sharedRepository=group`
294 // 2. `chmod -R g+w` and `find -type d -exec chmod g+s` so future
295 // writes inherit the group on creation.
296 //
297 // Group ownership itself is NOT changed — the shipped invariant is
298 // that all repos are owned by the `shithub` group already (the
299 // shithub user creates them). If a repo's group is wrong, that's a
300 // separate provisioning bug; this method's job is only the bits.
301 //
302 // Runs git with safe.directory=* injected via env so the operator
303 // (typically root over SSH) can operate on repos owned by the
304 // `shithub` user. The same trick is used by the SSH dispatcher.
305 // Without it, git 2.35+ emits "fatal: not in a git directory" as
306 // part of the dubious-ownership early exit.
307 func (r *RepoFS) RepairSharedPerms(ctx context.Context, path string) error {
308 if err := r.containedInRoot(path); err != nil {
309 return err
310 }
311 if _, err := os.Stat(path); err != nil {
312 return fmt.Errorf("storage: repofs: stat %s: %w", path, err)
313 }
314 // Persist the contract in config.
315 cfg := exec.CommandContext(ctx, "git", "-C", path, "config", "core.sharedRepository", "group") //nolint:gosec
316 cfg.Env = append(
317 os.Environ(),
318 "GIT_CONFIG_COUNT=1",
319 "GIT_CONFIG_KEY_0=safe.directory",
320 "GIT_CONFIG_VALUE_0=*",
321 )
322 if out, err := cfg.CombinedOutput(); err != nil {
323 return fmt.Errorf("storage: repofs: git config sharedRepository: %w (output: %s)", err, strings.TrimSpace(string(out)))
324 }
325 // Walk the tree once: directories get +g+s, files get +g+w.
326 // path is verified contained-in-root above; no symlinks span out
327 // of the repo (bare repos don't ship with symlinks under .git/).
328 // G122: filepath.Walk + os.Chmod is race-prone in adversarial
329 // trees, but our writer (this process running as root or shithub)
330 // is also the only writer for these paths, and the trees are not
331 // user-influenced beyond the validated owner/name slugs. Operator-
332 // only command, not user-triggered.
333 if err := filepath.Walk(path, func(p string, info os.FileInfo, err error) error { //nolint:gosec
334 if err != nil {
335 return err
336 }
337 mode := info.Mode()
338 newMode := mode | 0o060 // group rw
339 if info.IsDir() {
340 newMode |= os.ModeSetgid // g+s
341 }
342 if newMode == mode {
343 return nil
344 }
345 return os.Chmod(p, newMode) //nolint:gosec
346 }); err != nil {
347 return fmt.Errorf("storage: repofs: walk chmod: %w", err)
348 }
349 return nil
350 }
351
352 // SetPreciousObjects marks a bare repo's objects as not-prunable. The
353 // canonical foot-gun for forks is source-repo `git gc` removing
354 // objects that forks reach via alternates; setting this on the source
355 // after a fork is created prevents that. Idempotent.
356 func (r *RepoFS) SetPreciousObjects(ctx context.Context, path string) error {
357 if err := r.containedInRoot(path); err != nil {
358 return err
359 }
360 cmd := exec.CommandContext(ctx, "git", "-C", path, "config", "extensions.preciousObjects", "true") //nolint:gosec
361 if out, err := cmd.CombinedOutput(); err != nil {
362 return fmt.Errorf("storage: repofs: set preciousObjects: %w (output: %s)", err, strings.TrimSpace(string(out)))
363 }
364 return nil
365 }
366
367 // Move atomically renames oldPath to newPath. Both must be under root.
368 // If newPath already exists, returns ErrAlreadyExists rather than
369 // overwriting (avoids silent corruption on concurrent moves).
370 func (r *RepoFS) Move(oldPath, newPath string) error {
371 if err := r.containedInRoot(oldPath); err != nil {
372 return err
373 }
374 if err := r.containedInRoot(newPath); err != nil {
375 return err
376 }
377 if _, err := os.Stat(newPath); err == nil {
378 return fmt.Errorf("%w: %s", ErrAlreadyExists, newPath)
379 } else if !errors.Is(err, os.ErrNotExist) {
380 return fmt.Errorf("storage: repofs: stat dest: %w", err)
381 }
382 if err := os.MkdirAll(filepath.Dir(newPath), 0o750); err != nil {
383 return fmt.Errorf("storage: repofs: mkdir parent: %w", err)
384 }
385 if err := os.Rename(oldPath, newPath); err != nil {
386 return fmt.Errorf("storage: repofs: rename: %w", err)
387 }
388 return nil
389 }
390
391 // Delete removes the bare repo at path. Refuses paths outside root.
392 func (r *RepoFS) Delete(path string) error {
393 if err := r.containedInRoot(path); err != nil {
394 return err
395 }
396 if path == r.root {
397 return fmt.Errorf("%w: refusing to delete root", ErrEscapesRoot)
398 }
399 if err := os.RemoveAll(path); err != nil {
400 return fmt.Errorf("storage: repofs: remove: %w", err)
401 }
402 return nil
403 }
404