Go · 13849 bytes Raw Blame History
1 // SPDX-License-Identifier: AGPL-3.0-or-later
2
3 package storage
4
5 import (
6 "context"
7 "errors"
8 "fmt"
9 "os"
10 "os/exec"
11 "path/filepath"
12 "regexp"
13 "strings"
14 )
15
16 // RepoFS owns the on-disk layout for bare git repositories. All callers
17 // that touch repo paths route through this type so the path-validation
18 // rules live in exactly one place.
19 type RepoFS struct {
20 root string
21 }
22
23 // NewRepoFS validates root (must be absolute, must exist, must be a
24 // directory) and returns the layer.
25 func NewRepoFS(root string) (*RepoFS, error) {
26 if root == "" {
27 return nil, errors.New("storage: repofs: root required")
28 }
29 if !filepath.IsAbs(root) {
30 return nil, fmt.Errorf("storage: repofs: root must be absolute, got %q", root)
31 }
32 abs, err := filepath.Abs(filepath.Clean(root))
33 if err != nil {
34 return nil, fmt.Errorf("storage: repofs: clean root: %w", err)
35 }
36 info, err := os.Stat(abs)
37 if err != nil {
38 return nil, fmt.Errorf("storage: repofs: stat root: %w", err)
39 }
40 if !info.IsDir() {
41 return nil, fmt.Errorf("storage: repofs: root %q is not a directory", abs)
42 }
43 return &RepoFS{root: abs}, nil
44 }
45
46 // Root returns the absolute root path. Useful for logging and `storage check`.
47 func (r *RepoFS) Root() string { return r.root }
48
49 // ownerNameRE is the whitelist for owner names: lowercase ASCII letters,
50 // digits, and hyphens; cannot start or end with a hyphen; length 1..39
51 // (matches GitHub's username constraint).
52 var ownerNameRE = regexp.MustCompile(`^[a-z0-9](?:[a-z0-9-]{0,37}[a-z0-9])?$`)
53
54 // repoNameRE is the whitelist for repository names: lowercase ASCII
55 // letters, digits, hyphens, dots, and underscores. Can't start or end
56 // with a separator. Length 1..100 (matches GitHub).
57 var repoNameRE = regexp.MustCompile(`^[a-z0-9](?:[a-z0-9._-]{0,98}[a-z0-9_])?$`)
58
59 // validateName enforces the per-kind whitelist. Returns ErrInvalidPath
60 // wrapped with a precise reason on failure.
61 func validateName(kind, name string) error {
62 if name == "" {
63 return fmt.Errorf("%w: %s empty", ErrInvalidPath, kind)
64 }
65 maxLen, re, alphabet := 39, ownerNameRE, "[a-z0-9-]"
66 if kind == "repo" {
67 maxLen, re, alphabet = 100, repoNameRE, "[a-z0-9._-]"
68 }
69 if len(name) > maxLen {
70 return fmt.Errorf("%w: %s %q too long (max %d)", ErrInvalidPath, kind, name, maxLen)
71 }
72 if name != strings.ToLower(name) {
73 return fmt.Errorf("%w: %s %q must be lowercase", ErrInvalidPath, kind, name)
74 }
75 if strings.Contains(name, "..") {
76 return fmt.Errorf("%w: %s contains dot-dot", ErrInvalidPath, kind)
77 }
78 if strings.HasPrefix(name, ".") {
79 return fmt.Errorf("%w: %s starts with dot", ErrInvalidPath, kind)
80 }
81 if filepath.IsAbs(name) {
82 return fmt.Errorf("%w: %s is absolute", ErrInvalidPath, kind)
83 }
84 if !re.MatchString(name) {
85 return fmt.Errorf("%w: %s %q fails whitelist %s", ErrInvalidPath, kind, name, alphabet)
86 }
87 return nil
88 }
89
90 // shardOf returns the two-character shard prefix for owner. When owner is
91 // shorter than two characters, pads with `_` so the path remains stable.
92 func shardOf(owner string) string {
93 switch len(owner) {
94 case 0:
95 return "__"
96 case 1:
97 return owner + "_"
98 default:
99 return owner[:2]
100 }
101 }
102
103 // RepoPath returns the absolute disk path for the bare repository at
104 // (owner, name). Validates inputs and guarantees the result is rooted at
105 // r.root. Both inputs are lowercased before path construction.
106 func (r *RepoFS) RepoPath(owner, name string) (string, error) {
107 owner = strings.ToLower(owner)
108 name = strings.ToLower(name)
109 if err := validateName("owner", owner); err != nil {
110 return "", err
111 }
112 if err := validateName("repo", name); err != nil {
113 return "", err
114 }
115 p := filepath.Join(r.root, shardOf(owner), owner, name+".git")
116 if err := r.containedInRoot(p); err != nil {
117 return "", err
118 }
119 return p, nil
120 }
121
122 // DeletedRepoPath returns the internal tombstone path used while a
123 // soft-deleted repo is inside its restore grace window. Keeping
124 // tombstones outside the canonical <owner>/<name>.git path lets a new
125 // active repo reuse the name without losing the old row's restore data.
126 func (r *RepoFS) DeletedRepoPath(owner, name string, repoID int64) (string, error) {
127 if repoID <= 0 {
128 return "", fmt.Errorf("%w: repo id required", ErrInvalidPath)
129 }
130 canonical, err := r.RepoPath(owner, name)
131 if err != nil {
132 return "", err
133 }
134 p := filepath.Join(filepath.Dir(canonical), ".deleted", fmt.Sprintf("%d.git", repoID))
135 if err := r.containedInRoot(p); err != nil {
136 return "", err
137 }
138 return p, nil
139 }
140
141 // containedInRoot returns ErrEscapesRoot when p does not resolve under r.root.
142 // Defense-in-depth: validateName already rejects ".." and absolute paths,
143 // but a future caller might compose paths differently.
144 func (r *RepoFS) containedInRoot(p string) error {
145 clean := filepath.Clean(p)
146 if !strings.HasPrefix(clean, r.root+string(filepath.Separator)) && clean != r.root {
147 return fmt.Errorf("%w: %s not under %s", ErrEscapesRoot, clean, r.root)
148 }
149 return nil
150 }
151
152 // Exists reports whether path exists. Validates that path is under root.
153 func (r *RepoFS) Exists(path string) (bool, error) {
154 if err := r.containedInRoot(path); err != nil {
155 return false, err
156 }
157 _, err := os.Stat(path)
158 if err == nil {
159 return true, nil
160 }
161 if errors.Is(err, os.ErrNotExist) {
162 return false, nil
163 }
164 return false, fmt.Errorf("storage: repofs: stat %s: %w", path, err)
165 }
166
167 // InitBare creates a bare git repository at path. Default branch is
168 // "trunk" — there is no path through this package that creates a bare
169 // repo with a different initial branch.
170 //
171 // The parent directory tree is created on demand. ErrAlreadyExists is
172 // returned if path is non-empty.
173 //
174 // The repo is initialized with `--shared=group`, which:
175 //
176 // - persists `core.sharedRepository=group` in config
177 // - sets the setgid bit on directories (2775)
178 // - keeps group-writable mode bits on files (0664)
179 //
180 // Both shithubd-web (web pushes via the HTTPS handler, runs as the
181 // `shithub` user) and the SSH `git` user (the AuthorizedKeysCommand
182 // dispatches into a process running as `git`, which is in the
183 // `shithub` group) write to the same bare repo on disk. Without
184 // `--shared=group`, git-receive-pack via SSH fails with
185 // "unable to create temporary object directory" because objects/
186 // is 0755 and group write isn't set.
187 func (r *RepoFS) InitBare(ctx context.Context, path string) error {
188 if err := r.containedInRoot(path); err != nil {
189 return err
190 }
191 if entries, err := os.ReadDir(path); err == nil && len(entries) > 0 {
192 return fmt.Errorf("%w: %s", ErrAlreadyExists, path)
193 }
194 if err := os.MkdirAll(filepath.Dir(path), 0o2750); err != nil {
195 return fmt.Errorf("storage: repofs: mkdir parent: %w", err)
196 }
197 if err := os.MkdirAll(path, 0o2750); err != nil {
198 return fmt.Errorf("storage: repofs: mkdir target: %w", err)
199 }
200 // G204: path is constructed via RepoPath (strict whitelist) and verified
201 // to live under r.root. Caller cannot inject arbitrary args.
202 cmd := exec.CommandContext(ctx, "git", "init", "--bare", "--shared=group", "--initial-branch=trunk", path) //nolint:gosec
203 out, err := cmd.CombinedOutput()
204 if err != nil {
205 return fmt.Errorf("storage: repofs: git init --bare: %w (output: %s)", err, strings.TrimSpace(string(out)))
206 }
207 return nil
208 }
209
210 // CloneBareShared clones src → dst as a bare repo with object
211 // alternates pointing back at src. Disk usage of the result is
212 // essentially refs + a small overhead; objects live in src's
213 // `objects/` until the fork is detached (S16 hard-delete cascade
214 // repacks each fork before removing the source).
215 //
216 // Both paths must be contained in r.root and on the same volume —
217 // the same-volume requirement is what makes alternates safe (S04).
218 //
219 // On success the dst directory exists with `git init --bare` shape
220 // plus an `objects/info/alternates` file pointing at src/objects.
221 // On failure the dst directory is removed so a retry sees a clean
222 // slate.
223 func (r *RepoFS) CloneBareShared(ctx context.Context, src, dst string) error {
224 if err := r.containedInRoot(src); err != nil {
225 return err
226 }
227 if err := r.containedInRoot(dst); err != nil {
228 return err
229 }
230 if entries, err := os.ReadDir(dst); err == nil && len(entries) > 0 {
231 return fmt.Errorf("%w: %s", ErrAlreadyExists, dst)
232 }
233 if err := os.MkdirAll(filepath.Dir(dst), 0o2750); err != nil {
234 return fmt.Errorf("storage: repofs: mkdir parent: %w", err)
235 }
236 // `git clone --shared` (here: object-alternates flag, NOT a perms
237 // flag — same name, different sense than init's --shared=group).
238 // To get group-writable perms we set core.sharedRepository=group
239 // via -c so the cloned config has it from byte zero. Without this,
240 // SSH-git push to a fork hits the same EACCES on objects/ that
241 // PR for SR2 #287 fixed for `git init --bare` (see InitBare).
242 //
243 // G204: src/dst are RepoPath-derived, both verified under r.root.
244 cmd := exec.CommandContext(ctx, "git", "-c", "core.sharedRepository=group", "clone", "--bare", "--shared", src, dst) //nolint:gosec
245 out, err := cmd.CombinedOutput()
246 if err != nil {
247 // Best-effort cleanup; if removal fails too, surface the
248 // original clone error since that's the actionable signal.
249 _ = os.RemoveAll(dst)
250 return fmt.Errorf("storage: repofs: git clone --bare --shared: %w (output: %s)", err, strings.TrimSpace(string(out)))
251 }
252 return nil
253 }
254
255 // RepairSharedPerms brings an existing bare repo to the
256 // `--shared=group` contract InitBare now produces from byte zero
257 // (SR2 #287). Idempotent: a repo already at the contract is left
258 // alone except for explicitly setting the config (cheap).
259 //
260 // Steps:
261 // 1. `git config core.sharedRepository=group`
262 // 2. `chmod -R g+w` and `find -type d -exec chmod g+s` so future
263 // writes inherit the group on creation.
264 //
265 // Group ownership itself is NOT changed — the shipped invariant is
266 // that all repos are owned by the `shithub` group already (the
267 // shithub user creates them). If a repo's group is wrong, that's a
268 // separate provisioning bug; this method's job is only the bits.
269 //
270 // Runs git with safe.directory=* injected via env so the operator
271 // (typically root over SSH) can operate on repos owned by the
272 // `shithub` user. The same trick is used by the SSH dispatcher.
273 // Without it, git 2.35+ emits "fatal: not in a git directory" as
274 // part of the dubious-ownership early exit.
275 func (r *RepoFS) RepairSharedPerms(ctx context.Context, path string) error {
276 if err := r.containedInRoot(path); err != nil {
277 return err
278 }
279 if _, err := os.Stat(path); err != nil {
280 return fmt.Errorf("storage: repofs: stat %s: %w", path, err)
281 }
282 // Persist the contract in config.
283 cfg := exec.CommandContext(ctx, "git", "-C", path, "config", "core.sharedRepository", "group") //nolint:gosec
284 cfg.Env = append(
285 os.Environ(),
286 "GIT_CONFIG_COUNT=1",
287 "GIT_CONFIG_KEY_0=safe.directory",
288 "GIT_CONFIG_VALUE_0=*",
289 )
290 if out, err := cfg.CombinedOutput(); err != nil {
291 return fmt.Errorf("storage: repofs: git config sharedRepository: %w (output: %s)", err, strings.TrimSpace(string(out)))
292 }
293 // Walk the tree once: directories get +g+s, files get +g+w.
294 // path is verified contained-in-root above; no symlinks span out
295 // of the repo (bare repos don't ship with symlinks under .git/).
296 // G122: filepath.Walk + os.Chmod is race-prone in adversarial
297 // trees, but our writer (this process running as root or shithub)
298 // is also the only writer for these paths, and the trees are not
299 // user-influenced beyond the validated owner/name slugs. Operator-
300 // only command, not user-triggered.
301 if err := filepath.Walk(path, func(p string, info os.FileInfo, err error) error { //nolint:gosec
302 if err != nil {
303 return err
304 }
305 mode := info.Mode()
306 newMode := mode | 0o060 // group rw
307 if info.IsDir() {
308 newMode |= os.ModeSetgid // g+s
309 }
310 if newMode == mode {
311 return nil
312 }
313 return os.Chmod(p, newMode) //nolint:gosec
314 }); err != nil {
315 return fmt.Errorf("storage: repofs: walk chmod: %w", err)
316 }
317 return nil
318 }
319
320 // SetPreciousObjects marks a bare repo's objects as not-prunable. The
321 // canonical foot-gun for forks is source-repo `git gc` removing
322 // objects that forks reach via alternates; setting this on the source
323 // after a fork is created prevents that. Idempotent.
324 func (r *RepoFS) SetPreciousObjects(ctx context.Context, path string) error {
325 if err := r.containedInRoot(path); err != nil {
326 return err
327 }
328 cmd := exec.CommandContext(ctx, "git", "-C", path, "config", "extensions.preciousObjects", "true") //nolint:gosec
329 if out, err := cmd.CombinedOutput(); err != nil {
330 return fmt.Errorf("storage: repofs: set preciousObjects: %w (output: %s)", err, strings.TrimSpace(string(out)))
331 }
332 return nil
333 }
334
335 // Move atomically renames oldPath to newPath. Both must be under root.
336 // If newPath already exists, returns ErrAlreadyExists rather than
337 // overwriting (avoids silent corruption on concurrent moves).
338 func (r *RepoFS) Move(oldPath, newPath string) error {
339 if err := r.containedInRoot(oldPath); err != nil {
340 return err
341 }
342 if err := r.containedInRoot(newPath); err != nil {
343 return err
344 }
345 if _, err := os.Stat(newPath); err == nil {
346 return fmt.Errorf("%w: %s", ErrAlreadyExists, newPath)
347 } else if !errors.Is(err, os.ErrNotExist) {
348 return fmt.Errorf("storage: repofs: stat dest: %w", err)
349 }
350 if err := os.MkdirAll(filepath.Dir(newPath), 0o750); err != nil {
351 return fmt.Errorf("storage: repofs: mkdir parent: %w", err)
352 }
353 if err := os.Rename(oldPath, newPath); err != nil {
354 return fmt.Errorf("storage: repofs: rename: %w", err)
355 }
356 return nil
357 }
358
359 // Delete removes the bare repo at path. Refuses paths outside root.
360 func (r *RepoFS) Delete(path string) error {
361 if err := r.containedInRoot(path); err != nil {
362 return err
363 }
364 if path == r.root {
365 return fmt.Errorf("%w: refusing to delete root", ErrEscapesRoot)
366 }
367 if err := os.RemoveAll(path); err != nil {
368 return fmt.Errorf("storage: repofs: remove: %w", err)
369 }
370 return nil
371 }
372