Go · 15435 bytes Raw Blame History
1 // SPDX-License-Identifier: AGPL-3.0-or-later
2
3 package repos
4
5 import (
6 "context"
7 "errors"
8 "fmt"
9 "log/slog"
10 "os"
11 "strings"
12 "time"
13
14 "github.com/jackc/pgx/v5"
15 "github.com/jackc/pgx/v5/pgconn"
16 "github.com/jackc/pgx/v5/pgtype"
17 "github.com/jackc/pgx/v5/pgxpool"
18
19 "github.com/tenseleyFlow/shithub/internal/auth/audit"
20 "github.com/tenseleyFlow/shithub/internal/auth/throttle"
21 "github.com/tenseleyFlow/shithub/internal/entitlements"
22 "github.com/tenseleyFlow/shithub/internal/git/hooks"
23 "github.com/tenseleyFlow/shithub/internal/infra/storage"
24 "github.com/tenseleyFlow/shithub/internal/issues"
25 issuesdb "github.com/tenseleyFlow/shithub/internal/issues/sqlc"
26 "github.com/tenseleyFlow/shithub/internal/notif"
27 repogit "github.com/tenseleyFlow/shithub/internal/repos/git"
28 reposdb "github.com/tenseleyFlow/shithub/internal/repos/sqlc"
29 "github.com/tenseleyFlow/shithub/internal/repos/templates"
30 usersdb "github.com/tenseleyFlow/shithub/internal/users/sqlc"
31 )
32
33 // CreateRateLimit caps how many repos one user can create per hour.
34 // 20/hour leaves headroom for bulk-migrating an existing org without
35 // being a license to spam; site admins bypass the cap entirely.
36 const (
37 CreateRateLimitMax = 20
38 CreateRateLimitWindow = time.Hour
39 )
40
41 // Deps wires the repo orchestrator. Inject from the web layer; no
42 // global state.
43 type Deps struct {
44 Pool *pgxpool.Pool
45 RepoFS *storage.RepoFS
46 Audit *audit.Recorder
47 Limiter *throttle.Limiter
48 Logger *slog.Logger
49 Now func() time.Time
50 // ShithubdPath is the absolute path to the running shithubd binary,
51 // baked into the hook shims so push -> hook -> shithubd round-trip
52 // works in dev and prod. Empty disables hook installation (tests
53 // that don't care about hooks; the full E2E happy path provides it).
54 ShithubdPath string
55 }
56
57 // Params describes one repo-create request as it arrives from the
58 // handler, normalized but not yet validated against the DB.
59 //
60 // Owner is XOR (S30): either OwnerUserID set OR OwnerOrgID set.
61 // OwnerUsername / OwnerSlug carry the slug for path generation
62 // (the FS layer's per-owner directory uses it). ActorUserID is who
63 // initiated the create — defaults to OwnerUserID for personal repos
64 // and is required for org-owned creates.
65 type Params struct {
66 OwnerUserID int64
67 OwnerUsername string
68 OwnerOrgID int64
69 OwnerSlug string
70
71 // ActorUserID is the user performing the create. Used for
72 // audit-log + rate-limiting + initial-commit author. Defaults to
73 // OwnerUserID for personal repos when zero.
74 ActorUserID int64
75
76 // ActorIsSiteAdmin, when true, bypasses the per-actor create
77 // rate-limit. Site admins are trusted operators (bulk migration,
78 // fixture seeding) and the cap exists to deter abuse from regular
79 // accounts, not to throttle staff.
80 ActorIsSiteAdmin bool
81
82 // BypassCreateRateLimit lets trusted server-side bulk operations
83 // create many repos for the same actor without tripping the browser
84 // anti-abuse throttle. Keep false for direct user submits.
85 BypassCreateRateLimit bool
86
87 Name string // already lowercased + trimmed
88 Description string
89 Visibility string // "public" | "private"
90
91 InitReadme bool
92 LicenseKey string // "" = none
93 GitignoreKey string // "" = none
94
95 // Optional override for the initial commit timestamp; tests pin this
96 // for determinism. Production callers leave it zero and let
97 // orchestrator default to deps.Now().
98 InitialCommitWhen time.Time
99 }
100
101 // Result is what Create returns on success.
102 type Result struct {
103 Repo reposdb.Repo
104 InitialCommitOID string // "" when InitReadme/License/Gitignore were all unset
105 DiskPath string // bare-repo on-disk path
106 }
107
108 // Create validates, rate-limits, inserts the DB row, initializes the
109 // bare repo on disk, optionally builds the initial commit, audit-logs,
110 // and returns. On post-DB failure the tx rolls back and the partial
111 // repo dir is best-effort removed.
112 func Create(ctx context.Context, deps Deps, p Params) (Result, error) {
113 if deps.Pool == nil || deps.RepoFS == nil || deps.Audit == nil || deps.Limiter == nil {
114 return Result{}, errors.New("repos: Deps missing required field")
115 }
116 now := deps.Now
117 if now == nil {
118 now = time.Now
119 }
120
121 if err := ValidateName(p.Name); err != nil {
122 return Result{}, err
123 }
124 if err := ValidateDescription(p.Description); err != nil {
125 return Result{}, err
126 }
127 if p.Visibility != "public" && p.Visibility != "private" {
128 return Result{}, fmt.Errorf("repos: visibility must be public or private (got %q)", p.Visibility)
129 }
130 if p.LicenseKey != "" && !templates.HasLicense(p.LicenseKey) {
131 return Result{}, fmt.Errorf("%w: %s", ErrUnknownLicense, p.LicenseKey)
132 }
133 if p.GitignoreKey != "" && !templates.HasGitignore(p.GitignoreKey) {
134 return Result{}, fmt.Errorf("%w: %s", ErrUnknownGitignore, p.GitignoreKey)
135 }
136
137 // Owner XOR — exactly one kind. Org-owner path: actor must be set
138 // (so we know who initiated for audit + initial commit).
139 switch {
140 case p.OwnerUserID != 0 && p.OwnerOrgID == 0:
141 if p.ActorUserID == 0 {
142 p.ActorUserID = p.OwnerUserID
143 }
144 case p.OwnerOrgID != 0 && p.OwnerUserID == 0:
145 if p.ActorUserID == 0 {
146 return Result{}, errors.New("repos: ActorUserID required for org-owned create")
147 }
148 default:
149 return Result{}, errors.New("repos: owner is XOR — set OwnerUserID OR OwnerOrgID, not both")
150 }
151 if p.OwnerOrgID != 0 && p.Visibility == "private" {
152 check, err := entitlements.CheckPrivateRepositoryCreation(ctx, entitlements.Deps{Pool: deps.Pool}, p.OwnerOrgID)
153 if err != nil {
154 return Result{}, err
155 }
156 if err := check.Err(); err != nil {
157 return Result{}, err
158 }
159 }
160
161 // Rate-limit per actor (NOT per owner) so a user can't bypass the
162 // per-account cap by spreading creates across orgs they manage.
163 // Site admins skip the cap entirely.
164 if !p.ActorIsSiteAdmin && !p.BypassCreateRateLimit {
165 if err := deps.Limiter.Hit(ctx, deps.Pool, throttle.Limit{
166 Scope: "repo_create",
167 Identifier: fmt.Sprintf("user:%d", p.ActorUserID),
168 Max: CreateRateLimitMax,
169 Window: CreateRateLimitWindow,
170 }); err != nil {
171 return Result{}, err
172 }
173 }
174
175 // Resolve author identity for the initial commit. The actor (the
176 // human who clicked "create") is the author — even on org repos,
177 // the seed commit attributes to them.
178 authorName, authorEmail, err := resolveAuthor(ctx, deps.Pool, p.ActorUserID)
179 wantInit := p.InitReadme || p.LicenseKey != "" || p.GitignoreKey != ""
180 if wantInit && err != nil {
181 return Result{}, err
182 }
183
184 // Pre-compute disk path from RepoFS. Doing this before the tx avoids
185 // inserting a DB row for a name that fails the path-validation
186 // whitelist (which mostly mirrors our own ValidateName, but
187 // defense-in-depth never hurts). Org-owned repos use the org slug
188 // as the per-owner directory — same shape as user-owned, no
189 // `org/` prefix on disk (matches the GitHub URL layout).
190 ownerSlug := p.OwnerUsername
191 if p.OwnerOrgID != 0 {
192 ownerSlug = p.OwnerSlug
193 }
194 diskPath, err := deps.RepoFS.RepoPath(ownerSlug, p.Name)
195 if err != nil {
196 return Result{}, fmt.Errorf("%w: %v", ErrInvalidName, err)
197 }
198
199 tx, err := deps.Pool.Begin(ctx)
200 if err != nil {
201 return Result{}, fmt.Errorf("repos: begin tx: %w", err)
202 }
203 committed := false
204 defer func() {
205 if !committed {
206 _ = tx.Rollback(ctx)
207 }
208 }()
209
210 q := reposdb.New()
211 lockKey, err := createRepoNameLockKey(p)
212 if err != nil {
213 return Result{}, err
214 }
215 if err := q.LockRepoOwnerName(ctx, tx, lockKey); err != nil {
216 return Result{}, fmt.Errorf("repos: lock owner/name: %w", err)
217 }
218 row, err := q.CreateRepo(ctx, tx, reposdb.CreateRepoParams{
219 OwnerUserID: pgtype.Int8{Int64: p.OwnerUserID, Valid: p.OwnerUserID != 0},
220 OwnerOrgID: pgtype.Int8{Int64: p.OwnerOrgID, Valid: p.OwnerOrgID != 0},
221 Name: p.Name,
222 Description: p.Description,
223 Visibility: reposdb.RepoVisibility(p.Visibility),
224 DefaultBranch: "trunk",
225 LicenseKey: pgtype.Text{String: p.LicenseKey, Valid: p.LicenseKey != ""},
226 PrimaryLanguage: pgtype.Text{Valid: false},
227 })
228 if err != nil {
229 if isUniqueViolation(err) {
230 return Result{}, ErrTaken
231 }
232 return Result{}, fmt.Errorf("repos: insert: %w", err)
233 }
234
235 // FS init AFTER DB insert. If this fails the deferred Rollback
236 // reverses the row; we also best-effort RemoveAll the directory in
237 // case it got partially created.
238 if err := deps.RepoFS.InitBare(ctx, diskPath); err != nil {
239 if errors.Is(err, storage.ErrAlreadyExists) {
240 displaced, displaceErr := displaceDeletedRepoPath(ctx, deps, q, tx, p, ownerSlug, diskPath)
241 if displaceErr != nil {
242 return Result{}, fmt.Errorf("repos: reclaim deleted repo path: %w", displaceErr)
243 }
244 if displaced {
245 err = deps.RepoFS.InitBare(ctx, diskPath)
246 }
247 }
248 if err != nil {
249 if !errors.Is(err, storage.ErrAlreadyExists) {
250 _ = os.RemoveAll(diskPath)
251 }
252 return Result{}, fmt.Errorf("repos: init bare: %w", err)
253 }
254 }
255
256 // Install push-pipeline hooks. Skipped when ShithubdPath is empty
257 // (test fixtures that exercise repo creation without the hook
258 // stack). The plumbing-driven initial commit doesn't fire hooks —
259 // hooks only run on user-driven pushes — so this is the right
260 // boundary.
261 if deps.ShithubdPath != "" {
262 if err := hooks.Install(diskPath, deps.ShithubdPath); err != nil {
263 _ = os.RemoveAll(diskPath)
264 return Result{}, fmt.Errorf("repos: install hooks: %w", err)
265 }
266 }
267
268 // Seed the issue subsystem state for the new repo: counter row +
269 // default label set. Runs inside the create tx so a failed seed
270 // rolls the whole repo back. Cheap (10 inserts), and folding it in
271 // here keeps the "fresh repo is fully usable" invariant. Issues
272 // orchestrator's SeedDefaultLabels swallows unique-violations so a
273 // re-run is a no-op (defensive against partially-seeded migrations).
274 iq := issuesdb.New()
275 if err := iq.EnsureRepoIssueCounter(ctx, tx, row.ID); err != nil {
276 return Result{}, fmt.Errorf("repos: issue counter: %w", err)
277 }
278 if err := issues.SeedDefaultLabels(ctx, tx, row.ID); err != nil {
279 return Result{}, fmt.Errorf("repos: seed labels: %w", err)
280 }
281
282 var commitOID string
283 if wantInit {
284 commitWhen := p.InitialCommitWhen
285 if commitWhen.IsZero() {
286 commitWhen = now()
287 }
288 oid, err := buildInitialCommit(ctx, repogit.InitialCommit{
289 GitDir: diskPath,
290 AuthorName: authorName,
291 AuthorEmail: authorEmail,
292 Branch: "trunk",
293 When: commitWhen,
294 Files: initFiles(p, authorName, commitWhen.Year()),
295 })
296 if err != nil {
297 _ = os.RemoveAll(diskPath)
298 return Result{}, fmt.Errorf("repos: initial commit: %w", err)
299 }
300 commitOID = oid
301 }
302
303 if err := tx.Commit(ctx); err != nil {
304 _ = os.RemoveAll(diskPath)
305 return Result{}, fmt.Errorf("repos: commit tx: %w", err)
306 }
307 committed = true
308
309 if err := notif.Emit(ctx, deps.Pool, notif.Event{
310 ActorUserID: p.ActorUserID,
311 Kind: "repo_created",
312 RepoID: row.ID,
313 SourceKind: "repo",
314 SourceID: row.ID,
315 Public: p.Visibility == "public",
316 Extra: map[string]any{
317 "repo_name": p.Name,
318 },
319 }); err != nil && deps.Logger != nil {
320 deps.Logger.WarnContext(ctx, "repos: emit repo_created", "repo_id", row.ID, "error", err)
321 }
322
323 if err := deps.Audit.Record(ctx, deps.Pool, p.ActorUserID,
324 audit.ActionRepoCreated, audit.TargetRepo, row.ID, map[string]any{
325 "name": p.Name,
326 "visibility": p.Visibility,
327 "init": wantInit,
328 "license": p.LicenseKey,
329 "gitignore": p.GitignoreKey,
330 }); err != nil {
331 if deps.Logger != nil {
332 deps.Logger.WarnContext(ctx, "repos: audit", "error", err)
333 }
334 }
335
336 return Result{Repo: row, InitialCommitOID: commitOID, DiskPath: diskPath}, nil
337 }
338
339 // initFiles assembles the FileEntry slice for the initial commit based
340 // on which init checkboxes the user ticked.
341 func initFiles(p Params, author string, year int) []repogit.FileEntry {
342 var files []repogit.FileEntry
343 if p.InitReadme {
344 files = append(files, repogit.FileEntry{
345 Path: "README.md",
346 Body: []byte(templates.ReadmeText(p.Name, p.Description)),
347 })
348 }
349 if p.LicenseKey != "" {
350 body, err := templates.LicenseText(p.LicenseKey, year, author)
351 if err == nil {
352 files = append(files, repogit.FileEntry{
353 Path: "LICENSE",
354 Body: []byte(body),
355 })
356 }
357 }
358 if p.GitignoreKey != "" {
359 body, err := templates.GitignoreText(p.GitignoreKey)
360 if err == nil {
361 files = append(files, repogit.FileEntry{
362 Path: ".gitignore",
363 Body: []byte(body),
364 })
365 }
366 }
367 return files
368 }
369
370 // buildInitialCommit is a thin pass-through so tests can swap it (post-MVP).
371 var buildInitialCommit = func(ctx context.Context, ic repogit.InitialCommit) (string, error) {
372 return ic.Build(ctx)
373 }
374
375 // resolveAuthor reads the user's display name + verified primary email.
376 // Returns ErrNoVerifiedEmail if the user has no primary email or the
377 // primary isn't verified.
378 func resolveAuthor(ctx context.Context, pool *pgxpool.Pool, userID int64) (name, addr string, err error) {
379 uq := usersdb.New()
380 user, err := uq.GetUserByID(ctx, pool, userID)
381 if err != nil {
382 return "", "", fmt.Errorf("repos: load user: %w", err)
383 }
384 if !user.PrimaryEmailID.Valid {
385 return "", "", ErrNoVerifiedEmail
386 }
387 em, err := uq.GetUserEmailByID(ctx, pool, user.PrimaryEmailID.Int64)
388 if err != nil {
389 return "", "", fmt.Errorf("repos: load primary email: %w", err)
390 }
391 if !em.Verified {
392 return "", "", ErrNoVerifiedEmail
393 }
394 display := strings.TrimSpace(user.DisplayName)
395 if display == "" {
396 display = user.Username
397 }
398 return display, string(em.Email), nil
399 }
400
401 func createRepoNameLockKey(p Params) (string, error) {
402 name := strings.ToLower(p.Name)
403 switch {
404 case p.OwnerUserID != 0 && p.OwnerOrgID == 0:
405 return fmt.Sprintf("repo-name:user:%d:%s", p.OwnerUserID, name), nil
406 case p.OwnerOrgID != 0 && p.OwnerUserID == 0:
407 return fmt.Sprintf("repo-name:org:%d:%s", p.OwnerOrgID, name), nil
408 default:
409 return "", errors.New("repos: owner is XOR — set OwnerUserID OR OwnerOrgID, not both")
410 }
411 }
412
413 func displaceDeletedRepoPath(
414 ctx context.Context,
415 deps Deps,
416 q *reposdb.Queries,
417 db reposdb.DBTX,
418 p Params,
419 ownerSlug string,
420 diskPath string,
421 ) (bool, error) {
422 deleted, err := softDeletedRepoForCreate(ctx, q, db, p)
423 if errors.Is(err, pgx.ErrNoRows) {
424 return false, nil
425 }
426 if err != nil {
427 return false, err
428 }
429 deletedPath, err := deps.RepoFS.DeletedRepoPath(ownerSlug, p.Name, deleted.ID)
430 if err != nil {
431 return false, err
432 }
433 if err := deps.RepoFS.Move(diskPath, deletedPath); err != nil {
434 if errors.Is(err, os.ErrNotExist) {
435 return false, nil
436 }
437 return false, err
438 }
439 return true, nil
440 }
441
442 func softDeletedRepoForCreate(ctx context.Context, q *reposdb.Queries, db reposdb.DBTX, p Params) (reposdb.Repo, error) {
443 if p.OwnerUserID != 0 {
444 return q.GetSoftDeletedRepoByOwnerUserAndName(ctx, db, reposdb.GetSoftDeletedRepoByOwnerUserAndNameParams{
445 OwnerUserID: pgtype.Int8{Int64: p.OwnerUserID, Valid: true},
446 Name: p.Name,
447 })
448 }
449 return q.GetSoftDeletedRepoByOwnerOrgAndName(ctx, db, reposdb.GetSoftDeletedRepoByOwnerOrgAndNameParams{
450 OwnerOrgID: pgtype.Int8{Int64: p.OwnerOrgID, Valid: true},
451 Name: p.Name,
452 })
453 }
454
455 // isUniqueViolation matches Postgres SQLSTATE 23505. Used to surface
456 // the friendly "name taken" error from the unique-by-owner-and-name
457 // indexes when the pre-check raced.
458 func isUniqueViolation(err error) bool {
459 var pgErr *pgconn.PgError
460 if errors.As(err, &pgErr) {
461 return pgErr.Code == "23505"
462 }
463 return false
464 }
465