Go · 15095 bytes Raw Blame History
1 // SPDX-License-Identifier: AGPL-3.0-or-later
2
3 package repos
4
5 import (
6 "context"
7 "errors"
8 "fmt"
9 "log/slog"
10 "os"
11 "strings"
12 "time"
13
14 "github.com/jackc/pgx/v5"
15 "github.com/jackc/pgx/v5/pgconn"
16 "github.com/jackc/pgx/v5/pgtype"
17 "github.com/jackc/pgx/v5/pgxpool"
18
19 "github.com/tenseleyFlow/shithub/internal/auth/audit"
20 "github.com/tenseleyFlow/shithub/internal/auth/throttle"
21 "github.com/tenseleyFlow/shithub/internal/git/hooks"
22 "github.com/tenseleyFlow/shithub/internal/infra/storage"
23 "github.com/tenseleyFlow/shithub/internal/issues"
24 issuesdb "github.com/tenseleyFlow/shithub/internal/issues/sqlc"
25 "github.com/tenseleyFlow/shithub/internal/notif"
26 repogit "github.com/tenseleyFlow/shithub/internal/repos/git"
27 reposdb "github.com/tenseleyFlow/shithub/internal/repos/sqlc"
28 "github.com/tenseleyFlow/shithub/internal/repos/templates"
29 usersdb "github.com/tenseleyFlow/shithub/internal/users/sqlc"
30 )
31
32 // CreateRateLimit caps how many repos one user can create per hour.
33 // 20/hour leaves headroom for bulk-migrating an existing org without
34 // being a license to spam; site admins bypass the cap entirely.
35 const (
36 CreateRateLimitMax = 20
37 CreateRateLimitWindow = time.Hour
38 )
39
40 // Deps wires the repo orchestrator. Inject from the web layer; no
41 // global state.
42 type Deps struct {
43 Pool *pgxpool.Pool
44 RepoFS *storage.RepoFS
45 Audit *audit.Recorder
46 Limiter *throttle.Limiter
47 Logger *slog.Logger
48 Now func() time.Time
49 // ShithubdPath is the absolute path to the running shithubd binary,
50 // baked into the hook shims so push -> hook -> shithubd round-trip
51 // works in dev and prod. Empty disables hook installation (tests
52 // that don't care about hooks; the full E2E happy path provides it).
53 ShithubdPath string
54 }
55
56 // Params describes one repo-create request as it arrives from the
57 // handler, normalized but not yet validated against the DB.
58 //
59 // Owner is XOR (S30): either OwnerUserID set OR OwnerOrgID set.
60 // OwnerUsername / OwnerSlug carry the slug for path generation
61 // (the FS layer's per-owner directory uses it). ActorUserID is who
62 // initiated the create — defaults to OwnerUserID for personal repos
63 // and is required for org-owned creates.
64 type Params struct {
65 OwnerUserID int64
66 OwnerUsername string
67 OwnerOrgID int64
68 OwnerSlug string
69
70 // ActorUserID is the user performing the create. Used for
71 // audit-log + rate-limiting + initial-commit author. Defaults to
72 // OwnerUserID for personal repos when zero.
73 ActorUserID int64
74
75 // ActorIsSiteAdmin, when true, bypasses the per-actor create
76 // rate-limit. Site admins are trusted operators (bulk migration,
77 // fixture seeding) and the cap exists to deter abuse from regular
78 // accounts, not to throttle staff.
79 ActorIsSiteAdmin bool
80
81 // BypassCreateRateLimit lets trusted server-side bulk operations
82 // create many repos for the same actor without tripping the browser
83 // anti-abuse throttle. Keep false for direct user submits.
84 BypassCreateRateLimit bool
85
86 Name string // already lowercased + trimmed
87 Description string
88 Visibility string // "public" | "private"
89
90 InitReadme bool
91 LicenseKey string // "" = none
92 GitignoreKey string // "" = none
93
94 // Optional override for the initial commit timestamp; tests pin this
95 // for determinism. Production callers leave it zero and let
96 // orchestrator default to deps.Now().
97 InitialCommitWhen time.Time
98 }
99
100 // Result is what Create returns on success.
101 type Result struct {
102 Repo reposdb.Repo
103 InitialCommitOID string // "" when InitReadme/License/Gitignore were all unset
104 DiskPath string // bare-repo on-disk path
105 }
106
107 // Create validates, rate-limits, inserts the DB row, initializes the
108 // bare repo on disk, optionally builds the initial commit, audit-logs,
109 // and returns. On post-DB failure the tx rolls back and the partial
110 // repo dir is best-effort removed.
111 func Create(ctx context.Context, deps Deps, p Params) (Result, error) {
112 if deps.Pool == nil || deps.RepoFS == nil || deps.Audit == nil || deps.Limiter == nil {
113 return Result{}, errors.New("repos: Deps missing required field")
114 }
115 now := deps.Now
116 if now == nil {
117 now = time.Now
118 }
119
120 if err := ValidateName(p.Name); err != nil {
121 return Result{}, err
122 }
123 if err := ValidateDescription(p.Description); err != nil {
124 return Result{}, err
125 }
126 if p.Visibility != "public" && p.Visibility != "private" {
127 return Result{}, fmt.Errorf("repos: visibility must be public or private (got %q)", p.Visibility)
128 }
129 if p.LicenseKey != "" && !templates.HasLicense(p.LicenseKey) {
130 return Result{}, fmt.Errorf("%w: %s", ErrUnknownLicense, p.LicenseKey)
131 }
132 if p.GitignoreKey != "" && !templates.HasGitignore(p.GitignoreKey) {
133 return Result{}, fmt.Errorf("%w: %s", ErrUnknownGitignore, p.GitignoreKey)
134 }
135
136 // Owner XOR — exactly one kind. Org-owner path: actor must be set
137 // (so we know who initiated for audit + initial commit).
138 switch {
139 case p.OwnerUserID != 0 && p.OwnerOrgID == 0:
140 if p.ActorUserID == 0 {
141 p.ActorUserID = p.OwnerUserID
142 }
143 case p.OwnerOrgID != 0 && p.OwnerUserID == 0:
144 if p.ActorUserID == 0 {
145 return Result{}, errors.New("repos: ActorUserID required for org-owned create")
146 }
147 default:
148 return Result{}, errors.New("repos: owner is XOR — set OwnerUserID OR OwnerOrgID, not both")
149 }
150
151 // Rate-limit per actor (NOT per owner) so a user can't bypass the
152 // per-account cap by spreading creates across orgs they manage.
153 // Site admins skip the cap entirely.
154 if !p.ActorIsSiteAdmin && !p.BypassCreateRateLimit {
155 if err := deps.Limiter.Hit(ctx, deps.Pool, throttle.Limit{
156 Scope: "repo_create",
157 Identifier: fmt.Sprintf("user:%d", p.ActorUserID),
158 Max: CreateRateLimitMax,
159 Window: CreateRateLimitWindow,
160 }); err != nil {
161 return Result{}, err
162 }
163 }
164
165 // Resolve author identity for the initial commit. The actor (the
166 // human who clicked "create") is the author — even on org repos,
167 // the seed commit attributes to them.
168 authorName, authorEmail, err := resolveAuthor(ctx, deps.Pool, p.ActorUserID)
169 wantInit := p.InitReadme || p.LicenseKey != "" || p.GitignoreKey != ""
170 if wantInit && err != nil {
171 return Result{}, err
172 }
173
174 // Pre-compute disk path from RepoFS. Doing this before the tx avoids
175 // inserting a DB row for a name that fails the path-validation
176 // whitelist (which mostly mirrors our own ValidateName, but
177 // defense-in-depth never hurts). Org-owned repos use the org slug
178 // as the per-owner directory — same shape as user-owned, no
179 // `org/` prefix on disk (matches the GitHub URL layout).
180 ownerSlug := p.OwnerUsername
181 if p.OwnerOrgID != 0 {
182 ownerSlug = p.OwnerSlug
183 }
184 diskPath, err := deps.RepoFS.RepoPath(ownerSlug, p.Name)
185 if err != nil {
186 return Result{}, fmt.Errorf("%w: %v", ErrInvalidName, err)
187 }
188
189 tx, err := deps.Pool.Begin(ctx)
190 if err != nil {
191 return Result{}, fmt.Errorf("repos: begin tx: %w", err)
192 }
193 committed := false
194 defer func() {
195 if !committed {
196 _ = tx.Rollback(ctx)
197 }
198 }()
199
200 q := reposdb.New()
201 lockKey, err := createRepoNameLockKey(p)
202 if err != nil {
203 return Result{}, err
204 }
205 if err := q.LockRepoOwnerName(ctx, tx, lockKey); err != nil {
206 return Result{}, fmt.Errorf("repos: lock owner/name: %w", err)
207 }
208 row, err := q.CreateRepo(ctx, tx, reposdb.CreateRepoParams{
209 OwnerUserID: pgtype.Int8{Int64: p.OwnerUserID, Valid: p.OwnerUserID != 0},
210 OwnerOrgID: pgtype.Int8{Int64: p.OwnerOrgID, Valid: p.OwnerOrgID != 0},
211 Name: p.Name,
212 Description: p.Description,
213 Visibility: reposdb.RepoVisibility(p.Visibility),
214 DefaultBranch: "trunk",
215 LicenseKey: pgtype.Text{String: p.LicenseKey, Valid: p.LicenseKey != ""},
216 PrimaryLanguage: pgtype.Text{Valid: false},
217 })
218 if err != nil {
219 if isUniqueViolation(err) {
220 return Result{}, ErrTaken
221 }
222 return Result{}, fmt.Errorf("repos: insert: %w", err)
223 }
224
225 // FS init AFTER DB insert. If this fails the deferred Rollback
226 // reverses the row; we also best-effort RemoveAll the directory in
227 // case it got partially created.
228 if err := deps.RepoFS.InitBare(ctx, diskPath); err != nil {
229 if errors.Is(err, storage.ErrAlreadyExists) {
230 displaced, displaceErr := displaceDeletedRepoPath(ctx, deps, q, tx, p, ownerSlug, diskPath)
231 if displaceErr != nil {
232 return Result{}, fmt.Errorf("repos: reclaim deleted repo path: %w", displaceErr)
233 }
234 if displaced {
235 err = deps.RepoFS.InitBare(ctx, diskPath)
236 }
237 }
238 if err != nil {
239 if !errors.Is(err, storage.ErrAlreadyExists) {
240 _ = os.RemoveAll(diskPath)
241 }
242 return Result{}, fmt.Errorf("repos: init bare: %w", err)
243 }
244 }
245
246 // Install push-pipeline hooks. Skipped when ShithubdPath is empty
247 // (test fixtures that exercise repo creation without the hook
248 // stack). The plumbing-driven initial commit doesn't fire hooks —
249 // hooks only run on user-driven pushes — so this is the right
250 // boundary.
251 if deps.ShithubdPath != "" {
252 if err := hooks.Install(diskPath, deps.ShithubdPath); err != nil {
253 _ = os.RemoveAll(diskPath)
254 return Result{}, fmt.Errorf("repos: install hooks: %w", err)
255 }
256 }
257
258 // Seed the issue subsystem state for the new repo: counter row +
259 // default label set. Runs inside the create tx so a failed seed
260 // rolls the whole repo back. Cheap (10 inserts), and folding it in
261 // here keeps the "fresh repo is fully usable" invariant. Issues
262 // orchestrator's SeedDefaultLabels swallows unique-violations so a
263 // re-run is a no-op (defensive against partially-seeded migrations).
264 iq := issuesdb.New()
265 if err := iq.EnsureRepoIssueCounter(ctx, tx, row.ID); err != nil {
266 return Result{}, fmt.Errorf("repos: issue counter: %w", err)
267 }
268 if err := issues.SeedDefaultLabels(ctx, tx, row.ID); err != nil {
269 return Result{}, fmt.Errorf("repos: seed labels: %w", err)
270 }
271
272 var commitOID string
273 if wantInit {
274 commitWhen := p.InitialCommitWhen
275 if commitWhen.IsZero() {
276 commitWhen = now()
277 }
278 oid, err := buildInitialCommit(ctx, repogit.InitialCommit{
279 GitDir: diskPath,
280 AuthorName: authorName,
281 AuthorEmail: authorEmail,
282 Branch: "trunk",
283 When: commitWhen,
284 Files: initFiles(p, authorName, commitWhen.Year()),
285 })
286 if err != nil {
287 _ = os.RemoveAll(diskPath)
288 return Result{}, fmt.Errorf("repos: initial commit: %w", err)
289 }
290 commitOID = oid
291 }
292
293 if err := tx.Commit(ctx); err != nil {
294 _ = os.RemoveAll(diskPath)
295 return Result{}, fmt.Errorf("repos: commit tx: %w", err)
296 }
297 committed = true
298
299 if err := notif.Emit(ctx, deps.Pool, notif.Event{
300 ActorUserID: p.ActorUserID,
301 Kind: "repo_created",
302 RepoID: row.ID,
303 SourceKind: "repo",
304 SourceID: row.ID,
305 Public: p.Visibility == "public",
306 Extra: map[string]any{
307 "repo_name": p.Name,
308 },
309 }); err != nil && deps.Logger != nil {
310 deps.Logger.WarnContext(ctx, "repos: emit repo_created", "repo_id", row.ID, "error", err)
311 }
312
313 if err := deps.Audit.Record(ctx, deps.Pool, p.ActorUserID,
314 audit.ActionRepoCreated, audit.TargetRepo, row.ID, map[string]any{
315 "name": p.Name,
316 "visibility": p.Visibility,
317 "init": wantInit,
318 "license": p.LicenseKey,
319 "gitignore": p.GitignoreKey,
320 }); err != nil {
321 if deps.Logger != nil {
322 deps.Logger.WarnContext(ctx, "repos: audit", "error", err)
323 }
324 }
325
326 return Result{Repo: row, InitialCommitOID: commitOID, DiskPath: diskPath}, nil
327 }
328
329 // initFiles assembles the FileEntry slice for the initial commit based
330 // on which init checkboxes the user ticked.
331 func initFiles(p Params, author string, year int) []repogit.FileEntry {
332 var files []repogit.FileEntry
333 if p.InitReadme {
334 files = append(files, repogit.FileEntry{
335 Path: "README.md",
336 Body: []byte(templates.ReadmeText(p.Name, p.Description)),
337 })
338 }
339 if p.LicenseKey != "" {
340 body, err := templates.LicenseText(p.LicenseKey, year, author)
341 if err == nil {
342 files = append(files, repogit.FileEntry{
343 Path: "LICENSE",
344 Body: []byte(body),
345 })
346 }
347 }
348 if p.GitignoreKey != "" {
349 body, err := templates.GitignoreText(p.GitignoreKey)
350 if err == nil {
351 files = append(files, repogit.FileEntry{
352 Path: ".gitignore",
353 Body: []byte(body),
354 })
355 }
356 }
357 return files
358 }
359
360 // buildInitialCommit is a thin pass-through so tests can swap it (post-MVP).
361 var buildInitialCommit = func(ctx context.Context, ic repogit.InitialCommit) (string, error) {
362 return ic.Build(ctx)
363 }
364
365 // resolveAuthor reads the user's display name + verified primary email.
366 // Returns ErrNoVerifiedEmail if the user has no primary email or the
367 // primary isn't verified.
368 func resolveAuthor(ctx context.Context, pool *pgxpool.Pool, userID int64) (name, addr string, err error) {
369 uq := usersdb.New()
370 user, err := uq.GetUserByID(ctx, pool, userID)
371 if err != nil {
372 return "", "", fmt.Errorf("repos: load user: %w", err)
373 }
374 if !user.PrimaryEmailID.Valid {
375 return "", "", ErrNoVerifiedEmail
376 }
377 em, err := uq.GetUserEmailByID(ctx, pool, user.PrimaryEmailID.Int64)
378 if err != nil {
379 return "", "", fmt.Errorf("repos: load primary email: %w", err)
380 }
381 if !em.Verified {
382 return "", "", ErrNoVerifiedEmail
383 }
384 display := strings.TrimSpace(user.DisplayName)
385 if display == "" {
386 display = user.Username
387 }
388 return display, string(em.Email), nil
389 }
390
391 func createRepoNameLockKey(p Params) (string, error) {
392 name := strings.ToLower(p.Name)
393 switch {
394 case p.OwnerUserID != 0 && p.OwnerOrgID == 0:
395 return fmt.Sprintf("repo-name:user:%d:%s", p.OwnerUserID, name), nil
396 case p.OwnerOrgID != 0 && p.OwnerUserID == 0:
397 return fmt.Sprintf("repo-name:org:%d:%s", p.OwnerOrgID, name), nil
398 default:
399 return "", errors.New("repos: owner is XOR — set OwnerUserID OR OwnerOrgID, not both")
400 }
401 }
402
403 func displaceDeletedRepoPath(
404 ctx context.Context,
405 deps Deps,
406 q *reposdb.Queries,
407 db reposdb.DBTX,
408 p Params,
409 ownerSlug string,
410 diskPath string,
411 ) (bool, error) {
412 deleted, err := softDeletedRepoForCreate(ctx, q, db, p)
413 if errors.Is(err, pgx.ErrNoRows) {
414 return false, nil
415 }
416 if err != nil {
417 return false, err
418 }
419 deletedPath, err := deps.RepoFS.DeletedRepoPath(ownerSlug, p.Name, deleted.ID)
420 if err != nil {
421 return false, err
422 }
423 if err := deps.RepoFS.Move(diskPath, deletedPath); err != nil {
424 if errors.Is(err, os.ErrNotExist) {
425 return false, nil
426 }
427 return false, err
428 }
429 return true, nil
430 }
431
432 func softDeletedRepoForCreate(ctx context.Context, q *reposdb.Queries, db reposdb.DBTX, p Params) (reposdb.Repo, error) {
433 if p.OwnerUserID != 0 {
434 return q.GetSoftDeletedRepoByOwnerUserAndName(ctx, db, reposdb.GetSoftDeletedRepoByOwnerUserAndNameParams{
435 OwnerUserID: pgtype.Int8{Int64: p.OwnerUserID, Valid: true},
436 Name: p.Name,
437 })
438 }
439 return q.GetSoftDeletedRepoByOwnerOrgAndName(ctx, db, reposdb.GetSoftDeletedRepoByOwnerOrgAndNameParams{
440 OwnerOrgID: pgtype.Int8{Int64: p.OwnerOrgID, Valid: true},
441 Name: p.Name,
442 })
443 }
444
445 // isUniqueViolation matches Postgres SQLSTATE 23505. Used to surface
446 // the friendly "name taken" error from the unique-by-owner-and-name
447 // indexes when the pre-check raced.
448 func isUniqueViolation(err error) bool {
449 var pgErr *pgconn.PgError
450 if errors.As(err, &pgErr) {
451 return pgErr.Code == "23505"
452 }
453 return false
454 }
455