| 1 | // SPDX-License-Identifier: AGPL-3.0-or-later |
| 2 | |
| 3 | package repos |
| 4 | |
| 5 | import ( |
| 6 | "context" |
| 7 | "errors" |
| 8 | "fmt" |
| 9 | "log/slog" |
| 10 | "os" |
| 11 | "strings" |
| 12 | "time" |
| 13 | |
| 14 | "github.com/jackc/pgx/v5" |
| 15 | "github.com/jackc/pgx/v5/pgconn" |
| 16 | "github.com/jackc/pgx/v5/pgtype" |
| 17 | "github.com/jackc/pgx/v5/pgxpool" |
| 18 | |
| 19 | "github.com/tenseleyFlow/shithub/internal/auth/audit" |
| 20 | "github.com/tenseleyFlow/shithub/internal/auth/throttle" |
| 21 | "github.com/tenseleyFlow/shithub/internal/git/hooks" |
| 22 | "github.com/tenseleyFlow/shithub/internal/infra/storage" |
| 23 | "github.com/tenseleyFlow/shithub/internal/issues" |
| 24 | issuesdb "github.com/tenseleyFlow/shithub/internal/issues/sqlc" |
| 25 | "github.com/tenseleyFlow/shithub/internal/notif" |
| 26 | repogit "github.com/tenseleyFlow/shithub/internal/repos/git" |
| 27 | reposdb "github.com/tenseleyFlow/shithub/internal/repos/sqlc" |
| 28 | "github.com/tenseleyFlow/shithub/internal/repos/templates" |
| 29 | usersdb "github.com/tenseleyFlow/shithub/internal/users/sqlc" |
| 30 | ) |
| 31 | |
| 32 | // CreateRateLimit caps how many repos one user can create per hour. |
| 33 | // 20/hour leaves headroom for bulk-migrating an existing org without |
| 34 | // being a license to spam; site admins bypass the cap entirely. |
| 35 | const ( |
| 36 | CreateRateLimitMax = 20 |
| 37 | CreateRateLimitWindow = time.Hour |
| 38 | ) |
| 39 | |
| 40 | // Deps wires the repo orchestrator. Inject from the web layer; no |
| 41 | // global state. |
| 42 | type Deps struct { |
| 43 | Pool *pgxpool.Pool |
| 44 | RepoFS *storage.RepoFS |
| 45 | Audit *audit.Recorder |
| 46 | Limiter *throttle.Limiter |
| 47 | Logger *slog.Logger |
| 48 | Now func() time.Time |
| 49 | // ShithubdPath is the absolute path to the running shithubd binary, |
| 50 | // baked into the hook shims so push -> hook -> shithubd round-trip |
| 51 | // works in dev and prod. Empty disables hook installation (tests |
| 52 | // that don't care about hooks; the full E2E happy path provides it). |
| 53 | ShithubdPath string |
| 54 | } |
| 55 | |
| 56 | // Params describes one repo-create request as it arrives from the |
| 57 | // handler, normalized but not yet validated against the DB. |
| 58 | // |
| 59 | // Owner is XOR (S30): either OwnerUserID set OR OwnerOrgID set. |
| 60 | // OwnerUsername / OwnerSlug carry the slug for path generation |
| 61 | // (the FS layer's per-owner directory uses it). ActorUserID is who |
| 62 | // initiated the create — defaults to OwnerUserID for personal repos |
| 63 | // and is required for org-owned creates. |
| 64 | type Params struct { |
| 65 | OwnerUserID int64 |
| 66 | OwnerUsername string |
| 67 | OwnerOrgID int64 |
| 68 | OwnerSlug string |
| 69 | |
| 70 | // ActorUserID is the user performing the create. Used for |
| 71 | // audit-log + rate-limiting + initial-commit author. Defaults to |
| 72 | // OwnerUserID for personal repos when zero. |
| 73 | ActorUserID int64 |
| 74 | |
| 75 | // ActorIsSiteAdmin, when true, bypasses the per-actor create |
| 76 | // rate-limit. Site admins are trusted operators (bulk migration, |
| 77 | // fixture seeding) and the cap exists to deter abuse from regular |
| 78 | // accounts, not to throttle staff. |
| 79 | ActorIsSiteAdmin bool |
| 80 | |
| 81 | // BypassCreateRateLimit lets trusted server-side bulk operations |
| 82 | // create many repos for the same actor without tripping the browser |
| 83 | // anti-abuse throttle. Keep false for direct user submits. |
| 84 | BypassCreateRateLimit bool |
| 85 | |
| 86 | Name string // already lowercased + trimmed |
| 87 | Description string |
| 88 | Visibility string // "public" | "private" |
| 89 | |
| 90 | InitReadme bool |
| 91 | LicenseKey string // "" = none |
| 92 | GitignoreKey string // "" = none |
| 93 | |
| 94 | // Optional override for the initial commit timestamp; tests pin this |
| 95 | // for determinism. Production callers leave it zero and let |
| 96 | // orchestrator default to deps.Now(). |
| 97 | InitialCommitWhen time.Time |
| 98 | } |
| 99 | |
| 100 | // Result is what Create returns on success. |
| 101 | type Result struct { |
| 102 | Repo reposdb.Repo |
| 103 | InitialCommitOID string // "" when InitReadme/License/Gitignore were all unset |
| 104 | DiskPath string // bare-repo on-disk path |
| 105 | } |
| 106 | |
| 107 | // Create validates, rate-limits, inserts the DB row, initializes the |
| 108 | // bare repo on disk, optionally builds the initial commit, audit-logs, |
| 109 | // and returns. On post-DB failure the tx rolls back and the partial |
| 110 | // repo dir is best-effort removed. |
| 111 | func Create(ctx context.Context, deps Deps, p Params) (Result, error) { |
| 112 | if deps.Pool == nil || deps.RepoFS == nil || deps.Audit == nil || deps.Limiter == nil { |
| 113 | return Result{}, errors.New("repos: Deps missing required field") |
| 114 | } |
| 115 | now := deps.Now |
| 116 | if now == nil { |
| 117 | now = time.Now |
| 118 | } |
| 119 | |
| 120 | if err := ValidateName(p.Name); err != nil { |
| 121 | return Result{}, err |
| 122 | } |
| 123 | if err := ValidateDescription(p.Description); err != nil { |
| 124 | return Result{}, err |
| 125 | } |
| 126 | if p.Visibility != "public" && p.Visibility != "private" { |
| 127 | return Result{}, fmt.Errorf("repos: visibility must be public or private (got %q)", p.Visibility) |
| 128 | } |
| 129 | if p.LicenseKey != "" && !templates.HasLicense(p.LicenseKey) { |
| 130 | return Result{}, fmt.Errorf("%w: %s", ErrUnknownLicense, p.LicenseKey) |
| 131 | } |
| 132 | if p.GitignoreKey != "" && !templates.HasGitignore(p.GitignoreKey) { |
| 133 | return Result{}, fmt.Errorf("%w: %s", ErrUnknownGitignore, p.GitignoreKey) |
| 134 | } |
| 135 | |
| 136 | // Owner XOR — exactly one kind. Org-owner path: actor must be set |
| 137 | // (so we know who initiated for audit + initial commit). |
| 138 | switch { |
| 139 | case p.OwnerUserID != 0 && p.OwnerOrgID == 0: |
| 140 | if p.ActorUserID == 0 { |
| 141 | p.ActorUserID = p.OwnerUserID |
| 142 | } |
| 143 | case p.OwnerOrgID != 0 && p.OwnerUserID == 0: |
| 144 | if p.ActorUserID == 0 { |
| 145 | return Result{}, errors.New("repos: ActorUserID required for org-owned create") |
| 146 | } |
| 147 | default: |
| 148 | return Result{}, errors.New("repos: owner is XOR — set OwnerUserID OR OwnerOrgID, not both") |
| 149 | } |
| 150 | |
| 151 | // Rate-limit per actor (NOT per owner) so a user can't bypass the |
| 152 | // per-account cap by spreading creates across orgs they manage. |
| 153 | // Site admins skip the cap entirely. |
| 154 | if !p.ActorIsSiteAdmin && !p.BypassCreateRateLimit { |
| 155 | if err := deps.Limiter.Hit(ctx, deps.Pool, throttle.Limit{ |
| 156 | Scope: "repo_create", |
| 157 | Identifier: fmt.Sprintf("user:%d", p.ActorUserID), |
| 158 | Max: CreateRateLimitMax, |
| 159 | Window: CreateRateLimitWindow, |
| 160 | }); err != nil { |
| 161 | return Result{}, err |
| 162 | } |
| 163 | } |
| 164 | |
| 165 | // Resolve author identity for the initial commit. The actor (the |
| 166 | // human who clicked "create") is the author — even on org repos, |
| 167 | // the seed commit attributes to them. |
| 168 | authorName, authorEmail, err := resolveAuthor(ctx, deps.Pool, p.ActorUserID) |
| 169 | wantInit := p.InitReadme || p.LicenseKey != "" || p.GitignoreKey != "" |
| 170 | if wantInit && err != nil { |
| 171 | return Result{}, err |
| 172 | } |
| 173 | |
| 174 | // Pre-compute disk path from RepoFS. Doing this before the tx avoids |
| 175 | // inserting a DB row for a name that fails the path-validation |
| 176 | // whitelist (which mostly mirrors our own ValidateName, but |
| 177 | // defense-in-depth never hurts). Org-owned repos use the org slug |
| 178 | // as the per-owner directory — same shape as user-owned, no |
| 179 | // `org/` prefix on disk (matches the GitHub URL layout). |
| 180 | ownerSlug := p.OwnerUsername |
| 181 | if p.OwnerOrgID != 0 { |
| 182 | ownerSlug = p.OwnerSlug |
| 183 | } |
| 184 | diskPath, err := deps.RepoFS.RepoPath(ownerSlug, p.Name) |
| 185 | if err != nil { |
| 186 | return Result{}, fmt.Errorf("%w: %v", ErrInvalidName, err) |
| 187 | } |
| 188 | |
| 189 | tx, err := deps.Pool.Begin(ctx) |
| 190 | if err != nil { |
| 191 | return Result{}, fmt.Errorf("repos: begin tx: %w", err) |
| 192 | } |
| 193 | committed := false |
| 194 | defer func() { |
| 195 | if !committed { |
| 196 | _ = tx.Rollback(ctx) |
| 197 | } |
| 198 | }() |
| 199 | |
| 200 | q := reposdb.New() |
| 201 | lockKey, err := createRepoNameLockKey(p) |
| 202 | if err != nil { |
| 203 | return Result{}, err |
| 204 | } |
| 205 | if err := q.LockRepoOwnerName(ctx, tx, lockKey); err != nil { |
| 206 | return Result{}, fmt.Errorf("repos: lock owner/name: %w", err) |
| 207 | } |
| 208 | row, err := q.CreateRepo(ctx, tx, reposdb.CreateRepoParams{ |
| 209 | OwnerUserID: pgtype.Int8{Int64: p.OwnerUserID, Valid: p.OwnerUserID != 0}, |
| 210 | OwnerOrgID: pgtype.Int8{Int64: p.OwnerOrgID, Valid: p.OwnerOrgID != 0}, |
| 211 | Name: p.Name, |
| 212 | Description: p.Description, |
| 213 | Visibility: reposdb.RepoVisibility(p.Visibility), |
| 214 | DefaultBranch: "trunk", |
| 215 | LicenseKey: pgtype.Text{String: p.LicenseKey, Valid: p.LicenseKey != ""}, |
| 216 | PrimaryLanguage: pgtype.Text{Valid: false}, |
| 217 | }) |
| 218 | if err != nil { |
| 219 | if isUniqueViolation(err) { |
| 220 | return Result{}, ErrTaken |
| 221 | } |
| 222 | return Result{}, fmt.Errorf("repos: insert: %w", err) |
| 223 | } |
| 224 | |
| 225 | // FS init AFTER DB insert. If this fails the deferred Rollback |
| 226 | // reverses the row; we also best-effort RemoveAll the directory in |
| 227 | // case it got partially created. |
| 228 | if err := deps.RepoFS.InitBare(ctx, diskPath); err != nil { |
| 229 | if errors.Is(err, storage.ErrAlreadyExists) { |
| 230 | displaced, displaceErr := displaceDeletedRepoPath(ctx, deps, q, tx, p, ownerSlug, diskPath) |
| 231 | if displaceErr != nil { |
| 232 | return Result{}, fmt.Errorf("repos: reclaim deleted repo path: %w", displaceErr) |
| 233 | } |
| 234 | if displaced { |
| 235 | err = deps.RepoFS.InitBare(ctx, diskPath) |
| 236 | } |
| 237 | } |
| 238 | if err != nil { |
| 239 | if !errors.Is(err, storage.ErrAlreadyExists) { |
| 240 | _ = os.RemoveAll(diskPath) |
| 241 | } |
| 242 | return Result{}, fmt.Errorf("repos: init bare: %w", err) |
| 243 | } |
| 244 | } |
| 245 | |
| 246 | // Install push-pipeline hooks. Skipped when ShithubdPath is empty |
| 247 | // (test fixtures that exercise repo creation without the hook |
| 248 | // stack). The plumbing-driven initial commit doesn't fire hooks — |
| 249 | // hooks only run on user-driven pushes — so this is the right |
| 250 | // boundary. |
| 251 | if deps.ShithubdPath != "" { |
| 252 | if err := hooks.Install(diskPath, deps.ShithubdPath); err != nil { |
| 253 | _ = os.RemoveAll(diskPath) |
| 254 | return Result{}, fmt.Errorf("repos: install hooks: %w", err) |
| 255 | } |
| 256 | } |
| 257 | |
| 258 | // Seed the issue subsystem state for the new repo: counter row + |
| 259 | // default label set. Runs inside the create tx so a failed seed |
| 260 | // rolls the whole repo back. Cheap (10 inserts), and folding it in |
| 261 | // here keeps the "fresh repo is fully usable" invariant. Issues |
| 262 | // orchestrator's SeedDefaultLabels swallows unique-violations so a |
| 263 | // re-run is a no-op (defensive against partially-seeded migrations). |
| 264 | iq := issuesdb.New() |
| 265 | if err := iq.EnsureRepoIssueCounter(ctx, tx, row.ID); err != nil { |
| 266 | return Result{}, fmt.Errorf("repos: issue counter: %w", err) |
| 267 | } |
| 268 | if err := issues.SeedDefaultLabels(ctx, tx, row.ID); err != nil { |
| 269 | return Result{}, fmt.Errorf("repos: seed labels: %w", err) |
| 270 | } |
| 271 | |
| 272 | var commitOID string |
| 273 | if wantInit { |
| 274 | commitWhen := p.InitialCommitWhen |
| 275 | if commitWhen.IsZero() { |
| 276 | commitWhen = now() |
| 277 | } |
| 278 | oid, err := buildInitialCommit(ctx, repogit.InitialCommit{ |
| 279 | GitDir: diskPath, |
| 280 | AuthorName: authorName, |
| 281 | AuthorEmail: authorEmail, |
| 282 | Branch: "trunk", |
| 283 | When: commitWhen, |
| 284 | Files: initFiles(p, authorName, commitWhen.Year()), |
| 285 | }) |
| 286 | if err != nil { |
| 287 | _ = os.RemoveAll(diskPath) |
| 288 | return Result{}, fmt.Errorf("repos: initial commit: %w", err) |
| 289 | } |
| 290 | commitOID = oid |
| 291 | } |
| 292 | |
| 293 | if err := tx.Commit(ctx); err != nil { |
| 294 | _ = os.RemoveAll(diskPath) |
| 295 | return Result{}, fmt.Errorf("repos: commit tx: %w", err) |
| 296 | } |
| 297 | committed = true |
| 298 | |
| 299 | if err := notif.Emit(ctx, deps.Pool, notif.Event{ |
| 300 | ActorUserID: p.ActorUserID, |
| 301 | Kind: "repo_created", |
| 302 | RepoID: row.ID, |
| 303 | SourceKind: "repo", |
| 304 | SourceID: row.ID, |
| 305 | Public: p.Visibility == "public", |
| 306 | Extra: map[string]any{ |
| 307 | "repo_name": p.Name, |
| 308 | }, |
| 309 | }); err != nil && deps.Logger != nil { |
| 310 | deps.Logger.WarnContext(ctx, "repos: emit repo_created", "repo_id", row.ID, "error", err) |
| 311 | } |
| 312 | |
| 313 | if err := deps.Audit.Record(ctx, deps.Pool, p.ActorUserID, |
| 314 | audit.ActionRepoCreated, audit.TargetRepo, row.ID, map[string]any{ |
| 315 | "name": p.Name, |
| 316 | "visibility": p.Visibility, |
| 317 | "init": wantInit, |
| 318 | "license": p.LicenseKey, |
| 319 | "gitignore": p.GitignoreKey, |
| 320 | }); err != nil { |
| 321 | if deps.Logger != nil { |
| 322 | deps.Logger.WarnContext(ctx, "repos: audit", "error", err) |
| 323 | } |
| 324 | } |
| 325 | |
| 326 | return Result{Repo: row, InitialCommitOID: commitOID, DiskPath: diskPath}, nil |
| 327 | } |
| 328 | |
| 329 | // initFiles assembles the FileEntry slice for the initial commit based |
| 330 | // on which init checkboxes the user ticked. |
| 331 | func initFiles(p Params, author string, year int) []repogit.FileEntry { |
| 332 | var files []repogit.FileEntry |
| 333 | if p.InitReadme { |
| 334 | files = append(files, repogit.FileEntry{ |
| 335 | Path: "README.md", |
| 336 | Body: []byte(templates.ReadmeText(p.Name, p.Description)), |
| 337 | }) |
| 338 | } |
| 339 | if p.LicenseKey != "" { |
| 340 | body, err := templates.LicenseText(p.LicenseKey, year, author) |
| 341 | if err == nil { |
| 342 | files = append(files, repogit.FileEntry{ |
| 343 | Path: "LICENSE", |
| 344 | Body: []byte(body), |
| 345 | }) |
| 346 | } |
| 347 | } |
| 348 | if p.GitignoreKey != "" { |
| 349 | body, err := templates.GitignoreText(p.GitignoreKey) |
| 350 | if err == nil { |
| 351 | files = append(files, repogit.FileEntry{ |
| 352 | Path: ".gitignore", |
| 353 | Body: []byte(body), |
| 354 | }) |
| 355 | } |
| 356 | } |
| 357 | return files |
| 358 | } |
| 359 | |
| 360 | // buildInitialCommit is a thin pass-through so tests can swap it (post-MVP). |
| 361 | var buildInitialCommit = func(ctx context.Context, ic repogit.InitialCommit) (string, error) { |
| 362 | return ic.Build(ctx) |
| 363 | } |
| 364 | |
| 365 | // resolveAuthor reads the user's display name + verified primary email. |
| 366 | // Returns ErrNoVerifiedEmail if the user has no primary email or the |
| 367 | // primary isn't verified. |
| 368 | func resolveAuthor(ctx context.Context, pool *pgxpool.Pool, userID int64) (name, addr string, err error) { |
| 369 | uq := usersdb.New() |
| 370 | user, err := uq.GetUserByID(ctx, pool, userID) |
| 371 | if err != nil { |
| 372 | return "", "", fmt.Errorf("repos: load user: %w", err) |
| 373 | } |
| 374 | if !user.PrimaryEmailID.Valid { |
| 375 | return "", "", ErrNoVerifiedEmail |
| 376 | } |
| 377 | em, err := uq.GetUserEmailByID(ctx, pool, user.PrimaryEmailID.Int64) |
| 378 | if err != nil { |
| 379 | return "", "", fmt.Errorf("repos: load primary email: %w", err) |
| 380 | } |
| 381 | if !em.Verified { |
| 382 | return "", "", ErrNoVerifiedEmail |
| 383 | } |
| 384 | display := strings.TrimSpace(user.DisplayName) |
| 385 | if display == "" { |
| 386 | display = user.Username |
| 387 | } |
| 388 | return display, string(em.Email), nil |
| 389 | } |
| 390 | |
| 391 | func createRepoNameLockKey(p Params) (string, error) { |
| 392 | name := strings.ToLower(p.Name) |
| 393 | switch { |
| 394 | case p.OwnerUserID != 0 && p.OwnerOrgID == 0: |
| 395 | return fmt.Sprintf("repo-name:user:%d:%s", p.OwnerUserID, name), nil |
| 396 | case p.OwnerOrgID != 0 && p.OwnerUserID == 0: |
| 397 | return fmt.Sprintf("repo-name:org:%d:%s", p.OwnerOrgID, name), nil |
| 398 | default: |
| 399 | return "", errors.New("repos: owner is XOR — set OwnerUserID OR OwnerOrgID, not both") |
| 400 | } |
| 401 | } |
| 402 | |
| 403 | func displaceDeletedRepoPath( |
| 404 | ctx context.Context, |
| 405 | deps Deps, |
| 406 | q *reposdb.Queries, |
| 407 | db reposdb.DBTX, |
| 408 | p Params, |
| 409 | ownerSlug string, |
| 410 | diskPath string, |
| 411 | ) (bool, error) { |
| 412 | deleted, err := softDeletedRepoForCreate(ctx, q, db, p) |
| 413 | if errors.Is(err, pgx.ErrNoRows) { |
| 414 | return false, nil |
| 415 | } |
| 416 | if err != nil { |
| 417 | return false, err |
| 418 | } |
| 419 | deletedPath, err := deps.RepoFS.DeletedRepoPath(ownerSlug, p.Name, deleted.ID) |
| 420 | if err != nil { |
| 421 | return false, err |
| 422 | } |
| 423 | if err := deps.RepoFS.Move(diskPath, deletedPath); err != nil { |
| 424 | if errors.Is(err, os.ErrNotExist) { |
| 425 | return false, nil |
| 426 | } |
| 427 | return false, err |
| 428 | } |
| 429 | return true, nil |
| 430 | } |
| 431 | |
| 432 | func softDeletedRepoForCreate(ctx context.Context, q *reposdb.Queries, db reposdb.DBTX, p Params) (reposdb.Repo, error) { |
| 433 | if p.OwnerUserID != 0 { |
| 434 | return q.GetSoftDeletedRepoByOwnerUserAndName(ctx, db, reposdb.GetSoftDeletedRepoByOwnerUserAndNameParams{ |
| 435 | OwnerUserID: pgtype.Int8{Int64: p.OwnerUserID, Valid: true}, |
| 436 | Name: p.Name, |
| 437 | }) |
| 438 | } |
| 439 | return q.GetSoftDeletedRepoByOwnerOrgAndName(ctx, db, reposdb.GetSoftDeletedRepoByOwnerOrgAndNameParams{ |
| 440 | OwnerOrgID: pgtype.Int8{Int64: p.OwnerOrgID, Valid: true}, |
| 441 | Name: p.Name, |
| 442 | }) |
| 443 | } |
| 444 | |
| 445 | // isUniqueViolation matches Postgres SQLSTATE 23505. Used to surface |
| 446 | // the friendly "name taken" error from the unique-by-owner-and-name |
| 447 | // indexes when the pre-check raced. |
| 448 | func isUniqueViolation(err error) bool { |
| 449 | var pgErr *pgconn.PgError |
| 450 | if errors.As(err, &pgErr) { |
| 451 | return pgErr.Code == "23505" |
| 452 | } |
| 453 | return false |
| 454 | } |
| 455 |