| 1 | // SPDX-License-Identifier: AGPL-3.0-or-later |
| 2 | |
| 3 | package jobs |
| 4 | |
| 5 | import ( |
| 6 | "context" |
| 7 | "encoding/json" |
| 8 | "errors" |
| 9 | "fmt" |
| 10 | "io/fs" |
| 11 | "log/slog" |
| 12 | "path/filepath" |
| 13 | |
| 14 | "github.com/jackc/pgx/v5" |
| 15 | "github.com/jackc/pgx/v5/pgxpool" |
| 16 | |
| 17 | "github.com/tenseleyFlow/shithub/internal/infra/storage" |
| 18 | reposdb "github.com/tenseleyFlow/shithub/internal/repos/sqlc" |
| 19 | "github.com/tenseleyFlow/shithub/internal/worker" |
| 20 | ) |
| 21 | |
| 22 | // RepoSizeRecalcDeps wires the size-recalc handler. |
| 23 | type RepoSizeRecalcDeps struct { |
| 24 | Pool *pgxpool.Pool |
| 25 | RepoFS *storage.RepoFS |
| 26 | Logger *slog.Logger |
| 27 | } |
| 28 | |
| 29 | // RepoSizeRecalcPayload — { "repo_id": <int> }. |
| 30 | type RepoSizeRecalcPayload struct { |
| 31 | RepoID int64 `json:"repo_id"` |
| 32 | } |
| 33 | |
| 34 | // RepoSizeRecalc walks the bare-repo tree and updates |
| 35 | // repos.disk_used_bytes. Walked in pure Go (no shelling out to du) so |
| 36 | // we get a portable sum and don't have to wrangle stderr from a |
| 37 | // blocked subprocess. |
| 38 | // |
| 39 | // Concurrent runs may compute slightly different sizes if a push lands |
| 40 | // mid-walk; that's acceptable — the *last* one wins, and quotas (post- |
| 41 | // MVP) tolerate small drift. |
| 42 | func RepoSizeRecalc(deps RepoSizeRecalcDeps) worker.Handler { |
| 43 | return func(ctx context.Context, raw json.RawMessage) error { |
| 44 | var p RepoSizeRecalcPayload |
| 45 | if err := json.Unmarshal(raw, &p); err != nil { |
| 46 | return worker.PoisonError(fmt.Errorf("bad payload: %w", err)) |
| 47 | } |
| 48 | if p.RepoID == 0 { |
| 49 | return worker.PoisonError(errors.New("missing repo_id")) |
| 50 | } |
| 51 | |
| 52 | rq := reposdb.New() |
| 53 | ownerRow, err := rq.GetRepoOwnerUsernameByID(ctx, deps.Pool, p.RepoID) |
| 54 | if err != nil { |
| 55 | if errors.Is(err, pgx.ErrNoRows) { |
| 56 | return worker.PoisonError(fmt.Errorf("repo %d not found", p.RepoID)) |
| 57 | } |
| 58 | return fmt.Errorf("load repo: %w", err) |
| 59 | } |
| 60 | |
| 61 | ownerSlug, err := ownerSlugString(ownerRow.OwnerUsername) |
| 62 | if err != nil { |
| 63 | return worker.PoisonError(fmt.Errorf("repo owner slug: %w", err)) |
| 64 | } |
| 65 | gitDir, err := deps.RepoFS.RepoPath(ownerSlug, ownerRow.RepoName) |
| 66 | if err != nil { |
| 67 | return worker.PoisonError(fmt.Errorf("repo path: %w", err)) |
| 68 | } |
| 69 | size, err := walkSize(ctx, gitDir) |
| 70 | if err != nil { |
| 71 | return fmt.Errorf("walk size: %w", err) |
| 72 | } |
| 73 | if err := rq.UpdateRepoDiskUsed(ctx, deps.Pool, reposdb.UpdateRepoDiskUsedParams{ |
| 74 | ID: p.RepoID, |
| 75 | DiskUsedBytes: size, |
| 76 | }); err != nil { |
| 77 | return fmt.Errorf("update disk_used: %w", err) |
| 78 | } |
| 79 | return nil |
| 80 | } |
| 81 | } |
| 82 | |
| 83 | // walkSize sums the byte size of every regular file under root. Walks |
| 84 | // once; doesn't follow symlinks (we never create any inside a bare |
| 85 | // repo). Honors ctx so a long-running walk on a giant repo can be |
| 86 | // cancelled by graceful shutdown. |
| 87 | func walkSize(ctx context.Context, root string) (int64, error) { |
| 88 | var total int64 |
| 89 | err := filepath.WalkDir(root, func(path string, d fs.DirEntry, walkErr error) error { |
| 90 | if walkErr != nil { |
| 91 | return walkErr |
| 92 | } |
| 93 | if err := ctx.Err(); err != nil { |
| 94 | return err |
| 95 | } |
| 96 | if d.IsDir() { |
| 97 | return nil |
| 98 | } |
| 99 | info, err := d.Info() |
| 100 | if err != nil { |
| 101 | return err |
| 102 | } |
| 103 | if info.Mode().IsRegular() { |
| 104 | total += info.Size() |
| 105 | } |
| 106 | return nil |
| 107 | }) |
| 108 | return total, err |
| 109 | } |
| 110 |