Go · 3033 bytes Raw Blame History
1 // SPDX-License-Identifier: AGPL-3.0-or-later
2
3 package jobs
4
5 import (
6 "context"
7 "encoding/json"
8 "errors"
9 "fmt"
10 "io/fs"
11 "log/slog"
12 "path/filepath"
13
14 "github.com/jackc/pgx/v5"
15 "github.com/jackc/pgx/v5/pgxpool"
16
17 "github.com/tenseleyFlow/shithub/internal/infra/storage"
18 reposdb "github.com/tenseleyFlow/shithub/internal/repos/sqlc"
19 "github.com/tenseleyFlow/shithub/internal/worker"
20 )
21
22 // RepoSizeRecalcDeps wires the size-recalc handler.
23 type RepoSizeRecalcDeps struct {
24 Pool *pgxpool.Pool
25 RepoFS *storage.RepoFS
26 Logger *slog.Logger
27 }
28
29 // RepoSizeRecalcPayload — { "repo_id": <int> }.
30 type RepoSizeRecalcPayload struct {
31 RepoID int64 `json:"repo_id"`
32 }
33
34 // RepoSizeRecalc walks the bare-repo tree and updates
35 // repos.disk_used_bytes. Walked in pure Go (no shelling out to du) so
36 // we get a portable sum and don't have to wrangle stderr from a
37 // blocked subprocess.
38 //
39 // Concurrent runs may compute slightly different sizes if a push lands
40 // mid-walk; that's acceptable — the *last* one wins, and quotas (post-
41 // MVP) tolerate small drift.
42 func RepoSizeRecalc(deps RepoSizeRecalcDeps) worker.Handler {
43 return func(ctx context.Context, raw json.RawMessage) error {
44 var p RepoSizeRecalcPayload
45 if err := json.Unmarshal(raw, &p); err != nil {
46 return worker.PoisonError(fmt.Errorf("bad payload: %w", err))
47 }
48 if p.RepoID == 0 {
49 return worker.PoisonError(errors.New("missing repo_id"))
50 }
51
52 rq := reposdb.New()
53 ownerRow, err := rq.GetRepoOwnerUsernameByID(ctx, deps.Pool, p.RepoID)
54 if err != nil {
55 if errors.Is(err, pgx.ErrNoRows) {
56 return worker.PoisonError(fmt.Errorf("repo %d not found", p.RepoID))
57 }
58 return fmt.Errorf("load repo: %w", err)
59 }
60
61 ownerSlug, err := ownerSlugString(ownerRow.OwnerUsername)
62 if err != nil {
63 return worker.PoisonError(fmt.Errorf("repo owner slug: %w", err))
64 }
65 gitDir, err := deps.RepoFS.RepoPath(ownerSlug, ownerRow.RepoName)
66 if err != nil {
67 return worker.PoisonError(fmt.Errorf("repo path: %w", err))
68 }
69 size, err := walkSize(ctx, gitDir)
70 if err != nil {
71 return fmt.Errorf("walk size: %w", err)
72 }
73 if err := rq.UpdateRepoDiskUsed(ctx, deps.Pool, reposdb.UpdateRepoDiskUsedParams{
74 ID: p.RepoID,
75 DiskUsedBytes: size,
76 }); err != nil {
77 return fmt.Errorf("update disk_used: %w", err)
78 }
79 return nil
80 }
81 }
82
83 // walkSize sums the byte size of every regular file under root. Walks
84 // once; doesn't follow symlinks (we never create any inside a bare
85 // repo). Honors ctx so a long-running walk on a giant repo can be
86 // cancelled by graceful shutdown.
87 func walkSize(ctx context.Context, root string) (int64, error) {
88 var total int64
89 err := filepath.WalkDir(root, func(path string, d fs.DirEntry, walkErr error) error {
90 if walkErr != nil {
91 return walkErr
92 }
93 if err := ctx.Err(); err != nil {
94 return err
95 }
96 if d.IsDir() {
97 return nil
98 }
99 info, err := d.Info()
100 if err != nil {
101 return err
102 }
103 if info.Mode().IsRegular() {
104 total += info.Size()
105 }
106 return nil
107 })
108 return total, err
109 }
110