Go · 2895 bytes Raw Blame History
1 // SPDX-License-Identifier: AGPL-3.0-or-later
2
3 package jobs
4
5 import (
6 "context"
7 "encoding/json"
8 "errors"
9 "fmt"
10 "io/fs"
11 "log/slog"
12 "path/filepath"
13
14 "github.com/jackc/pgx/v5"
15 "github.com/jackc/pgx/v5/pgxpool"
16
17 "github.com/tenseleyFlow/shithub/internal/infra/storage"
18 reposdb "github.com/tenseleyFlow/shithub/internal/repos/sqlc"
19 "github.com/tenseleyFlow/shithub/internal/worker"
20 )
21
22 // RepoSizeRecalcDeps wires the size-recalc handler.
23 type RepoSizeRecalcDeps struct {
24 Pool *pgxpool.Pool
25 RepoFS *storage.RepoFS
26 Logger *slog.Logger
27 }
28
29 // RepoSizeRecalcPayload — { "repo_id": <int> }.
30 type RepoSizeRecalcPayload struct {
31 RepoID int64 `json:"repo_id"`
32 }
33
34 // RepoSizeRecalc walks the bare-repo tree and updates
35 // repos.disk_used_bytes. Walked in pure Go (no shelling out to du) so
36 // we get a portable sum and don't have to wrangle stderr from a
37 // blocked subprocess.
38 //
39 // Concurrent runs may compute slightly different sizes if a push lands
40 // mid-walk; that's acceptable — the *last* one wins, and quotas (post-
41 // MVP) tolerate small drift.
42 func RepoSizeRecalc(deps RepoSizeRecalcDeps) worker.Handler {
43 return func(ctx context.Context, raw json.RawMessage) error {
44 var p RepoSizeRecalcPayload
45 if err := json.Unmarshal(raw, &p); err != nil {
46 return worker.PoisonError(fmt.Errorf("bad payload: %w", err))
47 }
48 if p.RepoID == 0 {
49 return worker.PoisonError(errors.New("missing repo_id"))
50 }
51
52 rq := reposdb.New()
53 ownerRow, err := rq.GetRepoOwnerUsernameByID(ctx, deps.Pool, p.RepoID)
54 if err != nil {
55 if errors.Is(err, pgx.ErrNoRows) {
56 return worker.PoisonError(fmt.Errorf("repo %d not found", p.RepoID))
57 }
58 return fmt.Errorf("load repo: %w", err)
59 }
60
61 gitDir, err := deps.RepoFS.RepoPath(ownerRow.OwnerUsername, ownerRow.RepoName)
62 if err != nil {
63 return worker.PoisonError(fmt.Errorf("repo path: %w", err))
64 }
65 size, err := walkSize(ctx, gitDir)
66 if err != nil {
67 return fmt.Errorf("walk size: %w", err)
68 }
69 if err := rq.UpdateRepoDiskUsed(ctx, deps.Pool, reposdb.UpdateRepoDiskUsedParams{
70 ID: p.RepoID,
71 DiskUsedBytes: size,
72 }); err != nil {
73 return fmt.Errorf("update disk_used: %w", err)
74 }
75 return nil
76 }
77 }
78
79 // walkSize sums the byte size of every regular file under root. Walks
80 // once; doesn't follow symlinks (we never create any inside a bare
81 // repo). Honors ctx so a long-running walk on a giant repo can be
82 // cancelled by graceful shutdown.
83 func walkSize(ctx context.Context, root string) (int64, error) {
84 var total int64
85 err := filepath.WalkDir(root, func(path string, d fs.DirEntry, walkErr error) error {
86 if walkErr != nil {
87 return walkErr
88 }
89 if err := ctx.Err(); err != nil {
90 return err
91 }
92 if d.IsDir() {
93 return nil
94 }
95 info, err := d.Info()
96 if err != nil {
97 return err
98 }
99 if info.Mode().IsRegular() {
100 total += info.Size()
101 }
102 return nil
103 })
104 return total, err
105 }
106