@@ -0,0 +1,220 @@ |
| 1 | +// SPDX-License-Identifier: AGPL-3.0-or-later |
| 2 | + |
| 3 | +package jobs |
| 4 | + |
| 5 | +import ( |
| 6 | + "bufio" |
| 7 | + "bytes" |
| 8 | + "context" |
| 9 | + "encoding/json" |
| 10 | + "errors" |
| 11 | + "fmt" |
| 12 | + "log/slog" |
| 13 | + "os/exec" |
| 14 | + "strings" |
| 15 | + "time" |
| 16 | + |
| 17 | + "github.com/jackc/pgx/v5" |
| 18 | + "github.com/jackc/pgx/v5/pgxpool" |
| 19 | + |
| 20 | + "github.com/tenseleyFlow/shithub/internal/infra/storage" |
| 21 | + "github.com/tenseleyFlow/shithub/internal/repos/sigverify" |
| 22 | + reposdb "github.com/tenseleyFlow/shithub/internal/repos/sqlc" |
| 23 | + "github.com/tenseleyFlow/shithub/internal/worker" |
| 24 | +) |
| 25 | + |
| 26 | +// GPGBackfillDeps wires the gpg:backfill handler. |
| 27 | +type GPGBackfillDeps struct { |
| 28 | + Pool *pgxpool.Pool |
| 29 | + RepoFS *storage.RepoFS |
| 30 | + Logger *slog.Logger |
| 31 | +} |
| 32 | + |
| 33 | +// GPGBackfillPayload mirrors sigverify.BackfillPayload — duplicated |
| 34 | +// here so the jobs package doesn't pull sigverify in just for the |
| 35 | +// type definition. JSON wire shape MUST stay identical to |
| 36 | +// sigverify.BackfillPayload; both forms unmarshal the same bytes. |
| 37 | +type GPGBackfillPayload struct { |
| 38 | + RepoID int64 `json:"repo_id"` |
| 39 | +} |
| 40 | + |
| 41 | +// perCommitTimeout bounds a single object's verification. A |
| 42 | +// pathological commit object (huge gpgsig with deep continuation |
| 43 | +// lines) shouldn't stall the whole queue. |
| 44 | +const perCommitTimeout = 5 * time.Second |
| 45 | + |
| 46 | +// GPGBackfill is the worker.Handler for KindGPGBackfill. One job per |
| 47 | +// repo; the handler enumerates every commit on the default branch |
| 48 | +// and every annotated tag, then runs sigverify.Verify / VerifyTag |
| 49 | +// and writes the result to commit_verification_cache. |
| 50 | +// |
| 51 | +// The handler is idempotent thanks to UpsertCommitVerification's |
| 52 | +// ON CONFLICT clause — re-running this job is safe and is in fact |
| 53 | +// the documented recovery path for a partially-completed backfill. |
| 54 | +// |
| 55 | +// Failure semantics: any per-commit cat-file failure is logged and |
| 56 | +// SKIPPED (not retried at the job level) so one corrupted object |
| 57 | +// doesn't poison the whole repo's backfill. The job itself returns |
| 58 | +// nil unless the repo lookup or git env is unreachable; those |
| 59 | +// surface as retryable errors so the worker pool's backoff kicks |
| 60 | +// in. |
| 61 | +func GPGBackfill(deps GPGBackfillDeps) worker.Handler { |
| 62 | + return func(ctx context.Context, raw json.RawMessage) error { |
| 63 | + var p GPGBackfillPayload |
| 64 | + if err := json.Unmarshal(raw, &p); err != nil { |
| 65 | + return worker.PoisonError(fmt.Errorf("bad payload: %w", err)) |
| 66 | + } |
| 67 | + if p.RepoID == 0 { |
| 68 | + return worker.PoisonError(errors.New("missing repo_id")) |
| 69 | + } |
| 70 | + |
| 71 | + rq := reposdb.New() |
| 72 | + row, err := rq.GetRepoForBackfill(ctx, deps.Pool, p.RepoID) |
| 73 | + if err != nil { |
| 74 | + if errors.Is(err, pgx.ErrNoRows) { |
| 75 | + // Repo was deleted between enqueue and dispatch. |
| 76 | + // Poison so we don't retry a deleted target. |
| 77 | + return worker.PoisonError(fmt.Errorf("repo %d not found", p.RepoID)) |
| 78 | + } |
| 79 | + return fmt.Errorf("load repo: %w", err) |
| 80 | + } |
| 81 | + |
| 82 | + gitDir, err := deps.RepoFS.RepoPath(row.Owner, row.Name) |
| 83 | + if err != nil { |
| 84 | + return worker.PoisonError(fmt.Errorf("repo path: %w", err)) |
| 85 | + } |
| 86 | + |
| 87 | + lookups := sigverify.NewSQLCLookups(deps.Pool) |
| 88 | + |
| 89 | + commitsProcessed, err := backfillCommits(ctx, deps, gitDir, p.RepoID, row.DefaultBranch, lookups) |
| 90 | + if err != nil { |
| 91 | + return fmt.Errorf("backfill commits: %w", err) |
| 92 | + } |
| 93 | + tagsProcessed, err := backfillTags(ctx, deps, gitDir, p.RepoID, lookups) |
| 94 | + if err != nil { |
| 95 | + return fmt.Errorf("backfill tags: %w", err) |
| 96 | + } |
| 97 | + |
| 98 | + deps.Logger.InfoContext(ctx, "gpg backfill completed", |
| 99 | + "repo_id", p.RepoID, |
| 100 | + "commits", commitsProcessed, |
| 101 | + "tags", tagsProcessed, |
| 102 | + ) |
| 103 | + return nil |
| 104 | + } |
| 105 | +} |
| 106 | + |
| 107 | +// backfillCommits walks every commit on the default branch and |
| 108 | +// verifies each. Returns the number processed (signed + unsigned; |
| 109 | +// the cache stamps both so future "is this verified" reads don't |
| 110 | +// re-walk). |
| 111 | +func backfillCommits( |
| 112 | + ctx context.Context, |
| 113 | + deps GPGBackfillDeps, |
| 114 | + gitDir string, |
| 115 | + repoID int64, |
| 116 | + defaultBranch string, |
| 117 | + lookups sigverify.Lookups, |
| 118 | +) (int, error) { |
| 119 | + // Empty default branch (uninitialized repo) → nothing to walk. |
| 120 | + if defaultBranch == "" { |
| 121 | + return 0, nil |
| 122 | + } |
| 123 | + |
| 124 | + cmd := exec.CommandContext(ctx, "git", "-C", gitDir, "rev-list", defaultBranch) |
| 125 | + stdout, err := cmd.StdoutPipe() |
| 126 | + if err != nil { |
| 127 | + return 0, fmt.Errorf("rev-list pipe: %w", err) |
| 128 | + } |
| 129 | + var stderr bytes.Buffer |
| 130 | + cmd.Stderr = &stderr |
| 131 | + if err := cmd.Start(); err != nil { |
| 132 | + return 0, fmt.Errorf("rev-list start: %w", err) |
| 133 | + } |
| 134 | + |
| 135 | + rq := reposdb.New() |
| 136 | + scanner := bufio.NewScanner(stdout) |
| 137 | + count := 0 |
| 138 | + for scanner.Scan() { |
| 139 | + oid := strings.TrimSpace(scanner.Text()) |
| 140 | + if len(oid) != 40 { |
| 141 | + continue |
| 142 | + } |
| 143 | + if err := ctx.Err(); err != nil { |
| 144 | + return count, err |
| 145 | + } |
| 146 | + verifyCtx, cancel := context.WithTimeout(ctx, perCommitTimeout) |
| 147 | + result, vErr := sigverify.Verify(verifyCtx, gitDir, oid, lookups) |
| 148 | + cancel() |
| 149 | + if vErr != nil { |
| 150 | + deps.Logger.WarnContext(ctx, "verify commit failed; skipping", |
| 151 | + "oid", oid, "err", vErr) |
| 152 | + continue |
| 153 | + } |
| 154 | + if wErr := sigverify.WriteResult(ctx, rq, deps.Pool, repoID, oid, sigverify.KindCommit, result); wErr != nil { |
| 155 | + deps.Logger.WarnContext(ctx, "cache write failed; skipping", |
| 156 | + "oid", oid, "err", wErr) |
| 157 | + continue |
| 158 | + } |
| 159 | + count++ |
| 160 | + } |
| 161 | + if err := scanner.Err(); err != nil { |
| 162 | + return count, fmt.Errorf("scan rev-list: %w", err) |
| 163 | + } |
| 164 | + if err := cmd.Wait(); err != nil { |
| 165 | + return count, fmt.Errorf("rev-list: %w: %s", err, stderr.String()) |
| 166 | + } |
| 167 | + return count, nil |
| 168 | +} |
| 169 | + |
| 170 | +// backfillTags walks every annotated tag in the repo and verifies |
| 171 | +// each. Lightweight tags (which carry no signature) are skipped. |
| 172 | +// Returns the number processed. |
| 173 | +func backfillTags( |
| 174 | + ctx context.Context, |
| 175 | + deps GPGBackfillDeps, |
| 176 | + gitDir string, |
| 177 | + repoID int64, |
| 178 | + lookups sigverify.Lookups, |
| 179 | +) (int, error) { |
| 180 | + // for-each-ref filters to refs/tags and emits 'oid type'; we |
| 181 | + // only want annotated tags (type=tag). |
| 182 | + cmd := exec.CommandContext(ctx, "git", "-C", gitDir, |
| 183 | + "for-each-ref", "--format=%(objectname) %(objecttype)", "refs/tags", |
| 184 | + ) |
| 185 | + out, err := cmd.Output() |
| 186 | + if err != nil { |
| 187 | + return 0, fmt.Errorf("for-each-ref tags: %w", err) |
| 188 | + } |
| 189 | + |
| 190 | + rq := reposdb.New() |
| 191 | + count := 0 |
| 192 | + for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") { |
| 193 | + if line == "" { |
| 194 | + continue |
| 195 | + } |
| 196 | + fields := strings.Fields(line) |
| 197 | + if len(fields) != 2 || fields[1] != "tag" { |
| 198 | + continue |
| 199 | + } |
| 200 | + oid := fields[0] |
| 201 | + if err := ctx.Err(); err != nil { |
| 202 | + return count, err |
| 203 | + } |
| 204 | + verifyCtx, cancel := context.WithTimeout(ctx, perCommitTimeout) |
| 205 | + result, vErr := sigverify.VerifyTag(verifyCtx, gitDir, oid, lookups) |
| 206 | + cancel() |
| 207 | + if vErr != nil { |
| 208 | + deps.Logger.WarnContext(ctx, "verify tag failed; skipping", |
| 209 | + "oid", oid, "err", vErr) |
| 210 | + continue |
| 211 | + } |
| 212 | + if wErr := sigverify.WriteResult(ctx, rq, deps.Pool, repoID, oid, sigverify.KindTag, result); wErr != nil { |
| 213 | + deps.Logger.WarnContext(ctx, "cache write failed; skipping", |
| 214 | + "oid", oid, "err", wErr) |
| 215 | + continue |
| 216 | + } |
| 217 | + count++ |
| 218 | + } |
| 219 | + return count, nil |
| 220 | +} |