tenseleyflow/shithub / 5d8e910

Browse files

S22: git helpers for mergeability probe + worktree merge (merge/squash/rebase)

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
5d8e91020c8d243c71e441fa3ceca79cc6829f48
Parents
41c768b
Tree
c31f80f

1 changed file

StatusFile+-
A internal/repos/git/mergeops.go 376 0
internal/repos/git/mergeops.goadded
@@ -0,0 +1,376 @@
1
+// SPDX-License-Identifier: AGPL-3.0-or-later
2
+
3
+package git
4
+
5
+import (
6
+	"bytes"
7
+	"context"
8
+	"errors"
9
+	"fmt"
10
+	"os"
11
+	"os/exec"
12
+	"path/filepath"
13
+	"strconv"
14
+	"strings"
15
+	"time"
16
+)
17
+
18
+// ResolveRefOID returns the full SHA for `ref` via `git rev-parse`.
19
+// Returns ErrRefNotFound when git can't resolve.
20
+func ResolveRefOID(ctx context.Context, gitDir, ref string) (string, error) {
21
+	cmd := exec.CommandContext(ctx, "git", "-C", gitDir, "rev-parse", "--verify", ref+"^{commit}")
22
+	out, err := cmd.Output()
23
+	if err != nil {
24
+		var ee *exec.ExitError
25
+		if errors.As(err, &ee) {
26
+			return "", ErrRefNotFound
27
+		}
28
+		return "", wrapExecErr(err)
29
+	}
30
+	return strings.TrimSpace(string(out)), nil
31
+}
32
+
33
+// MergeTreeResult captures the output of `git merge-tree --write-tree
34
+// --merge-base=<base> <base> <head>`. ConflictPaths is empty when the
35
+// merge is clean. Git ≥ 2.38 required.
36
+type MergeTreeResult struct {
37
+	TreeOID       string
38
+	ConflictPaths []string
39
+	HasConflict   bool
40
+}
41
+
42
+// ProbeMerge runs `git merge-tree --write-tree --no-messages <base>
43
+// <head>` and lets git auto-compute the merge base. Exit 0 = clean
44
+// merge (TreeOID set); exit 1 = conflicts (ConflictPaths populated).
45
+// Anything else is wrapped.
46
+func ProbeMerge(ctx context.Context, gitDir, baseOID, headOID string) (MergeTreeResult, error) {
47
+	cmd := exec.CommandContext(ctx, "git", "-C", gitDir,
48
+		"merge-tree", "--write-tree", "--no-messages",
49
+		baseOID, headOID)
50
+	var stdout, stderr bytes.Buffer
51
+	cmd.Stdout = &stdout
52
+	cmd.Stderr = &stderr
53
+	err := cmd.Run()
54
+	out := strings.TrimRight(stdout.String(), "\n")
55
+	if err == nil {
56
+		return MergeTreeResult{TreeOID: out}, nil
57
+	}
58
+	var ee *exec.ExitError
59
+	if errors.As(err, &ee) && ee.ExitCode() == 1 {
60
+		// First line is the tree OID even on conflict; subsequent lines
61
+		// list conflicting paths (one per line) when --no-messages is set.
62
+		lines := strings.Split(out, "\n")
63
+		res := MergeTreeResult{HasConflict: true}
64
+		if len(lines) > 0 {
65
+			res.TreeOID = lines[0]
66
+		}
67
+		for _, l := range lines[1:] {
68
+			if l = strings.TrimSpace(l); l != "" {
69
+				res.ConflictPaths = append(res.ConflictPaths, l)
70
+			}
71
+		}
72
+		return res, nil
73
+	}
74
+	return MergeTreeResult{}, fmt.Errorf("merge-tree: %w (%s)", err, stderr.String())
75
+}
76
+
77
+// CommitsBetweenDetail returns commits unique to head, with author +
78
+// committer + body, suitable for refreshing pull_request_commits. The
79
+// result preserves head-side oldest-first ordering by inverting log's
80
+// default newest-first via --reverse.
81
+func CommitsBetweenDetail(ctx context.Context, gitDir, baseOID, headOID string, max int) ([]CommitDetail, error) {
82
+	if max <= 0 {
83
+		max = 250
84
+	}
85
+	const sep = "\x1f"
86
+	const recordEnd = "\x1e"
87
+	format := strings.Join([]string{
88
+		"%H", "%h",
89
+		"%an", "%ae", "%at",
90
+		"%cn", "%ce", "%ct",
91
+		"%s",
92
+	}, sep) + sep + "%b" + recordEnd
93
+	cmd := exec.CommandContext(ctx, "git", "-C", gitDir,
94
+		"log", "--reverse",
95
+		"--max-count="+strconv.Itoa(max),
96
+		"--format="+format,
97
+		baseOID+".."+headOID,
98
+	)
99
+	out, err := cmd.Output()
100
+	if err != nil {
101
+		var ee *exec.ExitError
102
+		if errors.As(err, &ee) && strings.Contains(string(ee.Stderr), "unknown revision") {
103
+			return nil, ErrRefNotFound
104
+		}
105
+		return nil, wrapExecErr(err)
106
+	}
107
+	return parseCommitDetail(out), nil
108
+}
109
+
110
+func parseCommitDetail(out []byte) []CommitDetail {
111
+	const sep = "\x1f"
112
+	const recordEnd = "\x1e"
113
+	body := bytes.TrimRight(out, "\n")
114
+	records := bytes.Split(body, []byte(recordEnd))
115
+	cs := make([]CommitDetail, 0, len(records))
116
+	for _, rec := range records {
117
+		rec = bytes.TrimLeft(rec, "\n")
118
+		if len(rec) == 0 {
119
+			continue
120
+		}
121
+		parts := strings.SplitN(string(rec), sep, 10)
122
+		if len(parts) < 10 {
123
+			continue
124
+		}
125
+		ats, _ := strconv.ParseInt(parts[4], 10, 64)
126
+		cts, _ := strconv.ParseInt(parts[7], 10, 64)
127
+		cs = append(cs, CommitDetail{
128
+			Commit: Commit{
129
+				OID:         parts[0],
130
+				ShortOID:    parts[1],
131
+				AuthorName:  parts[2],
132
+				AuthorEmail: parts[3],
133
+				AuthorWhen:  time.Unix(ats, 0).UTC(),
134
+				Subject:     parts[8],
135
+				Body:        strings.TrimSpace(parts[9]),
136
+			},
137
+			CommitterName:  parts[5],
138
+			CommitterEmail: parts[6],
139
+			CommitterWhen:  time.Unix(cts, 0).UTC(),
140
+		})
141
+	}
142
+	return cs
143
+}
144
+
145
+// FilesChangedBetween returns the change set head-side, computed as
146
+// `git diff --name-status --numstat <base>...<head>` (three-dot:
147
+// changes from merge-base to head). Status is git's letter code,
148
+// renames carry the old path as the second column.
149
+func FilesChangedBetween(ctx context.Context, gitDir, baseOID, headOID string) ([]PRFileChange, error) {
150
+	cmd := exec.CommandContext(ctx, "git", "-C", gitDir,
151
+		"diff", "--name-status", "-M", "-C",
152
+		baseOID+"..."+headOID,
153
+	)
154
+	statusOut, err := cmd.Output()
155
+	if err != nil {
156
+		var ee *exec.ExitError
157
+		if errors.As(err, &ee) && strings.Contains(string(ee.Stderr), "unknown revision") {
158
+			return nil, ErrRefNotFound
159
+		}
160
+		return nil, wrapExecErr(err)
161
+	}
162
+	cmd = exec.CommandContext(ctx, "git", "-C", gitDir,
163
+		"diff", "--numstat", "-M", "-C",
164
+		baseOID+"..."+headOID,
165
+	)
166
+	numOut, err := cmd.Output()
167
+	if err != nil {
168
+		return nil, wrapExecErr(err)
169
+	}
170
+	return parseFilesChanged(statusOut, numOut), nil
171
+}
172
+
173
+// PRFileChange is a per-file row for pull_request_files. Status mirrors
174
+// the migration enum; OldPath is non-empty for renames + copies.
175
+type PRFileChange struct {
176
+	Path      string
177
+	OldPath   string
178
+	Status    string // "added" | "modified" | "deleted" | "renamed" | "copied"
179
+	Additions int
180
+	Deletions int
181
+}
182
+
183
+func parseFilesChanged(statusOut, numOut []byte) []PRFileChange {
184
+	type stats struct{ adds, dels int }
185
+	numByPath := map[string]stats{}
186
+	for _, line := range strings.Split(string(numOut), "\n") {
187
+		fields := strings.Split(strings.TrimSpace(line), "\t")
188
+		if len(fields) < 3 {
189
+			continue
190
+		}
191
+		// "-" appears for binary files; treat as 0/0 counts.
192
+		a, _ := strconv.Atoi(fields[0])
193
+		d, _ := strconv.Atoi(fields[1])
194
+		// For renames/copies the path field is `old\x00new`-ish via {oldpath => newpath};
195
+		// numstat's last field is just the new path when -M is applied.
196
+		key := fields[len(fields)-1]
197
+		numByPath[key] = stats{a, d}
198
+	}
199
+	out := []PRFileChange{}
200
+	for _, line := range strings.Split(string(statusOut), "\n") {
201
+		fields := strings.Split(strings.TrimSpace(line), "\t")
202
+		if len(fields) < 2 {
203
+			continue
204
+		}
205
+		statusLetter := fields[0]
206
+		var fc PRFileChange
207
+		switch {
208
+		case strings.HasPrefix(statusLetter, "R") && len(fields) >= 3:
209
+			fc.Status = "renamed"
210
+			fc.OldPath = fields[1]
211
+			fc.Path = fields[2]
212
+		case strings.HasPrefix(statusLetter, "C") && len(fields) >= 3:
213
+			fc.Status = "copied"
214
+			fc.OldPath = fields[1]
215
+			fc.Path = fields[2]
216
+		case statusLetter == "A":
217
+			fc.Status = "added"
218
+			fc.Path = fields[1]
219
+		case statusLetter == "D":
220
+			fc.Status = "deleted"
221
+			fc.Path = fields[1]
222
+		case statusLetter == "M":
223
+			fc.Status = "modified"
224
+			fc.Path = fields[1]
225
+		default:
226
+			fc.Status = "modified"
227
+			fc.Path = fields[len(fields)-1]
228
+		}
229
+		s := numByPath[fc.Path]
230
+		fc.Additions = s.adds
231
+		fc.Deletions = s.dels
232
+		out = append(out, fc)
233
+	}
234
+	return out
235
+}
236
+
237
+// MergeOptions configures a worktree-based merge.
238
+type MergeOptions struct {
239
+	GitDir         string
240
+	BaseRef        string // e.g. "refs/heads/trunk"
241
+	BaseOID        string
242
+	HeadOID        string
243
+	Method         string // "merge" | "squash" | "rebase"
244
+	AuthorName     string
245
+	AuthorEmail    string
246
+	CommitterName  string
247
+	CommitterEmail string
248
+	When           time.Time
249
+	Subject        string
250
+	Body           string
251
+	WorktreesDir   string // parent dir for the temp worktree (must share volume with GitDir)
252
+}
253
+
254
+// MergeResult is what PerformMerge returns on success.
255
+type MergeResult struct {
256
+	NewBaseOID string // the new tip of base_ref after the merge
257
+	MergedOID  string // for "merge" method: the merge commit; "squash"/"rebase": same as NewBaseOID
258
+}
259
+
260
+// PerformMerge executes the requested merge strategy in a temp worktree
261
+// rooted at WorktreesDir. The worktree is removed on every exit path
262
+// (success or failure). Returns the new base-ref tip.
263
+func PerformMerge(ctx context.Context, opts MergeOptions) (MergeResult, error) {
264
+	if opts.WorktreesDir == "" {
265
+		opts.WorktreesDir = filepath.Join(filepath.Dir(opts.GitDir), ".tmp-worktrees")
266
+	}
267
+	if err := os.MkdirAll(opts.WorktreesDir, 0o750); err != nil {
268
+		return MergeResult{}, fmt.Errorf("worktrees dir: %w", err)
269
+	}
270
+	wt, err := os.MkdirTemp(opts.WorktreesDir, "merge-*")
271
+	if err != nil {
272
+		return MergeResult{}, fmt.Errorf("mktemp worktree: %w", err)
273
+	}
274
+	cleanup := func() {
275
+		// `git worktree remove --force` ignores stale entries; --force
276
+		// also drops the directory contents so a leftover after a panic
277
+		// gets reaped on the next attempt.
278
+		_ = exec.Command("git", "-C", opts.GitDir, "worktree", "remove", "--force", wt).Run()
279
+		_ = os.RemoveAll(wt)
280
+	}
281
+	defer cleanup()
282
+
283
+	// Set up the worktree at base_oid (detached). Using detached HEAD
284
+	// keeps the worktree from polluting the bare repo's branch refs;
285
+	// we only push the resulting commit back to base_ref at the end.
286
+	addCmd := exec.CommandContext(ctx, "git", "-C", opts.GitDir,
287
+		"worktree", "add", "--detach", wt, opts.BaseOID)
288
+	if out, err := addCmd.CombinedOutput(); err != nil {
289
+		return MergeResult{}, fmt.Errorf("worktree add: %w (%s)", err, out)
290
+	}
291
+
292
+	// Identity for the merge commit. `--no-edit` + a baked subject
293
+	// keeps the merge non-interactive.
294
+	envBase := append(os.Environ(),
295
+		"GIT_AUTHOR_NAME="+opts.AuthorName,
296
+		"GIT_AUTHOR_EMAIL="+opts.AuthorEmail,
297
+		"GIT_COMMITTER_NAME="+opts.CommitterName,
298
+		"GIT_COMMITTER_EMAIL="+opts.CommitterEmail,
299
+	)
300
+	if !opts.When.IsZero() {
301
+		envBase = append(envBase,
302
+			"GIT_AUTHOR_DATE="+opts.When.Format(time.RFC3339),
303
+			"GIT_COMMITTER_DATE="+opts.When.Format(time.RFC3339),
304
+		)
305
+	}
306
+
307
+	switch opts.Method {
308
+	case "merge":
309
+		// Non-fast-forward merge so we always get a merge commit, even
310
+		// when the head is strictly ahead of base.
311
+		msg := strings.TrimSpace(opts.Subject)
312
+		if opts.Body != "" {
313
+			msg += "\n\n" + opts.Body
314
+		}
315
+		mergeCmd := exec.CommandContext(ctx, "git", "-C", wt,
316
+			"merge", "--no-ff", "--no-edit", "-m", msg, opts.HeadOID)
317
+		mergeCmd.Env = envBase
318
+		if out, err := mergeCmd.CombinedOutput(); err != nil {
319
+			return MergeResult{}, fmt.Errorf("merge --no-ff: %w (%s)", err, out)
320
+		}
321
+	case "squash":
322
+		// `git merge --squash` stages the squashed change without
323
+		// committing; `git commit` makes the squash commit with a
324
+		// single author/committer pair.
325
+		squashCmd := exec.CommandContext(ctx, "git", "-C", wt,
326
+			"merge", "--squash", opts.HeadOID)
327
+		squashCmd.Env = envBase
328
+		if out, err := squashCmd.CombinedOutput(); err != nil {
329
+			return MergeResult{}, fmt.Errorf("merge --squash: %w (%s)", err, out)
330
+		}
331
+		msg := strings.TrimSpace(opts.Subject)
332
+		if opts.Body != "" {
333
+			msg += "\n\n" + opts.Body
334
+		}
335
+		commitCmd := exec.CommandContext(ctx, "git", "-C", wt,
336
+			"commit", "-m", msg)
337
+		commitCmd.Env = envBase
338
+		if out, err := commitCmd.CombinedOutput(); err != nil {
339
+			return MergeResult{}, fmt.Errorf("squash commit: %w (%s)", err, out)
340
+		}
341
+	case "rebase":
342
+		// Replay head_oid onto base_oid. --rebase-merges off means we
343
+		// flatten merge commits into linear history; this matches the
344
+		// standard "rebase merge" UX.
345
+		rebaseCmd := exec.CommandContext(ctx, "git", "-C", wt,
346
+			"rebase", "--onto", opts.BaseOID, opts.BaseOID, opts.HeadOID)
347
+		rebaseCmd.Env = envBase
348
+		if out, err := rebaseCmd.CombinedOutput(); err != nil {
349
+			// Best-effort abort so the worktree is reusable for the
350
+			// cleanup step (cleanup deletes anyway, but keeps logs sane).
351
+			_ = exec.Command("git", "-C", wt, "rebase", "--abort").Run()
352
+			return MergeResult{}, fmt.Errorf("rebase --onto: %w (%s)", err, out)
353
+		}
354
+	default:
355
+		return MergeResult{}, fmt.Errorf("unknown merge method %q", opts.Method)
356
+	}
357
+
358
+	// Capture the resulting tip of HEAD in the worktree.
359
+	revOut, err := exec.CommandContext(ctx, "git", "-C", wt, "rev-parse", "HEAD").Output()
360
+	if err != nil {
361
+		return MergeResult{}, fmt.Errorf("rev-parse HEAD: %w", err)
362
+	}
363
+	newOID := strings.TrimSpace(string(revOut))
364
+
365
+	// Update base_ref atomically via update-ref, gated on the expected
366
+	// old OID to defend against concurrent pushes during the merge.
367
+	updateCmd := exec.CommandContext(ctx, "git", "-C", opts.GitDir,
368
+		"update-ref", opts.BaseRef, newOID, opts.BaseOID)
369
+	if out, err := updateCmd.CombinedOutput(); err != nil {
370
+		return MergeResult{}, fmt.Errorf("update-ref %s: %w (%s)", opts.BaseRef, err, out)
371
+	}
372
+
373
+	// For "merge" method the merge commit is HEAD; for squash and
374
+	// rebase it's the same as the new base tip.
375
+	return MergeResult{NewBaseOID: newOID, MergedOID: newOID}, nil
376
+}