Go · 4280 bytes Raw Blame History
1 // SPDX-License-Identifier: AGPL-3.0-or-later
2
3 // Package source produces raw git-diff patch bytes for the parser.
4 // Three flavors:
5 //
6 // - FromCommit — commit-vs-parent (for the single-commit page)
7 // - FromRange — base..head two-dot (for compare-style diffs)
8 // - FromMergeBase — base...head three-dot (for PR / compare against
9 // the merge-base; matches GitHub's PR file-list behavior)
10 //
11 // Each function returns the unified-diff bytes; the parser package
12 // consumes from there. Whitespace-ignoring requested via Options.
13 package source
14
15 import (
16 "bytes"
17 "context"
18 "errors"
19 "fmt"
20 "os/exec"
21 )
22
23 // Options tunes a diff source. IgnoreWhitespace passes -w to git so
24 // whitespace-only lines vanish from the patch (cleaner UX than parser-
25 // side filtering, and git's whitespace handling respects language
26 // quirks like indentation-sensitive Python).
27 type Options struct {
28 IgnoreWhitespace bool
29 // FindRenames toggles -M (--find-renames). Default off matches
30 // historical git behavior; the package's helpers turn it on for
31 // the rendered surfaces because rename detection is what users
32 // expect on a code-review page.
33 FindRenames bool
34 }
35
36 // FromCommit returns the diff between sha and its first parent. For a
37 // root commit (no parent) we diff against the empty tree by using
38 // `git diff-tree -p -r --root`.
39 func FromCommit(ctx context.Context, gitDir, sha string, opts Options) ([]byte, error) {
40 args := []string{
41 "-C", gitDir,
42 "diff-tree", "-p", "-r", "--root",
43 "--no-color", "--no-ext-diff",
44 "--full-index",
45 }
46 args = append(args, opts.gitFlags()...)
47 args = append(args, sha)
48 out, err := exec.CommandContext(ctx, "git", args...).Output()
49 if err != nil {
50 return nil, fmtErr("FromCommit", err)
51 }
52 return stripFirstHeader(out), nil
53 }
54
55 // FromRange returns the two-dot diff base..head. Use for "show me
56 // every change between these two refs", regardless of merge graph.
57 func FromRange(ctx context.Context, gitDir, base, head string, opts Options) ([]byte, error) {
58 args := []string{
59 "-C", gitDir,
60 "diff", "--patch",
61 "--no-color", "--no-ext-diff",
62 "--full-index",
63 }
64 args = append(args, opts.gitFlags()...)
65 args = append(args, base+".."+head, "--")
66 out, err := exec.CommandContext(ctx, "git", args...).Output()
67 if err != nil {
68 return nil, fmtErr("FromRange", err)
69 }
70 return out, nil
71 }
72
73 // FromMergeBase returns the three-dot diff base...head. Equivalent to
74 // `git diff $(git merge-base base head)..head`. Used by PR / compare
75 // pages — shows only what `head` adds over the common ancestor.
76 func FromMergeBase(ctx context.Context, gitDir, base, head string, opts Options) ([]byte, error) {
77 args := []string{
78 "-C", gitDir,
79 "diff", "--patch",
80 "--no-color", "--no-ext-diff",
81 "--full-index",
82 }
83 args = append(args, opts.gitFlags()...)
84 args = append(args, base+"..."+head, "--")
85 out, err := exec.CommandContext(ctx, "git", args...).Output()
86 if err != nil {
87 return nil, fmtErr("FromMergeBase", err)
88 }
89 return out, nil
90 }
91
92 // gitFlags translates Options to the git-cli flags that the three
93 // helpers share.
94 func (o Options) gitFlags() []string {
95 var flags []string
96 if o.IgnoreWhitespace {
97 flags = append(flags, "-w")
98 }
99 if o.FindRenames {
100 flags = append(flags, "-M", "-C")
101 }
102 return flags
103 }
104
105 // stripFirstHeader removes the leading "<sha>\n" line that
106 // `git diff-tree` emits before the first patch hunk. The parser
107 // expects to start at "diff --git ..."; the leading SHA line confuses
108 // it (or at minimum produces a spurious empty file entry).
109 func stripFirstHeader(b []byte) []byte {
110 if len(b) == 0 {
111 return b
112 }
113 idx := bytes.IndexByte(b, '\n')
114 if idx < 0 {
115 return b
116 }
117 first := b[:idx]
118 // The leading line is just the SHA — 40 hex chars.
119 if len(first) == 40 && allHex(first) {
120 return b[idx+1:]
121 }
122 return b
123 }
124
125 func allHex(b []byte) bool {
126 for _, c := range b {
127 switch {
128 case c >= '0' && c <= '9', c >= 'a' && c <= 'f', c >= 'A' && c <= 'F':
129 default:
130 return false
131 }
132 }
133 return true
134 }
135
136 // fmtErr wraps an exec error with stderr context when available.
137 func fmtErr(op string, err error) error {
138 var ee *exec.ExitError
139 if errors.As(err, &ee) && len(ee.Stderr) > 0 {
140 return fmt.Errorf("%s: %w: %s", op, err, ee.Stderr)
141 }
142 return fmt.Errorf("%s: %w", op, err)
143 }
144