| 1 | // SPDX-License-Identifier: AGPL-3.0-or-later |
| 2 | |
| 3 | // Package source produces raw git-diff patch bytes for the parser. |
| 4 | // Three flavors: |
| 5 | // |
| 6 | // - FromCommit — commit-vs-parent (for the single-commit page) |
| 7 | // - FromRange — base..head two-dot (for compare-style diffs) |
| 8 | // - FromMergeBase — base...head three-dot (for PR / compare against |
| 9 | // the merge-base; matches GitHub's PR file-list behavior) |
| 10 | // |
| 11 | // Each function returns the unified-diff bytes; the parser package |
| 12 | // consumes from there. Whitespace-ignoring requested via Options. |
| 13 | package source |
| 14 | |
| 15 | import ( |
| 16 | "bytes" |
| 17 | "context" |
| 18 | "errors" |
| 19 | "fmt" |
| 20 | "os/exec" |
| 21 | ) |
| 22 | |
| 23 | // Options tunes a diff source. IgnoreWhitespace passes -w to git so |
| 24 | // whitespace-only lines vanish from the patch (cleaner UX than parser- |
| 25 | // side filtering, and git's whitespace handling respects language |
| 26 | // quirks like indentation-sensitive Python). |
| 27 | type Options struct { |
| 28 | IgnoreWhitespace bool |
| 29 | // FindRenames toggles -M (--find-renames). Default off matches |
| 30 | // historical git behavior; the package's helpers turn it on for |
| 31 | // the rendered surfaces because rename detection is what users |
| 32 | // expect on a code-review page. |
| 33 | FindRenames bool |
| 34 | } |
| 35 | |
| 36 | // FromCommit returns the diff between sha and its first parent. For a |
| 37 | // root commit (no parent) we diff against the empty tree by using |
| 38 | // `git diff-tree -p -r --root`. |
| 39 | func FromCommit(ctx context.Context, gitDir, sha string, opts Options) ([]byte, error) { |
| 40 | args := []string{ |
| 41 | "-C", gitDir, |
| 42 | "diff-tree", "-p", "-r", "--root", |
| 43 | "--no-color", "--no-ext-diff", |
| 44 | "--full-index", |
| 45 | } |
| 46 | args = append(args, opts.gitFlags()...) |
| 47 | args = append(args, sha) |
| 48 | out, err := exec.CommandContext(ctx, "git", args...).Output() |
| 49 | if err != nil { |
| 50 | return nil, fmtErr("FromCommit", err) |
| 51 | } |
| 52 | return stripFirstHeader(out), nil |
| 53 | } |
| 54 | |
| 55 | // FromRange returns the two-dot diff base..head. Use for "show me |
| 56 | // every change between these two refs", regardless of merge graph. |
| 57 | func FromRange(ctx context.Context, gitDir, base, head string, opts Options) ([]byte, error) { |
| 58 | args := []string{ |
| 59 | "-C", gitDir, |
| 60 | "diff", "--patch", |
| 61 | "--no-color", "--no-ext-diff", |
| 62 | "--full-index", |
| 63 | } |
| 64 | args = append(args, opts.gitFlags()...) |
| 65 | args = append(args, base+".."+head, "--") |
| 66 | out, err := exec.CommandContext(ctx, "git", args...).Output() |
| 67 | if err != nil { |
| 68 | return nil, fmtErr("FromRange", err) |
| 69 | } |
| 70 | return out, nil |
| 71 | } |
| 72 | |
| 73 | // FromMergeBase returns the three-dot diff base...head. Equivalent to |
| 74 | // `git diff $(git merge-base base head)..head`. Used by PR / compare |
| 75 | // pages — shows only what `head` adds over the common ancestor. |
| 76 | func FromMergeBase(ctx context.Context, gitDir, base, head string, opts Options) ([]byte, error) { |
| 77 | args := []string{ |
| 78 | "-C", gitDir, |
| 79 | "diff", "--patch", |
| 80 | "--no-color", "--no-ext-diff", |
| 81 | "--full-index", |
| 82 | } |
| 83 | args = append(args, opts.gitFlags()...) |
| 84 | args = append(args, base+"..."+head, "--") |
| 85 | out, err := exec.CommandContext(ctx, "git", args...).Output() |
| 86 | if err != nil { |
| 87 | return nil, fmtErr("FromMergeBase", err) |
| 88 | } |
| 89 | return out, nil |
| 90 | } |
| 91 | |
| 92 | // gitFlags translates Options to the git-cli flags that the three |
| 93 | // helpers share. |
| 94 | func (o Options) gitFlags() []string { |
| 95 | var flags []string |
| 96 | if o.IgnoreWhitespace { |
| 97 | flags = append(flags, "-w") |
| 98 | } |
| 99 | if o.FindRenames { |
| 100 | flags = append(flags, "-M", "-C") |
| 101 | } |
| 102 | return flags |
| 103 | } |
| 104 | |
| 105 | // stripFirstHeader removes the leading "<sha>\n" line that |
| 106 | // `git diff-tree` emits before the first patch hunk. The parser |
| 107 | // expects to start at "diff --git ..."; the leading SHA line confuses |
| 108 | // it (or at minimum produces a spurious empty file entry). |
| 109 | func stripFirstHeader(b []byte) []byte { |
| 110 | if len(b) == 0 { |
| 111 | return b |
| 112 | } |
| 113 | idx := bytes.IndexByte(b, '\n') |
| 114 | if idx < 0 { |
| 115 | return b |
| 116 | } |
| 117 | first := b[:idx] |
| 118 | // The leading line is just the SHA — 40 hex chars. |
| 119 | if len(first) == 40 && allHex(first) { |
| 120 | return b[idx+1:] |
| 121 | } |
| 122 | return b |
| 123 | } |
| 124 | |
| 125 | func allHex(b []byte) bool { |
| 126 | for _, c := range b { |
| 127 | switch { |
| 128 | case c >= '0' && c <= '9', c >= 'a' && c <= 'f', c >= 'A' && c <= 'F': |
| 129 | default: |
| 130 | return false |
| 131 | } |
| 132 | } |
| 133 | return true |
| 134 | } |
| 135 | |
| 136 | // fmtErr wraps an exec error with stderr context when available. |
| 137 | func fmtErr(op string, err error) error { |
| 138 | var ee *exec.ExitError |
| 139 | if errors.As(err, &ee) && len(ee.Stderr) > 0 { |
| 140 | return fmt.Errorf("%s: %w: %s", op, err, ee.Stderr) |
| 141 | } |
| 142 | return fmt.Errorf("%s: %w", op, err) |
| 143 | } |
| 144 |