| 1 | // SPDX-License-Identifier: AGPL-3.0-or-later |
| 2 | |
| 3 | // Package parse wraps go-gitdiff. We expose a minimal struct surface |
| 4 | // (`Diff` / `File` / `Hunk` / `Line`) so the renderer doesn't depend |
| 5 | // on the upstream type names — keeps the option to swap libraries |
| 6 | // later if go-gitdiff evolves in an uncomfortable direction. |
| 7 | package parse |
| 8 | |
| 9 | import ( |
| 10 | "bytes" |
| 11 | "errors" |
| 12 | "fmt" |
| 13 | "io" |
| 14 | |
| 15 | "github.com/bluekeyes/go-gitdiff/gitdiff" |
| 16 | ) |
| 17 | |
| 18 | // Diff is the top-level result of parsing a unified-diff stream. |
| 19 | type Diff struct { |
| 20 | Files []File |
| 21 | } |
| 22 | |
| 23 | // File is one changed path. OldPath / NewPath differ on rename/copy; |
| 24 | // they're equal on a normal modify. IsBinary, IsRename, IsCopy, |
| 25 | // IsNew, IsDelete summarize the extended-header flags so renderers |
| 26 | // don't reach into the upstream types. |
| 27 | type File struct { |
| 28 | OldPath string |
| 29 | NewPath string |
| 30 | OldMode string |
| 31 | NewMode string |
| 32 | IsBinary bool |
| 33 | IsNew bool |
| 34 | IsDelete bool |
| 35 | IsRename bool |
| 36 | IsCopy bool |
| 37 | Hunks []Hunk |
| 38 | // Score is the rename/copy similarity (0–100) when IsRename or IsCopy. |
| 39 | Score int |
| 40 | // IsTooLarge marks files past the per-file render threshold; the |
| 41 | // renderer emits a "too large" placeholder + expander link rather |
| 42 | // than the full hunks. |
| 43 | IsTooLarge bool |
| 44 | // SizeBytes is the rough cost of this file's hunks (sum of line |
| 45 | // content). Used to drive truncation decisions. |
| 46 | SizeBytes int |
| 47 | } |
| 48 | |
| 49 | // Hunk is one `@@ -X,Y +A,B @@` block. |
| 50 | type Hunk struct { |
| 51 | OldStart int |
| 52 | OldLines int |
| 53 | NewStart int |
| 54 | NewLines int |
| 55 | Section string // text after the closing `@@` (function name etc.) |
| 56 | Lines []Line |
| 57 | } |
| 58 | |
| 59 | // LineKind is the per-line context indicator. |
| 60 | type LineKind int |
| 61 | |
| 62 | const ( |
| 63 | LineContext LineKind = iota |
| 64 | LineAdd |
| 65 | LineDelete |
| 66 | LineNoNewline // "\ No newline at end of file" |
| 67 | ) |
| 68 | |
| 69 | // Line is one rendered row in a hunk. |
| 70 | type Line struct { |
| 71 | Kind LineKind |
| 72 | OldLineNo int // 0 when the line is a pure addition |
| 73 | NewLineNo int // 0 when the line is a pure deletion |
| 74 | Content string // without the leading +/-/space marker, no trailing newline |
| 75 | } |
| 76 | |
| 77 | // Parse consumes a unified diff stream into the typed shape. The |
| 78 | // readSrc may be a bytes.Reader or any other io.Reader; we don't |
| 79 | // enforce a max-size cap here — caller decides. |
| 80 | func Parse(r io.Reader) (*Diff, error) { |
| 81 | files, _, err := gitdiff.Parse(r) |
| 82 | if err != nil && !errors.Is(err, io.EOF) { |
| 83 | return nil, fmt.Errorf("parse: %w", err) |
| 84 | } |
| 85 | out := &Diff{Files: make([]File, 0, len(files))} |
| 86 | for _, f := range files { |
| 87 | out.Files = append(out.Files, fromGitdiff(f)) |
| 88 | } |
| 89 | return out, nil |
| 90 | } |
| 91 | |
| 92 | // ParseBytes is a convenience for the common case of having the patch |
| 93 | // as a single buffer. |
| 94 | func ParseBytes(b []byte) (*Diff, error) { |
| 95 | return Parse(bytes.NewReader(b)) |
| 96 | } |
| 97 | |
| 98 | // fromGitdiff translates the upstream type to ours. Hunks are walked |
| 99 | // in-place; we don't retain the upstream pointer so the renderer can |
| 100 | // be tested without importing gitdiff. |
| 101 | func fromGitdiff(f *gitdiff.File) File { |
| 102 | out := File{ |
| 103 | OldPath: f.OldName, |
| 104 | NewPath: f.NewName, |
| 105 | IsBinary: f.IsBinary, |
| 106 | IsNew: f.IsNew, |
| 107 | IsDelete: f.IsDelete, |
| 108 | IsRename: f.IsRename, |
| 109 | IsCopy: f.IsCopy, |
| 110 | Score: f.Score, |
| 111 | } |
| 112 | if f.OldMode != 0 { |
| 113 | out.OldMode = fmt.Sprintf("%06o", f.OldMode) |
| 114 | } |
| 115 | if f.NewMode != 0 { |
| 116 | out.NewMode = fmt.Sprintf("%06o", f.NewMode) |
| 117 | } |
| 118 | if f.IsBinary { |
| 119 | return out // hunks are absent for binary |
| 120 | } |
| 121 | |
| 122 | hunks := make([]Hunk, 0, len(f.TextFragments)) |
| 123 | var totalBytes int |
| 124 | for _, frag := range f.TextFragments { |
| 125 | h := Hunk{ |
| 126 | OldStart: int(frag.OldPosition), |
| 127 | OldLines: int(frag.OldLines), |
| 128 | NewStart: int(frag.NewPosition), |
| 129 | NewLines: int(frag.NewLines), |
| 130 | Section: frag.Comment, |
| 131 | } |
| 132 | oldNo := h.OldStart |
| 133 | newNo := h.NewStart |
| 134 | for _, gl := range frag.Lines { |
| 135 | line := Line{Content: trimNewline(gl.Line)} |
| 136 | totalBytes += len(line.Content) |
| 137 | switch gl.Op { |
| 138 | case gitdiff.OpContext: |
| 139 | line.Kind = LineContext |
| 140 | line.OldLineNo = oldNo |
| 141 | line.NewLineNo = newNo |
| 142 | oldNo++ |
| 143 | newNo++ |
| 144 | case gitdiff.OpAdd: |
| 145 | line.Kind = LineAdd |
| 146 | line.NewLineNo = newNo |
| 147 | newNo++ |
| 148 | case gitdiff.OpDelete: |
| 149 | line.Kind = LineDelete |
| 150 | line.OldLineNo = oldNo |
| 151 | oldNo++ |
| 152 | } |
| 153 | h.Lines = append(h.Lines, line) |
| 154 | } |
| 155 | hunks = append(hunks, h) |
| 156 | } |
| 157 | out.Hunks = hunks |
| 158 | out.SizeBytes = totalBytes |
| 159 | return out |
| 160 | } |
| 161 | |
| 162 | // trimNewline strips a single trailing `\n`. We deliberately don't |
| 163 | // touch the "\ No newline at end of file" marker (gitdiff materializes |
| 164 | // that as a separate line in our caller's loop — it isn't reached |
| 165 | // because go-gitdiff parses it as a special marker on the Op level |
| 166 | // only when present). |
| 167 | func trimNewline(s string) string { |
| 168 | if n := len(s); n > 0 && s[n-1] == '\n' { |
| 169 | return s[:n-1] |
| 170 | } |
| 171 | return s |
| 172 | } |
| 173 |