Go · 4617 bytes Raw Blame History
1 // SPDX-License-Identifier: AGPL-3.0-or-later
2
3 // Package parse wraps go-gitdiff. We expose a minimal struct surface
4 // (`Diff` / `File` / `Hunk` / `Line`) so the renderer doesn't depend
5 // on the upstream type names — keeps the option to swap libraries
6 // later if go-gitdiff evolves in an uncomfortable direction.
7 package parse
8
9 import (
10 "bytes"
11 "errors"
12 "fmt"
13 "io"
14
15 "github.com/bluekeyes/go-gitdiff/gitdiff"
16 )
17
18 // Diff is the top-level result of parsing a unified-diff stream.
19 type Diff struct {
20 Files []File
21 }
22
23 // File is one changed path. OldPath / NewPath differ on rename/copy;
24 // they're equal on a normal modify. IsBinary, IsRename, IsCopy,
25 // IsNew, IsDelete summarize the extended-header flags so renderers
26 // don't reach into the upstream types.
27 type File struct {
28 OldPath string
29 NewPath string
30 OldMode string
31 NewMode string
32 IsBinary bool
33 IsNew bool
34 IsDelete bool
35 IsRename bool
36 IsCopy bool
37 Hunks []Hunk
38 // Score is the rename/copy similarity (0–100) when IsRename or IsCopy.
39 Score int
40 // IsTooLarge marks files past the per-file render threshold; the
41 // renderer emits a "too large" placeholder + expander link rather
42 // than the full hunks.
43 IsTooLarge bool
44 // SizeBytes is the rough cost of this file's hunks (sum of line
45 // content). Used to drive truncation decisions.
46 SizeBytes int
47 }
48
49 // Hunk is one `@@ -X,Y +A,B @@` block.
50 type Hunk struct {
51 OldStart int
52 OldLines int
53 NewStart int
54 NewLines int
55 Section string // text after the closing `@@` (function name etc.)
56 Lines []Line
57 }
58
59 // LineKind is the per-line context indicator.
60 type LineKind int
61
62 const (
63 LineContext LineKind = iota
64 LineAdd
65 LineDelete
66 LineNoNewline // "\ No newline at end of file"
67 )
68
69 // Line is one rendered row in a hunk.
70 type Line struct {
71 Kind LineKind
72 OldLineNo int // 0 when the line is a pure addition
73 NewLineNo int // 0 when the line is a pure deletion
74 Content string // without the leading +/-/space marker, no trailing newline
75 }
76
77 // Parse consumes a unified diff stream into the typed shape. The
78 // readSrc may be a bytes.Reader or any other io.Reader; we don't
79 // enforce a max-size cap here — caller decides.
80 func Parse(r io.Reader) (*Diff, error) {
81 files, _, err := gitdiff.Parse(r)
82 if err != nil && !errors.Is(err, io.EOF) {
83 return nil, fmt.Errorf("parse: %w", err)
84 }
85 out := &Diff{Files: make([]File, 0, len(files))}
86 for _, f := range files {
87 out.Files = append(out.Files, fromGitdiff(f))
88 }
89 return out, nil
90 }
91
92 // ParseBytes is a convenience for the common case of having the patch
93 // as a single buffer.
94 func ParseBytes(b []byte) (*Diff, error) {
95 return Parse(bytes.NewReader(b))
96 }
97
98 // fromGitdiff translates the upstream type to ours. Hunks are walked
99 // in-place; we don't retain the upstream pointer so the renderer can
100 // be tested without importing gitdiff.
101 func fromGitdiff(f *gitdiff.File) File {
102 out := File{
103 OldPath: f.OldName,
104 NewPath: f.NewName,
105 IsBinary: f.IsBinary,
106 IsNew: f.IsNew,
107 IsDelete: f.IsDelete,
108 IsRename: f.IsRename,
109 IsCopy: f.IsCopy,
110 Score: f.Score,
111 }
112 if f.OldMode != 0 {
113 out.OldMode = fmt.Sprintf("%06o", f.OldMode)
114 }
115 if f.NewMode != 0 {
116 out.NewMode = fmt.Sprintf("%06o", f.NewMode)
117 }
118 if f.IsBinary {
119 return out // hunks are absent for binary
120 }
121
122 hunks := make([]Hunk, 0, len(f.TextFragments))
123 var totalBytes int
124 for _, frag := range f.TextFragments {
125 h := Hunk{
126 OldStart: int(frag.OldPosition),
127 OldLines: int(frag.OldLines),
128 NewStart: int(frag.NewPosition),
129 NewLines: int(frag.NewLines),
130 Section: frag.Comment,
131 }
132 oldNo := h.OldStart
133 newNo := h.NewStart
134 for _, gl := range frag.Lines {
135 line := Line{Content: trimNewline(gl.Line)}
136 totalBytes += len(line.Content)
137 switch gl.Op {
138 case gitdiff.OpContext:
139 line.Kind = LineContext
140 line.OldLineNo = oldNo
141 line.NewLineNo = newNo
142 oldNo++
143 newNo++
144 case gitdiff.OpAdd:
145 line.Kind = LineAdd
146 line.NewLineNo = newNo
147 newNo++
148 case gitdiff.OpDelete:
149 line.Kind = LineDelete
150 line.OldLineNo = oldNo
151 oldNo++
152 }
153 h.Lines = append(h.Lines, line)
154 }
155 hunks = append(hunks, h)
156 }
157 out.Hunks = hunks
158 out.SizeBytes = totalBytes
159 return out
160 }
161
162 // trimNewline strips a single trailing `\n`. We deliberately don't
163 // touch the "\ No newline at end of file" marker (gitdiff materializes
164 // that as a separate line in our caller's loop — it isn't reached
165 // because go-gitdiff parses it as a special marker on the Op level
166 // only when present).
167 func trimNewline(s string) string {
168 if n := len(s); n > 0 && s[n-1] == '\n' {
169 return s[:n-1]
170 }
171 return s
172 }
173