Go · 11139 bytes Raw Blame History
1 // SPDX-License-Identifier: AGPL-3.0-or-later
2
3 package git
4
5 import (
6 "bytes"
7 "context"
8 "errors"
9 "fmt"
10 "os/exec"
11 "strconv"
12 "strings"
13 "time"
14 )
15
16 // Commit is one commit row from the commits list. Only the cheap fields
17 // arrive via `git log --format=...`; the per-file stats live on
18 // CommitDetail.
19 type Commit struct {
20 OID string // full 40-char SHA
21 ShortOID string // git's --abbrev result, typically 7 chars
22 AuthorName string
23 AuthorEmail string
24 AuthorWhen time.Time
25 Subject string
26 Body string // empty for list view; populated by GetCommit
27 }
28
29 // LogOptions tunes the commits-list query. Zero values are sensible:
30 // - MaxCount 0 → 30
31 // - Skip 0 → 0
32 // - Path "" → log over all paths
33 // - Author "" → no author filter
34 // - Since/Until zero → no date filter
35 type LogOptions struct {
36 Ref string
37 MaxCount int
38 Skip int
39 Path string
40 Author string
41 Since time.Time
42 Until time.Time
43 Follow bool // --follow; only meaningful when Path is set
44 }
45
46 // Log returns Commits for the requested page. The format string packs
47 // every field we render onto one line per commit, separated by ASCII
48 // unit-separators (\x1f), with body terminated by ASCII record-separator
49 // (\x1e) so newlines inside the body don't break parsing.
50 func Log(ctx context.Context, gitDir string, o LogOptions) ([]Commit, error) {
51 if o.MaxCount <= 0 {
52 o.MaxCount = 30
53 }
54 const sep = "\x1f"
55 const recordEnd = "\x1e"
56 // %H %h %an %ae %at %s %b — body last so embedded \x1f in body
57 // don't confuse SplitN.
58 format := strings.Join([]string{"%H", "%h", "%an", "%ae", "%at", "%s"}, sep) + sep + "%b" + recordEnd
59
60 args := []string{
61 "-C", gitDir, "log",
62 "--max-count=" + strconv.Itoa(o.MaxCount),
63 "--skip=" + strconv.Itoa(o.Skip),
64 "--format=" + format,
65 }
66 if o.Author != "" {
67 args = append(args, "--author="+o.Author)
68 }
69 if !o.Since.IsZero() {
70 args = append(args, "--since="+o.Since.UTC().Format(time.RFC3339))
71 }
72 if !o.Until.IsZero() {
73 args = append(args, "--until="+o.Until.UTC().Format(time.RFC3339))
74 }
75 args = append(args, o.Ref)
76 if o.Path != "" {
77 if o.Follow {
78 args = append(args, "--follow")
79 }
80 args = append(args, "--", o.Path)
81 }
82
83 cmd := exec.CommandContext(ctx, "git", args...)
84 out, err := cmd.Output()
85 if err != nil {
86 return nil, wrapExecErr(err)
87 }
88 return parseLogOutput(out)
89 }
90
91 // CountCommits returns the number of commits reachable from ref.
92 func CountCommits(ctx context.Context, gitDir, ref string) (int, error) {
93 cmd := exec.CommandContext(ctx, "git", "-C", gitDir, "rev-list", "--count", ref)
94 out, err := cmd.Output()
95 if err != nil {
96 return 0, wrapExecErr(err)
97 }
98 count, err := strconv.Atoi(strings.TrimSpace(string(out)))
99 if err != nil {
100 return 0, fmt.Errorf("git rev-list count: %w", err)
101 }
102 return count, nil
103 }
104
105 // CommitExists reports whether sha resolves to a commit in this repository.
106 func CommitExists(ctx context.Context, gitDir, sha string) (bool, error) {
107 cmd := exec.CommandContext(ctx, "git", "-C", gitDir, "cat-file", "-e", sha+"^{commit}")
108 if _, err := cmd.Output(); err != nil {
109 var ee *exec.ExitError
110 if errors.As(err, &ee) && isMissingGitObjectError(ee.Stderr) {
111 return false, nil
112 }
113 return false, wrapExecErr(err)
114 }
115 return true, nil
116 }
117
118 // WeeklyCommitActivity counts commits by UTC week, oldest bucket first.
119 // It is intentionally small and read-only so list surfaces can render
120 // GitHub-style activity sparklines without parsing full commit rows.
121 func WeeklyCommitActivity(ctx context.Context, gitDir, ref string, bucketCount int, now time.Time) ([]int, error) {
122 if bucketCount <= 0 {
123 bucketCount = 52
124 }
125 if ref == "" {
126 ref = "HEAD"
127 }
128 if now.IsZero() {
129 now = time.Now()
130 }
131 weekStart := startOfUTCWeek(now.UTC())
132 start := weekStart.AddDate(0, 0, -7*(bucketCount-1))
133 end := weekStart.AddDate(0, 0, 7)
134
135 //nolint:gosec // G204: gitDir is constrained by RepoFS path validation; ref is an argv value.
136 cmd := exec.CommandContext(ctx, "git", "-C", gitDir, "log",
137 "--format=%ct",
138 "--since="+start.Format(time.RFC3339),
139 "--until="+end.Format(time.RFC3339),
140 ref,
141 "--",
142 )
143 out, err := cmd.Output()
144 if err != nil {
145 return nil, wrapExecErr(err)
146 }
147
148 buckets := make([]int, bucketCount)
149 for _, raw := range strings.Fields(string(out)) {
150 ts, err := strconv.ParseInt(raw, 10, 64)
151 if err != nil {
152 continue
153 }
154 when := time.Unix(ts, 0).UTC()
155 idx := int(when.Sub(start) / (7 * 24 * time.Hour))
156 if idx >= 0 && idx < bucketCount {
157 buckets[idx]++
158 }
159 }
160 return buckets, nil
161 }
162
163 func startOfUTCWeek(t time.Time) time.Time {
164 day := time.Date(t.Year(), t.Month(), t.Day(), 0, 0, 0, 0, time.UTC)
165 offset := (int(day.Weekday()) + 6) % 7
166 return day.AddDate(0, 0, -offset)
167 }
168
169 // parseLogOutput unpacks the format above into Commits. Stable: the
170 // recordEnd lets us split records first, then unpack each.
171 func parseLogOutput(out []byte) ([]Commit, error) {
172 const sep = "\x1f"
173 const recordEnd = "\x1e"
174 body := bytes.TrimRight(out, "\n")
175 records := bytes.Split(body, []byte(recordEnd))
176 commits := make([]Commit, 0, len(records))
177 for _, rec := range records {
178 rec = bytes.TrimLeft(rec, "\n")
179 if len(rec) == 0 {
180 continue
181 }
182 parts := strings.SplitN(string(rec), sep, 7)
183 if len(parts) < 7 {
184 continue
185 }
186 ts, _ := strconv.ParseInt(parts[4], 10, 64)
187 commits = append(commits, Commit{
188 OID: parts[0],
189 ShortOID: parts[1],
190 AuthorName: parts[2],
191 AuthorEmail: parts[3],
192 AuthorWhen: time.Unix(ts, 0).UTC(),
193 Subject: parts[5],
194 Body: strings.TrimSpace(parts[6]),
195 })
196 }
197 return commits, nil
198 }
199
200 // CommitDetail holds the single-commit-view data: the Commit plus
201 // committer fields, parents, tree OID, and the per-file change list.
202 type CommitDetail struct {
203 Commit
204 CommitterName string
205 CommitterEmail string
206 CommitterWhen time.Time
207 Parents []string // each is a full OID
208 TreeOID string
209 Files []FileChange
210 }
211
212 // FileChange is one row from `git diff-tree --numstat --no-commit-id -r`.
213 // Status is git's letter code: A added, M modified, D deleted, R renamed,
214 // C copied, T type-changed.
215 type FileChange struct {
216 Status string
217 Path string
218 OldPath string // populated for R/C
219 Insert int
220 Delete int
221 Binary bool
222 }
223
224 // GetCommit returns the full detail for one SHA. Two commands: one for
225 // the commit metadata + parents (cheap, single-line), one for the file
226 // stats. Combining them in one --pretty walk would save a fork but
227 // complicate parsing.
228 func GetCommit(ctx context.Context, gitDir, sha string) (CommitDetail, error) {
229 const sep = "\x1f"
230 // %H %h %an %ae %at %cn %ce %ct %P %T %s\n%b — single record.
231 format := strings.Join([]string{
232 "%H", "%h", "%an", "%ae", "%at",
233 "%cn", "%ce", "%ct", "%P", "%T", "%s",
234 }, sep) + sep + "%B"
235
236 cmd := exec.CommandContext(ctx, "git", "-C", gitDir,
237 "log", "-1", "--format="+format, sha, "--")
238 out, err := cmd.Output()
239 if err != nil {
240 var ee *exec.ExitError
241 if errors.As(err, &ee) && isMissingGitObjectError(ee.Stderr) {
242 return CommitDetail{}, ErrCommitNotFound
243 }
244 return CommitDetail{}, wrapExecErr(err)
245 }
246 parts := strings.SplitN(string(bytes.TrimRight(out, "\n")), sep, 12)
247 if len(parts) < 12 {
248 return CommitDetail{}, fmt.Errorf("git log: malformed output: %q", string(out))
249 }
250 at, _ := strconv.ParseInt(parts[4], 10, 64)
251 ct, _ := strconv.ParseInt(parts[7], 10, 64)
252 parentList := []string{}
253 if pp := strings.TrimSpace(parts[8]); pp != "" {
254 parentList = strings.Fields(pp)
255 }
256
257 // %B is "subject + blank + body"; we already have Subject from %s.
258 rawMessage := parts[11]
259 body := strings.TrimSpace(strings.TrimPrefix(rawMessage, parts[10]))
260
261 cd := CommitDetail{
262 Commit: Commit{
263 OID: parts[0],
264 ShortOID: parts[1],
265 AuthorName: parts[2],
266 AuthorEmail: parts[3],
267 AuthorWhen: time.Unix(at, 0).UTC(),
268 Subject: parts[10],
269 Body: body,
270 },
271 CommitterName: parts[5],
272 CommitterEmail: parts[6],
273 CommitterWhen: time.Unix(ct, 0).UTC(),
274 Parents: parentList,
275 TreeOID: parts[9],
276 }
277
278 files, err := DiffStat(ctx, gitDir, sha)
279 if err != nil {
280 return cd, fmt.Errorf("diff-stat: %w", err)
281 }
282 cd.Files = files
283 return cd, nil
284 }
285
286 // ErrCommitNotFound is returned by GetCommit when the SHA doesn't
287 // resolve to a commit on this repo.
288 var ErrCommitNotFound = errors.New("git: commit not found")
289
290 func isMissingGitObjectError(stderr []byte) bool {
291 return bytes.Contains(stderr, []byte("unknown revision")) ||
292 bytes.Contains(stderr, []byte("Not a valid object name")) ||
293 bytes.Contains(stderr, []byte("bad object"))
294 }
295
296 // DiffStat returns the per-file change list for a SHA. We run two
297 // commands: --name-status for the letter and rename pairs, --numstat
298 // for +/- counts. Two passes is two forks, but the parsing stays
299 // simple; combining via --raw is harder to read.
300 func DiffStat(ctx context.Context, gitDir, sha string) ([]FileChange, error) {
301 // --name-status produces: "M\tpath" or "R100\told\tnew" etc.
302 // `--root` makes the initial (parentless) commit show its files
303 // against the empty tree; without it diff-tree emits nothing for
304 // root commits.
305 nsOut, err := exec.CommandContext(ctx, "git", "-C", gitDir,
306 "diff-tree", "-r", "--root", "--name-status", "--no-commit-id", "-M", "-C", sha).Output()
307 if err != nil {
308 return nil, wrapExecErr(err)
309 }
310 // --numstat: "<add>\t<del>\t<path>" or "-\t-\t<path>" for binary.
311 numOut, err := exec.CommandContext(ctx, "git", "-C", gitDir,
312 "diff-tree", "-r", "--root", "--numstat", "--no-commit-id", "-M", "-C", sha).Output()
313 if err != nil {
314 return nil, wrapExecErr(err)
315 }
316
317 type ns struct {
318 status, oldPath, path string
319 }
320 var nsRows []ns
321 for _, line := range strings.Split(strings.TrimRight(string(nsOut), "\n"), "\n") {
322 if line == "" {
323 continue
324 }
325 fields := strings.Split(line, "\t")
326 if len(fields) < 2 {
327 continue
328 }
329 row := ns{status: fields[0]}
330 // Rename/copy: status starts with R or C followed by a similarity
331 // number; field layout is "Rxxx\tOLD\tNEW" or "Cxxx\tOLD\tNEW".
332 if (strings.HasPrefix(row.status, "R") || strings.HasPrefix(row.status, "C")) && len(fields) == 3 {
333 row.oldPath = fields[1]
334 row.path = fields[2]
335 row.status = string(row.status[0])
336 } else {
337 row.path = fields[len(fields)-1]
338 }
339 nsRows = append(nsRows, row)
340 }
341
342 type stat struct {
343 ins, del int
344 binary bool
345 }
346 statByPath := make(map[string]stat, len(nsRows))
347 for _, line := range strings.Split(strings.TrimRight(string(numOut), "\n"), "\n") {
348 if line == "" {
349 continue
350 }
351 fields := strings.Split(line, "\t")
352 if len(fields) < 3 {
353 continue
354 }
355 path := fields[len(fields)-1]
356 if fields[0] == "-" {
357 statByPath[path] = stat{binary: true}
358 continue
359 }
360 ins, _ := strconv.Atoi(fields[0])
361 del, _ := strconv.Atoi(fields[1])
362 statByPath[path] = stat{ins: ins, del: del}
363 }
364
365 out := make([]FileChange, 0, len(nsRows))
366 for _, n := range nsRows {
367 s := statByPath[n.path]
368 out = append(out, FileChange{
369 Status: n.status, Path: n.path, OldPath: n.oldPath,
370 Insert: s.ins, Delete: s.del, Binary: s.binary,
371 })
372 }
373 return out, nil
374 }
375