tenseleyflow/shithub / ddcd3b5

Browse files

Backfill GitHub submodule targets

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
ddcd3b5e284ab0b9737dfd96ef267bc3425de0f8
Parents
44daa8e
Tree
1dd374c

6 changed files

StatusFile+-
M docs/internal/code-tab.md 11 4
A internal/repos/git/remotes.go 38 0
A internal/repos/git/remotes_test.go 90 0
M internal/web/handlers/repo/code_tree_rows_test.go 60 0
M internal/web/handlers/repo/repo.go 8 6
M internal/web/handlers/repo/submodule_links.go 137 0
docs/internal/code-tab.mdmodified
@@ -73,10 +73,17 @@ on the rendered ref, the Code tab parses it once, matches entries by
7373
 submodule path, and links GitHub or configured shithub clone remotes to
7474
 the local `/{owner}/{repo}/tree/{gitlink-oid}` route when the target
7575
 repo has that commit. If the target repo exists locally but does not
76
-have the pinned commit object, the row links to the target repo's
77
-default Code tab so independently-created mirrors don't produce dead
78
-links. Unknown, external, absent, or malformed remotes stay as plain
79
-`name @ shortsha` rows.
76
+have the pinned commit object, and `.gitmodules` points at GitHub, the
77
+handler performs a bounded, non-forced fetch of heads/tags from that
78
+GitHub remote, re-checks the object, and then links to the exact
79
+detached-commit tree when it arrived. Successful backfills update the
80
+target repo's default-branch OID when that ref moved, then enqueue the
81
+same code-index and size-recalc maintenance used after pushes. Diverged
82
+local refs are never force-updated; on fetch failure or still-missing
83
+objects, the row links to the target repo's default Code tab so
84
+independently-created mirrors don't produce dead links. Unknown,
85
+external, absent, or malformed remotes stay as plain `name @ shortsha`
86
+rows.
8087
 
8188
 The S17 ship excludes the htmx-driven "last commit per entry" column
8289
 that the spec describes — an extra round-trip we can add later without
internal/repos/git/remotes.goadded
@@ -0,0 +1,38 @@
1
+// SPDX-License-Identifier: AGPL-3.0-or-later
2
+
3
+package git
4
+
5
+import (
6
+	"context"
7
+	"errors"
8
+	"fmt"
9
+	"os/exec"
10
+	"strings"
11
+)
12
+
13
+// FetchRemoteHeadsAndTags imports public heads and tags from remoteURL into
14
+// gitDir without forcing local refs. It is intended for mirror/backfill flows:
15
+// if a local branch or tag has diverged, git rejects the update instead of
16
+// overwriting local history.
17
+func FetchRemoteHeadsAndTags(ctx context.Context, gitDir, remoteURL string) error {
18
+	if gitDir == "" {
19
+		return errors.New("git fetch: gitDir is required")
20
+	}
21
+	if strings.TrimSpace(remoteURL) == "" {
22
+		return errors.New("git fetch: remoteURL is required")
23
+	}
24
+	//nolint:gosec // G204: gitDir is RepoFS-derived at call sites; remoteURL is caller-allowlisted and passed as argv, not shell.
25
+	cmd := exec.CommandContext(ctx, "git", "-C", gitDir,
26
+		"fetch",
27
+		"--quiet",
28
+		"--no-recurse-submodules",
29
+		remoteURL,
30
+		"refs/heads/*:refs/heads/*",
31
+		"refs/tags/*:refs/tags/*",
32
+	)
33
+	out, err := cmd.CombinedOutput()
34
+	if err != nil {
35
+		return fmt.Errorf("git fetch remote refs: %w (%s)", err, strings.TrimSpace(string(out)))
36
+	}
37
+	return nil
38
+}
internal/repos/git/remotes_test.goadded
@@ -0,0 +1,90 @@
1
+// SPDX-License-Identifier: AGPL-3.0-or-later
2
+
3
+package git_test
4
+
5
+import (
6
+	"context"
7
+	"strings"
8
+	"testing"
9
+	"time"
10
+
11
+	repogit "github.com/tenseleyFlow/shithub/internal/repos/git"
12
+)
13
+
14
+func TestFetchRemoteHeadsAndTags_ImportsReachableCommit(t *testing.T) {
15
+	t.Parallel()
16
+	ctx := context.Background()
17
+	source := initBare(t)
18
+	commit, err := repogit.InitialCommit{
19
+		GitDir:      source,
20
+		AuthorName:  "Alice Anderson",
21
+		AuthorEmail: "alice@example.com",
22
+		Message:     "source commit",
23
+		Branch:      "trunk",
24
+		When:        time.Date(2026, 5, 10, 12, 0, 0, 0, time.UTC),
25
+		Files:       []repogit.FileEntry{{Path: "README.md", Body: []byte("# source\n")}},
26
+	}.Build(ctx)
27
+	if err != nil {
28
+		t.Fatalf("Build source: %v", err)
29
+	}
30
+	dst := initBare(t)
31
+
32
+	if err := repogit.FetchRemoteHeadsAndTags(ctx, dst, source); err != nil {
33
+		t.Fatalf("FetchRemoteHeadsAndTags: %v", err)
34
+	}
35
+	exists, err := repogit.CommitExists(ctx, dst, commit)
36
+	if err != nil {
37
+		t.Fatalf("CommitExists: %v", err)
38
+	}
39
+	if !exists {
40
+		t.Fatalf("fetched repo is missing commit %s", commit)
41
+	}
42
+	out, err := gitCmd("-C", dst, "rev-parse", "refs/heads/trunk").CombinedOutput()
43
+	if err != nil {
44
+		t.Fatalf("rev-parse dst trunk: %v\n%s", err, out)
45
+	}
46
+	if got := strings.TrimSpace(string(out)); got != commit {
47
+		t.Fatalf("dst trunk = %q, want %q", got, commit)
48
+	}
49
+}
50
+
51
+func TestFetchRemoteHeadsAndTags_DoesNotForceDivergedBranch(t *testing.T) {
52
+	t.Parallel()
53
+	ctx := context.Background()
54
+	source := initBare(t)
55
+	if _, err := (repogit.InitialCommit{
56
+		GitDir:      source,
57
+		AuthorName:  "Alice Anderson",
58
+		AuthorEmail: "alice@example.com",
59
+		Message:     "source commit",
60
+		Branch:      "trunk",
61
+		When:        time.Date(2026, 5, 10, 12, 0, 0, 0, time.UTC),
62
+		Files:       []repogit.FileEntry{{Path: "README.md", Body: []byte("# source\n")}},
63
+	}).Build(ctx); err != nil {
64
+		t.Fatalf("Build source: %v", err)
65
+	}
66
+	dst := initBare(t)
67
+	dstCommit, err := repogit.InitialCommit{
68
+		GitDir:      dst,
69
+		AuthorName:  "Bob Brown",
70
+		AuthorEmail: "bob@example.com",
71
+		Message:     "local commit",
72
+		Branch:      "trunk",
73
+		When:        time.Date(2026, 5, 10, 12, 1, 0, 0, time.UTC),
74
+		Files:       []repogit.FileEntry{{Path: "README.md", Body: []byte("# local\n")}},
75
+	}.Build(ctx)
76
+	if err != nil {
77
+		t.Fatalf("Build dst: %v", err)
78
+	}
79
+
80
+	if err := repogit.FetchRemoteHeadsAndTags(ctx, dst, source); err == nil {
81
+		t.Fatal("FetchRemoteHeadsAndTags succeeded on a diverged branch; want rejection")
82
+	}
83
+	out, err := gitCmd("-C", dst, "rev-parse", "refs/heads/trunk").CombinedOutput()
84
+	if err != nil {
85
+		t.Fatalf("rev-parse dst trunk: %v\n%s", err, out)
86
+	}
87
+	if got := strings.TrimSpace(string(out)); got != dstCommit {
88
+		t.Fatalf("dst trunk changed to %q, want original %q", got, dstCommit)
89
+	}
90
+}
internal/web/handlers/repo/code_tree_rows_test.gomodified
@@ -124,3 +124,63 @@ func TestSubmoduleRouteURL_UnsupportedRemotesStayPlain(t *testing.T) {
124124
 		})
125125
 	}
126126
 }
127
+
128
+func TestGitHubSubmoduleFetchURL_CanonicalizesSupportedRemotes(t *testing.T) {
129
+	t.Parallel()
130
+
131
+	for _, tt := range []struct {
132
+		name   string
133
+		remote string
134
+		want   string
135
+	}{
136
+		{
137
+			name:   "scp",
138
+			remote: "git@github.com:FortranGoingOnForty/afs-as.git",
139
+			want:   "https://github.com/FortranGoingOnForty/afs-as.git",
140
+		},
141
+		{
142
+			name:   "https",
143
+			remote: "https://github.com/tenseleyFlow/bencch.git",
144
+			want:   "https://github.com/tenseleyFlow/bencch.git",
145
+		},
146
+		{
147
+			name:   "ssh url",
148
+			remote: "ssh://git@github.com/FortranGoingOnForty/afs-ld.git",
149
+			want:   "https://github.com/FortranGoingOnForty/afs-ld.git",
150
+		},
151
+	} {
152
+		tt := tt
153
+		t.Run(tt.name, func(t *testing.T) {
154
+			t.Parallel()
155
+			got, ok := githubSubmoduleFetchURL(tt.remote)
156
+			if !ok {
157
+				t.Fatalf("githubSubmoduleFetchURL(%q) ok = false", tt.remote)
158
+			}
159
+			if got != tt.want {
160
+				t.Fatalf("githubSubmoduleFetchURL(%q) = %q, want %q", tt.remote, got, tt.want)
161
+			}
162
+		})
163
+	}
164
+}
165
+
166
+func TestGitHubSubmoduleFetchURL_RejectsUnsupportedRemotes(t *testing.T) {
167
+	t.Parallel()
168
+
169
+	for _, remote := range []string{
170
+		"https://shithub.sh/tenseleyFlow/bencch.git",
171
+		"../afs-ld.git",
172
+		"https://example.com/octo/lib.git",
173
+		"https://github.com/octo/nested/lib.git",
174
+		"https://github.com/%2F/lib.git",
175
+		"javascript:alert(1)",
176
+		"",
177
+	} {
178
+		remote := remote
179
+		t.Run(remote, func(t *testing.T) {
180
+			t.Parallel()
181
+			if got, ok := githubSubmoduleFetchURL(remote); ok || got != "" {
182
+				t.Fatalf("githubSubmoduleFetchURL(%q) = %q, %v; want empty, false", remote, got, ok)
183
+			}
184
+		})
185
+	}
186
+}
internal/web/handlers/repo/repo.gomodified
@@ -17,6 +17,7 @@ import (
1717
 	"github.com/jackc/pgx/v5"
1818
 	"github.com/jackc/pgx/v5/pgtype"
1919
 	"github.com/jackc/pgx/v5/pgxpool"
20
+	"golang.org/x/sync/singleflight"
2021
 
2122
 	"github.com/tenseleyFlow/shithub/internal/auth/audit"
2223
 	"github.com/tenseleyFlow/shithub/internal/auth/policy"
@@ -78,12 +79,13 @@ type Deps struct {
7879
 
7980
 // Handlers is the registered handler set. Construct via New.
8081
 type Handlers struct {
81
-	d  Deps
82
-	rq *reposdb.Queries
83
-	uq *usersdb.Queries
84
-	iq *issuesdb.Queries
85
-	pq *pullsdb.Queries
86
-	cq *checksdb.Queries
82
+	d                  Deps
83
+	rq                 *reposdb.Queries
84
+	uq                 *usersdb.Queries
85
+	iq                 *issuesdb.Queries
86
+	pq                 *pullsdb.Queries
87
+	cq                 *checksdb.Queries
88
+	submoduleBackfills singleflight.Group
8789
 }
8890
 
8991
 // New constructs the handler set, validating Deps.