tenseleyflow/shithub / 2e3ba81

Browse files

S12: smart-HTTP route handlers (info/refs + upload-pack + receive-pack); inline owner-only authz

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
2e3ba810c3d6440c823681b33dcdb0bf9bd20d0b
Parents
cd1c278
Tree
1af45b9

1 changed file

StatusFile+-
A internal/web/handlers/githttp/handler.go 260 0
internal/web/handlers/githttp/handler.goadded
@@ -0,0 +1,260 @@
1
+// SPDX-License-Identifier: AGPL-3.0-or-later
2
+
3
+package githttp
4
+
5
+import (
6
+	"io"
7
+	"net/http"
8
+	"os"
9
+	"strconv"
10
+	"strings"
11
+
12
+	"github.com/go-chi/chi/v5"
13
+	"github.com/jackc/pgx/v5/pgtype"
14
+
15
+	"github.com/tenseleyFlow/shithub/internal/git/protocol"
16
+	reposdb "github.com/tenseleyFlow/shithub/internal/repos/sqlc"
17
+	"github.com/tenseleyFlow/shithub/internal/web/middleware"
18
+)
19
+
20
+// MountSmartHTTP registers the smart-HTTP routes on r. Caller is
21
+// responsible for placing this group ahead of any conflicting routes
22
+// (specifically, register before the /{owner}/{repo} two-segment route
23
+// from S11 — `.git` URLs would otherwise be eaten by it).
24
+//
25
+// Caller is also responsible for stripping CSRF, response-compression,
26
+// and request-timeout middleware from this group; see web/server.go.
27
+func (h *Handlers) MountSmartHTTP(r chi.Router) {
28
+	r.Get("/{owner}/{repo}.git/info/refs", h.infoRefs)
29
+	r.Post("/{owner}/{repo}.git/git-upload-pack", h.uploadPack)
30
+	r.Post("/{owner}/{repo}.git/git-receive-pack", h.receivePack)
31
+}
32
+
33
+// infoRefs handles GET /{owner}/{repo}.git/info/refs?service=git-... .
34
+//
35
+// Per the smart-HTTP protocol the response body has shape:
36
+//
37
+//	001e# service=git-<svc>\n
38
+//	0000
39
+//	<advertise-refs output from git>
40
+//
41
+// We stream the trailing advertise-refs body straight from the
42
+// subprocess so a huge ref set doesn't buffer in memory.
43
+func (h *Handlers) infoRefs(w http.ResponseWriter, r *http.Request) {
44
+	svc, ok := serviceFromQuery(r.URL.Query().Get("service"))
45
+	if !ok {
46
+		http.Error(w, "service query parameter required", http.StatusBadRequest)
47
+		return
48
+	}
49
+
50
+	row, allow := h.authorizeForService(w, r, svc)
51
+	if !allow {
52
+		return
53
+	}
54
+
55
+	gitDir, err := h.d.RepoFS.RepoPath(chi.URLParam(r, "owner"), row.Name)
56
+	if err != nil {
57
+		h.d.Logger.ErrorContext(r.Context(), "githttp: path", "error", err)
58
+		http.Error(w, "not found", http.StatusNotFound)
59
+		return
60
+	}
61
+
62
+	w.Header().Set("Content-Type", "application/x-"+string(svc)+"-advertisement")
63
+	setNoCacheHeaders(w)
64
+
65
+	if err := protocol.WriteServiceAdvertisement(w, string(svc)); err != nil {
66
+		h.d.Logger.ErrorContext(r.Context(), "githttp: pkt", "error", err)
67
+		return
68
+	}
69
+	cmd := protocol.Cmd(r.Context(), svc, gitDir, true, nil)
70
+	cmd.Stdout = w
71
+	stderr := protocol.DrainStderr(cmd)
72
+	if err := cmd.Run(); err != nil {
73
+		h.d.Logger.ErrorContext(r.Context(), "githttp: info/refs",
74
+			"error", err, "service", svc, "stderr", string(stderr()))
75
+	}
76
+}
77
+
78
+// uploadPack handles POST /{owner}/{repo}.git/git-upload-pack.
79
+// Streams: req.Body → git-upload-pack stdin; git-upload-pack stdout → w.
80
+func (h *Handlers) uploadPack(w http.ResponseWriter, r *http.Request) {
81
+	h.runPack(w, r, protocol.UploadPack)
82
+}
83
+
84
+// receivePack handles POST /{owner}/{repo}.git/git-receive-pack.
85
+// Same streaming shape as uploadPack but with the SHITHUB_* env vars
86
+// set so the post-receive hook (S14) can identify the actor.
87
+func (h *Handlers) receivePack(w http.ResponseWriter, r *http.Request) {
88
+	h.runPack(w, r, protocol.ReceivePack)
89
+}
90
+
91
+// runPack is the shared body for both POST endpoints.
92
+func (h *Handlers) runPack(w http.ResponseWriter, r *http.Request, svc protocol.Service) {
93
+	row, allow := h.authorizeForService(w, r, svc)
94
+	if !allow {
95
+		return
96
+	}
97
+
98
+	gitDir, err := h.d.RepoFS.RepoPath(chi.URLParam(r, "owner"), row.Name)
99
+	if err != nil {
100
+		h.d.Logger.ErrorContext(r.Context(), "githttp: path", "error", err)
101
+		http.Error(w, "not found", http.StatusNotFound)
102
+		return
103
+	}
104
+
105
+	// Hard cap on the request body. A 2 GiB+1 push reads up to the cap
106
+	// from req.Body and then errors; we surface the read error from
107
+	// the subprocess as a 413 IF we haven't started writing yet.
108
+	body := http.MaxBytesReader(w, r.Body, h.d.MaxPushBytes)
109
+	defer func() { _ = body.Close() }()
110
+
111
+	w.Header().Set("Content-Type", "application/x-"+string(svc)+"-result")
112
+	setNoCacheHeaders(w)
113
+
114
+	var env []string
115
+	if svc == protocol.ReceivePack {
116
+		env = h.hookEnv(r, row)
117
+	}
118
+	cmd := protocol.Cmd(r.Context(), svc, gitDir, false, env)
119
+	cmd.Stdin = body
120
+	cmd.Stdout = w
121
+	stderr := protocol.DrainStderr(cmd)
122
+	if err := cmd.Run(); err != nil {
123
+		// At this point we may have already written headers + bytes;
124
+		// surfacing 413/500 cleanly is best-effort.
125
+		h.d.Logger.ErrorContext(r.Context(), "githttp: pack",
126
+			"error", err, "service", svc, "stderr", string(stderr()))
127
+	}
128
+}
129
+
130
+// authorizeForService resolves the repo + checks visibility/permission.
131
+// Returns the repo row + allow=true on success. On any failure writes
132
+// the appropriate response (404, 401, 403, 410) and returns allow=false.
133
+func (h *Handlers) authorizeForService(w http.ResponseWriter, r *http.Request, svc protocol.Service) (reposdb.Repo, bool) {
134
+	ownerName := chi.URLParam(r, "owner")
135
+	repoName := strings.TrimSuffix(chi.URLParam(r, "repo"), ".git")
136
+	// chi already strips .git for the URL pattern but we defensively
137
+	// trim again — the route is /{owner}/{repo}.git/info/refs, where
138
+	// chi captures `{repo}` WITHOUT the `.git` suffix.
139
+
140
+	owner, err := h.uq.GetUserByUsername(r.Context(), h.d.Pool, ownerName)
141
+	if err != nil {
142
+		http.Error(w, "not found", http.StatusNotFound)
143
+		return reposdb.Repo{}, false
144
+	}
145
+	row, err := h.rq.GetRepoByOwnerUserAndName(r.Context(), h.d.Pool, reposdb.GetRepoByOwnerUserAndNameParams{
146
+		OwnerUserID: pgtype.Int8{Int64: owner.ID, Valid: true},
147
+		Name:        repoName,
148
+	})
149
+	if err != nil {
150
+		http.Error(w, "not found", http.StatusNotFound)
151
+		return reposdb.Repo{}, false
152
+	}
153
+
154
+	auth, authErr := h.resolveBasicAuth(r.Context(), r.Header.Get("Authorization"))
155
+	requireAuth := svc == protocol.ReceivePack || row.Visibility == reposdb.RepoVisibilityPrivate
156
+	if authErr != nil || (requireAuth && auth.Anonymous) {
157
+		writeChallenge(w)
158
+		return reposdb.Repo{}, false
159
+	}
160
+
161
+	// Permission check — inline; S15 replaces this with the policy package.
162
+	// V1: only the owner can read a private repo or write to any repo.
163
+	if !auth.Anonymous && auth.UserID != owner.ID {
164
+		http.Error(w, "forbidden", http.StatusForbidden)
165
+		return reposdb.Repo{}, false
166
+	}
167
+	if svc == protocol.ReceivePack {
168
+		if row.IsArchived {
169
+			writeGitErrorMessage(w, http.StatusForbidden,
170
+				"repository is archived; pushes are disabled")
171
+			return reposdb.Repo{}, false
172
+		}
173
+		if row.DeletedAt.Valid {
174
+			http.Error(w, "gone", http.StatusGone)
175
+			return reposdb.Repo{}, false
176
+		}
177
+	}
178
+	return row, true
179
+}
180
+
181
+// hookEnv assembles the SHITHUB_* env vars to thread through git-
182
+// receive-pack into S14's hooks.
183
+func (h *Handlers) hookEnv(r *http.Request, row reposdb.Repo) []string {
184
+	auth, _ := h.resolveBasicAuth(r.Context(), r.Header.Get("Authorization"))
185
+	owner, err := h.uq.GetUserByID(r.Context(), h.d.Pool, ownerIDFromRow(row))
186
+	ownerName := ""
187
+	if err == nil {
188
+		ownerName = owner.Username
189
+	}
190
+	return []string{
191
+		"SHITHUB_USER_ID=" + strconv.FormatInt(auth.UserID, 10),
192
+		"SHITHUB_USERNAME=" + auth.Username,
193
+		"SHITHUB_REPO_ID=" + strconv.FormatInt(row.ID, 10),
194
+		"SHITHUB_REPO_FULL_NAME=" + ownerName + "/" + row.Name,
195
+		"SHITHUB_PROTOCOL=http",
196
+		"SHITHUB_REMOTE_IP=" + clientIP(r),
197
+		"SHITHUB_REQUEST_ID=" + middleware.RequestIDFromContext(r.Context()),
198
+		// PATH must be inherited so the subprocess can find git's helpers.
199
+		"PATH=" + os.Getenv("PATH"),
200
+	}
201
+}
202
+
203
+// ownerIDFromRow extracts the user-owner ID; orgs come in S31. Until
204
+// then we trust the XOR check in the migration.
205
+func ownerIDFromRow(row reposdb.Repo) int64 {
206
+	if row.OwnerUserID.Valid {
207
+		return row.OwnerUserID.Int64
208
+	}
209
+	return 0
210
+}
211
+
212
+// serviceFromQuery maps the ?service=... value to our typed enum.
213
+func serviceFromQuery(s string) (protocol.Service, bool) {
214
+	switch s {
215
+	case string(protocol.UploadPack):
216
+		return protocol.UploadPack, true
217
+	case string(protocol.ReceivePack):
218
+		return protocol.ReceivePack, true
219
+	}
220
+	return "", false
221
+}
222
+
223
+// writeChallenge emits a 401 with the canonical Basic challenge.
224
+func writeChallenge(w http.ResponseWriter) {
225
+	w.Header().Set("WWW-Authenticate", `Basic realm="shithub"`)
226
+	w.Header().Set("Content-Type", "text/plain; charset=utf-8")
227
+	w.WriteHeader(http.StatusUnauthorized)
228
+	_, _ = w.Write([]byte("authentication required\n"))
229
+}
230
+
231
+// writeGitErrorMessage writes a friendly message in the body. For
232
+// non-streamed responses (we haven't started writing the pack stream
233
+// yet) this surfaces in `git push`'s stderr verbatim.
234
+func writeGitErrorMessage(w http.ResponseWriter, status int, msg string) {
235
+	w.Header().Set("Content-Type", "text/plain; charset=utf-8")
236
+	w.WriteHeader(status)
237
+	_, _ = io.WriteString(w, msg+"\n")
238
+}
239
+
240
+// setNoCacheHeaders matches what the canonical git http-backend emits
241
+// — git endpoints are uncacheable both because the stream is dynamic
242
+// and because intermediaries shouldn't second-guess us.
243
+func setNoCacheHeaders(w http.ResponseWriter) {
244
+	w.Header().Set("Expires", "Fri, 01 Jan 1980 00:00:00 GMT")
245
+	w.Header().Set("Pragma", "no-cache")
246
+	w.Header().Set("Cache-Control", "no-cache, max-age=0, must-revalidate")
247
+}
248
+
249
+// clientIP returns the request's source address. RealIP middleware in
250
+// the global stack populates X-Real-IP-style headers; we read those
251
+// first, then fall back to RemoteAddr's host part.
252
+func clientIP(r *http.Request) string {
253
+	if ip := middleware.RealIPFromContext(r.Context(), r); ip != "" {
254
+		return ip
255
+	}
256
+	if i := strings.LastIndex(r.RemoteAddr, ":"); i > 0 {
257
+		return r.RemoteAddr[:i]
258
+	}
259
+	return r.RemoteAddr
260
+}