Go · 11479 bytes Raw Blame History
1 // SPDX-License-Identifier: AGPL-3.0-or-later
2
3 package githttp
4
5 import (
6 "context"
7 "errors"
8 "io"
9 "net/http"
10 "os"
11 "strconv"
12 "strings"
13
14 "github.com/go-chi/chi/v5"
15 "github.com/jackc/pgx/v5/pgtype"
16
17 "github.com/tenseleyFlow/shithub/internal/auth/policy"
18 "github.com/tenseleyFlow/shithub/internal/git/protocol"
19 "github.com/tenseleyFlow/shithub/internal/orgs"
20 reposdb "github.com/tenseleyFlow/shithub/internal/repos/sqlc"
21 "github.com/tenseleyFlow/shithub/internal/web/middleware"
22 )
23
24 // MountSmartHTTP registers the smart-HTTP routes on r. Caller is
25 // responsible for placing this group ahead of any conflicting routes
26 // (specifically, register before the /{owner}/{repo} two-segment route
27 // from S11 — `.git` URLs would otherwise be eaten by it).
28 //
29 // Caller is also responsible for stripping CSRF, response-compression,
30 // and request-timeout middleware from this group; see web/server.go.
31 func (h *Handlers) MountSmartHTTP(r chi.Router) {
32 r.Get("/{owner}/{repo}.git/info/refs", h.infoRefs)
33 r.Post("/{owner}/{repo}.git/git-upload-pack", h.uploadPack)
34 r.Post("/{owner}/{repo}.git/git-receive-pack", h.receivePack)
35 }
36
37 // infoRefs handles GET /{owner}/{repo}.git/info/refs?service=git-... .
38 //
39 // Per the smart-HTTP protocol the response body has shape:
40 //
41 // 001e# service=git-<svc>\n
42 // 0000
43 // <advertise-refs output from git>
44 //
45 // We stream the trailing advertise-refs body straight from the
46 // subprocess so a huge ref set doesn't buffer in memory.
47 func (h *Handlers) infoRefs(w http.ResponseWriter, r *http.Request) {
48 svc, ok := serviceFromQuery(r.URL.Query().Get("service"))
49 if !ok {
50 http.Error(w, "service query parameter required", http.StatusBadRequest)
51 return
52 }
53
54 row, allow := h.authorizeForService(w, r, svc)
55 if !allow {
56 return
57 }
58
59 gitDir, err := h.d.RepoFS.RepoPath(chi.URLParam(r, "owner"), row.Name)
60 if err != nil {
61 h.d.Logger.ErrorContext(r.Context(), "githttp: path", "error", err)
62 http.Error(w, "not found", http.StatusNotFound)
63 return
64 }
65
66 w.Header().Set("Content-Type", "application/x-"+string(svc)+"-advertisement")
67 setNoCacheHeaders(w)
68
69 if err := protocol.WriteServiceAdvertisement(w, string(svc)); err != nil {
70 h.d.Logger.ErrorContext(r.Context(), "githttp: pkt", "error", err)
71 return
72 }
73 cmd := protocol.Cmd(r.Context(), svc, gitDir, true, nil)
74 cmd.Stdout = w
75 stderr := protocol.DrainStderr(cmd)
76 if err := cmd.Run(); err != nil {
77 h.d.Logger.ErrorContext(r.Context(), "githttp: info/refs",
78 "error", err, "service", svc, "stderr", string(stderr()))
79 }
80 }
81
82 // uploadPack handles POST /{owner}/{repo}.git/git-upload-pack.
83 // Streams: req.Body → git-upload-pack stdin; git-upload-pack stdout → w.
84 func (h *Handlers) uploadPack(w http.ResponseWriter, r *http.Request) {
85 h.runPack(w, r, protocol.UploadPack)
86 }
87
88 // receivePack handles POST /{owner}/{repo}.git/git-receive-pack.
89 // Same streaming shape as uploadPack but with the SHITHUB_* env vars
90 // set so the post-receive hook (S14) can identify the actor.
91 func (h *Handlers) receivePack(w http.ResponseWriter, r *http.Request) {
92 h.runPack(w, r, protocol.ReceivePack)
93 }
94
95 // runPack is the shared body for both POST endpoints.
96 func (h *Handlers) runPack(w http.ResponseWriter, r *http.Request, svc protocol.Service) {
97 row, allow := h.authorizeForService(w, r, svc)
98 if !allow {
99 return
100 }
101
102 gitDir, err := h.d.RepoFS.RepoPath(chi.URLParam(r, "owner"), row.Name)
103 if err != nil {
104 h.d.Logger.ErrorContext(r.Context(), "githttp: path", "error", err)
105 http.Error(w, "not found", http.StatusNotFound)
106 return
107 }
108
109 // Hard cap on the request body. A 2 GiB+1 push reads up to the cap
110 // from req.Body and then errors; we surface the read error from
111 // the subprocess as a 413 IF we haven't started writing yet.
112 body := http.MaxBytesReader(w, r.Body, h.d.MaxPushBytes)
113 defer func() { _ = body.Close() }()
114
115 w.Header().Set("Content-Type", "application/x-"+string(svc)+"-result")
116 setNoCacheHeaders(w)
117
118 var env []string
119 if svc == protocol.ReceivePack {
120 env = h.hookEnv(r, row)
121 }
122 cmd := protocol.Cmd(r.Context(), svc, gitDir, false, env)
123 cmd.Stdin = body
124 cmd.Stdout = w
125 stderr := protocol.DrainStderr(cmd)
126 if err := cmd.Run(); err != nil {
127 // At this point we may have already written headers + bytes;
128 // surfacing 413/500 cleanly is best-effort.
129 h.d.Logger.ErrorContext(r.Context(), "githttp: pack",
130 "error", err, "service", svc, "stderr", string(stderr()))
131 }
132 }
133
134 // authorizeForService resolves the repo + checks visibility/permission.
135 // Returns the repo row + allow=true on success. On any failure writes
136 // the appropriate response (404, 401, 403, 410) and returns allow=false.
137 func (h *Handlers) authorizeForService(w http.ResponseWriter, r *http.Request, svc protocol.Service) (reposdb.Repo, bool) {
138 ownerName := chi.URLParam(r, "owner")
139 repoName := strings.TrimSuffix(chi.URLParam(r, "repo"), ".git")
140 // chi already strips .git for the URL pattern but we defensively
141 // trim again — the route is /{owner}/{repo}.git/info/refs, where
142 // chi captures `{repo}` WITHOUT the `.git` suffix.
143
144 // Owner can be a user OR an org; orgs.Resolve hits the principals
145 // table to dispatch on kind. Mirrors the same lookup the HTML repo
146 // handler uses (web/handlers/repo/repo.go::lookupRepoForViewer).
147 row, err := h.lookupRepo(r.Context(), ownerName, repoName)
148 if err != nil {
149 http.Error(w, "not found", http.StatusNotFound)
150 return reposdb.Repo{}, false
151 }
152
153 auth, authErr := h.resolveBasicAuth(r.Context(), r.Header.Get("Authorization"))
154 repoRef := policy.NewRepoRefFromRepo(row)
155 requireAuth := svc == protocol.ReceivePack || repoRef.IsPrivate()
156 if authErr != nil || (requireAuth && auth.Anonymous) {
157 writeChallenge(w)
158 return reposdb.Repo{}, false
159 }
160 if auth.ViaRunnerCheckout {
161 if svc != protocol.UploadPack {
162 writeGitErrorMessage(w, http.StatusForbidden,
163 "shithub Actions checkout credentials are read-only")
164 return reposdb.Repo{}, false
165 }
166 if auth.RunnerCheckoutRepo != row.ID {
167 http.Error(w, "not found", http.StatusNotFound)
168 return reposdb.Repo{}, false
169 }
170 return row, true
171 }
172
173 // Build the policy actor and ask Can(). Owner identity, collab role,
174 // archived/deleted gates all live in the policy package now.
175 var actor policy.Actor
176 if auth.Anonymous {
177 actor = policy.AnonymousActor()
178 } else {
179 actor = policy.UserActor(auth.UserID, auth.Username, false, false)
180 }
181 action := policy.ActionRepoRead
182 if svc == protocol.ReceivePack {
183 action = policy.ActionRepoWrite
184 }
185 decision := policy.Can(r.Context(), policy.Deps{Pool: h.d.Pool}, actor, action, repoRef)
186 if !decision.Allow {
187 switch decision.Code {
188 case policy.DenyRepoDeleted:
189 http.Error(w, "gone", http.StatusGone)
190 case policy.DenyArchived:
191 // User sees this directly in their git client.
192 writeGitErrorMessage(w, http.StatusForbidden,
193 "repository is archived; pushes are disabled")
194 case policy.DenyVisibility:
195 http.Error(w, "not found", http.StatusNotFound)
196 default:
197 http.Error(w, "forbidden", http.StatusForbidden)
198 }
199 return reposdb.Repo{}, false
200 }
201 return row, true
202 }
203
204 // hookEnv assembles the SHITHUB_* env vars to thread through git-
205 // receive-pack into S14's hooks.
206 func (h *Handlers) hookEnv(r *http.Request, row reposdb.Repo) []string {
207 auth, _ := h.resolveBasicAuth(r.Context(), r.Header.Get("Authorization"))
208 ownerName := h.ownerName(r.Context(), row)
209 return []string{
210 "SHITHUB_USER_ID=" + strconv.FormatInt(auth.UserID, 10),
211 "SHITHUB_USERNAME=" + auth.Username,
212 "SHITHUB_REPO_ID=" + strconv.FormatInt(row.ID, 10),
213 "SHITHUB_REPO_FULL_NAME=" + ownerName + "/" + row.Name,
214 "SHITHUB_PROTOCOL=http",
215 "SHITHUB_REMOTE_IP=" + clientIP(r),
216 "SHITHUB_REQUEST_ID=" + middleware.RequestIDFromContext(r.Context()),
217 // PATH must be inherited so the subprocess can find git's helpers.
218 "PATH=" + os.Getenv("PATH"),
219 }
220 }
221
222 // lookupRepo resolves a repo by owner-slug + name, dispatching on
223 // whether the owner-slug names a user or an org. Returns the same
224 // row shape regardless of owner kind; pgx.ErrNoRows-equivalent on
225 // any failure (so the caller writes a 404 without leaking which
226 // half failed — slug missing vs. repo missing).
227 func (h *Handlers) lookupRepo(ctx context.Context, ownerName, repoName string) (reposdb.Repo, error) {
228 principal, err := orgs.Resolve(ctx, h.d.Pool, ownerName)
229 if err != nil {
230 return reposdb.Repo{}, err
231 }
232 switch principal.Kind {
233 case orgs.PrincipalUser:
234 return h.rq.GetRepoByOwnerUserAndName(ctx, h.d.Pool, reposdb.GetRepoByOwnerUserAndNameParams{
235 OwnerUserID: pgtype.Int8{Int64: principal.ID, Valid: true},
236 Name: repoName,
237 })
238 case orgs.PrincipalOrg:
239 return h.rq.GetRepoByOwnerOrgAndName(ctx, h.d.Pool, reposdb.GetRepoByOwnerOrgAndNameParams{
240 OwnerOrgID: pgtype.Int8{Int64: principal.ID, Valid: true},
241 Name: repoName,
242 })
243 default:
244 return reposdb.Repo{}, errOwnerKindUnknown
245 }
246 }
247
248 // ownerName returns the owner's display slug (username for users,
249 // slug for orgs). Used to compose SHITHUB_REPO_FULL_NAME for hooks.
250 // Returns "" on any lookup failure — callers tolerate the empty
251 // string rather than failing the push because of a metadata gap.
252 func (h *Handlers) ownerName(ctx context.Context, row reposdb.Repo) string {
253 switch {
254 case row.OwnerUserID.Valid:
255 u, err := h.uq.GetUserByID(ctx, h.d.Pool, row.OwnerUserID.Int64)
256 if err != nil {
257 return ""
258 }
259 return u.Username
260 case row.OwnerOrgID.Valid:
261 o, err := h.oq.GetOrgByID(ctx, h.d.Pool, row.OwnerOrgID.Int64)
262 if err != nil {
263 return ""
264 }
265 return o.Slug
266 }
267 return ""
268 }
269
270 var errOwnerKindUnknown = errors.New("githttp: owner principal kind not user/org")
271
272 // serviceFromQuery maps the ?service=... value to our typed enum.
273 func serviceFromQuery(s string) (protocol.Service, bool) {
274 switch s {
275 case string(protocol.UploadPack):
276 return protocol.UploadPack, true
277 case string(protocol.ReceivePack):
278 return protocol.ReceivePack, true
279 }
280 return "", false
281 }
282
283 // writeChallenge emits a 401 with the canonical Basic challenge.
284 func writeChallenge(w http.ResponseWriter) {
285 w.Header().Set("WWW-Authenticate", `Basic realm="shithub"`)
286 w.Header().Set("Content-Type", "text/plain; charset=utf-8")
287 w.WriteHeader(http.StatusUnauthorized)
288 _, _ = w.Write([]byte("authentication required\n"))
289 }
290
291 // writeGitErrorMessage writes a friendly message in the body. For
292 // non-streamed responses (we haven't started writing the pack stream
293 // yet) this surfaces in `git push`'s stderr verbatim.
294 func writeGitErrorMessage(w http.ResponseWriter, status int, msg string) {
295 w.Header().Set("Content-Type", "text/plain; charset=utf-8")
296 w.WriteHeader(status)
297 _, _ = io.WriteString(w, msg+"\n")
298 }
299
300 // setNoCacheHeaders matches what the canonical git http-backend emits
301 // — git endpoints are uncacheable both because the stream is dynamic
302 // and because intermediaries shouldn't second-guess us.
303 func setNoCacheHeaders(w http.ResponseWriter) {
304 w.Header().Set("Expires", "Fri, 01 Jan 1980 00:00:00 GMT")
305 w.Header().Set("Pragma", "no-cache")
306 w.Header().Set("Cache-Control", "no-cache, max-age=0, must-revalidate")
307 }
308
309 // clientIP returns the request's source address. RealIP middleware in
310 // the global stack populates X-Real-IP-style headers; we read those
311 // first, then fall back to RemoteAddr's host part.
312 func clientIP(r *http.Request) string {
313 if ip := middleware.RealIPFromContext(r.Context(), r); ip != "" {
314 return ip
315 }
316 if i := strings.LastIndex(r.RemoteAddr, ":"); i > 0 {
317 return r.RemoteAddr[:i]
318 }
319 return r.RemoteAddr
320 }
321