Go · 14746 bytes Raw Blame History
1 // SPDX-License-Identifier: AGPL-3.0-or-later
2
3 package repo
4
5 import (
6 "bytes"
7 "errors"
8 "fmt"
9 "html/template"
10 "net/http"
11 "path"
12 "strings"
13
14 "github.com/go-chi/chi/v5"
15 "github.com/jackc/pgx/v5/pgtype"
16
17 "github.com/tenseleyFlow/shithub/internal/auth/policy"
18 mdrender "github.com/tenseleyFlow/shithub/internal/markdown"
19 "github.com/tenseleyFlow/shithub/internal/repos/finder"
20 repogit "github.com/tenseleyFlow/shithub/internal/repos/git"
21 "github.com/tenseleyFlow/shithub/internal/repos/highlight"
22 reposdb "github.com/tenseleyFlow/shithub/internal/repos/sqlc"
23 "github.com/tenseleyFlow/shithub/internal/web/middleware"
24 )
25
26 // MountCode registers the code-tab routes:
27 //
28 // GET /{owner}/{repo}/tree/*
29 // GET /{owner}/{repo}/blob/*
30 // GET /{owner}/{repo}/raw/*
31 // GET /{owner}/{repo}/find/*
32 //
33 // The leading {ref} segment is variable-length (refs may contain `/`).
34 // chi's `*` wildcard captures the rest; we resolve ref + path inside
35 // the handler against the repo's known ref list.
36 func (h *Handlers) MountCode(r chi.Router) {
37 r.Get("/{owner}/{repo}/tree/*", h.codeTree)
38 r.Get("/{owner}/{repo}/blob/*", h.codeBlob)
39 r.Get("/{owner}/{repo}/raw/*", h.codeRaw)
40 r.Get("/{owner}/{repo}/find/*", h.codeFinder)
41 }
42
43 // codeContext bundles the per-request data the code-tab handlers
44 // derive once at the top. Owner+repo come from chi; ref+path come from
45 // the wildcard, resolved against the repo's ref list.
46 type codeContext struct {
47 owner string
48 row reposdb.Repo
49 gitDir string
50 refs repogit.RefListing
51 allRefs []string
52 ref string // matched ref name (or 40-hex sha)
53 subpath string // path inside the ref, no leading slash
54 }
55
56 // loadCodeContext does the resolve dance for tree/blob/raw/find. On
57 // any failure it writes the response and returns ok=false.
58 func (h *Handlers) loadCodeContext(w http.ResponseWriter, r *http.Request) (*codeContext, bool) {
59 row, owner, ok := h.loadRepoAndAuthorize(w, r, policy.ActionRepoRead)
60 if !ok {
61 return nil, false
62 }
63 gitDir, err := h.d.RepoFS.RepoPath(owner.Username, row.Name)
64 if err != nil {
65 h.d.Render.HTTPError(w, r, http.StatusNotFound, "")
66 return nil, false
67 }
68 refs, err := repogit.ListRefs(r.Context(), gitDir)
69 if err != nil {
70 h.d.Logger.WarnContext(r.Context(), "code: ListRefs", "error", err)
71 }
72 allNames := refNames(refs)
73
74 rest := chi.URLParam(r, "*")
75 rest = strings.Trim(rest, "/")
76 segs := []string{}
77 if rest != "" {
78 segs = strings.Split(rest, "/")
79 }
80 if len(segs) == 0 {
81 // no ref → default branch root tree
82 ref := row.DefaultBranch
83 return &codeContext{
84 owner: owner.Username, row: row, gitDir: gitDir,
85 refs: refs, allRefs: allNames, ref: ref, subpath: "",
86 }, true
87 }
88 ref, sub, ok2 := repogit.ResolveRef(allNames, segs)
89 if !ok2 {
90 // Fallback: if the first segment looks like a hex sha, accept it.
91 if len(segs[0]) == 40 && isHex(segs[0]) {
92 ref = segs[0]
93 sub = strings.Join(segs[1:], "/")
94 } else {
95 h.d.Render.HTTPError(w, r, http.StatusNotFound, "")
96 return nil, false
97 }
98 }
99 if !validateSubpath(sub) {
100 h.d.Render.HTTPError(w, r, http.StatusBadRequest, "")
101 return nil, false
102 }
103 return &codeContext{
104 owner: owner.Username, row: row, gitDir: gitDir,
105 refs: refs, allRefs: allNames, ref: ref, subpath: sub,
106 }, true
107 }
108
109 // codeTree renders the directory listing at <ref>:<subpath>. If the
110 // path turns out to be a blob, redirects to /blob/. README rendering
111 // for tree-roots is appended below the listing.
112 func (h *Handlers) codeTree(w http.ResponseWriter, r *http.Request) {
113 cc, ok := h.loadCodeContext(w, r)
114 if !ok {
115 return
116 }
117 h.renderRepoTree(w, r, cc)
118 }
119
120 func (h *Handlers) renderRepoTree(w http.ResponseWriter, r *http.Request, cc *codeContext) {
121 kind, _, _, err := repogit.StatPath(r.Context(), cc.gitDir, cc.ref, cc.subpath)
122 if err != nil {
123 if errors.Is(err, repogit.ErrPathNotFound) {
124 h.d.Render.HTTPError(w, r, http.StatusNotFound, "")
125 return
126 }
127 h.d.Logger.WarnContext(r.Context(), "code: StatPath", "error", err)
128 h.d.Render.HTTPError(w, r, http.StatusInternalServerError, "")
129 return
130 }
131 if kind == repogit.EntryBlob {
132 http.Redirect(w, r, "/"+cc.owner+"/"+cc.row.Name+"/blob/"+cc.ref+"/"+cc.subpath, http.StatusSeeOther)
133 return
134 }
135 entries, err := repogit.LsTree(r.Context(), cc.gitDir, cc.ref, cc.subpath)
136 if err != nil {
137 if errors.Is(err, repogit.ErrNotATree) {
138 h.d.Render.HTTPError(w, r, http.StatusNotFound, "")
139 return
140 }
141 h.d.Logger.WarnContext(r.Context(), "code: LsTree", "error", err)
142 h.d.Render.HTTPError(w, r, http.StatusInternalServerError, "")
143 return
144 }
145 // README detection on the requested directory only.
146 readmeHTML := h.findAndRenderREADME(r, cc, entries)
147 head, headFound, headErr := repogit.HeadOf(r.Context(), cc.gitDir, cc.ref)
148 if headErr != nil {
149 h.d.Logger.WarnContext(r.Context(), "code: HeadOf", "error", headErr)
150 }
151 topics, _ := h.rq.ListRepoTopics(r.Context(), h.d.Pool, cc.row.ID)
152
153 h.d.Render.RenderPage(w, r, "repo/tree", map[string]any{
154 "Title": cc.row.Name + " · " + cc.owner,
155 "CSRFToken": middleware.CSRFTokenForRequest(r),
156 "Owner": cc.owner,
157 "Repo": cc.row,
158 "Ref": cc.ref,
159 "Path": cc.subpath,
160 "Crumbs": breadcrumbs(cc.owner, cc.row.Name, cc.ref, cc.subpath),
161 "Entries": entries,
162 "Branches": cc.refs.Branches,
163 "Tags": cc.refs.Tags,
164 "Head": head,
165 "HeadFound": headFound,
166 "README": template.HTML(readmeHTML), //nolint:gosec // sanitized by mdrender
167 "HTTPSCloneURL": h.cloneHTTPS(cc.owner, cc.row.Name),
168 "SSHEnabled": h.d.CloneURLs.SSHEnabled,
169 "SSHCloneURL": h.cloneSSH(cc.owner, cc.row.Name),
170 "RepoTopics": topics,
171 "RepoActions": h.repoActions(r, cc.row.ID),
172 "RepoCounts": h.subnavCounts(r.Context(), cc.row.ID, cc.row.ForkCount),
173 "CanSettings": h.canViewSettings(middleware.CurrentUserFromContext(r.Context())),
174 "ActiveSubnav": "code",
175 })
176 }
177
178 func refNames(refs repogit.RefListing) []string {
179 allNames := make([]string, 0, len(refs.Branches)+len(refs.Tags))
180 for _, b := range refs.Branches {
181 allNames = append(allNames, b.Name)
182 }
183 for _, t := range refs.Tags {
184 allNames = append(allNames, t.Name)
185 }
186 return allNames
187 }
188
189 // findAndRenderREADME looks for README* in the supplied entries (case-
190 // insensitive). Returns rendered HTML for markdown sources; returns a
191 // `<pre>`-wrapped escaped string for non-markdown text. Empty when
192 // no README is present.
193 func (h *Handlers) findAndRenderREADME(r *http.Request, cc *codeContext, entries []repogit.TreeEntry) string {
194 const maxREADMEBytes = 1 * 1024 * 1024 // 1 MiB cap
195 for _, e := range entries {
196 if e.Kind != repogit.EntryBlob {
197 continue
198 }
199 lower := strings.ToLower(e.Name)
200 if !strings.HasPrefix(lower, "readme") {
201 continue
202 }
203 full := joinPath(cc.subpath, e.Name)
204 body, err := repogit.ReadBlobBytes(r.Context(), cc.gitDir, cc.ref, full, maxREADMEBytes)
205 if err != nil && !errors.Is(err, repogit.ErrBlobTooLarge) {
206 return ""
207 }
208 // Markdown: render via Goldmark + sanitizer.
209 if hasExt(lower, []string{".md", ".markdown"}) {
210 out, mderr := mdrender.RenderHTML(body)
211 if mderr == nil {
212 return out
213 }
214 }
215 // Non-markdown plain text: escape + <pre>.
216 return "<pre class=\"shithub-readme-plain\">" + template.HTMLEscapeString(string(body)) + "</pre>"
217 }
218 return ""
219 }
220
221 func hasExt(filename string, exts []string) bool {
222 for _, e := range exts {
223 if strings.HasSuffix(filename, e) {
224 return true
225 }
226 }
227 return false
228 }
229
230 // codeBlob renders the file viewer.
231 func (h *Handlers) codeBlob(w http.ResponseWriter, r *http.Request) {
232 cc, ok := h.loadCodeContext(w, r)
233 if !ok {
234 return
235 }
236 kind, _, size, err := repogit.StatPath(r.Context(), cc.gitDir, cc.ref, cc.subpath)
237 if err != nil || kind != repogit.EntryBlob {
238 h.d.Render.HTTPError(w, r, http.StatusNotFound, "")
239 return
240 }
241 const largeFileThreshold = 1 * 1024 * 1024 // 1 MiB
242 const maxReadBytes = 4 * 1024 * 1024 // never read more than 4 MiB even for highlighting
243
244 data := map[string]any{
245 "Title": cc.subpath + " · " + cc.row.Name,
246 "CSRFToken": middleware.CSRFTokenForRequest(r),
247 "Owner": cc.owner,
248 "Repo": cc.row,
249 "Ref": cc.ref,
250 "Path": cc.subpath,
251 "Crumbs": breadcrumbs(cc.owner, cc.row.Name, cc.ref, cc.subpath),
252 "Branches": cc.refs.Branches,
253 "Tags": cc.refs.Tags,
254 "Size": size,
255 "IsLarge": size > largeFileThreshold,
256 "IsBinary": false,
257 "IsImage": false,
258 "IsMarkdown": false,
259 "Language": highlight.LanguageGuess(cc.subpath),
260 "RepoCounts": h.subnavCounts(r.Context(), cc.row.ID, cc.row.ForkCount),
261 "CanSettings": h.canViewSettings(middleware.CurrentUserFromContext(r.Context())),
262 "ActiveSubnav": "code",
263 }
264 if size > largeFileThreshold {
265 h.d.Render.RenderPage(w, r, "repo/blob", data)
266 return
267 }
268 body, err := repogit.ReadBlobBytes(r.Context(), cc.gitDir, cc.ref, cc.subpath, maxReadBytes)
269 if err != nil && !errors.Is(err, repogit.ErrBlobTooLarge) {
270 h.d.Render.HTTPError(w, r, http.StatusInternalServerError, "")
271 return
272 }
273 if isBinary(body) {
274 data["IsBinary"] = true
275 if isImageExt(cc.subpath) && size <= 5*1024*1024 {
276 data["IsImage"] = true
277 }
278 h.d.Render.RenderPage(w, r, "repo/blob", data)
279 return
280 }
281 // Text path: highlight or markdown-render.
282 if hasExt(strings.ToLower(cc.subpath), []string{".md", ".markdown"}) {
283 data["IsMarkdown"] = true
284 rendered, mderr := mdrender.RenderHTML(body)
285 if mderr == nil {
286 data["MarkdownHTML"] = template.HTML(rendered) //nolint:gosec // sanitized
287 }
288 data["RawSource"] = string(body)
289 }
290 // Per-line highlighted fragments. The template composes the row
291 // table; chroma only colors the tokens inside each line.
292 data["Lines"] = highlight.RenderLines(cc.subpath, string(body))
293 h.d.Render.RenderPage(w, r, "repo/blob", data)
294 }
295
296 // codeRaw streams the raw bytes. Force `attachment` for executable
297 // content types (HTML/SVG/JS/etc.) since shithub doesn't have a
298 // separate raw host.
299 func (h *Handlers) codeRaw(w http.ResponseWriter, r *http.Request) {
300 cc, ok := h.loadCodeContext(w, r)
301 if !ok {
302 return
303 }
304 kind, _, size, err := repogit.StatPath(r.Context(), cc.gitDir, cc.ref, cc.subpath)
305 if err != nil || kind != repogit.EntryBlob {
306 h.d.Render.HTTPError(w, r, http.StatusNotFound, "")
307 return
308 }
309 contentType, forceAttachment := rawContentType(cc.subpath)
310 w.Header().Set("Content-Type", contentType)
311 w.Header().Set("Cache-Control", "private, max-age=0, must-revalidate")
312 w.Header().Set("X-Content-Type-Options", "nosniff")
313 w.Header().Set("Content-Security-Policy", "default-src 'none'; sandbox")
314 if forceAttachment {
315 w.Header().Set("Content-Disposition", `attachment; filename="`+path.Base(cc.subpath)+`"`)
316 }
317 if size > 0 {
318 w.Header().Set("Content-Length", fmt.Sprintf("%d", size))
319 }
320 if err := repogit.StreamBlob(r.Context(), cc.gitDir, cc.ref, cc.subpath, w); err != nil {
321 h.d.Logger.WarnContext(r.Context(), "code: stream raw", "error", err)
322 }
323 }
324
325 // codeFinder serves /find/{ref} — full list pre-filtered by `q`.
326 func (h *Handlers) codeFinder(w http.ResponseWriter, r *http.Request) {
327 cc, ok := h.loadCodeContext(w, r)
328 if !ok {
329 return
330 }
331 paths, err := repogit.ListAllPaths(r.Context(), cc.gitDir, cc.ref)
332 if err != nil {
333 h.d.Logger.WarnContext(r.Context(), "code: ListAllPaths", "error", err)
334 h.d.Render.HTTPError(w, r, http.StatusInternalServerError, "")
335 return
336 }
337 q := r.URL.Query().Get("q")
338 matches := finder.Filter(paths, q, 200)
339 h.d.Render.RenderPage(w, r, "repo/finder", map[string]any{
340 "Title": "Find file · " + cc.row.Name,
341 "CSRFToken": middleware.CSRFTokenForRequest(r),
342 "Owner": cc.owner,
343 "Repo": cc.row,
344 "Ref": cc.ref,
345 "Query": q,
346 "Matches": matches,
347 "Branches": cc.refs.Branches,
348 "Tags": cc.refs.Tags,
349 })
350 }
351
352 // breadcrumbs returns the click-each-segment slice for the tree/blob
353 // header.
354 type Breadcrumb struct {
355 Name string
356 URL string
357 }
358
359 func breadcrumbs(owner, repoName, ref, subpath string) []Breadcrumb {
360 out := []Breadcrumb{
361 {Name: repoName, URL: fmt.Sprintf("/%s/%s/tree/%s", owner, repoName, ref)},
362 }
363 if subpath == "" {
364 return out
365 }
366 parts := strings.Split(subpath, "/")
367 for i, p := range parts {
368 out = append(out, Breadcrumb{
369 Name: p,
370 URL: fmt.Sprintf("/%s/%s/tree/%s/%s", owner, repoName, ref, strings.Join(parts[:i+1], "/")),
371 })
372 }
373 return out
374 }
375
376 // validateSubpath is the path-traversal guard. Reject `..`, control
377 // chars, leading slash, and `\`.
378 func validateSubpath(p string) bool {
379 if p == "" {
380 return true
381 }
382 if strings.HasPrefix(p, "/") || strings.Contains(p, "\\") {
383 return false
384 }
385 for _, seg := range strings.Split(p, "/") {
386 if seg == "" || seg == ".." {
387 return false
388 }
389 for _, c := range seg {
390 if c < 0x20 || c == 0x7f {
391 return false
392 }
393 }
394 }
395 return true
396 }
397
398 func isHex(s string) bool {
399 for _, c := range s {
400 switch {
401 case c >= '0' && c <= '9', c >= 'a' && c <= 'f', c >= 'A' && c <= 'F':
402 default:
403 return false
404 }
405 }
406 return true
407 }
408
409 // rawContentType maps an extension to (Content-Type, forceAttachment).
410 // Executable content types force `attachment` to defeat XSS via raw view
411 // (no separate raw.host yet).
412 func rawContentType(p string) (string, bool) {
413 ext := strings.ToLower(path.Ext(p))
414 switch ext {
415 case ".html", ".htm", ".xhtml", ".svg", ".js", ".mjs", ".wasm":
416 return "text/plain; charset=utf-8", true
417 case ".png":
418 return "image/png", false
419 case ".jpg", ".jpeg":
420 return "image/jpeg", false
421 case ".gif":
422 return "image/gif", false
423 case ".webp":
424 return "image/webp", false
425 case ".pdf":
426 return "application/pdf", false
427 case ".css":
428 return "text/css; charset=utf-8", false
429 case ".json":
430 return "application/json; charset=utf-8", false
431 case ".txt", ".md", ".markdown", ".yml", ".yaml", ".toml", ".log":
432 return "text/plain; charset=utf-8", false
433 default:
434 // Sniff by inspecting the body would be ideal, but we already
435 // stream — fall back to text/plain for safety.
436 return "text/plain; charset=utf-8", false
437 }
438 }
439
440 func isImageExt(p string) bool {
441 switch strings.ToLower(path.Ext(p)) {
442 case ".png", ".jpg", ".jpeg", ".gif", ".webp":
443 return true
444 }
445 return false
446 }
447
448 // isBinary scans the first 8 KiB for a NUL byte.
449 func isBinary(b []byte) bool {
450 const window = 8192
451 if len(b) > window {
452 b = b[:window]
453 }
454 return bytes.IndexByte(b, 0) >= 0
455 }
456
457 // joinPath joins two slash-separated paths, ignoring an empty parent.
458 func joinPath(parent, child string) string {
459 if parent == "" {
460 return child
461 }
462 return parent + "/" + child
463 }
464
465 // silence pgtype unused-import warning when the loadRepoAndAuthorize
466 // helper is in this file's package but defined elsewhere.
467 var _ = pgtype.Int8{}
468