Go · 14698 bytes Raw Blame History
1 // SPDX-License-Identifier: AGPL-3.0-or-later
2
3 package repo
4
5 import (
6 "bytes"
7 "errors"
8 "fmt"
9 "html/template"
10 "net/http"
11 "path"
12 "strings"
13
14 "github.com/go-chi/chi/v5"
15 "github.com/jackc/pgx/v5/pgtype"
16
17 "github.com/tenseleyFlow/shithub/internal/auth/policy"
18 mdrender "github.com/tenseleyFlow/shithub/internal/markdown"
19 "github.com/tenseleyFlow/shithub/internal/repos/finder"
20 repogit "github.com/tenseleyFlow/shithub/internal/repos/git"
21 "github.com/tenseleyFlow/shithub/internal/repos/highlight"
22 reposdb "github.com/tenseleyFlow/shithub/internal/repos/sqlc"
23 "github.com/tenseleyFlow/shithub/internal/web/middleware"
24 )
25
26 // MountCode registers the code-tab routes:
27 //
28 // GET /{owner}/{repo}/tree/*
29 // GET /{owner}/{repo}/blob/*
30 // GET /{owner}/{repo}/raw/*
31 // GET /{owner}/{repo}/find/*
32 //
33 // The leading {ref} segment is variable-length (refs may contain `/`).
34 // chi's `*` wildcard captures the rest; we resolve ref + path inside
35 // the handler against the repo's known ref list.
36 func (h *Handlers) MountCode(r chi.Router) {
37 r.Get("/{owner}/{repo}/tree/*", h.codeTree)
38 r.Get("/{owner}/{repo}/blob/*", h.codeBlob)
39 r.Get("/{owner}/{repo}/raw/*", h.codeRaw)
40 r.Get("/{owner}/{repo}/find/*", h.codeFinder)
41 }
42
43 // codeContext bundles the per-request data the code-tab handlers
44 // derive once at the top. Owner+repo come from chi; ref+path come from
45 // the wildcard, resolved against the repo's ref list.
46 type codeContext struct {
47 owner string
48 row reposdb.Repo
49 gitDir string
50 refs repogit.RefListing
51 allRefs []string
52 ref string // matched ref name (or 40-hex sha)
53 subpath string // path inside the ref, no leading slash
54 }
55
56 // loadCodeContext does the resolve dance for tree/blob/raw/find. On
57 // any failure it writes the response and returns ok=false.
58 func (h *Handlers) loadCodeContext(w http.ResponseWriter, r *http.Request) (*codeContext, bool) {
59 row, owner, ok := h.loadRepoAndAuthorize(w, r, policy.ActionRepoRead)
60 if !ok {
61 return nil, false
62 }
63 gitDir, err := h.d.RepoFS.RepoPath(owner.Username, row.Name)
64 if err != nil {
65 h.d.Render.HTTPError(w, r, http.StatusNotFound, "")
66 return nil, false
67 }
68 refs, err := repogit.ListRefs(r.Context(), gitDir)
69 if err != nil {
70 h.d.Logger.WarnContext(r.Context(), "code: ListRefs", "error", err)
71 }
72 allNames := refNames(refs)
73
74 rest := chi.URLParam(r, "*")
75 rest = strings.Trim(rest, "/")
76 segs := []string{}
77 if rest != "" {
78 segs = strings.Split(rest, "/")
79 }
80 if len(segs) == 0 {
81 // no ref → default branch root tree
82 ref := row.DefaultBranch
83 return &codeContext{
84 owner: owner.Username, row: row, gitDir: gitDir,
85 refs: refs, allRefs: allNames, ref: ref, subpath: "",
86 }, true
87 }
88 ref, sub, ok2 := repogit.ResolveRef(allNames, segs)
89 if !ok2 {
90 // Fallback: if the first segment looks like a hex sha, accept it.
91 if len(segs[0]) == 40 && isHex(segs[0]) {
92 ref = segs[0]
93 sub = strings.Join(segs[1:], "/")
94 } else {
95 h.d.Render.HTTPError(w, r, http.StatusNotFound, "")
96 return nil, false
97 }
98 }
99 if !validateSubpath(sub) {
100 h.d.Render.HTTPError(w, r, http.StatusBadRequest, "")
101 return nil, false
102 }
103 return &codeContext{
104 owner: owner.Username, row: row, gitDir: gitDir,
105 refs: refs, allRefs: allNames, ref: ref, subpath: sub,
106 }, true
107 }
108
109 // codeTree renders the directory listing at <ref>:<subpath>. If the
110 // path turns out to be a blob, redirects to /blob/. README rendering
111 // for tree-roots is appended below the listing.
112 func (h *Handlers) codeTree(w http.ResponseWriter, r *http.Request) {
113 cc, ok := h.loadCodeContext(w, r)
114 if !ok {
115 return
116 }
117 h.renderRepoTree(w, r, cc)
118 }
119
120 func (h *Handlers) renderRepoTree(w http.ResponseWriter, r *http.Request, cc *codeContext) {
121 kind, _, _, err := repogit.StatPath(r.Context(), cc.gitDir, cc.ref, cc.subpath)
122 if err != nil {
123 if errors.Is(err, repogit.ErrPathNotFound) {
124 h.d.Render.HTTPError(w, r, http.StatusNotFound, "")
125 return
126 }
127 h.d.Logger.WarnContext(r.Context(), "code: StatPath", "error", err)
128 h.d.Render.HTTPError(w, r, http.StatusInternalServerError, "")
129 return
130 }
131 if kind == repogit.EntryBlob {
132 http.Redirect(w, r, "/"+cc.owner+"/"+cc.row.Name+"/blob/"+cc.ref+"/"+cc.subpath, http.StatusSeeOther)
133 return
134 }
135 entries, err := repogit.LsTree(r.Context(), cc.gitDir, cc.ref, cc.subpath)
136 if err != nil {
137 if errors.Is(err, repogit.ErrNotATree) {
138 h.d.Render.HTTPError(w, r, http.StatusNotFound, "")
139 return
140 }
141 h.d.Logger.WarnContext(r.Context(), "code: LsTree", "error", err)
142 h.d.Render.HTTPError(w, r, http.StatusInternalServerError, "")
143 return
144 }
145 // README detection on the requested directory only.
146 readmeHTML := h.findAndRenderREADME(r, cc, entries)
147 head, headFound, headErr := repogit.HeadOf(r.Context(), cc.gitDir, cc.ref)
148 if headErr != nil {
149 h.d.Logger.WarnContext(r.Context(), "code: HeadOf", "error", headErr)
150 }
151 topics, _ := h.rq.ListRepoTopics(r.Context(), h.d.Pool, cc.row.ID)
152
153 h.d.Render.RenderPage(w, r, "repo/tree", map[string]any{
154 "Title": cc.row.Name + " · " + cc.owner,
155 "CSRFToken": middleware.CSRFTokenForRequest(r),
156 "Owner": cc.owner,
157 "Repo": cc.row,
158 "Ref": cc.ref,
159 "Path": cc.subpath,
160 "Crumbs": breadcrumbs(cc.owner, cc.row.Name, cc.ref, cc.subpath),
161 "Entries": entries,
162 "Branches": cc.refs.Branches,
163 "Tags": cc.refs.Tags,
164 "Head": head,
165 "HeadFound": headFound,
166 "README": template.HTML(readmeHTML), //nolint:gosec // sanitized by mdrender
167 "HTTPSCloneURL": h.cloneHTTPS(cc.owner, cc.row.Name),
168 "SSHEnabled": h.d.CloneURLs.SSHEnabled,
169 "SSHCloneURL": h.cloneSSH(cc.owner, cc.row.Name),
170 "RepoTopics": topics,
171 "RepoCounts": h.subnavCounts(r.Context(), cc.row.ID, cc.row.ForkCount),
172 "CanSettings": h.canViewSettings(middleware.CurrentUserFromContext(r.Context())),
173 "ActiveSubnav": "code",
174 })
175 }
176
177 func refNames(refs repogit.RefListing) []string {
178 allNames := make([]string, 0, len(refs.Branches)+len(refs.Tags))
179 for _, b := range refs.Branches {
180 allNames = append(allNames, b.Name)
181 }
182 for _, t := range refs.Tags {
183 allNames = append(allNames, t.Name)
184 }
185 return allNames
186 }
187
188 // findAndRenderREADME looks for README* in the supplied entries (case-
189 // insensitive). Returns rendered HTML for markdown sources; returns a
190 // `<pre>`-wrapped escaped string for non-markdown text. Empty when
191 // no README is present.
192 func (h *Handlers) findAndRenderREADME(r *http.Request, cc *codeContext, entries []repogit.TreeEntry) string {
193 const maxREADMEBytes = 1 * 1024 * 1024 // 1 MiB cap
194 for _, e := range entries {
195 if e.Kind != repogit.EntryBlob {
196 continue
197 }
198 lower := strings.ToLower(e.Name)
199 if !strings.HasPrefix(lower, "readme") {
200 continue
201 }
202 full := joinPath(cc.subpath, e.Name)
203 body, err := repogit.ReadBlobBytes(r.Context(), cc.gitDir, cc.ref, full, maxREADMEBytes)
204 if err != nil && !errors.Is(err, repogit.ErrBlobTooLarge) {
205 return ""
206 }
207 // Markdown: render via Goldmark + sanitizer.
208 if hasExt(lower, []string{".md", ".markdown"}) {
209 out, mderr := mdrender.RenderHTML(body)
210 if mderr == nil {
211 return out
212 }
213 }
214 // Non-markdown plain text: escape + <pre>.
215 return "<pre class=\"shithub-readme-plain\">" + template.HTMLEscapeString(string(body)) + "</pre>"
216 }
217 return ""
218 }
219
220 func hasExt(filename string, exts []string) bool {
221 for _, e := range exts {
222 if strings.HasSuffix(filename, e) {
223 return true
224 }
225 }
226 return false
227 }
228
229 // codeBlob renders the file viewer.
230 func (h *Handlers) codeBlob(w http.ResponseWriter, r *http.Request) {
231 cc, ok := h.loadCodeContext(w, r)
232 if !ok {
233 return
234 }
235 kind, _, size, err := repogit.StatPath(r.Context(), cc.gitDir, cc.ref, cc.subpath)
236 if err != nil || kind != repogit.EntryBlob {
237 h.d.Render.HTTPError(w, r, http.StatusNotFound, "")
238 return
239 }
240 const largeFileThreshold = 1 * 1024 * 1024 // 1 MiB
241 const maxReadBytes = 4 * 1024 * 1024 // never read more than 4 MiB even for highlighting
242
243 data := map[string]any{
244 "Title": cc.subpath + " · " + cc.row.Name,
245 "CSRFToken": middleware.CSRFTokenForRequest(r),
246 "Owner": cc.owner,
247 "Repo": cc.row,
248 "Ref": cc.ref,
249 "Path": cc.subpath,
250 "Crumbs": breadcrumbs(cc.owner, cc.row.Name, cc.ref, cc.subpath),
251 "Branches": cc.refs.Branches,
252 "Tags": cc.refs.Tags,
253 "Size": size,
254 "IsLarge": size > largeFileThreshold,
255 "IsBinary": false,
256 "IsImage": false,
257 "IsMarkdown": false,
258 "Language": highlight.LanguageGuess(cc.subpath),
259 "RepoCounts": h.subnavCounts(r.Context(), cc.row.ID, cc.row.ForkCount),
260 "CanSettings": h.canViewSettings(middleware.CurrentUserFromContext(r.Context())),
261 "ActiveSubnav": "code",
262 }
263 if size > largeFileThreshold {
264 h.d.Render.RenderPage(w, r, "repo/blob", data)
265 return
266 }
267 body, err := repogit.ReadBlobBytes(r.Context(), cc.gitDir, cc.ref, cc.subpath, maxReadBytes)
268 if err != nil && !errors.Is(err, repogit.ErrBlobTooLarge) {
269 h.d.Render.HTTPError(w, r, http.StatusInternalServerError, "")
270 return
271 }
272 if isBinary(body) {
273 data["IsBinary"] = true
274 if isImageExt(cc.subpath) && size <= 5*1024*1024 {
275 data["IsImage"] = true
276 }
277 h.d.Render.RenderPage(w, r, "repo/blob", data)
278 return
279 }
280 // Text path: highlight or markdown-render.
281 if hasExt(strings.ToLower(cc.subpath), []string{".md", ".markdown"}) {
282 data["IsMarkdown"] = true
283 rendered, mderr := mdrender.RenderHTML(body)
284 if mderr == nil {
285 data["MarkdownHTML"] = template.HTML(rendered) //nolint:gosec // sanitized
286 }
287 data["RawSource"] = string(body)
288 }
289 // Per-line highlighted fragments. The template composes the row
290 // table; chroma only colors the tokens inside each line.
291 data["Lines"] = highlight.RenderLines(cc.subpath, string(body))
292 h.d.Render.RenderPage(w, r, "repo/blob", data)
293 }
294
295 // codeRaw streams the raw bytes. Force `attachment` for executable
296 // content types (HTML/SVG/JS/etc.) since shithub doesn't have a
297 // separate raw host.
298 func (h *Handlers) codeRaw(w http.ResponseWriter, r *http.Request) {
299 cc, ok := h.loadCodeContext(w, r)
300 if !ok {
301 return
302 }
303 kind, _, size, err := repogit.StatPath(r.Context(), cc.gitDir, cc.ref, cc.subpath)
304 if err != nil || kind != repogit.EntryBlob {
305 h.d.Render.HTTPError(w, r, http.StatusNotFound, "")
306 return
307 }
308 contentType, forceAttachment := rawContentType(cc.subpath)
309 w.Header().Set("Content-Type", contentType)
310 w.Header().Set("Cache-Control", "private, max-age=0, must-revalidate")
311 w.Header().Set("X-Content-Type-Options", "nosniff")
312 w.Header().Set("Content-Security-Policy", "default-src 'none'; sandbox")
313 if forceAttachment {
314 w.Header().Set("Content-Disposition", `attachment; filename="`+path.Base(cc.subpath)+`"`)
315 }
316 if size > 0 {
317 w.Header().Set("Content-Length", fmt.Sprintf("%d", size))
318 }
319 if err := repogit.StreamBlob(r.Context(), cc.gitDir, cc.ref, cc.subpath, w); err != nil {
320 h.d.Logger.WarnContext(r.Context(), "code: stream raw", "error", err)
321 }
322 }
323
324 // codeFinder serves /find/{ref} — full list pre-filtered by `q`.
325 func (h *Handlers) codeFinder(w http.ResponseWriter, r *http.Request) {
326 cc, ok := h.loadCodeContext(w, r)
327 if !ok {
328 return
329 }
330 paths, err := repogit.ListAllPaths(r.Context(), cc.gitDir, cc.ref)
331 if err != nil {
332 h.d.Logger.WarnContext(r.Context(), "code: ListAllPaths", "error", err)
333 h.d.Render.HTTPError(w, r, http.StatusInternalServerError, "")
334 return
335 }
336 q := r.URL.Query().Get("q")
337 matches := finder.Filter(paths, q, 200)
338 h.d.Render.RenderPage(w, r, "repo/finder", map[string]any{
339 "Title": "Find file · " + cc.row.Name,
340 "CSRFToken": middleware.CSRFTokenForRequest(r),
341 "Owner": cc.owner,
342 "Repo": cc.row,
343 "Ref": cc.ref,
344 "Query": q,
345 "Matches": matches,
346 "Branches": cc.refs.Branches,
347 "Tags": cc.refs.Tags,
348 })
349 }
350
351 // breadcrumbs returns the click-each-segment slice for the tree/blob
352 // header.
353 type Breadcrumb struct {
354 Name string
355 URL string
356 }
357
358 func breadcrumbs(owner, repoName, ref, subpath string) []Breadcrumb {
359 out := []Breadcrumb{
360 {Name: repoName, URL: fmt.Sprintf("/%s/%s/tree/%s", owner, repoName, ref)},
361 }
362 if subpath == "" {
363 return out
364 }
365 parts := strings.Split(subpath, "/")
366 for i, p := range parts {
367 out = append(out, Breadcrumb{
368 Name: p,
369 URL: fmt.Sprintf("/%s/%s/tree/%s/%s", owner, repoName, ref, strings.Join(parts[:i+1], "/")),
370 })
371 }
372 return out
373 }
374
375 // validateSubpath is the path-traversal guard. Reject `..`, control
376 // chars, leading slash, and `\`.
377 func validateSubpath(p string) bool {
378 if p == "" {
379 return true
380 }
381 if strings.HasPrefix(p, "/") || strings.Contains(p, "\\") {
382 return false
383 }
384 for _, seg := range strings.Split(p, "/") {
385 if seg == "" || seg == ".." {
386 return false
387 }
388 for _, c := range seg {
389 if c < 0x20 || c == 0x7f {
390 return false
391 }
392 }
393 }
394 return true
395 }
396
397 func isHex(s string) bool {
398 for _, c := range s {
399 switch {
400 case c >= '0' && c <= '9', c >= 'a' && c <= 'f', c >= 'A' && c <= 'F':
401 default:
402 return false
403 }
404 }
405 return true
406 }
407
408 // rawContentType maps an extension to (Content-Type, forceAttachment).
409 // Executable content types force `attachment` to defeat XSS via raw view
410 // (no separate raw.host yet).
411 func rawContentType(p string) (string, bool) {
412 ext := strings.ToLower(path.Ext(p))
413 switch ext {
414 case ".html", ".htm", ".xhtml", ".svg", ".js", ".mjs", ".wasm":
415 return "text/plain; charset=utf-8", true
416 case ".png":
417 return "image/png", false
418 case ".jpg", ".jpeg":
419 return "image/jpeg", false
420 case ".gif":
421 return "image/gif", false
422 case ".webp":
423 return "image/webp", false
424 case ".pdf":
425 return "application/pdf", false
426 case ".css":
427 return "text/css; charset=utf-8", false
428 case ".json":
429 return "application/json; charset=utf-8", false
430 case ".txt", ".md", ".markdown", ".yml", ".yaml", ".toml", ".log":
431 return "text/plain; charset=utf-8", false
432 default:
433 // Sniff by inspecting the body would be ideal, but we already
434 // stream — fall back to text/plain for safety.
435 return "text/plain; charset=utf-8", false
436 }
437 }
438
439 func isImageExt(p string) bool {
440 switch strings.ToLower(path.Ext(p)) {
441 case ".png", ".jpg", ".jpeg", ".gif", ".webp":
442 return true
443 }
444 return false
445 }
446
447 // isBinary scans the first 8 KiB for a NUL byte.
448 func isBinary(b []byte) bool {
449 const window = 8192
450 if len(b) > window {
451 b = b[:window]
452 }
453 return bytes.IndexByte(b, 0) >= 0
454 }
455
456 // joinPath joins two slash-separated paths, ignoring an empty parent.
457 func joinPath(parent, child string) string {
458 if parent == "" {
459 return child
460 }
461 return parent + "/" + child
462 }
463
464 // silence pgtype unused-import warning when the loadRepoAndAuthorize
465 // helper is in this file's package but defined elsewhere.
466 var _ = pgtype.Int8{}
467