@@ -6,15 +6,18 @@ |
| 6 | 6 | package search |
| 7 | 7 | |
| 8 | 8 | import ( |
| 9 | + "context" |
| 9 | 10 | "errors" |
| 10 | 11 | "log/slog" |
| 11 | 12 | "net/http" |
| 12 | 13 | "net/url" |
| 14 | + "time" |
| 13 | 15 | |
| 14 | 16 | "github.com/go-chi/chi/v5" |
| 15 | 17 | "github.com/jackc/pgx/v5/pgxpool" |
| 16 | 18 | |
| 17 | 19 | "github.com/tenseleyFlow/shithub/internal/auth/policy" |
| 20 | + "github.com/tenseleyFlow/shithub/internal/ratelimit" |
| 18 | 21 | srch "github.com/tenseleyFlow/shithub/internal/search" |
| 19 | 22 | "github.com/tenseleyFlow/shithub/internal/web/middleware" |
| 20 | 23 | "github.com/tenseleyFlow/shithub/internal/web/render" |
@@ -25,11 +28,17 @@ type Deps struct { |
| 25 | 28 | Logger *slog.Logger |
| 26 | 29 | Render *render.Renderer |
| 27 | 30 | Pool *pgxpool.Pool |
| 31 | + // Limiter, when non-nil, gates /search per-(viewer or IP). Audit |
| 32 | + // 2026-05-10 H4: search renders amplify FTS cost 5×–6× per |
| 33 | + // request, so without a limiter a single client can hammer the |
| 34 | + // DB. Optional in tests; required in production wiring. |
| 35 | + Limiter *ratelimit.Limiter |
| 28 | 36 | } |
| 29 | 37 | |
| 30 | 38 | // Handlers is the registered handler set. Construct via New. |
| 31 | 39 | type Handlers struct { |
| 32 | | - d Deps |
| 40 | + d Deps |
| 41 | + tabsCache *tabsCache // nil-safe — Mount constructs it |
| 33 | 42 | } |
| 34 | 43 | |
| 35 | 44 | // New constructs the handler set, validating Deps. |
@@ -40,15 +49,53 @@ func New(d Deps) (*Handlers, error) { |
| 40 | 49 | if d.Pool == nil { |
| 41 | 50 | return nil, errors.New("search: nil Pool") |
| 42 | 51 | } |
| 43 | | - return &Handlers{d: d}, nil |
| 52 | + return &Handlers{d: d, tabsCache: newTabsCache()}, nil |
| 44 | 53 | } |
| 45 | 54 | |
| 46 | | -// Mount registers /search and /search/quick. |
| 55 | +// SearchRateLimitPolicy is the per-(viewer or IP) limit applied to |
| 56 | +// /search and /search/quick. 60/min is generous for human use |
| 57 | +// (typical browse rate is well under this) but cheap to defeat any |
| 58 | +// query-rotation attack that bypasses the tab-count cache (audit |
| 59 | +// 2026-05-10 H4+H5). Surfaced as a var so tests can tighten it. |
| 60 | +var SearchRateLimitPolicy = ratelimit.Policy{ |
| 61 | + Scope: "search", |
| 62 | + Max: 60, |
| 63 | + Window: 1 * time.Minute, |
| 64 | +} |
| 65 | + |
| 66 | +// Mount registers /search and /search/quick. When d.Limiter is set, |
| 67 | +// both routes go through the rate-limit middleware before reaching |
| 68 | +// the handlers — protects the FTS path from query-rotation attacks |
| 69 | +// that the tab-counts cache alone can't absorb. |
| 47 | 70 | func (h *Handlers) Mount(r chi.Router) { |
| 71 | + if h.d.Limiter != nil { |
| 72 | + r.Group(func(r chi.Router) { |
| 73 | + r.Use(h.d.Limiter.Middleware(SearchRateLimitPolicy, searchRateLimitKey)) |
| 74 | + r.Get("/search", h.results) |
| 75 | + r.Get("/search/quick", h.quick) |
| 76 | + }) |
| 77 | + return |
| 78 | + } |
| 48 | 79 | r.Get("/search", h.results) |
| 49 | 80 | r.Get("/search/quick", h.quick) |
| 50 | 81 | } |
| 51 | 82 | |
| 83 | +// searchRateLimitKey picks the per-request key. Authed users key |
| 84 | +// on user_id (so an attacker can't bypass by hopping accounts they |
| 85 | +// don't have); anonymous users key on the trusted client IP. We |
| 86 | +// trust X-Forwarded-For only when middleware.RealIP has already |
| 87 | +// vetted it, which it does at the global stack level. |
| 88 | +func searchRateLimitKey(r *http.Request) string { |
| 89 | + viewer := middleware.CurrentUserFromContext(r.Context()) |
| 90 | + if !viewer.IsAnonymous() { |
| 91 | + return "u:" + intString(int(viewer.ID)) |
| 92 | + } |
| 93 | + if ip, ok := ratelimit.ClientIP(r, true); ok { |
| 94 | + return "ip:" + ip.String() |
| 95 | + } |
| 96 | + return "" |
| 97 | +} |
| 98 | + |
| 52 | 99 | func (h *Handlers) deps() srch.Deps { |
| 53 | 100 | return srch.Deps{Pool: h.d.Pool, Logger: h.d.Logger} |
| 54 | 101 | } |
@@ -194,29 +241,81 @@ func (h *Handlers) searchTabs(r *http.Request, actor policy.Actor, parsed srch.P |
| 194 | 241 | return tabs |
| 195 | 242 | } |
| 196 | 243 | |
| 197 | | - deps := h.deps() |
| 244 | + // Counts are cached per-(query, viewer) for tabsCacheTTL. The |
| 245 | + // active-tab's actual result rows are NOT cached here — only the |
| 246 | + // 5 count-only badge calls that pre-fix were the dominant cost |
| 247 | + // (audit 2026-05-10 H5). Single-flighted via lru.Group so a |
| 248 | + // thundering-herd on the same key doesn't spawn N waves. |
| 249 | + key := tabsCacheKey{q: canonicalizeQuery(parsed), userID: actorUserID(actor)} |
| 250 | + cached, err := h.tabsCache.g.Do(r.Context(), key, func(ctx context.Context) ([]searchTab, error) { |
| 251 | + return h.computeTabCounts(ctx, actor, parsed), nil |
| 252 | + }) |
| 253 | + if err != nil { |
| 254 | + // Group.Do never caches errors and our fetch returns nil; this |
| 255 | + // path is unreachable today but kept for defensiveness. |
| 256 | + h.d.Logger.ErrorContext(r.Context(), "search tabs cache", "error", err) |
| 257 | + cached = h.computeTabCounts(r.Context(), actor, parsed) |
| 258 | + } |
| 259 | + // Merge cached counts into the freshly-built (Selected/Href-aware) |
| 260 | + // tabs slice. The cached value carries Counts and the same Key |
| 261 | + // ordering; everything else is per-request and not cached. |
| 198 | 262 | for i := range tabs { |
| 263 | + for j := range cached { |
| 264 | + if cached[j].Key == tabs[i].Key { |
| 265 | + tabs[i].Count = cached[j].Count |
| 266 | + break |
| 267 | + } |
| 268 | + } |
| 269 | + } |
| 270 | + return tabs |
| 271 | +} |
| 272 | + |
| 273 | +// computeTabCounts is the cache miss path: 5 FTS count-only queries. |
| 274 | +// Returned slice carries (Key, Count) only — Selected/Href/Label/ |
| 275 | +// Icon are per-request and applied by the caller. |
| 276 | +func (h *Handlers) computeTabCounts(ctx context.Context, actor policy.Actor, parsed srch.ParsedQuery) []searchTab { |
| 277 | + deps := h.deps() |
| 278 | + out := []searchTab{ |
| 279 | + {Key: "code"}, |
| 280 | + {Key: "repositories"}, |
| 281 | + {Key: "issues"}, |
| 282 | + {Key: "pullrequests"}, |
| 283 | + {Key: "users"}, |
| 284 | + } |
| 285 | + for i := range out { |
| 199 | 286 | var total int64 |
| 200 | 287 | var err error |
| 201 | | - switch tabs[i].Key { |
| 288 | + switch out[i].Key { |
| 202 | 289 | case "repositories": |
| 203 | | - _, total, err = srch.SearchRepos(r.Context(), deps, actor, parsed, 0, 0) |
| 290 | + _, total, err = srch.SearchRepos(ctx, deps, actor, parsed, 0, 0) |
| 204 | 291 | case "code": |
| 205 | | - _, total, err = srch.SearchCode(r.Context(), deps, actor, parsed, 0, 0) |
| 292 | + _, total, err = srch.SearchCode(ctx, deps, actor, parsed, 0, 0) |
| 206 | 293 | case "issues": |
| 207 | | - _, total, err = srch.SearchIssues(r.Context(), deps, actor, parsed, "issue", 0, 0) |
| 294 | + _, total, err = srch.SearchIssues(ctx, deps, actor, parsed, "issue", 0, 0) |
| 208 | 295 | case "pullrequests": |
| 209 | | - _, total, err = srch.SearchIssues(r.Context(), deps, actor, parsed, "pr", 0, 0) |
| 296 | + _, total, err = srch.SearchIssues(ctx, deps, actor, parsed, "pr", 0, 0) |
| 210 | 297 | case "users": |
| 211 | | - _, total, err = srch.SearchUsers(r.Context(), deps, parsed, 0, 0) |
| 298 | + _, total, err = srch.SearchUsers(ctx, deps, parsed, 0, 0) |
| 212 | 299 | } |
| 213 | 300 | if err != nil && !errors.Is(err, srch.ErrEmptyQuery) { |
| 214 | | - h.d.Logger.ErrorContext(r.Context(), "search tab count", "tab", tabs[i].Key, "error", err) |
| 301 | + h.d.Logger.ErrorContext(ctx, "search tab count", "tab", out[i].Key, "error", err) |
| 215 | 302 | continue |
| 216 | 303 | } |
| 217 | | - tabs[i].Count = total |
| 304 | + out[i].Count = total |
| 218 | 305 | } |
| 219 | | - return tabs |
| 306 | + return out |
| 307 | +} |
| 308 | + |
| 309 | +// actorUserID returns 0 for anonymous, the user_id otherwise. Used |
| 310 | +// as the (anon vs each-authed-user) discriminant in the tabs cache |
| 311 | +// key — anonymous viewers all see the same public-only result set |
| 312 | +// so they share a slot; authed viewers see private results based |
| 313 | +// on their collab roles, so each gets their own. |
| 314 | +func actorUserID(a policy.Actor) int64 { |
| 315 | + if a.IsAnonymous { |
| 316 | + return 0 |
| 317 | + } |
| 318 | + return a.UserID |
| 220 | 319 | } |
| 221 | 320 | |
| 222 | 321 | func searchHref(q, tab string, page int) string { |