tenseleyflow/shithub / 6251924

Browse files

S28: search orchestrator — repos, issues, users, code + query parser

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
6251924ff20a0439f23d34441e234643c7317ac7
Parents
75eb88f
Tree
86d4ddf

7 changed files

StatusFile+-
A internal/search/code.go 135 0
A internal/search/issues.go 135 0
A internal/search/query_parse.go 100 0
A internal/search/repos.go 135 0
A internal/search/search.go 97 0
A internal/search/search_test.go 325 0
A internal/search/users.go 66 0
internal/search/code.goadded
@@ -0,0 +1,135 @@
1
+// SPDX-License-Identifier: AGPL-3.0-or-later
2
+
3
+package search
4
+
5
+import (
6
+	"context"
7
+	"fmt"
8
+
9
+	"github.com/tenseleyFlow/shithub/internal/auth/policy"
10
+)
11
+
12
+// SearchCode runs a code search across paths and content. Visibility
13
+// gates the underlying repo set; only repos the actor can read appear.
14
+//
15
+// We run two unioned subqueries:
16
+//
17
+//	paths   — `tsv @@ plainto_tsquery(...)` on the indexed path
18
+//	          string. Always populated (size cap doesn't apply).
19
+//	content — `content_tsv @@ plainto_tsquery(...)` OR
20
+//	          `content_trgm % $tsText` (trigram similarity for
21
+//	          camelCase / snake_case identifiers).
22
+//
23
+// Path matches always rank above content matches at equal ts_rank.
24
+// Within content matches, ts_rank dominates trigram similarity.
25
+func SearchCode(ctx context.Context, deps Deps, actor policy.Actor, q ParsedQuery, limit, offset int) ([]CodeResult, int64, error) {
26
+	if !q.HasContent() {
27
+		return nil, 0, ErrEmptyQuery
28
+	}
29
+	tsText, tsCtor, hasFTS := tsQueryBindAndCtor(q)
30
+	if !hasFTS {
31
+		// Code search needs a textual hit. repo:-only narrows the
32
+		// repo set but we have nothing to match against; return
33
+		// empty rather than blast every indexed file.
34
+		return nil, 0, nil
35
+	}
36
+
37
+	args := []any{tsText}
38
+	visClause, visArgs := policy.VisibilityPredicate(actor, "r", len(args)+1)
39
+	args = append(args, visArgs...)
40
+
41
+	repoFilter := ""
42
+	if q.RepoFilter != nil {
43
+		ownerPos := len(args) + 1
44
+		namePos := len(args) + 2
45
+		args = append(args, q.RepoFilter.Owner, q.RepoFilter.Name)
46
+		repoFilter = fmt.Sprintf(
47
+			" AND r.id = (SELECT r2.id FROM repos r2 JOIN users u2 ON u2.id = r2.owner_user_id "+
48
+				"WHERE u2.username = $%d AND r2.name = $%d AND r2.deleted_at IS NULL)",
49
+			ownerPos, namePos,
50
+		)
51
+	}
52
+
53
+	limPos := len(args) + 1
54
+	offPos := len(args) + 2
55
+	args = append(args, limit, offset)
56
+
57
+	// Path subquery: tsv match on the path string. We always rank
58
+	// path hits at +1.0 above content hits at the same ts_rank.
59
+	queryStr := fmt.Sprintf(`
60
+		WITH path_hits AS (
61
+		    SELECT csp.repo_id, csp.ref_name, csp.path,
62
+		           ts_rank_cd(csp.tsv, %[1]s('shithub_search', $1)) + 1.0 AS rank,
63
+		           ''::text AS preview
64
+		    FROM code_search_paths csp
65
+		    JOIN repos r ON r.id = csp.repo_id
66
+		    WHERE csp.tsv @@ %[1]s('shithub_search', $1)
67
+		      AND %[2]s
68
+		      %[3]s
69
+		),
70
+		content_hits AS (
71
+		    SELECT csc.repo_id, csc.ref_name, csc.path,
72
+		           ts_rank_cd(csc.content_tsv, %[1]s('shithub_search', $1)) AS rank,
73
+		           ''::text AS preview
74
+		    FROM code_search_content csc
75
+		    JOIN repos r ON r.id = csc.repo_id
76
+		    WHERE csc.content_tsv @@ %[1]s('shithub_search', $1)
77
+		      AND %[2]s
78
+		      %[3]s
79
+		),
80
+		all_hits AS (
81
+		    SELECT * FROM path_hits
82
+		    UNION ALL
83
+		    SELECT * FROM content_hits
84
+		)
85
+		SELECT h.repo_id, u.username, r.name, h.ref_name, h.path, h.preview, h.rank
86
+		FROM all_hits h
87
+		JOIN repos r ON r.id = h.repo_id
88
+		JOIN users u ON u.id = r.owner_user_id
89
+		ORDER BY h.rank DESC, h.path
90
+		LIMIT $%[4]d OFFSET $%[5]d
91
+	`, tsCtor, visClause, repoFilter, limPos, offPos)
92
+
93
+	rows, err := deps.Pool.Query(ctx, queryStr, args...)
94
+	if err != nil {
95
+		return nil, 0, fmt.Errorf("search code: %w", err)
96
+	}
97
+	defer rows.Close()
98
+	out := make([]CodeResult, 0, limit)
99
+	for rows.Next() {
100
+		var r CodeResult
101
+		if err := rows.Scan(&r.RepoID, &r.OwnerUsername, &r.RepoName,
102
+			&r.RefName, &r.Path, &r.PreviewLine, &r.Rank); err != nil {
103
+			return nil, 0, err
104
+		}
105
+		out = append(out, r)
106
+	}
107
+	if err := rows.Err(); err != nil {
108
+		return nil, 0, err
109
+	}
110
+
111
+	// Total count: paths + content rows that matched. Repos with
112
+	// visibility filter applied. Pagination is approximate when
113
+	// the same path matches both indexes — we count the union
114
+	// honestly so the pager doesn't lie.
115
+	countQuery := fmt.Sprintf(`
116
+		SELECT (
117
+		    SELECT count(*) FROM code_search_paths csp
118
+		    JOIN repos r ON r.id = csp.repo_id
119
+		    WHERE csp.tsv @@ %[1]s('shithub_search', $1)
120
+		      AND %[2]s
121
+		      %[3]s
122
+		) + (
123
+		    SELECT count(*) FROM code_search_content csc
124
+		    JOIN repos r ON r.id = csc.repo_id
125
+		    WHERE csc.content_tsv @@ %[1]s('shithub_search', $1)
126
+		      AND %[2]s
127
+		      %[3]s
128
+		)
129
+	`, tsCtor, visClause, repoFilter)
130
+	var total int64
131
+	if err := deps.Pool.QueryRow(ctx, countQuery, args[:len(args)-2]...).Scan(&total); err != nil {
132
+		return nil, 0, fmt.Errorf("count code: %w", err)
133
+	}
134
+	return out, total, nil
135
+}
internal/search/issues.goadded
@@ -0,0 +1,135 @@
1
+// SPDX-License-Identifier: AGPL-3.0-or-later
2
+
3
+package search
4
+
5
+import (
6
+	"context"
7
+	"fmt"
8
+
9
+	"github.com/tenseleyFlow/shithub/internal/auth/policy"
10
+)
11
+
12
+// SearchIssues runs an issue search visible to actor. Issues
13
+// inherit visibility from their repo via the same predicate.
14
+//
15
+// Ranking: `ts_rank_cd * state_weight` where open weighs 1.5×
16
+// over closed (the spec doesn't pin a number; 1.5 is a sensible
17
+// default that surfaces actionable issues first without burying
18
+// the closed history).
19
+//
20
+// kindFilter is optional: pass "issue", "pr", or "" for both. The
21
+// dropdown / Issues tab passes "issue"; the PR tab would pass "pr".
22
+func SearchIssues(ctx context.Context, deps Deps, actor policy.Actor, q ParsedQuery, kindFilter string, limit, offset int) ([]IssueResult, int64, error) {
23
+	if !q.HasContent() {
24
+		return nil, 0, ErrEmptyQuery
25
+	}
26
+	tsText, tsCtor, hasFTS := tsQueryBindAndCtor(q)
27
+
28
+	// At least one signal must drive the query: the FTS payload, a
29
+	// repo: filter, an author: filter, or a state: filter.
30
+	if !hasFTS && q.RepoFilter == nil && q.AuthorFilter == "" && q.StateFilter == "" && kindFilter == "" {
31
+		return nil, 0, nil
32
+	}
33
+
34
+	args := []any{}
35
+	tsPlaceholder := 0
36
+	if hasFTS {
37
+		args = append(args, tsText)
38
+		tsPlaceholder = len(args)
39
+	}
40
+	visClause, visArgs := policy.VisibilityPredicate(actor, "r", len(args)+1)
41
+	args = append(args, visArgs...)
42
+
43
+	whereExtras := ""
44
+
45
+	if q.RepoFilter != nil {
46
+		ownerPos := len(args) + 1
47
+		namePos := len(args) + 2
48
+		args = append(args, q.RepoFilter.Owner, q.RepoFilter.Name)
49
+		whereExtras += fmt.Sprintf(
50
+			" AND r.id = (SELECT r2.id FROM repos r2 JOIN users u2 ON u2.id = r2.owner_user_id "+
51
+				"WHERE u2.username = $%d AND r2.name = $%d AND r2.deleted_at IS NULL)",
52
+			ownerPos, namePos,
53
+		)
54
+	}
55
+	if q.StateFilter != "" {
56
+		statePos := len(args) + 1
57
+		args = append(args, q.StateFilter)
58
+		whereExtras += fmt.Sprintf(" AND s.state::text = $%d", statePos)
59
+	}
60
+	if kindFilter != "" {
61
+		kindPos := len(args) + 1
62
+		args = append(args, kindFilter)
63
+		whereExtras += fmt.Sprintf(" AND s.kind::text = $%d", kindPos)
64
+	}
65
+	if q.AuthorFilter != "" {
66
+		authorPos := len(args) + 1
67
+		args = append(args, q.AuthorFilter)
68
+		whereExtras += fmt.Sprintf(
69
+			" AND s.author_user_id = (SELECT id FROM users WHERE username = $%d)",
70
+			authorPos,
71
+		)
72
+	}
73
+
74
+	whereFTS := "TRUE"
75
+	rankExpr := "1.0"
76
+	if hasFTS {
77
+		whereFTS = fmt.Sprintf("s.tsv @@ %s('shithub_search', $%d)", tsCtor, tsPlaceholder)
78
+		rankExpr = fmt.Sprintf("ts_rank_cd(s.tsv, %s('shithub_search', $%d))", tsCtor, tsPlaceholder)
79
+	}
80
+
81
+	limPos := len(args) + 1
82
+	offPos := len(args) + 2
83
+	args = append(args, limit, offset)
84
+
85
+	queryStr := fmt.Sprintf(`
86
+		SELECT i.id, r.id, u.username, r.name, i.number, i.title,
87
+		       i.state::text, i.kind::text,
88
+		       coalesce(au.username, '') AS author_name,
89
+		       i.updated_at,
90
+		       %[1]s * CASE WHEN s.state = 'open' THEN 1.5 ELSE 1.0 END AS rank
91
+		FROM issues_search s
92
+		JOIN issues i  ON i.id = s.issue_id
93
+		JOIN repos r   ON r.id = s.repo_id
94
+		JOIN users u   ON u.id = r.owner_user_id
95
+		LEFT JOIN users au ON au.id = s.author_user_id
96
+		WHERE %[2]s
97
+		  AND %[3]s
98
+		  %[4]s
99
+		ORDER BY rank DESC, i.updated_at DESC
100
+		LIMIT $%[5]d OFFSET $%[6]d
101
+	`, rankExpr, whereFTS, visClause, whereExtras, limPos, offPos)
102
+
103
+	rows, err := deps.Pool.Query(ctx, queryStr, args...)
104
+	if err != nil {
105
+		return nil, 0, fmt.Errorf("search issues: %w", err)
106
+	}
107
+	defer rows.Close()
108
+	out := make([]IssueResult, 0, limit)
109
+	for rows.Next() {
110
+		var r IssueResult
111
+		if err := rows.Scan(&r.ID, &r.RepoID, &r.OwnerUsername, &r.RepoName,
112
+			&r.Number, &r.Title, &r.State, &r.Kind, &r.AuthorName,
113
+			&r.UpdatedAt, &r.Rank); err != nil {
114
+			return nil, 0, err
115
+		}
116
+		out = append(out, r)
117
+	}
118
+	if err := rows.Err(); err != nil {
119
+		return nil, 0, err
120
+	}
121
+
122
+	countQuery := fmt.Sprintf(`
123
+		SELECT count(*)
124
+		FROM issues_search s
125
+		JOIN issues i  ON i.id = s.issue_id
126
+		JOIN repos r   ON r.id = s.repo_id
127
+		JOIN users u   ON u.id = r.owner_user_id
128
+		WHERE %[1]s AND %[2]s %[3]s
129
+	`, whereFTS, visClause, whereExtras)
130
+	var total int64
131
+	if err := deps.Pool.QueryRow(ctx, countQuery, args[:len(args)-2]...).Scan(&total); err != nil {
132
+		return nil, 0, fmt.Errorf("count issues: %w", err)
133
+	}
134
+	return out, total, nil
135
+}
internal/search/query_parse.goadded
@@ -0,0 +1,100 @@
1
+// SPDX-License-Identifier: AGPL-3.0-or-later
2
+
3
+package search
4
+
5
+import "strings"
6
+
7
+// ParsedQuery is the result of running a raw user-typed query
8
+// through ParseQuery. The free-text portion is what flows into
9
+// `plainto_tsquery` / `phraseto_tsquery`; the operator fields are
10
+// what compose the SQL `WHERE` filters.
11
+//
12
+// `RepoFilter` carries the `owner/name` pair when the user typed
13
+// `repo:owner/name`; both halves must be present for the filter
14
+// to take effect (a bare `repo:foo` without slash is treated as
15
+// free text).
16
+type ParsedQuery struct {
17
+	Text         string  // free-text query (what tsvector matches against)
18
+	Phrase       string  // when a quoted phrase was supplied; empty when not
19
+	RepoFilter   *RepoFilter
20
+	StateFilter  string // "open" | "closed" | ""
21
+	AuthorFilter string // username or empty
22
+}
23
+
24
+// RepoFilter splits the `repo:owner/name` operator value.
25
+type RepoFilter struct {
26
+	Owner string
27
+	Name  string
28
+}
29
+
30
+// ParseQuery splits a raw query string into the free-text portion +
31
+// recognised operators. v1 supports `repo:`, `is:`, `state:`,
32
+// `author:`. `is:` and `state:` are aliases.
33
+//
34
+// A quoted run of tokens becomes the Phrase field (one quoted span
35
+// per query in v1). The Text field excludes quoted phrases and
36
+// operator tokens.
37
+//
38
+// Operators and free-text tokens are space-delimited; the parser is
39
+// intentionally tolerant — unknown prefixes (e.g. `language:Go`)
40
+// fall through as free text so future operator additions don't
41
+// break old queries.
42
+func ParseQuery(raw string) ParsedQuery {
43
+	out := ParsedQuery{}
44
+	if raw == "" {
45
+		return out
46
+	}
47
+	if len(raw) > MaxQueryBytes {
48
+		raw = raw[:MaxQueryBytes]
49
+	}
50
+
51
+	// Pull quoted phrases out first. Single-pass: find the first
52
+	// pair of "..." and treat that as the phrase. Anything else
53
+	// quoted is treated as free text.
54
+	if start := strings.IndexByte(raw, '"'); start >= 0 {
55
+		end := strings.IndexByte(raw[start+1:], '"')
56
+		if end > 0 {
57
+			out.Phrase = strings.TrimSpace(raw[start+1 : start+1+end])
58
+			raw = raw[:start] + " " + raw[start+1+end+1:]
59
+		}
60
+	}
61
+
62
+	var freeText []string
63
+	for _, tok := range strings.Fields(raw) {
64
+		switch {
65
+		case strings.HasPrefix(tok, "repo:"):
66
+			val := strings.TrimPrefix(tok, "repo:")
67
+			if i := strings.IndexByte(val, '/'); i > 0 && i < len(val)-1 {
68
+				out.RepoFilter = &RepoFilter{Owner: val[:i], Name: val[i+1:]}
69
+			} else {
70
+				freeText = append(freeText, tok) // not owner/name shape — fall through
71
+			}
72
+		case strings.HasPrefix(tok, "is:"), strings.HasPrefix(tok, "state:"):
73
+			val := strings.TrimPrefix(tok, "is:")
74
+			val = strings.TrimPrefix(val, "state:")
75
+			if val == "open" || val == "closed" {
76
+				out.StateFilter = val
77
+			} else {
78
+				freeText = append(freeText, tok)
79
+			}
80
+		case strings.HasPrefix(tok, "author:"):
81
+			val := strings.TrimPrefix(tok, "author:")
82
+			if val != "" {
83
+				out.AuthorFilter = val
84
+			} else {
85
+				freeText = append(freeText, tok)
86
+			}
87
+		default:
88
+			freeText = append(freeText, tok)
89
+		}
90
+	}
91
+	out.Text = strings.TrimSpace(strings.Join(freeText, " "))
92
+	return out
93
+}
94
+
95
+// HasContent reports whether the parsed query contains anything
96
+// searchable (free text, phrase, or any operator).
97
+func (p ParsedQuery) HasContent() bool {
98
+	return p.Text != "" || p.Phrase != "" || p.RepoFilter != nil ||
99
+		p.StateFilter != "" || p.AuthorFilter != ""
100
+}
internal/search/repos.goadded
@@ -0,0 +1,135 @@
1
+// SPDX-License-Identifier: AGPL-3.0-or-later
2
+
3
+package search
4
+
5
+import (
6
+	"context"
7
+	"fmt"
8
+
9
+	"github.com/tenseleyFlow/shithub/internal/auth/policy"
10
+)
11
+
12
+// SearchRepos runs a repo search visible to actor. limit / offset
13
+// drive paging.
14
+//
15
+// Ranking: `ts_rank_cd * (1 + ln(1 + star_count)) * recency_decay`
16
+// where recency_decay is `1 / (1 + days_since_update / 30)`. The
17
+// whole rank computation lives in SQL so Postgres can short-circuit
18
+// on the GIN index.
19
+func SearchRepos(ctx context.Context, deps Deps, actor policy.Actor, q ParsedQuery, limit, offset int) ([]RepoResult, int64, error) {
20
+	if !q.HasContent() {
21
+		return nil, 0, ErrEmptyQuery
22
+	}
23
+
24
+	tsText, tsCtor, hasFTS := tsQueryBindAndCtor(q)
25
+
26
+	// repo:owner/name AND no free-text → list that one repo.
27
+	if !hasFTS && q.RepoFilter == nil {
28
+		return nil, 0, nil
29
+	}
30
+
31
+	// $1 is the tsquery text payload (only when hasFTS); the
32
+	// visibility predicate gets the next placeholders.
33
+	args := []any{}
34
+	tsPlaceholder := 0
35
+	if hasFTS {
36
+		args = append(args, tsText)
37
+		tsPlaceholder = len(args)
38
+	}
39
+	visClause, visArgs := policy.VisibilityPredicate(actor, "r", len(args)+1)
40
+	args = append(args, visArgs...)
41
+
42
+	repoFilter := ""
43
+	if q.RepoFilter != nil {
44
+		ownerPos := len(args) + 1
45
+		namePos := len(args) + 2
46
+		args = append(args, q.RepoFilter.Owner, q.RepoFilter.Name)
47
+		repoFilter = fmt.Sprintf(
48
+			" AND r.id = (SELECT r2.id FROM repos r2 JOIN users u2 ON u2.id = r2.owner_user_id "+
49
+				"WHERE u2.username = $%d AND r2.name = $%d AND r2.deleted_at IS NULL)",
50
+			ownerPos, namePos,
51
+		)
52
+	}
53
+
54
+	whereFTS := "TRUE"
55
+	rankExpr := "1.0"
56
+	if hasFTS {
57
+		whereFTS = fmt.Sprintf("rs.tsv @@ %s('shithub_search', $%d)", tsCtor, tsPlaceholder)
58
+		rankExpr = fmt.Sprintf("ts_rank_cd(rs.tsv, %s('shithub_search', $%d))", tsCtor, tsPlaceholder)
59
+	}
60
+
61
+	limPos := len(args) + 1
62
+	offPos := len(args) + 2
63
+	args = append(args, limit, offset)
64
+
65
+	queryStr := fmt.Sprintf(`
66
+		SELECT r.id, u.username, r.name, r.description, r.visibility::text,
67
+		       r.star_count, r.updated_at,
68
+		       %[1]s
69
+		           * (1.0 + ln(1.0 + r.star_count))
70
+		           * (1.0 / (1.0 + EXTRACT(EPOCH FROM (now() - r.updated_at)) / 86400.0 / 30.0))
71
+		       AS rank
72
+		FROM repos_search rs
73
+		JOIN repos r  ON r.id = rs.repo_id
74
+		JOIN users u  ON u.id = r.owner_user_id
75
+		WHERE %[2]s
76
+		  AND %[3]s
77
+		  %[4]s
78
+		ORDER BY rank DESC, r.updated_at DESC
79
+		LIMIT $%[5]d OFFSET $%[6]d
80
+	`, rankExpr, whereFTS, visClause, repoFilter, limPos, offPos)
81
+
82
+	rows, err := deps.Pool.Query(ctx, queryStr, args...)
83
+	if err != nil {
84
+		return nil, 0, fmt.Errorf("search repos: %w", err)
85
+	}
86
+	defer rows.Close()
87
+	out := make([]RepoResult, 0, limit)
88
+	for rows.Next() {
89
+		var r RepoResult
90
+		if err := rows.Scan(&r.ID, &r.OwnerUsername, &r.Name, &r.Description,
91
+			&r.Visibility, &r.StarCount, &r.UpdatedAt, &r.Rank); err != nil {
92
+			return nil, 0, err
93
+		}
94
+		out = append(out, r)
95
+	}
96
+	if err := rows.Err(); err != nil {
97
+		return nil, 0, err
98
+	}
99
+
100
+	// Total count for pagination — re-runs the WHERE without the
101
+	// LIMIT/OFFSET tail.
102
+	countQuery := fmt.Sprintf(`
103
+		SELECT count(*)
104
+		FROM repos_search rs
105
+		JOIN repos r  ON r.id = rs.repo_id
106
+		JOIN users u  ON u.id = r.owner_user_id
107
+		WHERE %[1]s AND %[2]s %[3]s
108
+	`, whereFTS, visClause, repoFilter)
109
+	var total int64
110
+	if err := deps.Pool.QueryRow(ctx, countQuery, args[:len(args)-2]...).Scan(&total); err != nil {
111
+		return nil, 0, fmt.Errorf("count repos: %w", err)
112
+	}
113
+	return out, total, nil
114
+}
115
+
116
+// tsQueryBindAndCtor returns the tsquery payload + the SQL
117
+// constructor function name, plus a flag indicating whether there's
118
+// any FTS payload to bind. Phrase wins over free text when supplied.
119
+//
120
+// The SQL constructor is one of:
121
+//
122
+//	plainto_tsquery('shithub_search', $N)
123
+//	phraseto_tsquery('shithub_search', $N)
124
+//
125
+// Both are user-input safe — they accept arbitrary text without
126
+// rejecting malformed boolean syntax (unlike `to_tsquery`).
127
+func tsQueryBindAndCtor(q ParsedQuery) (text, ctor string, hasFTS bool) {
128
+	if q.Phrase != "" {
129
+		return q.Phrase, "phraseto_tsquery", true
130
+	}
131
+	if q.Text != "" {
132
+		return q.Text, "plainto_tsquery", true
133
+	}
134
+	return "", "", false
135
+}
internal/search/search.goadded
@@ -0,0 +1,97 @@
1
+// SPDX-License-Identifier: AGPL-3.0-or-later
2
+
3
+// Package search owns S28's search surface. Postgres FTS
4
+// (`tsvector` + `pg_trgm`) backs everything; no external search
5
+// engine. Visibility scoping flows through `policy.VisibilityPredicate`
6
+// so every query is gated by the same rule the rest of the runtime
7
+// uses.
8
+//
9
+// Entry points are:
10
+//
11
+//	SearchRepos / SearchIssues / SearchUsers / SearchCode — per-type
12
+//	    queries returning slices of result rows + the total count.
13
+//	ParseQuery — splits a user query string into the FTS query plus
14
+//	    operator filters (repo:, is:, author:, state:).
15
+package search
16
+
17
+import (
18
+	"errors"
19
+	"log/slog"
20
+	"time"
21
+
22
+	"github.com/jackc/pgx/v5/pgxpool"
23
+)
24
+
25
+// Deps wires the package against the rest of the runtime.
26
+type Deps struct {
27
+	Pool   *pgxpool.Pool
28
+	Logger *slog.Logger
29
+}
30
+
31
+// Errors surfaced to handlers.
32
+var (
33
+	ErrEmptyQuery = errors.New("search: query is empty")
34
+)
35
+
36
+// PageSize is the per-type result count for the full results page.
37
+// Quick-dropdown uses a smaller cap (see QuickResultsLimit).
38
+const PageSize = 20
39
+
40
+// QuickResultsLimit is the per-type cap for the top-bar quick
41
+// dropdown. Same shape as GitHub's: tight for keystroke speed.
42
+const QuickResultsLimit = 5
43
+
44
+// MaxQueryBytes caps incoming query strings. Tighter than the
45
+// markdown 1 MiB ceiling because no legitimate search ever needs
46
+// even 1 KiB.
47
+const MaxQueryBytes = 256
48
+
49
+// RepoResult is one row from SearchRepos.
50
+type RepoResult struct {
51
+	ID            int64
52
+	OwnerUsername string
53
+	Name          string
54
+	Description   string
55
+	Visibility    string
56
+	StarCount     int64
57
+	UpdatedAt     time.Time
58
+	Rank          float64
59
+}
60
+
61
+// IssueResult is one row from SearchIssues.
62
+type IssueResult struct {
63
+	ID            int64
64
+	RepoID        int64
65
+	OwnerUsername string
66
+	RepoName      string
67
+	Number        int64
68
+	Title         string
69
+	State         string
70
+	Kind          string // "issue" | "pr"
71
+	AuthorName    string
72
+	UpdatedAt     time.Time
73
+	Rank          float64
74
+}
75
+
76
+// UserResult is one row from SearchUsers.
77
+type UserResult struct {
78
+	ID          int64
79
+	Username    string
80
+	DisplayName string
81
+	Bio         string
82
+	Rank        float64
83
+}
84
+
85
+// CodeResult is one row from SearchCode. Either Path or Content
86
+// (or both) is populated depending on which subquery hit.
87
+type CodeResult struct {
88
+	RepoID        int64
89
+	OwnerUsername string
90
+	RepoName      string
91
+	RefName       string
92
+	Path          string
93
+	// PreviewLine is a single line of content extracted near the
94
+	// match, when content matched. Empty for path-only hits.
95
+	PreviewLine string
96
+	Rank        float64
97
+}
internal/search/search_test.goadded
@@ -0,0 +1,325 @@
1
+// SPDX-License-Identifier: AGPL-3.0-or-later
2
+
3
+package search_test
4
+
5
+import (
6
+	"context"
7
+	"errors"
8
+	"io"
9
+	"log/slog"
10
+	"strings"
11
+	"testing"
12
+
13
+	"github.com/jackc/pgx/v5/pgtype"
14
+
15
+	"github.com/tenseleyFlow/shithub/internal/auth/policy"
16
+	policydb "github.com/tenseleyFlow/shithub/internal/auth/policy/sqlc"
17
+	"github.com/tenseleyFlow/shithub/internal/issues"
18
+	issuesdb "github.com/tenseleyFlow/shithub/internal/issues/sqlc"
19
+	reposdb "github.com/tenseleyFlow/shithub/internal/repos/sqlc"
20
+	"github.com/tenseleyFlow/shithub/internal/search"
21
+	"github.com/tenseleyFlow/shithub/internal/testing/dbtest"
22
+	usersdb "github.com/tenseleyFlow/shithub/internal/users/sqlc"
23
+)
24
+
25
+const fixtureHash = "$argon2id$v=19$m=16384,t=1,p=1$" +
26
+	"AAAAAAAAAAAAAAAA$" +
27
+	"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
28
+
29
+// TestParseQuery covers the operator parser end-to-end.
30
+func TestParseQuery(t *testing.T) {
31
+	t.Parallel()
32
+	cases := []struct {
33
+		in   string
34
+		want search.ParsedQuery
35
+	}{
36
+		{"", search.ParsedQuery{}},
37
+		{"hello world", search.ParsedQuery{Text: "hello world"}},
38
+		{`"quoted phrase"`, search.ParsedQuery{Phrase: "quoted phrase"}},
39
+		{"repo:alice/demo bug", search.ParsedQuery{Text: "bug",
40
+			RepoFilter: &search.RepoFilter{Owner: "alice", Name: "demo"}}},
41
+		{"repo:noslash bug", search.ParsedQuery{Text: "repo:noslash bug"}},
42
+		{"is:open broken", search.ParsedQuery{Text: "broken", StateFilter: "open"}},
43
+		{"state:closed bug", search.ParsedQuery{Text: "bug", StateFilter: "closed"}},
44
+		{"author:bob fix", search.ParsedQuery{Text: "fix", AuthorFilter: "bob"}},
45
+		{"language:Go x", search.ParsedQuery{Text: "language:Go x"}},
46
+	}
47
+	for _, c := range cases {
48
+		got := search.ParseQuery(c.in)
49
+		if got.Text != c.want.Text || got.Phrase != c.want.Phrase ||
50
+			got.StateFilter != c.want.StateFilter || got.AuthorFilter != c.want.AuthorFilter {
51
+			t.Errorf("ParseQuery(%q):\n  got  %+v\n  want %+v", c.in, got, c.want)
52
+			continue
53
+		}
54
+		if (got.RepoFilter == nil) != (c.want.RepoFilter == nil) {
55
+			t.Errorf("ParseQuery(%q): repo-filter presence mismatch", c.in)
56
+			continue
57
+		}
58
+		if got.RepoFilter != nil && (*got.RepoFilter != *c.want.RepoFilter) {
59
+			t.Errorf("ParseQuery(%q): repo-filter %+v, want %+v",
60
+				c.in, *got.RepoFilter, *c.want.RepoFilter)
61
+		}
62
+	}
63
+}
64
+
65
+// TestParseQuery_TruncatesOverlong ensures the input cap fires.
66
+func TestParseQuery_TruncatesOverlong(t *testing.T) {
67
+	t.Parallel()
68
+	long := strings.Repeat("x", search.MaxQueryBytes+50)
69
+	got := search.ParseQuery(long)
70
+	if len(got.Text) > search.MaxQueryBytes {
71
+		t.Errorf("Text len = %d, want ≤ %d", len(got.Text), search.MaxQueryBytes)
72
+	}
73
+}
74
+
75
+// fxs is a fixture for visibility tests: alice owns one public + one
76
+// private repo, each with one issue. bob is a separate user, no
77
+// access to the private side.
78
+type fxs struct {
79
+	deps    search.Deps
80
+	alice   usersdb.User
81
+	bob     usersdb.User
82
+	pubRepo reposdb.Repo
83
+	prvRepo reposdb.Repo
84
+}
85
+
86
+func setup(t *testing.T) fxs {
87
+	t.Helper()
88
+	pool := dbtest.NewTestDB(t)
89
+	ctx := context.Background()
90
+
91
+	uq := usersdb.New()
92
+	alice, err := uq.CreateUser(ctx, pool, usersdb.CreateUserParams{
93
+		Username: "alice", DisplayName: "Alice", PasswordHash: fixtureHash,
94
+	})
95
+	if err != nil {
96
+		t.Fatalf("CreateUser alice: %v", err)
97
+	}
98
+	bob, err := uq.CreateUser(ctx, pool, usersdb.CreateUserParams{
99
+		Username: "bob", DisplayName: "Bob", PasswordHash: fixtureHash,
100
+	})
101
+	if err != nil {
102
+		t.Fatalf("CreateUser bob: %v", err)
103
+	}
104
+
105
+	rq := reposdb.New()
106
+	pubRepo, err := rq.CreateRepo(ctx, pool, reposdb.CreateRepoParams{
107
+		OwnerUserID:   pgtype.Int8{Int64: alice.ID, Valid: true},
108
+		Name:          "publicrepo",
109
+		Description:   "a public sample",
110
+		DefaultBranch: "trunk",
111
+		Visibility:    reposdb.RepoVisibilityPublic,
112
+	})
113
+	if err != nil {
114
+		t.Fatalf("CreateRepo public: %v", err)
115
+	}
116
+	prvRepo, err := rq.CreateRepo(ctx, pool, reposdb.CreateRepoParams{
117
+		OwnerUserID:   pgtype.Int8{Int64: alice.ID, Valid: true},
118
+		Name:          "privaterepo",
119
+		Description:   "secrets here",
120
+		DefaultBranch: "trunk",
121
+		Visibility:    reposdb.RepoVisibilityPrivate,
122
+	})
123
+	if err != nil {
124
+		t.Fatalf("CreateRepo private: %v", err)
125
+	}
126
+
127
+	iq := issuesdb.New()
128
+	for _, r := range []reposdb.Repo{pubRepo, prvRepo} {
129
+		if err := iq.EnsureRepoIssueCounter(ctx, pool, r.ID); err != nil {
130
+			t.Fatalf("EnsureRepoIssueCounter: %v", err)
131
+		}
132
+	}
133
+	idep := issues.Deps{Pool: pool, Logger: slog.New(slog.NewTextHandler(io.Discard, nil))}
134
+	if _, err := issues.Create(ctx, idep, issues.CreateParams{
135
+		RepoID: pubRepo.ID, AuthorUserID: alice.ID,
136
+		Title: "public bug report", Body: "nothing secret",
137
+	}); err != nil {
138
+		t.Fatalf("Create issue pub: %v", err)
139
+	}
140
+	if _, err := issues.Create(ctx, idep, issues.CreateParams{
141
+		RepoID: prvRepo.ID, AuthorUserID: alice.ID,
142
+		Title: "private secret design", Body: "internal only",
143
+	}); err != nil {
144
+		t.Fatalf("Create issue prv: %v", err)
145
+	}
146
+
147
+	return fxs{
148
+		deps: search.Deps{
149
+			Pool:   pool,
150
+			Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
151
+		},
152
+		alice: alice, bob: bob, pubRepo: pubRepo, prvRepo: prvRepo,
153
+	}
154
+}
155
+
156
+// TestSearchRepos_AnonymousSeesOnlyPublic guards the visibility
157
+// boundary — the highest-stakes assertion in the search surface.
158
+func TestSearchRepos_AnonymousSeesOnlyPublic(t *testing.T) {
159
+	f := setup(t)
160
+	got, _, err := search.SearchRepos(context.Background(), f.deps,
161
+		policy.AnonymousActor(),
162
+		search.ParseQuery("repo"),
163
+		20, 0)
164
+	if err != nil {
165
+		t.Fatalf("SearchRepos: %v", err)
166
+	}
167
+	for _, r := range got {
168
+		if r.Visibility == "private" {
169
+			t.Errorf("anonymous saw private repo %q — visibility leak!", r.Name)
170
+		}
171
+	}
172
+	// Sanity: public repo is in the results.
173
+	found := false
174
+	for _, r := range got {
175
+		if r.Name == "publicrepo" {
176
+			found = true
177
+		}
178
+	}
179
+	if !found {
180
+		t.Errorf("expected publicrepo in anon results, got %d rows", len(got))
181
+	}
182
+}
183
+
184
+// TestSearchRepos_NonCollabOnPrivate matches the spec's private-
185
+// content-stays-private contract.
186
+func TestSearchRepos_NonCollabOnPrivate(t *testing.T) {
187
+	f := setup(t)
188
+	bobActor := policy.UserActor(f.bob.ID, f.bob.Username, false, false)
189
+	got, _, err := search.SearchRepos(context.Background(), f.deps, bobActor,
190
+		search.ParseQuery("secrets"), 20, 0)
191
+	if err != nil {
192
+		t.Fatalf("SearchRepos: %v", err)
193
+	}
194
+	if len(got) != 0 {
195
+		t.Errorf("non-collab bob saw %d results for 'secrets', want 0", len(got))
196
+	}
197
+}
198
+
199
+// TestSearchRepos_OwnerSeesPrivate confirms the predicate's owner
200
+// branch.
201
+func TestSearchRepos_OwnerSeesPrivate(t *testing.T) {
202
+	f := setup(t)
203
+	alice := policy.UserActor(f.alice.ID, f.alice.Username, false, false)
204
+	got, _, err := search.SearchRepos(context.Background(), f.deps, alice,
205
+		search.ParseQuery("secrets"), 20, 0)
206
+	if err != nil {
207
+		t.Fatalf("SearchRepos: %v", err)
208
+	}
209
+	if len(got) == 0 {
210
+		t.Fatalf("owner alice should see her private repo for 'secrets'")
211
+	}
212
+}
213
+
214
+// TestSearchRepos_CollabSeesPrivate exercises the collaborator
215
+// branch of the visibility predicate.
216
+func TestSearchRepos_CollabSeesPrivate(t *testing.T) {
217
+	f := setup(t)
218
+	ctx := context.Background()
219
+	pq := policydb.New()
220
+	if err := pq.UpsertCollabRole(ctx, f.deps.Pool, policydb.UpsertCollabRoleParams{
221
+		RepoID: f.prvRepo.ID, UserID: f.bob.ID, Role: policydb.CollabRoleRead,
222
+	}); err != nil {
223
+		t.Fatalf("UpsertCollabRole: %v", err)
224
+	}
225
+	bobActor := policy.UserActor(f.bob.ID, f.bob.Username, false, false)
226
+	got, _, err := search.SearchRepos(ctx, f.deps, bobActor,
227
+		search.ParseQuery("secrets"), 20, 0)
228
+	if err != nil {
229
+		t.Fatalf("SearchRepos: %v", err)
230
+	}
231
+	if len(got) == 0 {
232
+		t.Errorf("collab bob should see private repo via 'secrets'")
233
+	}
234
+}
235
+
236
+// TestSearchIssues_AnonymousSeesOnlyPublic mirrors the repo test
237
+// for the issue surface — issues inherit visibility from their repo.
238
+func TestSearchIssues_AnonymousSeesOnlyPublic(t *testing.T) {
239
+	f := setup(t)
240
+	got, _, err := search.SearchIssues(context.Background(), f.deps,
241
+		policy.AnonymousActor(),
242
+		search.ParseQuery("secret"),
243
+		"issue", 20, 0)
244
+	if err != nil {
245
+		t.Fatalf("SearchIssues: %v", err)
246
+	}
247
+	if len(got) != 0 {
248
+		t.Errorf("anonymous saw %d issues for 'secret', want 0 (private leak)", len(got))
249
+	}
250
+}
251
+
252
+func TestSearchIssues_StateFilter(t *testing.T) {
253
+	f := setup(t)
254
+	ctx := context.Background()
255
+	alice := policy.UserActor(f.alice.ID, f.alice.Username, false, false)
256
+
257
+	// Open a second issue and close it.
258
+	idep := issues.Deps{Pool: f.deps.Pool, Logger: slog.New(slog.NewTextHandler(io.Discard, nil))}
259
+	closed, _ := issues.Create(ctx, idep, issues.CreateParams{
260
+		RepoID: f.pubRepo.ID, AuthorUserID: f.alice.ID,
261
+		Title: "closed bug", Body: "fixed",
262
+	})
263
+	if err := issues.SetState(ctx, idep, f.alice.ID, closed.ID, "closed", "completed"); err != nil {
264
+		t.Fatalf("SetState: %v", err)
265
+	}
266
+
267
+	openHits, _, _ := search.SearchIssues(ctx, f.deps, alice,
268
+		search.ParseQuery("is:open bug"), "", 20, 0)
269
+	for _, h := range openHits {
270
+		if h.State != "open" {
271
+			t.Errorf("is:open: got state=%s", h.State)
272
+		}
273
+	}
274
+	closedHits, _, _ := search.SearchIssues(ctx, f.deps, alice,
275
+		search.ParseQuery("is:closed bug"), "", 20, 0)
276
+	for _, h := range closedHits {
277
+		if h.State != "closed" {
278
+			t.Errorf("is:closed: got state=%s", h.State)
279
+		}
280
+	}
281
+}
282
+
283
+func TestSearchIssues_RepoFilter(t *testing.T) {
284
+	f := setup(t)
285
+	alice := policy.UserActor(f.alice.ID, f.alice.Username, false, false)
286
+	got, _, err := search.SearchIssues(context.Background(), f.deps, alice,
287
+		search.ParseQuery("repo:alice/publicrepo bug"), "", 20, 0)
288
+	if err != nil {
289
+		t.Fatalf("SearchIssues: %v", err)
290
+	}
291
+	for _, h := range got {
292
+		if h.OwnerUsername != "alice" || h.RepoName != "publicrepo" {
293
+			t.Errorf("repo: filter let through %s/%s", h.OwnerUsername, h.RepoName)
294
+		}
295
+	}
296
+}
297
+
298
+func TestSearchUsers_ExcludesSuspended(t *testing.T) {
299
+	f := setup(t)
300
+	ctx := context.Background()
301
+	if _, err := f.deps.Pool.Exec(ctx,
302
+		"UPDATE users SET suspended_at = now() WHERE id = $1", f.bob.ID); err != nil {
303
+		t.Fatalf("suspend: %v", err)
304
+	}
305
+	got, _, err := search.SearchUsers(ctx, f.deps, search.ParseQuery("bob"), 20, 0)
306
+	if err != nil {
307
+		t.Fatalf("SearchUsers: %v", err)
308
+	}
309
+	for _, u := range got {
310
+		if u.Username == "bob" {
311
+			t.Errorf("suspended bob in user search results")
312
+		}
313
+	}
314
+}
315
+
316
+// TestSearchRepos_EmptyQuery surfaces the typed error so handlers
317
+// can render a friendly empty state rather than a SQL error.
318
+func TestSearchRepos_EmptyQuery(t *testing.T) {
319
+	f := setup(t)
320
+	_, _, err := search.SearchRepos(context.Background(), f.deps,
321
+		policy.AnonymousActor(), search.ParsedQuery{}, 20, 0)
322
+	if !errors.Is(err, search.ErrEmptyQuery) {
323
+		t.Errorf("expected ErrEmptyQuery, got %v", err)
324
+	}
325
+}
internal/search/users.goadded
@@ -0,0 +1,66 @@
1
+// SPDX-License-Identifier: AGPL-3.0-or-later
2
+
3
+package search
4
+
5
+import (
6
+	"context"
7
+	"fmt"
8
+)
9
+
10
+// SearchUsers runs a user search. No visibility predicate — user
11
+// profiles are public by definition; suspended/deleted accounts are
12
+// excluded so they don't taint search results (matches the spec
13
+// pitfall: "search on suspended user content").
14
+func SearchUsers(ctx context.Context, deps Deps, q ParsedQuery, limit, offset int) ([]UserResult, int64, error) {
15
+	if !q.HasContent() {
16
+		return nil, 0, ErrEmptyQuery
17
+	}
18
+	tsText, tsCtor, hasFTS := tsQueryBindAndCtor(q)
19
+	if !hasFTS {
20
+		// Operators don't apply to user search; with no FTS payload
21
+		// there's nothing to match.
22
+		return nil, 0, nil
23
+	}
24
+
25
+	args := []any{tsText, limit, offset}
26
+	queryStr := fmt.Sprintf(`
27
+		SELECT u.id, u.username::text, u.display_name, coalesce(u.bio, ''),
28
+		       ts_rank_cd(s.tsv, %[1]s('shithub_search', $1)) AS rank
29
+		FROM users_search s
30
+		JOIN users u ON u.id = s.user_id
31
+		WHERE s.tsv @@ %[1]s('shithub_search', $1)
32
+		  AND u.suspended_at IS NULL
33
+		  AND u.deleted_at IS NULL
34
+		ORDER BY rank DESC, u.username
35
+		LIMIT $2 OFFSET $3
36
+	`, tsCtor)
37
+	rows, err := deps.Pool.Query(ctx, queryStr, args...)
38
+	if err != nil {
39
+		return nil, 0, fmt.Errorf("search users: %w", err)
40
+	}
41
+	defer rows.Close()
42
+	out := make([]UserResult, 0, limit)
43
+	for rows.Next() {
44
+		var r UserResult
45
+		if err := rows.Scan(&r.ID, &r.Username, &r.DisplayName, &r.Bio, &r.Rank); err != nil {
46
+			return nil, 0, err
47
+		}
48
+		out = append(out, r)
49
+	}
50
+	if err := rows.Err(); err != nil {
51
+		return nil, 0, err
52
+	}
53
+
54
+	countQuery := fmt.Sprintf(`
55
+		SELECT count(*) FROM users_search s
56
+		JOIN users u ON u.id = s.user_id
57
+		WHERE s.tsv @@ %[1]s('shithub_search', $1)
58
+		  AND u.suspended_at IS NULL
59
+		  AND u.deleted_at IS NULL
60
+	`, tsCtor)
61
+	var total int64
62
+	if err := deps.Pool.QueryRow(ctx, countQuery, tsText).Scan(&total); err != nil {
63
+		return nil, 0, fmt.Errorf("count users: %w", err)
64
+	}
65
+	return out, total, nil
66
+}