| 1 | // SPDX-License-Identifier: AGPL-3.0-or-later |
| 2 | |
| 3 | package search |
| 4 | |
| 5 | import ( |
| 6 | "context" |
| 7 | "fmt" |
| 8 | |
| 9 | "github.com/tenseleyFlow/shithub/internal/auth/policy" |
| 10 | ) |
| 11 | |
| 12 | // SearchRepos runs a repo search visible to actor. limit / offset |
| 13 | // drive paging. |
| 14 | // |
| 15 | // Ranking: `ts_rank_cd * (1 + ln(1 + star_count)) * recency_decay` |
| 16 | // where recency_decay is `1 / (1 + days_since_update / 30)`. The |
| 17 | // whole rank computation lives in SQL so Postgres can short-circuit |
| 18 | // on the GIN index. |
| 19 | func SearchRepos(ctx context.Context, deps Deps, actor policy.Actor, q ParsedQuery, limit, offset int) ([]RepoResult, int64, error) { |
| 20 | if !q.HasContent() { |
| 21 | return nil, 0, ErrEmptyQuery |
| 22 | } |
| 23 | |
| 24 | tsText, tsCtor, hasFTS := tsQueryBindAndCtor(q) |
| 25 | |
| 26 | // repo:owner/name AND no free-text → list that one repo. |
| 27 | if !hasFTS && q.RepoFilter == nil { |
| 28 | return nil, 0, nil |
| 29 | } |
| 30 | |
| 31 | // $1 is the tsquery text payload (only when hasFTS); the |
| 32 | // visibility predicate gets the next placeholders. |
| 33 | args := []any{} |
| 34 | tsPlaceholder := 0 |
| 35 | if hasFTS { |
| 36 | args = append(args, tsText) |
| 37 | tsPlaceholder = len(args) |
| 38 | } |
| 39 | visClause, visArgs := policy.VisibilityPredicate(actor, "r", len(args)+1) |
| 40 | args = append(args, visArgs...) |
| 41 | |
| 42 | repoFilter := "" |
| 43 | if q.RepoFilter != nil { |
| 44 | ownerPos := len(args) + 1 |
| 45 | namePos := len(args) + 2 |
| 46 | args = append(args, q.RepoFilter.Owner, q.RepoFilter.Name) |
| 47 | repoFilter = repoFilterByOwnerName("r", ownerPos, namePos) |
| 48 | } |
| 49 | |
| 50 | whereFTS := "TRUE" |
| 51 | rankExpr := "1.0" |
| 52 | if hasFTS { |
| 53 | whereFTS = fmt.Sprintf("rs.tsv @@ %s('shithub_search', $%d)", tsCtor, tsPlaceholder) |
| 54 | rankExpr = fmt.Sprintf("ts_rank_cd(rs.tsv, %s('shithub_search', $%d))", tsCtor, tsPlaceholder) |
| 55 | } |
| 56 | |
| 57 | limPos := len(args) + 1 |
| 58 | offPos := len(args) + 2 |
| 59 | args = append(args, limit, offset) |
| 60 | |
| 61 | queryStr := fmt.Sprintf(` |
| 62 | SELECT r.id, %[7]s, r.name, r.description, r.visibility::text, |
| 63 | r.star_count, r.updated_at, |
| 64 | %[1]s |
| 65 | * (1.0 + ln(1.0 + r.star_count)) |
| 66 | * (1.0 / (1.0 + EXTRACT(EPOCH FROM (now() - r.updated_at)) / 86400.0 / 30.0)) |
| 67 | AS rank |
| 68 | FROM repos_search rs |
| 69 | JOIN repos r ON r.id = rs.repo_id |
| 70 | %[8]s |
| 71 | WHERE %[2]s |
| 72 | AND %[3]s |
| 73 | %[4]s |
| 74 | ORDER BY rank DESC, r.updated_at DESC |
| 75 | LIMIT $%[5]d OFFSET $%[6]d |
| 76 | `, rankExpr, whereFTS, visClause, repoFilter, limPos, offPos, repoOwnerNameExpr("u", "o"), repoOwnerJoin("r", "u", "o")) |
| 77 | |
| 78 | rows, err := deps.Pool.Query(ctx, queryStr, args...) |
| 79 | if err != nil { |
| 80 | return nil, 0, fmt.Errorf("search repos: %w", err) |
| 81 | } |
| 82 | defer rows.Close() |
| 83 | out := make([]RepoResult, 0, limit) |
| 84 | for rows.Next() { |
| 85 | var r RepoResult |
| 86 | if err := rows.Scan(&r.ID, &r.OwnerUsername, &r.Name, &r.Description, |
| 87 | &r.Visibility, &r.StarCount, &r.UpdatedAt, &r.Rank); err != nil { |
| 88 | return nil, 0, err |
| 89 | } |
| 90 | out = append(out, r) |
| 91 | } |
| 92 | if err := rows.Err(); err != nil { |
| 93 | return nil, 0, err |
| 94 | } |
| 95 | |
| 96 | // Total count for pagination — re-runs the WHERE without the |
| 97 | // LIMIT/OFFSET tail. |
| 98 | countQuery := fmt.Sprintf(` |
| 99 | SELECT count(*) |
| 100 | FROM repos_search rs |
| 101 | JOIN repos r ON r.id = rs.repo_id |
| 102 | WHERE %[1]s AND %[2]s %[3]s |
| 103 | `, whereFTS, visClause, repoFilter) |
| 104 | var total int64 |
| 105 | if err := deps.Pool.QueryRow(ctx, countQuery, args[:len(args)-2]...).Scan(&total); err != nil { |
| 106 | return nil, 0, fmt.Errorf("count repos: %w", err) |
| 107 | } |
| 108 | return out, total, nil |
| 109 | } |
| 110 | |
| 111 | // tsQueryBindAndCtor returns the tsquery payload + the SQL |
| 112 | // constructor function name, plus a flag indicating whether there's |
| 113 | // any FTS payload to bind. Phrase wins over free text when supplied. |
| 114 | // |
| 115 | // The SQL constructor is one of: |
| 116 | // |
| 117 | // plainto_tsquery('shithub_search', $N) |
| 118 | // phraseto_tsquery('shithub_search', $N) |
| 119 | // |
| 120 | // Both are user-input safe — they accept arbitrary text without |
| 121 | // rejecting malformed boolean syntax (unlike `to_tsquery`). |
| 122 | func tsQueryBindAndCtor(q ParsedQuery) (text, ctor string, hasFTS bool) { |
| 123 | if q.Phrase != "" { |
| 124 | return q.Phrase, "phraseto_tsquery", true |
| 125 | } |
| 126 | if q.Text != "" { |
| 127 | return q.Text, "plainto_tsquery", true |
| 128 | } |
| 129 | return "", "", false |
| 130 | } |
| 131 |