| 1 | // SPDX-License-Identifier: AGPL-3.0-or-later |
| 2 | |
| 3 | // Package finder implements the "Go to file" fuzzy match used by the |
| 4 | // /find/{ref} endpoint. The matcher is a simple subsequence scorer |
| 5 | // with bonuses for path-segment boundaries — close enough to feel like |
| 6 | // VS Code's quickopen for a few thousand entries without pulling a |
| 7 | // full fuzzy library. |
| 8 | package finder |
| 9 | |
| 10 | import ( |
| 11 | "sort" |
| 12 | "strings" |
| 13 | "unicode" |
| 14 | ) |
| 15 | |
| 16 | // Match is one row in the finder result list. |
| 17 | type Match struct { |
| 18 | Path string |
| 19 | Score int |
| 20 | } |
| 21 | |
| 22 | // Filter returns the top `limit` matches against query, scored highest |
| 23 | // first. A blank query returns the first `limit` paths in input order. |
| 24 | // |
| 25 | // The matcher is case-insensitive and rewards: |
| 26 | // - consecutive characters (longer runs score higher) |
| 27 | // - matches at path-segment starts (after `/` or at index 0) |
| 28 | // - matches at filename basename |
| 29 | // |
| 30 | // Designed for input sizes up to ~50k paths; past that, restrict the |
| 31 | // callable surface or paginate. |
| 32 | func Filter(paths []string, query string, limit int) []Match { |
| 33 | q := strings.TrimSpace(query) |
| 34 | if q == "" { |
| 35 | out := make([]Match, 0, min(limit, len(paths))) |
| 36 | for i, p := range paths { |
| 37 | if i >= limit { |
| 38 | break |
| 39 | } |
| 40 | out = append(out, Match{Path: p, Score: 0}) |
| 41 | } |
| 42 | return out |
| 43 | } |
| 44 | qLower := []rune(strings.ToLower(q)) |
| 45 | matches := make([]Match, 0, 64) |
| 46 | for _, p := range paths { |
| 47 | if score, ok := score(p, qLower); ok { |
| 48 | matches = append(matches, Match{Path: p, Score: score}) |
| 49 | } |
| 50 | } |
| 51 | sort.SliceStable(matches, func(i, j int) bool { |
| 52 | if matches[i].Score != matches[j].Score { |
| 53 | return matches[i].Score > matches[j].Score |
| 54 | } |
| 55 | // Tiebreaker: shorter paths first (more specific matches). |
| 56 | if len(matches[i].Path) != len(matches[j].Path) { |
| 57 | return len(matches[i].Path) < len(matches[j].Path) |
| 58 | } |
| 59 | return matches[i].Path < matches[j].Path |
| 60 | }) |
| 61 | if len(matches) > limit { |
| 62 | matches = matches[:limit] |
| 63 | } |
| 64 | return matches |
| 65 | } |
| 66 | |
| 67 | // score is a single subsequence-with-bonus pass. Returns (score, true) |
| 68 | // when every query rune is consumed in order; otherwise (0, false). |
| 69 | func score(path string, q []rune) (int, bool) { |
| 70 | if len(q) == 0 { |
| 71 | return 0, true |
| 72 | } |
| 73 | pLower := []rune(strings.ToLower(path)) |
| 74 | score := 0 |
| 75 | qi := 0 |
| 76 | prevMatchedAt := -2 // forces "consecutive" check to fail on first hit |
| 77 | for i := 0; i < len(pLower) && qi < len(q); i++ { |
| 78 | if pLower[i] != q[qi] { |
| 79 | continue |
| 80 | } |
| 81 | // Base hit. |
| 82 | score += 1 |
| 83 | // Consecutive run bonus. |
| 84 | if i == prevMatchedAt+1 { |
| 85 | score += 4 |
| 86 | } |
| 87 | // Boundary bonus: start of path or after `/`. |
| 88 | if i == 0 || pLower[i-1] == '/' { |
| 89 | score += 6 |
| 90 | } |
| 91 | // Camel/kebab boundary: lowercase-after-uppercase is a weaker |
| 92 | // boundary than `/`, but worth a smaller bonus. |
| 93 | if i > 0 && unicode.IsUpper(rune(path[i])) && unicode.IsLower(rune(path[i-1])) { |
| 94 | score += 3 |
| 95 | } |
| 96 | prevMatchedAt = i |
| 97 | qi++ |
| 98 | } |
| 99 | if qi != len(q) { |
| 100 | return 0, false |
| 101 | } |
| 102 | // Filename-basename bonus: query that fully matches the basename |
| 103 | // gets a kicker so `repo.go` ranks above `repo_settings_form.html`. |
| 104 | if base := basename(path); strings.Contains(strings.ToLower(base), string(q)) { |
| 105 | score += 8 |
| 106 | } |
| 107 | return score, true |
| 108 | } |
| 109 | |
| 110 | // basename returns the path's last segment (no leading slash). |
| 111 | func basename(p string) string { |
| 112 | if i := strings.LastIndexByte(p, '/'); i >= 0 { |
| 113 | return p[i+1:] |
| 114 | } |
| 115 | return p |
| 116 | } |
| 117 | |
| 118 | func min(a, b int) int { |
| 119 | if a < b { |
| 120 | return a |
| 121 | } |
| 122 | return b |
| 123 | } |
| 124 |