Go · 3337 bytes Raw Blame History
1 // SPDX-License-Identifier: AGPL-3.0-or-later
2
3 // Package finder implements the "Go to file" fuzzy match used by the
4 // /find/{ref} endpoint. The matcher is a simple subsequence scorer
5 // with bonuses for path-segment boundaries — close enough to feel like
6 // VS Code's quickopen for a few thousand entries without pulling a
7 // full fuzzy library.
8 package finder
9
10 import (
11 "sort"
12 "strings"
13 "unicode"
14 )
15
16 // Match is one row in the finder result list.
17 type Match struct {
18 Path string
19 Score int
20 }
21
22 // Filter returns the top `limit` matches against query, scored highest
23 // first. A blank query returns the first `limit` paths in input order.
24 //
25 // The matcher is case-insensitive and rewards:
26 // - consecutive characters (longer runs score higher)
27 // - matches at path-segment starts (after `/` or at index 0)
28 // - matches at filename basename
29 //
30 // Designed for input sizes up to ~50k paths; past that, restrict the
31 // callable surface or paginate.
32 func Filter(paths []string, query string, limit int) []Match {
33 q := strings.TrimSpace(query)
34 if q == "" {
35 out := make([]Match, 0, min(limit, len(paths)))
36 for i, p := range paths {
37 if i >= limit {
38 break
39 }
40 out = append(out, Match{Path: p, Score: 0})
41 }
42 return out
43 }
44 qLower := []rune(strings.ToLower(q))
45 matches := make([]Match, 0, 64)
46 for _, p := range paths {
47 if score, ok := score(p, qLower); ok {
48 matches = append(matches, Match{Path: p, Score: score})
49 }
50 }
51 sort.SliceStable(matches, func(i, j int) bool {
52 if matches[i].Score != matches[j].Score {
53 return matches[i].Score > matches[j].Score
54 }
55 // Tiebreaker: shorter paths first (more specific matches).
56 if len(matches[i].Path) != len(matches[j].Path) {
57 return len(matches[i].Path) < len(matches[j].Path)
58 }
59 return matches[i].Path < matches[j].Path
60 })
61 if len(matches) > limit {
62 matches = matches[:limit]
63 }
64 return matches
65 }
66
67 // score is a single subsequence-with-bonus pass. Returns (score, true)
68 // when every query rune is consumed in order; otherwise (0, false).
69 func score(path string, q []rune) (int, bool) {
70 if len(q) == 0 {
71 return 0, true
72 }
73 pLower := []rune(strings.ToLower(path))
74 score := 0
75 qi := 0
76 prevMatchedAt := -2 // forces "consecutive" check to fail on first hit
77 for i := 0; i < len(pLower) && qi < len(q); i++ {
78 if pLower[i] != q[qi] {
79 continue
80 }
81 // Base hit.
82 score += 1
83 // Consecutive run bonus.
84 if i == prevMatchedAt+1 {
85 score += 4
86 }
87 // Boundary bonus: start of path or after `/`.
88 if i == 0 || pLower[i-1] == '/' {
89 score += 6
90 }
91 // Camel/kebab boundary: lowercase-after-uppercase is a weaker
92 // boundary than `/`, but worth a smaller bonus.
93 if i > 0 && unicode.IsUpper(rune(path[i])) && unicode.IsLower(rune(path[i-1])) {
94 score += 3
95 }
96 prevMatchedAt = i
97 qi++
98 }
99 if qi != len(q) {
100 return 0, false
101 }
102 // Filename-basename bonus: query that fully matches the basename
103 // gets a kicker so `repo.go` ranks above `repo_settings_form.html`.
104 if base := basename(path); strings.Contains(strings.ToLower(base), string(q)) {
105 score += 8
106 }
107 return score, true
108 }
109
110 // basename returns the path's last segment (no leading slash).
111 func basename(p string) string {
112 if i := strings.LastIndexByte(p, '/'); i >= 0 {
113 return p[i+1:]
114 }
115 return p
116 }
117
118 func min(a, b int) int {
119 if a < b {
120 return a
121 }
122 return b
123 }
124