shithub Public

Watch 1 Fork 0 Star 0

MySQL · 7164 bytes Raw Blame History

  
        1
        -- SPDX-License-Identifier: AGPL-3.0-or-later
      
        2
        --
      
        3
        -- S28 search index tables for repos, issues, users.
      
        4
        --
      
        5
        -- Each table is keyed 1:1 with its source row and holds a tsvector
      
        6
        -- maintained by AFTER triggers on the source. We use a separate
      
        7
        -- table rather than a generated column on the source so:
      
        8
        --   1. Existing query buckets don't have to change shape.
      
        9
        --   2. The tsv column doesn't bloat every row read in unrelated
      
        10
        --      contexts (e.g. the repo list page would otherwise pay the
      
        11
        --      cost of pulling the tsv on every paint).
      
        12
        --
      
        13
        -- The tsv config is `english` everywhere — good enough for v1.
      
        14
        -- Multi-language content is post-MVP (would need per-document
      
        15
        -- language detection and a config switch).
      
        16
        --
      
        17
        -- `unaccent` is composed via the dictionary chain so accent stripping
      
        18
        -- happens before stemming; the search-side tsquery uses the same
      
        19
        -- chain so "café" matches "cafe".
      
        20
        --
      
        21
        -- Code search lives in 0032 — splitting because it's the bulkier
      
        22
        -- of the two and ships with its own worker job.
      
        23
        
        24
        -- +goose Up
      
        25
        
        26
        -- Build a custom dictionary chain that runs `unaccent` first, then
      
        27
        -- english stemming. The same config is used by both index-side and
      
        28
        -- query-side calls so accents normalize consistently on both sides.
      
        29
        -- Lazy: skip if it already exists (e.g. on a re-up after partial
      
        30
        -- failure).
      
        31
        -- +goose StatementBegin
      
        32
        DO $$
      
        33
        BEGIN
      
        34
            IF NOT EXISTS (SELECT 1 FROM pg_ts_config WHERE cfgname = 'shithub_search') THEN
      
        35
                CREATE TEXT SEARCH CONFIGURATION shithub_search (COPY = pg_catalog.english);
      
        36
                ALTER TEXT SEARCH CONFIGURATION shithub_search
      
        37
                    ALTER MAPPING FOR hword, hword_part, word
      
        38
                    WITH unaccent, english_stem;
      
        39
            END IF;
      
        40
        END $$;
      
        41
        -- +goose StatementEnd
      
        42
        
        43
        -- ─── repos ─────────────────────────────────────────────────────────
      
        44
        
        45
        CREATE TABLE repos_search (
      
        46
            repo_id   bigint   PRIMARY KEY REFERENCES repos(id) ON DELETE CASCADE,
      
        47
            tsv       tsvector NOT NULL
      
        48
        );
      
        49
        
        50
        CREATE INDEX repos_search_tsv_idx ON repos_search USING GIN (tsv);
      
        51
        
        52
        -- +goose StatementBegin
      
        53
        CREATE OR REPLACE FUNCTION tg_repos_search_upsert() RETURNS trigger
      
        54
            LANGUAGE plpgsql AS $$
      
        55
        BEGIN
      
        56
            INSERT INTO repos_search (repo_id, tsv) VALUES (
      
        57
                NEW.id,
      
        58
                setweight(to_tsvector('shithub_search', coalesce(NEW.name::text, '')), 'A') ||
      
        59
                setweight(to_tsvector('shithub_search', coalesce(NEW.description, '')), 'B')
      
        60
            )
      
        61
            ON CONFLICT (repo_id) DO UPDATE
      
        62
                SET tsv = EXCLUDED.tsv;
      
        63
            RETURN NEW;
      
        64
        END;
      
        65
        $$;
      
        66
        -- +goose StatementEnd
      
        67
        
        68
        CREATE TRIGGER repos_search_upsert
      
        69
            AFTER INSERT OR UPDATE OF name, description ON repos
      
        70
            FOR EACH ROW EXECUTE FUNCTION tg_repos_search_upsert();
      
        71
        
        72
        -- Backfill any existing rows.
      
        73
        INSERT INTO repos_search (repo_id, tsv)
      
        74
        SELECT id,
      
        75
               setweight(to_tsvector('shithub_search', coalesce(name::text, '')), 'A') ||
      
        76
               setweight(to_tsvector('shithub_search', coalesce(description, '')), 'B')
      
        77
        FROM repos
      
        78
        ON CONFLICT (repo_id) DO NOTHING;
      
        79
        
        80
        -- ─── issues ────────────────────────────────────────────────────────
      
        81
        
        82
        CREATE TABLE issues_search (
      
        83
            issue_id        bigint   PRIMARY KEY REFERENCES issues(id) ON DELETE CASCADE,
      
        84
            repo_id         bigint   NOT NULL REFERENCES repos(id) ON DELETE CASCADE,
      
        85
            kind            issue_kind NOT NULL,
      
        86
            state           issue_state NOT NULL,
      
        87
            author_user_id  bigint   REFERENCES users(id) ON DELETE SET NULL,
      
        88
            tsv             tsvector NOT NULL
      
        89
        );
      
        90
        
        91
        CREATE INDEX issues_search_tsv_idx       ON issues_search USING GIN (tsv);
      
        92
        CREATE INDEX issues_search_repo_idx      ON issues_search (repo_id);
      
        93
        CREATE INDEX issues_search_state_idx     ON issues_search (state);
      
        94
        CREATE INDEX issues_search_author_idx    ON issues_search (author_user_id) WHERE author_user_id IS NOT NULL;
      
        95
        
        96
        -- +goose StatementBegin
      
        97
        CREATE OR REPLACE FUNCTION tg_issues_search_upsert() RETURNS trigger
      
        98
            LANGUAGE plpgsql AS $$
      
        99
        BEGIN
      
        100
            INSERT INTO issues_search (issue_id, repo_id, kind, state, author_user_id, tsv) VALUES (
      
        101
                NEW.id, NEW.repo_id, NEW.kind, NEW.state, NEW.author_user_id,
      
        102
                setweight(to_tsvector('shithub_search', coalesce(NEW.title, '')), 'A') ||
      
        103
                setweight(to_tsvector('shithub_search', coalesce(NEW.body, '')), 'B')
      
        104
            )
      
        105
            ON CONFLICT (issue_id) DO UPDATE
      
        106
                SET repo_id        = EXCLUDED.repo_id,
      
        107
                    kind           = EXCLUDED.kind,
      
        108
                    state          = EXCLUDED.state,
      
        109
                    author_user_id = EXCLUDED.author_user_id,
      
        110
                    tsv            = EXCLUDED.tsv;
      
        111
            RETURN NEW;
      
        112
        END;
      
        113
        $$;
      
        114
        -- +goose StatementEnd
      
        115
        
        116
        CREATE TRIGGER issues_search_upsert
      
        117
            AFTER INSERT OR UPDATE OF title, body, state, kind ON issues
      
        118
            FOR EACH ROW EXECUTE FUNCTION tg_issues_search_upsert();
      
        119
        
        120
        INSERT INTO issues_search (issue_id, repo_id, kind, state, author_user_id, tsv)
      
        121
        SELECT id, repo_id, kind, state, author_user_id,
      
        122
               setweight(to_tsvector('shithub_search', coalesce(title, '')), 'A') ||
      
        123
               setweight(to_tsvector('shithub_search', coalesce(body, '')), 'B')
      
        124
        FROM issues
      
        125
        ON CONFLICT (issue_id) DO NOTHING;
      
        126
        
        127
        -- ─── users ─────────────────────────────────────────────────────────
      
        128
        
        129
        CREATE TABLE users_search (
      
        130
            user_id bigint   PRIMARY KEY REFERENCES users(id) ON DELETE CASCADE,
      
        131
            tsv     tsvector NOT NULL
      
        132
        );
      
        133
        
        134
        CREATE INDEX users_search_tsv_idx ON users_search USING GIN (tsv);
      
        135
        
        136
        -- +goose StatementBegin
      
        137
        CREATE OR REPLACE FUNCTION tg_users_search_upsert() RETURNS trigger
      
        138
            LANGUAGE plpgsql AS $$
      
        139
        BEGIN
      
        140
            INSERT INTO users_search (user_id, tsv) VALUES (
      
        141
                NEW.id,
      
        142
                setweight(to_tsvector('shithub_search', coalesce(NEW.username::text, '')), 'A') ||
      
        143
                setweight(to_tsvector('shithub_search', coalesce(NEW.display_name, '')), 'B') ||
      
        144
                setweight(to_tsvector('shithub_search', coalesce(NEW.bio, '')), 'C')
      
        145
            )
      
        146
            ON CONFLICT (user_id) DO UPDATE
      
        147
                SET tsv = EXCLUDED.tsv;
      
        148
            RETURN NEW;
      
        149
        END;
      
        150
        $$;
      
        151
        -- +goose StatementEnd
      
        152
        
        153
        CREATE TRIGGER users_search_upsert
      
        154
            AFTER INSERT OR UPDATE OF username, display_name, bio ON users
      
        155
            FOR EACH ROW EXECUTE FUNCTION tg_users_search_upsert();
      
        156
        
        157
        INSERT INTO users_search (user_id, tsv)
      
        158
        SELECT id,
      
        159
               setweight(to_tsvector('shithub_search', coalesce(username::text, '')), 'A') ||
      
        160
               setweight(to_tsvector('shithub_search', coalesce(display_name, '')), 'B') ||
      
        161
               setweight(to_tsvector('shithub_search', coalesce(bio, '')), 'C')
      
        162
        FROM users
      
        163
        ON CONFLICT (user_id) DO NOTHING;
      
        164
        
        165
        -- +goose Down
      
        166
        DROP TRIGGER IF EXISTS users_search_upsert ON users;
      
        167
        DROP FUNCTION IF EXISTS tg_users_search_upsert();
      
        168
        DROP TABLE IF EXISTS users_search;
      
        169
        
        170
        DROP TRIGGER IF EXISTS issues_search_upsert ON issues;
      
        171
        DROP FUNCTION IF EXISTS tg_issues_search_upsert();
      
        172
        DROP TABLE IF EXISTS issues_search;
      
        173
        
        174
        DROP TRIGGER IF EXISTS repos_search_upsert ON repos;
      
        175
        DROP FUNCTION IF EXISTS tg_repos_search_upsert();
      
        176
        DROP TABLE IF EXISTS repos_search;
      
        177
        
        178
        DROP TEXT SEARCH CONFIGURATION IF EXISTS shithub_search;
      
        179

1	-- SPDX-License-Identifier: AGPL-3.0-or-later
2	--
3	-- S28 search index tables for repos, issues, users.
4	--
5	-- Each table is keyed 1:1 with its source row and holds a tsvector
6	-- maintained by AFTER triggers on the source. We use a separate
7	-- table rather than a generated column on the source so:
8	-- 1. Existing query buckets don't have to change shape.
9	-- 2. The tsv column doesn't bloat every row read in unrelated
10	-- contexts (e.g. the repo list page would otherwise pay the
11	-- cost of pulling the tsv on every paint).
12	--
13	-- The tsv config is `english` everywhere — good enough for v1.
14	-- Multi-language content is post-MVP (would need per-document
15	-- language detection and a config switch).
16	--
17	-- `unaccent` is composed via the dictionary chain so accent stripping
18	-- happens before stemming; the search-side tsquery uses the same
19	-- chain so "café" matches "cafe".
20	--
21	-- Code search lives in 0032 — splitting because it's the bulkier
22	-- of the two and ships with its own worker job.
23
24	-- +goose Up
25
26	-- Build a custom dictionary chain that runs `unaccent` first, then
27	-- english stemming. The same config is used by both index-side and
28	-- query-side calls so accents normalize consistently on both sides.
29	-- Lazy: skip if it already exists (e.g. on a re-up after partial
30	-- failure).
31	-- +goose StatementBegin
32	DO $$
33	BEGIN
34	IF NOT EXISTS (SELECT 1 FROM pg_ts_config WHERE cfgname = 'shithub_search') THEN
35	CREATE TEXT SEARCH CONFIGURATION shithub_search (COPY = pg_catalog.english);
36	ALTER TEXT SEARCH CONFIGURATION shithub_search
37	ALTER MAPPING FOR hword, hword_part, word
38	WITH unaccent, english_stem;
39	END IF;
40	END $$;
41	-- +goose StatementEnd
42
43	-- ─── repos ─────────────────────────────────────────────────────────
44
45	CREATE TABLE repos_search (
46	repo_id bigint PRIMARY KEY REFERENCES repos(id) ON DELETE CASCADE,
47	tsv tsvector NOT NULL
48	);
49
50	CREATE INDEX repos_search_tsv_idx ON repos_search USING GIN (tsv);
51
52	-- +goose StatementBegin
53	CREATE OR REPLACE FUNCTION tg_repos_search_upsert() RETURNS trigger
54	LANGUAGE plpgsql AS $$
55	BEGIN
56	INSERT INTO repos_search (repo_id, tsv) VALUES (
57	NEW.id,
58	setweight(to_tsvector('shithub_search', coalesce(NEW.name::text, '')), 'A') \|\|
59	setweight(to_tsvector('shithub_search', coalesce(NEW.description, '')), 'B')
60	)
61	ON CONFLICT (repo_id) DO UPDATE
62	SET tsv = EXCLUDED.tsv;
63	RETURN NEW;
64	END;
65	$$;
66	-- +goose StatementEnd
67
68	CREATE TRIGGER repos_search_upsert
69	AFTER INSERT OR UPDATE OF name, description ON repos
70	FOR EACH ROW EXECUTE FUNCTION tg_repos_search_upsert();
71
72	-- Backfill any existing rows.
73	INSERT INTO repos_search (repo_id, tsv)
74	SELECT id,
75	setweight(to_tsvector('shithub_search', coalesce(name::text, '')), 'A') \|\|
76	setweight(to_tsvector('shithub_search', coalesce(description, '')), 'B')
77	FROM repos
78	ON CONFLICT (repo_id) DO NOTHING;
79
80	-- ─── issues ────────────────────────────────────────────────────────
81
82	CREATE TABLE issues_search (
83	issue_id bigint PRIMARY KEY REFERENCES issues(id) ON DELETE CASCADE,
84	repo_id bigint NOT NULL REFERENCES repos(id) ON DELETE CASCADE,
85	kind issue_kind NOT NULL,
86	state issue_state NOT NULL,
87	author_user_id bigint REFERENCES users(id) ON DELETE SET NULL,
88	tsv tsvector NOT NULL
89	);
90
91	CREATE INDEX issues_search_tsv_idx ON issues_search USING GIN (tsv);
92	CREATE INDEX issues_search_repo_idx ON issues_search (repo_id);
93	CREATE INDEX issues_search_state_idx ON issues_search (state);
94	CREATE INDEX issues_search_author_idx ON issues_search (author_user_id) WHERE author_user_id IS NOT NULL;
95
96	-- +goose StatementBegin
97	CREATE OR REPLACE FUNCTION tg_issues_search_upsert() RETURNS trigger
98	LANGUAGE plpgsql AS $$
99	BEGIN
100	INSERT INTO issues_search (issue_id, repo_id, kind, state, author_user_id, tsv) VALUES (
101	NEW.id, NEW.repo_id, NEW.kind, NEW.state, NEW.author_user_id,
102	setweight(to_tsvector('shithub_search', coalesce(NEW.title, '')), 'A') \|\|
103	setweight(to_tsvector('shithub_search', coalesce(NEW.body, '')), 'B')
104	)
105	ON CONFLICT (issue_id) DO UPDATE
106	SET repo_id = EXCLUDED.repo_id,
107	kind = EXCLUDED.kind,
108	state = EXCLUDED.state,
109	author_user_id = EXCLUDED.author_user_id,
110	tsv = EXCLUDED.tsv;
111	RETURN NEW;
112	END;
113	$$;
114	-- +goose StatementEnd
115
116	CREATE TRIGGER issues_search_upsert
117	AFTER INSERT OR UPDATE OF title, body, state, kind ON issues
118	FOR EACH ROW EXECUTE FUNCTION tg_issues_search_upsert();
119
120	INSERT INTO issues_search (issue_id, repo_id, kind, state, author_user_id, tsv)
121	SELECT id, repo_id, kind, state, author_user_id,
122	setweight(to_tsvector('shithub_search', coalesce(title, '')), 'A') \|\|
123	setweight(to_tsvector('shithub_search', coalesce(body, '')), 'B')
124	FROM issues
125	ON CONFLICT (issue_id) DO NOTHING;
126
127	-- ─── users ─────────────────────────────────────────────────────────
128
129	CREATE TABLE users_search (
130	user_id bigint PRIMARY KEY REFERENCES users(id) ON DELETE CASCADE,
131	tsv tsvector NOT NULL
132	);
133
134	CREATE INDEX users_search_tsv_idx ON users_search USING GIN (tsv);
135
136	-- +goose StatementBegin
137	CREATE OR REPLACE FUNCTION tg_users_search_upsert() RETURNS trigger
138	LANGUAGE plpgsql AS $$
139	BEGIN
140	INSERT INTO users_search (user_id, tsv) VALUES (
141	NEW.id,
142	setweight(to_tsvector('shithub_search', coalesce(NEW.username::text, '')), 'A') \|\|
143	setweight(to_tsvector('shithub_search', coalesce(NEW.display_name, '')), 'B') \|\|
144	setweight(to_tsvector('shithub_search', coalesce(NEW.bio, '')), 'C')
145	)
146	ON CONFLICT (user_id) DO UPDATE
147	SET tsv = EXCLUDED.tsv;
148	RETURN NEW;
149	END;
150	$$;
151	-- +goose StatementEnd
152
153	CREATE TRIGGER users_search_upsert
154	AFTER INSERT OR UPDATE OF username, display_name, bio ON users
155	FOR EACH ROW EXECUTE FUNCTION tg_users_search_upsert();
156
157	INSERT INTO users_search (user_id, tsv)
158	SELECT id,
159	setweight(to_tsvector('shithub_search', coalesce(username::text, '')), 'A') \|\|
160	setweight(to_tsvector('shithub_search', coalesce(display_name, '')), 'B') \|\|
161	setweight(to_tsvector('shithub_search', coalesce(bio, '')), 'C')
162	FROM users
163	ON CONFLICT (user_id) DO NOTHING;
164
165	-- +goose Down
166	DROP TRIGGER IF EXISTS users_search_upsert ON users;
167	DROP FUNCTION IF EXISTS tg_users_search_upsert();
168	DROP TABLE IF EXISTS users_search;
169
170	DROP TRIGGER IF EXISTS issues_search_upsert ON issues;
171	DROP FUNCTION IF EXISTS tg_issues_search_upsert();
172	DROP TABLE IF EXISTS issues_search;
173
174	DROP TRIGGER IF EXISTS repos_search_upsert ON repos;
175	DROP FUNCTION IF EXISTS tg_repos_search_upsert();
176	DROP TABLE IF EXISTS repos_search;
177
178	DROP TEXT SEARCH CONFIGURATION IF EXISTS shithub_search;
179