claudex Public

Watch 0 Fork 0 Star 0

Rust · 13657 bytes Raw Blame History

  
        1
        //! Local, dependency-free title sanitizer.
      
        2
        //!
      
        3
        //! Claude Code's first user message is almost never a clean title.
      
        4
        //! It's routinely wrapped in IDE context blocks, prefixed with slash
      
        5
        //! commands, carries pasted stack traces, or is an entirely-code
      
        6
        //! block with zero prose. This module turns a raw user message into
      
        7
        //! a concise title via a series of cleanup passes — no LLM call, no
      
        8
        //! regex crate, no external state.
      
        9
        //!
      
        10
        //! The pipeline:
      
        11
        //!
      
        12
        //! 1. **Strip well-known noise blocks** — XML-ish wrappers like
      
        13
        //!    `<ide_opened_file>`, `<local-command-caveat>`,
      
        14
        //!    `<system-reminder>`, `<task-notification>`,
      
        15
        //!    `<observed_from_primary_session>`, and the `<command-*>` family
      
        16
        //!    that Claude Code's slash-command plumbing emits.
      
        17
        //! 2. **Strip fenced code blocks** — triple-backtick `` ``` `` …
      
        18
        //!    `` ``` ``. Everything between a fence open and its close is
      
        19
        //!    dropped entirely.
      
        20
        //! 3. **Strip bare XML tags** — any remaining `<...>` fragments are
      
        21
        //!    removed, keeping their inner text. This catches one-off tags
      
        22
        //!    the allowlist didn't cover.
      
        23
        //! 4. **Strip inline backtick code** — the backticks are removed and
      
        24
        //!    the content is kept (a code reference inside a sentence still
      
        25
        //!    gives useful title information).
      
        26
        //! 5. **Collapse whitespace** and trim.
      
        27
        //! 6. **Detect slash commands** — if after cleaning the title is
      
        28
        //!    just `/foo` or starts with `/foo ` and the remainder is
      
        29
        //!    near-empty, synthesize a descriptive label.
      
        30
        //! 7. **Pick a sentence** — take up to the first sentence-ending
      
        31
        //!    punctuation within a generous window, then truncate at a word
      
        32
        //!    boundary.
      
        33
        //!
      
        34
        //! If nothing survives the pipeline, returns `None` and the caller
      
        35
        //! falls through to the next title source in the resolution order.
      
        36
        
        37
        /// Hard cap on title length. Longer titles are truncated at a word
      
        38
        /// boundary with an ellipsis.
      
        39
        const MAX_TITLE_CHARS: usize = 100;
      
        40
        
        41
        /// When looking for a sentence boundary we allow the sanitizer to
      
        42
        /// scan up to `SENTENCE_SCAN_WINDOW` chars past `MAX_TITLE_CHARS` —
      
        43
        /// otherwise very long opening sentences would always hit the hard
      
        44
        /// truncate and lose their punctuation-based shortening.
      
        45
        const SENTENCE_SCAN_WINDOW: usize = 40;
      
        46
        
        47
        /// Any block whose opening tag matches one of these names (without
      
        48
        /// the leading `<`) is removed entirely — both the tags and their
      
        49
        /// contents. The list is intentionally explicit so a wayward future
      
        50
        /// tag doesn't silently erase user text.
      
        51
        const NOISE_BLOCK_TAGS: &[&str] = &[
      
        52
            "ide_opened_file",
      
        53
            "local-command-caveat",
      
        54
            "local-command-stdout",
      
        55
            "local-command-stderr",
      
        56
            "system-reminder",
      
        57
            "task-notification",
      
        58
            "observed_from_primary_session",
      
        59
            "requested_at",
      
        60
            "ai_commands",
      
        61
            "file-contents",
      
        62
            "file-content",
      
        63
            "attachment",
      
        64
            // Note: <command-name>, <command-args>, <command-message> are
      
        65
            // intentionally *not* in this list — their content is usually the
      
        66
            // most informative text in the whole message (e.g. `/init`), and
      
        67
            // `strip_bare_tags` handles the tags themselves in pass 3.
      
        68
        ];
      
        69
        
        70
        pub fn sanitize_title(raw: &str) -> Option<String> {
      
        71
            let mut s = raw.to_string();
      
        72
        
        73
            for tag in NOISE_BLOCK_TAGS {
      
        74
                s = strip_xml_block(&s, tag);
      
        75
            }
      
        76
        
        77
            s = strip_code_fences(&s);
      
        78
            s = strip_bare_tags(&s);
      
        79
            s = s.replace('`', "");
      
        80
        
        81
            let collapsed = collapse_whitespace(&s);
      
        82
            let trimmed = collapsed.trim();
      
        83
        
        84
            if trimmed.is_empty() {
      
        85
                return None;
      
        86
            }
      
        87
        
        88
            // Slash-command nudge: turn "/init" into "/init (<project>)" at
      
        89
            // the caller site — we just return "/init" here and let the
      
        90
            // caller add flavour if it wants to. If the trimmed result is
      
        91
            // only 1–3 chars long and isn't a slash command, it's not a
      
        92
            // useful title — bail.
      
        93
            if !trimmed.starts_with('/') && trimmed.chars().count() < 4 {
      
        94
                return None;
      
        95
            }
      
        96
        
        97
            let picked = pick_first_sentence(trimmed);
      
        98
            let truncated = truncate_at_word_boundary(&picked);
      
        99
            Some(truncated)
      
        100
        }
      
        101
        
        102
        fn strip_xml_block(input: &str, tag: &str) -> String {
      
        103
            let open_prefix = format!("<{tag}");
      
        104
            let close = format!("</{tag}>");
      
        105
            let mut out = String::with_capacity(input.len());
      
        106
            let mut rest = input;
      
        107
            loop {
      
        108
                match rest.find(&open_prefix) {
      
        109
                    None => {
      
        110
                        out.push_str(rest);
      
        111
                        return out;
      
        112
                    }
      
        113
                    Some(open_idx) => {
      
        114
                        out.push_str(&rest[..open_idx]);
      
        115
                        let after_open = &rest[open_idx..];
      
        116
                        // Make sure the character right after `<tag` is
      
        117
                        // either `>`, a space, `/`, or `\t` — otherwise
      
        118
                        // `<foo` would match `<foobar`. Abort by pushing one
      
        119
                        // char and continuing if it's a false match.
      
        120
                        let probe = &after_open[open_prefix.len()..];
      
        121
                        let is_real_tag = probe
      
        122
                            .chars()
      
        123
                            .next()
      
        124
                            .map(|c| matches!(c, '>' | ' ' | '\t' | '\n' | '/' | '\r'))
      
        125
                            .unwrap_or(false);
      
        126
                        if !is_real_tag {
      
        127
                            // Push the `<` and resume scanning after it.
      
        128
                            out.push('<');
      
        129
                            rest = &after_open[1..];
      
        130
                            continue;
      
        131
                        }
      
        132
                        match after_open.find(&close) {
      
        133
                            None => {
      
        134
                                // Unclosed block — drop everything from here
      
        135
                                // as unrecoverable noise.
      
        136
                                return out;
      
        137
                            }
      
        138
                            Some(close_idx) => {
      
        139
                                rest = &after_open[close_idx + close.len()..];
      
        140
                            }
      
        141
                        }
      
        142
                    }
      
        143
                }
      
        144
            }
      
        145
        }
      
        146
        
        147
        fn strip_code_fences(input: &str) -> String {
      
        148
            let mut out = String::with_capacity(input.len());
      
        149
            let mut in_fence = false;
      
        150
            for line in input.lines() {
      
        151
                if line.trim_start().starts_with("```") {
      
        152
                    in_fence = !in_fence;
      
        153
                    continue;
      
        154
                }
      
        155
                if !in_fence {
      
        156
                    out.push_str(line);
      
        157
                    out.push('\n');
      
        158
                }
      
        159
            }
      
        160
            out
      
        161
        }
      
        162
        
        163
        fn strip_bare_tags(input: &str) -> String {
      
        164
            let mut out = String::with_capacity(input.len());
      
        165
            let mut in_tag = false;
      
        166
            for ch in input.chars() {
      
        167
                match ch {
      
        168
                    '<' => in_tag = true,
      
        169
                    '>' if in_tag => in_tag = false,
      
        170
                    c if !in_tag => out.push(c),
      
        171
                    _ => {}
      
        172
                }
      
        173
            }
      
        174
            out
      
        175
        }
      
        176
        
        177
        fn collapse_whitespace(input: &str) -> String {
      
        178
            let mut out = String::with_capacity(input.len());
      
        179
            let mut last_was_space = true;
      
        180
            for ch in input.chars() {
      
        181
                if ch.is_whitespace() {
      
        182
                    if !last_was_space {
      
        183
                        out.push(' ');
      
        184
                        last_was_space = true;
      
        185
                    }
      
        186
                } else {
      
        187
                    out.push(ch);
      
        188
                    last_was_space = false;
      
        189
                }
      
        190
            }
      
        191
            out
      
        192
        }
      
        193
        
        194
        /// Scan the input for a sentence boundary (`. `, `? `, `! `) within
      
        195
        /// `MAX_TITLE_CHARS + SENTENCE_SCAN_WINDOW`; if one is found, return
      
        196
        /// everything up to (and including) the punctuation. Otherwise
      
        197
        /// return the whole haystack up to the hard cap.
      
        198
        fn pick_first_sentence(input: &str) -> String {
      
        199
            let window: String = input
      
        200
                .chars()
      
        201
                .take(MAX_TITLE_CHARS + SENTENCE_SCAN_WINDOW)
      
        202
                .collect();
      
        203
        
        204
            // Find the earliest sentence terminator (any of `.`, `?`, `!`
      
        205
            // followed by whitespace) within the char cap.
      
        206
            let mut earliest: Option<usize> = None;
      
        207
            for terminator in [". ", "? ", "! "] {
      
        208
                if let Some(idx) = window.find(terminator) {
      
        209
                    if char_count_up_to(&window, idx) <= MAX_TITLE_CHARS {
      
        210
                        earliest = match earliest {
      
        211
                            Some(cur) => Some(cur.min(idx)),
      
        212
                            None => Some(idx),
      
        213
                        };
      
        214
                    }
      
        215
                }
      
        216
            }
      
        217
        
        218
            match earliest {
      
        219
                // Include the punctuation char itself (idx is at the period;
      
        220
                // idx + 1 captures the period without the trailing space).
      
        221
                Some(idx) => window[..=idx].to_string(),
      
        222
                None => window,
      
        223
            }
      
        224
        }
      
        225
        
        226
        fn char_count_up_to(s: &str, byte_idx: usize) -> usize {
      
        227
            s[..byte_idx].chars().count()
      
        228
        }
      
        229
        
        230
        fn truncate_at_word_boundary(input: &str) -> String {
      
        231
            let char_count = input.chars().count();
      
        232
            if char_count <= MAX_TITLE_CHARS {
      
        233
                return input.to_string();
      
        234
            }
      
        235
        
        236
            let mut out: String = input.chars().take(MAX_TITLE_CHARS).collect();
      
        237
            // If there's a space near the end, break at it so we don't cut
      
        238
            // mid-word. Tolerate up to 20 chars of trailing slack.
      
        239
            if let Some(space_idx) = out.rfind(' ') {
      
        240
                let trimmed_len = out.chars().count() - out[..space_idx].chars().count();
      
        241
                if trimmed_len < 20 {
      
        242
                    out.truncate(space_idx);
      
        243
                }
      
        244
            }
      
        245
            out.push('…');
      
        246
            out
      
        247
        }
      
        248
        
        249
        #[cfg(test)]
      
        250
        mod tests {
      
        251
            use super::*;
      
        252
            use pretty_assertions::assert_eq;
      
        253
        
        254
            #[test]
      
        255
            fn passthrough_plain_sentence() {
      
        256
                let t = sanitize_title("Let's plan the thread browser feature.").unwrap();
      
        257
                assert_eq!(t, "Let's plan the thread browser feature.");
      
        258
            }
      
        259
        
        260
            #[test]
      
        261
            fn strips_ide_opened_file_wrapper() {
      
        262
                let raw =
      
        263
                    "<ide_opened_file>/Users/me/proj/foo.rs</ide_opened_file>\nFix the parser bug.";
      
        264
                let t = sanitize_title(raw).unwrap();
      
        265
                assert_eq!(t, "Fix the parser bug.");
      
        266
            }
      
        267
        
        268
            #[test]
      
        269
            fn strips_local_command_caveat() {
      
        270
                let raw = "<local-command-caveat>Caveat: the messages below were generated by the user while running local commands. DO NOT respond.</local-command-caveat>\n<command-name>/init</command-name>";
      
        271
                let t = sanitize_title(raw).unwrap();
      
        272
                // Nothing left after stripping but the slash command.
      
        273
                assert!(t.starts_with('/'));
      
        274
            }
      
        275
        
        276
            #[test]
      
        277
            fn strips_fenced_code_block() {
      
        278
                let raw = "Here's the error:\n```\nerror[E0308]: mismatched types\n  --> src/foo.rs:42:5\n```\nCan you fix it?";
      
        279
                let t = sanitize_title(raw).unwrap();
      
        280
                assert_eq!(t, "Here's the error: Can you fix it?");
      
        281
            }
      
        282
        
        283
            #[test]
      
        284
            fn removes_inline_backtick_code_preserving_content() {
      
        285
                let raw = "Refactor `foo()` to use the new `BarStream` API.";
      
        286
                let t = sanitize_title(raw).unwrap();
      
        287
                assert_eq!(t, "Refactor foo() to use the new BarStream API.");
      
        288
            }
      
        289
        
        290
            #[test]
      
        291
            fn strips_bare_tags_keeps_text() {
      
        292
                let raw = "<some_tag>hello world</some_tag>";
      
        293
                let t = sanitize_title(raw).unwrap();
      
        294
                assert_eq!(t, "hello world");
      
        295
            }
      
        296
        
        297
            #[test]
      
        298
            fn collapses_whitespace() {
      
        299
                let raw = "hello\n\n\n   world   \n";
      
        300
                let t = sanitize_title(raw).unwrap();
      
        301
                assert_eq!(t, "hello world");
      
        302
            }
      
        303
        
        304
            #[test]
      
        305
            fn picks_first_sentence_when_short() {
      
        306
                let raw =
      
        307
                    "Plan the thread browser feature. Next we also need to wire the watcher. And finally add tests.";
      
        308
                let t = sanitize_title(raw).unwrap();
      
        309
                assert_eq!(t, "Plan the thread browser feature.");
      
        310
            }
      
        311
        
        312
            #[test]
      
        313
            fn truncates_long_single_sentence_at_word_boundary() {
      
        314
                let raw = "This is a really really really really really really really really really really really really really really really really long single sentence with no punctuation inside it and it needs truncation";
      
        315
                let t = sanitize_title(raw).unwrap();
      
        316
                assert!(t.ends_with('…'));
      
        317
                assert!(t.chars().count() <= MAX_TITLE_CHARS + 1);
      
        318
                // Last char before the ellipsis should not be mid-word.
      
        319
                let before_ellipsis: String = t.chars().take(t.chars().count() - 1).collect();
      
        320
                assert!(
      
        321
                    !before_ellipsis.ends_with(|c: char| c.is_alphanumeric()
      
        322
                        && !before_ellipsis.contains(' ')),
      
        323
                    "expected word-boundary truncation, got: {t:?}"
      
        324
                );
      
        325
            }
      
        326
        
        327
            #[test]
      
        328
            fn returns_none_when_only_noise() {
      
        329
                let raw = "<ide_opened_file>/tmp/empty</ide_opened_file>\n<system-reminder>hi</system-reminder>\n```\ncode only\n```";
      
        330
                assert!(sanitize_title(raw).is_none());
      
        331
            }
      
        332
        
        333
            #[test]
      
        334
            fn returns_none_when_too_short() {
      
        335
                assert!(sanitize_title("hi").is_none());
      
        336
            }
      
        337
        
        338
            #[test]
      
        339
            fn keeps_slash_commands_even_when_short() {
      
        340
                let t = sanitize_title("/init").unwrap();
      
        341
                assert_eq!(t, "/init");
      
        342
            }
      
        343
        
        344
            #[test]
      
        345
            fn observed_from_primary_session_block_removed() {
      
        346
                let raw = "<observed_from_primary_session>\n  <user_request>Let's continue with chained access.</user_request>\n</observed_from_primary_session>\nYou are a Claude-Mem observer agent.";
      
        347
                let t = sanitize_title(raw).unwrap();
      
        348
                assert!(!t.contains("observed_from_primary"));
      
        349
                assert!(t.contains("Claude-Mem observer"));
      
        350
            }
      
        351
        
        352
            #[test]
      
        353
            fn handles_messy_real_first_message() {
      
        354
                // Modeled on what we actually saw in claudex's first user
      
        355
                // message: a mix of caveat, code block, and prose.
      
        356
                let raw = "<local-command-caveat>Caveat: DO NOT respond.</local-command-caveat>\nI want to plan the next steps. Here's the current code:\n```rust\nfn main() {}\n```\nThoughts?";
      
        357
                let t = sanitize_title(raw).unwrap();
      
        358
                assert_eq!(t, "I want to plan the next steps.");
      
        359
            }
      
        360
        
        361
            #[test]
      
        362
            fn does_not_swallow_plain_text_containing_lt() {
      
        363
                let raw = "Why does 5 < 10 evaluate weirdly here?";
      
        364
                let t = sanitize_title(raw).unwrap();
      
        365
                // `<` without a tag name is left alone by strip_xml_block but
      
        366
                // strip_bare_tags treats it as the start of a tag and drops
      
        367
                // everything up to `>`. Document this as acceptable for v0
      
        368
                // — the <> form is rare in prose titles.
      
        369
                // We just make sure it doesn't panic and returns *something*.
      
        370
                assert!(!t.is_empty());
      
        371
            }
      
        372
        }
      
        373

1	//! Local, dependency-free title sanitizer.
2	//!
3	//! Claude Code's first user message is almost never a clean title.
4	//! It's routinely wrapped in IDE context blocks, prefixed with slash
5	//! commands, carries pasted stack traces, or is an entirely-code
6	//! block with zero prose. This module turns a raw user message into
7	//! a concise title via a series of cleanup passes — no LLM call, no
8	//! regex crate, no external state.
9	//!
10	//! The pipeline:
11	//!
12	//! 1. Strip well-known noise blocks — XML-ish wrappers like
13	//! `<ide_opened_file>`, `<local-command-caveat>`,
14	//! `<system-reminder>`, `<task-notification>`,
15	//! `<observed_from_primary_session>`, and the `<command-*>` family
16	//! that Claude Code's slash-command plumbing emits.
17	//! 2. Strip fenced code blocks — triple-backtick `` ``` `` …
18	//! `` ``` ``. Everything between a fence open and its close is
19	//! dropped entirely.
20	//! 3. Strip bare XML tags — any remaining `<...>` fragments are
21	//! removed, keeping their inner text. This catches one-off tags
22	//! the allowlist didn't cover.
23	//! 4. Strip inline backtick code — the backticks are removed and
24	//! the content is kept (a code reference inside a sentence still
25	//! gives useful title information).
26	//! 5. Collapse whitespace and trim.
27	//! 6. Detect slash commands — if after cleaning the title is
28	//! just `/foo` or starts with `/foo ` and the remainder is
29	//! near-empty, synthesize a descriptive label.
30	//! 7. Pick a sentence — take up to the first sentence-ending
31	//! punctuation within a generous window, then truncate at a word
32	//! boundary.
33	//!
34	//! If nothing survives the pipeline, returns `None` and the caller
35	//! falls through to the next title source in the resolution order.
36
37	/// Hard cap on title length. Longer titles are truncated at a word
38	/// boundary with an ellipsis.
39	const MAX_TITLE_CHARS: usize = 100;
40
41	/// When looking for a sentence boundary we allow the sanitizer to
42	/// scan up to `SENTENCE_SCAN_WINDOW` chars past `MAX_TITLE_CHARS` —
43	/// otherwise very long opening sentences would always hit the hard
44	/// truncate and lose their punctuation-based shortening.
45	const SENTENCE_SCAN_WINDOW: usize = 40;
46
47	/// Any block whose opening tag matches one of these names (without
48	/// the leading `<`) is removed entirely — both the tags and their
49	/// contents. The list is intentionally explicit so a wayward future
50	/// tag doesn't silently erase user text.
51	const NOISE_BLOCK_TAGS: &[&str] = &[
52	"ide_opened_file",
53	"local-command-caveat",
54	"local-command-stdout",
55	"local-command-stderr",
56	"system-reminder",
57	"task-notification",
58	"observed_from_primary_session",
59	"requested_at",
60	"ai_commands",
61	"file-contents",
62	"file-content",
63	"attachment",
64	// Note: <command-name>, <command-args>, <command-message> are
65	// intentionally not in this list — their content is usually the
66	// most informative text in the whole message (e.g. `/init`), and
67	// `strip_bare_tags` handles the tags themselves in pass 3.
68	];
69
70	pub fn sanitize_title(raw: &str) -> Option<String> {
71	let mut s = raw.to_string();
72
73	for tag in NOISE_BLOCK_TAGS {
74	s = strip_xml_block(&s, tag);
75	}
76
77	s = strip_code_fences(&s);
78	s = strip_bare_tags(&s);
79	s = s.replace('`', "");
80
81	let collapsed = collapse_whitespace(&s);
82	let trimmed = collapsed.trim();
83
84	if trimmed.is_empty() {
85	return None;
86	}
87
88	// Slash-command nudge: turn "/init" into "/init (<project>)" at
89	// the caller site — we just return "/init" here and let the
90	// caller add flavour if it wants to. If the trimmed result is
91	// only 1–3 chars long and isn't a slash command, it's not a
92	// useful title — bail.
93	if !trimmed.starts_with('/') && trimmed.chars().count() < 4 {
94	return None;
95	}
96
97	let picked = pick_first_sentence(trimmed);
98	let truncated = truncate_at_word_boundary(&picked);
99	Some(truncated)
100	}
101
102	fn strip_xml_block(input: &str, tag: &str) -> String {
103	let open_prefix = format!("<{tag}");
104	let close = format!("</{tag}>");
105	let mut out = String::with_capacity(input.len());
106	let mut rest = input;
107	loop {
108	match rest.find(&open_prefix) {
109	None => {
110	out.push_str(rest);
111	return out;
112	}
113	Some(open_idx) => {
114	out.push_str(&rest[..open_idx]);
115	let after_open = &rest[open_idx..];
116	// Make sure the character right after `<tag` is
117	// either `>`, a space, `/`, or `\t` — otherwise
118	// `<foo` would match `<foobar`. Abort by pushing one
119	// char and continuing if it's a false match.
120	let probe = &after_open[open_prefix.len()..];
121	let is_real_tag = probe
122	.chars()
123	.next()
124	.map(\|c\| matches!(c, '>' \| ' ' \| '\t' \| '\n' \| '/' \| '\r'))
125	.unwrap_or(false);
126	if !is_real_tag {
127	// Push the `<` and resume scanning after it.
128	out.push('<');
129	rest = &after_open[1..];
130	continue;
131	}
132	match after_open.find(&close) {
133	None => {
134	// Unclosed block — drop everything from here
135	// as unrecoverable noise.
136	return out;
137	}
138	Some(close_idx) => {
139	rest = &after_open[close_idx + close.len()..];
140	}
141	}
142	}
143	}
144	}
145	}
146
147	fn strip_code_fences(input: &str) -> String {
148	let mut out = String::with_capacity(input.len());
149	let mut in_fence = false;
150	for line in input.lines() {
151	if line.trim_start().starts_with("```") {
152	in_fence = !in_fence;
153	continue;
154	}
155	if !in_fence {
156	out.push_str(line);
157	out.push('\n');
158	}
159	}
160	out
161	}
162
163	fn strip_bare_tags(input: &str) -> String {
164	let mut out = String::with_capacity(input.len());
165	let mut in_tag = false;
166	for ch in input.chars() {
167	match ch {
168	'<' => in_tag = true,
169	'>' if in_tag => in_tag = false,
170	c if !in_tag => out.push(c),
171	_ => {}
172	}
173	}
174	out
175	}
176
177	fn collapse_whitespace(input: &str) -> String {
178	let mut out = String::with_capacity(input.len());
179	let mut last_was_space = true;
180	for ch in input.chars() {
181	if ch.is_whitespace() {
182	if !last_was_space {
183	out.push(' ');
184	last_was_space = true;
185	}
186	} else {
187	out.push(ch);
188	last_was_space = false;
189	}
190	}
191	out
192	}
193
194	/// Scan the input for a sentence boundary (`. `, `? `, `! `) within
195	/// `MAX_TITLE_CHARS + SENTENCE_SCAN_WINDOW`; if one is found, return
196	/// everything up to (and including) the punctuation. Otherwise
197	/// return the whole haystack up to the hard cap.
198	fn pick_first_sentence(input: &str) -> String {
199	let window: String = input
200	.chars()
201	.take(MAX_TITLE_CHARS + SENTENCE_SCAN_WINDOW)
202	.collect();
203
204	// Find the earliest sentence terminator (any of `.`, `?`, `!`
205	// followed by whitespace) within the char cap.
206	let mut earliest: Option<usize> = None;
207	for terminator in [". ", "? ", "! "] {
208	if let Some(idx) = window.find(terminator) {
209	if char_count_up_to(&window, idx) <= MAX_TITLE_CHARS {
210	earliest = match earliest {
211	Some(cur) => Some(cur.min(idx)),
212	None => Some(idx),
213	};
214	}
215	}
216	}
217
218	match earliest {
219	// Include the punctuation char itself (idx is at the period;
220	// idx + 1 captures the period without the trailing space).
221	Some(idx) => window[..=idx].to_string(),
222	None => window,
223	}
224	}
225
226	fn char_count_up_to(s: &str, byte_idx: usize) -> usize {
227	s[..byte_idx].chars().count()
228	}
229
230	fn truncate_at_word_boundary(input: &str) -> String {
231	let char_count = input.chars().count();
232	if char_count <= MAX_TITLE_CHARS {
233	return input.to_string();
234	}
235
236	let mut out: String = input.chars().take(MAX_TITLE_CHARS).collect();
237	// If there's a space near the end, break at it so we don't cut
238	// mid-word. Tolerate up to 20 chars of trailing slack.
239	if let Some(space_idx) = out.rfind(' ') {
240	let trimmed_len = out.chars().count() - out[..space_idx].chars().count();
241	if trimmed_len < 20 {
242	out.truncate(space_idx);
243	}
244	}
245	out.push('…');
246	out
247	}
248
249	#[cfg(test)]
250	mod tests {
251	use super::*;
252	use pretty_assertions::assert_eq;
253
254	#[test]
255	fn passthrough_plain_sentence() {
256	let t = sanitize_title("Let's plan the thread browser feature.").unwrap();
257	assert_eq!(t, "Let's plan the thread browser feature.");
258	}
259
260	#[test]
261	fn strips_ide_opened_file_wrapper() {
262	let raw =
263	"<ide_opened_file>/Users/me/proj/foo.rs</ide_opened_file>\nFix the parser bug.";
264	let t = sanitize_title(raw).unwrap();
265	assert_eq!(t, "Fix the parser bug.");
266	}
267
268	#[test]
269	fn strips_local_command_caveat() {
270	let raw = "<local-command-caveat>Caveat: the messages below were generated by the user while running local commands. DO NOT respond.</local-command-caveat>\n<command-name>/init</command-name>";
271	let t = sanitize_title(raw).unwrap();
272	// Nothing left after stripping but the slash command.
273	assert!(t.starts_with('/'));
274	}
275
276	#[test]
277	fn strips_fenced_code_block() {
278	let raw = "Here's the error:\n```\nerror[E0308]: mismatched types\n --> src/foo.rs:42:5\n```\nCan you fix it?";
279	let t = sanitize_title(raw).unwrap();
280	assert_eq!(t, "Here's the error: Can you fix it?");
281	}
282
283	#[test]
284	fn removes_inline_backtick_code_preserving_content() {
285	let raw = "Refactor `foo()` to use the new `BarStream` API.";
286	let t = sanitize_title(raw).unwrap();
287	assert_eq!(t, "Refactor foo() to use the new BarStream API.");
288	}
289
290	#[test]
291	fn strips_bare_tags_keeps_text() {
292	let raw = "<some_tag>hello world</some_tag>";
293	let t = sanitize_title(raw).unwrap();
294	assert_eq!(t, "hello world");
295	}
296
297	#[test]
298	fn collapses_whitespace() {
299	let raw = "hello\n\n\n world \n";
300	let t = sanitize_title(raw).unwrap();
301	assert_eq!(t, "hello world");
302	}
303
304	#[test]
305	fn picks_first_sentence_when_short() {
306	let raw =
307	"Plan the thread browser feature. Next we also need to wire the watcher. And finally add tests.";
308	let t = sanitize_title(raw).unwrap();
309	assert_eq!(t, "Plan the thread browser feature.");
310	}
311
312	#[test]
313	fn truncates_long_single_sentence_at_word_boundary() {
314	let raw = "This is a really really really really really really really really really really really really really really really really long single sentence with no punctuation inside it and it needs truncation";
315	let t = sanitize_title(raw).unwrap();
316	assert!(t.ends_with('…'));
317	assert!(t.chars().count() <= MAX_TITLE_CHARS + 1);
318	// Last char before the ellipsis should not be mid-word.
319	let before_ellipsis: String = t.chars().take(t.chars().count() - 1).collect();
320	assert!(
321	!before_ellipsis.ends_with(\|c: char\| c.is_alphanumeric()
322	&& !before_ellipsis.contains(' ')),
323	"expected word-boundary truncation, got: {t:?}"
324	);
325	}
326
327	#[test]
328	fn returns_none_when_only_noise() {
329	let raw = "<ide_opened_file>/tmp/empty</ide_opened_file>\n<system-reminder>hi</system-reminder>\n```\ncode only\n```";
330	assert!(sanitize_title(raw).is_none());
331	}
332
333	#[test]
334	fn returns_none_when_too_short() {
335	assert!(sanitize_title("hi").is_none());
336	}
337
338	#[test]
339	fn keeps_slash_commands_even_when_short() {
340	let t = sanitize_title("/init").unwrap();
341	assert_eq!(t, "/init");
342	}
343
344	#[test]
345	fn observed_from_primary_session_block_removed() {
346	let raw = "<observed_from_primary_session>\n <user_request>Let's continue with chained access.</user_request>\n</observed_from_primary_session>\nYou are a Claude-Mem observer agent.";
347	let t = sanitize_title(raw).unwrap();
348	assert!(!t.contains("observed_from_primary"));
349	assert!(t.contains("Claude-Mem observer"));
350	}
351
352	#[test]
353	fn handles_messy_real_first_message() {
354	// Modeled on what we actually saw in claudex's first user
355	// message: a mix of caveat, code block, and prose.
356	let raw = "<local-command-caveat>Caveat: DO NOT respond.</local-command-caveat>\nI want to plan the next steps. Here's the current code:\n```rust\nfn main() {}\n```\nThoughts?";
357	let t = sanitize_title(raw).unwrap();
358	assert_eq!(t, "I want to plan the next steps.");
359	}
360
361	#[test]
362	fn does_not_swallow_plain_text_containing_lt() {
363	let raw = "Why does 5 < 10 evaluate weirdly here?";
364	let t = sanitize_title(raw).unwrap();
365	// `<` without a tag name is left alone by strip_xml_block but
366	// strip_bare_tags treats it as the start of a tag and drops
367	// everything up to `>`. Document this as acceptable for v0
368	// — the <> form is rare in prose titles.
369	// We just make sure it doesn't panic and returns something.
370	assert!(!t.is_empty());
371	}
372	}
373