Rust · 19880 bytes Raw Blame History
1 //! Project merging / grouping.
2 //!
3 //! Claude Code encodes each working directory into a flat dir name
4 //! under `~/.claude/projects/`, but one logical project routinely ends
5 //! up split across multiple encoded dirs — a user will `cd` into a
6 //! subdirectory mid-session and Claude Code starts writing the
7 //! transcript into a new encoded dir, leaving an orphan state dir
8 //! behind in the original. This module rehydrates the logical
9 //! grouping by:
10 //!
11 //! 1. **Git-root detection**: walk up from a session's real `cwd`
12 //! looking for a `.git` directory. All sessions under that git
13 //! root become one project.
14 //! 2. **Path-prefix fallback**: for projects without a reachable git
15 //! ancestor, collapse keys where one is a strict path prefix of
16 //! another (up to a bounded depth), so `/Users/me/proj` absorbs
17 //! `/Users/me/proj/docs`.
18 //! 3. **Observer detection**: sessions whose cwd lives under
19 //! `.claude-mem/observer-sessions` are marked as the `Observer`
20 //! category and rendered under a collapsed section in the sidebar.
21
22 use std::collections::{BTreeSet, HashMap};
23 use std::path::{Path, PathBuf};
24
25 use chrono::{DateTime, Utc};
26
27 use crate::core::history_log::HistoryLog;
28 use crate::core::schema::{Project, ProjectCategory, SessionSource, SessionSummary};
29 use crate::core::title::sanitize_title;
30
31 /// How many path components above its cwd a session may travel before
32 /// we stop looking for a `.git` directory. Avoids silly long walks on
33 /// orphan paths.
34 const GIT_SEARCH_MAX_DEPTH: usize = 24;
35
36 /// A "short" key must have at least this many components before we let
37 /// it absorb longer sibling keys via path-prefix fallback. Prevents
38 /// `/Users/me` from swallowing every project under the home dir.
39 const MIN_PREFIX_KEY_DEPTH: usize = 4;
40
41 /// A "long" key may extend a short key by at most this many components
42 /// before we stop treating them as the same logical project.
43 const MAX_PREFIX_EXTRA_DEPTH: usize = 3;
44
45 /// A summary that has been keyed for grouping.
46 #[derive(Debug, Clone)]
47 pub struct Keyed {
48 pub summary: SessionSummary,
49 pub encoded_dir: String,
50 pub key: String,
51 pub category: ProjectCategory,
52 }
53
54 /// Walk up from `cwd` looking for a `.git` ancestor. Returns the
55 /// deepest directory that contains `.git`. `None` if `cwd` doesn't
56 /// exist on disk, has no git ancestor, or we hit the depth cap.
57 pub fn detect_git_root(cwd: &Path) -> Option<PathBuf> {
58 if !cwd.exists() {
59 return None;
60 }
61 let mut current = cwd.to_path_buf();
62 for _ in 0..GIT_SEARCH_MAX_DEPTH {
63 if current.join(".git").exists() {
64 return Some(current);
65 }
66 if !current.pop() {
67 return None;
68 }
69 }
70 None
71 }
72
73 /// Is this cwd the claude-mem observer working directory?
74 pub fn is_observer_cwd(cwd: &str) -> bool {
75 cwd.ends_with("/.claude-mem/observer-sessions")
76 || cwd.contains("/.claude-mem/observer-sessions/")
77 }
78
79 /// Derive the initial merge key for a single session.
80 pub fn initial_key(summary: &SessionSummary, encoded_dir: &str) -> String {
81 if let Some(cwd) = summary.cwd.as_deref() {
82 if let Some(git) = detect_git_root(Path::new(cwd)) {
83 return git.to_string_lossy().into_owned();
84 }
85 return cwd.to_string();
86 }
87 encoded_dir.to_string()
88 }
89
90 /// Pick the category for a session.
91 pub fn initial_category(summary: &SessionSummary) -> ProjectCategory {
92 match summary.cwd.as_deref() {
93 Some(cwd) if is_observer_cwd(cwd) => ProjectCategory::Observer,
94 _ => ProjectCategory::Regular,
95 }
96 }
97
98 /// Build ghost [`ProjectCategory::Archive`] projects from
99 /// `~/.claude/history.jsonl` for every project path that has prompt
100 /// history but **no** representation in the disk-backed projects we
101 /// already discovered. Each archive project carries a single synthetic
102 /// [`SessionSummary`] built from its prompt stream.
103 pub fn build_archive_projects(
104 history: &HistoryLog,
105 disk_projects: &[Project],
106 ) -> Vec<Project> {
107 let mut out = Vec::new();
108 for project_path in history.projects() {
109 if is_path_represented_on_disk(project_path, disk_projects) {
110 continue;
111 }
112 let entries = history.entries_for(project_path);
113 if entries.is_empty() {
114 continue;
115 }
116
117 let earliest = entries.first().map(|e| e.timestamp_ms).unwrap_or(0);
118 let latest = entries.last().map(|e| e.timestamp_ms).unwrap_or(0);
119
120 // First non-trivial display entry becomes the title candidate.
121 let title_candidate = entries
122 .iter()
123 .map(|e| e.display.as_str())
124 .find(|d| !d.trim().is_empty() && !d.trim().starts_with('/'))
125 .unwrap_or_default();
126 let title = sanitize_title(title_candidate)
127 .unwrap_or_else(|| format!("{} (archived)", basename_of(project_path)));
128
129 let display_name = basename_of(project_path);
130 let summary = SessionSummary {
131 id: archive_session_id(project_path),
132 project_id: project_path.to_string(),
133 title: title.clone(),
134 started_at: epoch_ms_to_utc(earliest),
135 last_activity_at: epoch_ms_to_utc(latest),
136 model: None,
137 message_count: entries.len() as u32,
138 // Archive entries come from ~/.claude/history.jsonl which
139 // only stores human prompts.
140 prompt_count: entries.len() as u32,
141 git_branch: None,
142 version: None,
143 slug: None,
144 cwd: Some(project_path.to_string()),
145 custom_title: None,
146 entrypoint: Some("archive".to_string()),
147 source: SessionSource::Archive,
148 };
149
150 out.push(Project {
151 id: project_path.to_string(),
152 cwd: project_path.to_string(),
153 display_name,
154 session_count: 1,
155 last_activity: epoch_ms_to_utc(latest),
156 category: ProjectCategory::Archive,
157 source_dirs: Vec::new(),
158 sessions: vec![summary],
159 });
160 }
161 // Newest first within the archive bucket.
162 out.sort_by(|a, b| b.last_activity.cmp(&a.last_activity));
163 out
164 }
165
166 /// Stable id for an archive project's synthetic session. Must be
167 /// unique across all archive projects in a single list_projects call.
168 pub fn archive_session_id(project_path: &str) -> String {
169 format!("archive:{project_path}")
170 }
171
172 fn is_path_represented_on_disk(archive_path: &str, disk_projects: &[Project]) -> bool {
173 for p in disk_projects {
174 // The merged project's canonical cwd — exact match always wins.
175 if p.cwd == archive_path {
176 return true;
177 }
178 // Subpath matching only if the disk cwd is a real project
179 // path (4+ components). Without this guard, a home-dir
180 // umbrella project (cwd = `/Users/alice` — 3 components)
181 // would swallow every archive path under the home directory.
182 if path_depth(&p.cwd) >= MIN_PREFIX_KEY_DEPTH
183 && archive_path.starts_with(&format!("{}/", p.cwd))
184 {
185 return true;
186 }
187 // Also check individual session cwds in case a project in the
188 // disk set had heterogeneous cwds.
189 for s in &p.sessions {
190 if let Some(scwd) = &s.cwd {
191 if scwd == archive_path {
192 return true;
193 }
194 if path_depth(scwd) >= MIN_PREFIX_KEY_DEPTH
195 && archive_path.starts_with(&format!("{scwd}/"))
196 {
197 return true;
198 }
199 }
200 }
201 }
202 false
203 }
204
205 fn basename_of(path: &str) -> String {
206 Path::new(path)
207 .file_name()
208 .and_then(|n| n.to_str())
209 .map(str::to_owned)
210 .unwrap_or_else(|| path.to_string())
211 }
212
213 fn epoch_ms_to_utc(ms: i64) -> Option<DateTime<Utc>> {
214 DateTime::<Utc>::from_timestamp_millis(ms)
215 }
216
217 /// Build a merged Project list from a flat sequence of (summary,
218 /// encoded_dir) pairs.
219 pub fn build_projects(sessions: Vec<(SessionSummary, String)>) -> Vec<Project> {
220 // Pass 1 — per-session key + category.
221 let mut keyed: Vec<Keyed> = sessions
222 .into_iter()
223 .map(|(summary, encoded_dir)| {
224 let key = initial_key(&summary, &encoded_dir);
225 let category = initial_category(&summary);
226 Keyed {
227 summary,
228 encoded_dir,
229 key,
230 category,
231 }
232 })
233 .collect();
234
235 // Pass 2 — path-prefix fallback collapse. Observers never
236 // participate in prefix merging (they all share one cwd anyway).
237 let collapse_map = compute_prefix_collapse(&keyed);
238 for ks in keyed.iter_mut() {
239 if ks.category != ProjectCategory::Regular {
240 continue;
241 }
242 if let Some(new_key) = collapse_map.get(&ks.key) {
243 ks.key = new_key.clone();
244 }
245 }
246
247 // Pass 3 — group by (key, category) and materialize Project rows.
248 let mut groups: HashMap<(String, ProjectCategory), Vec<Keyed>> = HashMap::new();
249 for ks in keyed {
250 groups
251 .entry((ks.key.clone(), ks.category))
252 .or_default()
253 .push(ks);
254 }
255
256 let mut out: Vec<Project> = groups
257 .into_iter()
258 .map(|((key, category), members)| build_one_project(key, category, members))
259 .collect();
260
261 // Category order: Regular → Observer → Archive. Inside each
262 // category, newest-active first.
263 out.sort_by(|a, b| match category_rank(a.category).cmp(&category_rank(b.category)) {
264 std::cmp::Ordering::Equal => b.last_activity.cmp(&a.last_activity),
265 other => other,
266 });
267 out
268 }
269
270 fn category_rank(c: ProjectCategory) -> u8 {
271 match c {
272 ProjectCategory::Regular => 0,
273 ProjectCategory::Observer => 1,
274 ProjectCategory::Archive => 2,
275 }
276 }
277
278 fn build_one_project(
279 key: String,
280 category: ProjectCategory,
281 members: Vec<Keyed>,
282 ) -> Project {
283 let session_count = members.len() as u32;
284 let last_activity: Option<DateTime<Utc>> = members
285 .iter()
286 .filter_map(|m| m.summary.last_activity_at)
287 .max();
288
289 let mut source_dirs: Vec<String> =
290 members.iter().map(|m| m.encoded_dir.clone()).collect();
291 source_dirs.sort();
292 source_dirs.dedup();
293
294 // The merge key is the canonical cwd when path-like (git root or
295 // verbatim cwd). Use it directly; fall back to the latest session's
296 // cwd only when the key isn't a path (e.g. raw encoded dir name).
297 let (cwd, display_name) = if key.starts_with('/') {
298 let name = Path::new(&key)
299 .file_name()
300 .and_then(|n| n.to_str())
301 .map(str::to_owned)
302 .unwrap_or_else(|| key.clone());
303 (key.clone(), name)
304 } else {
305 let fallback_cwd = members
306 .iter()
307 .max_by_key(|m| m.summary.last_activity_at)
308 .and_then(|m| m.summary.cwd.clone())
309 .unwrap_or_else(|| key.clone());
310 let name = Path::new(&fallback_cwd)
311 .file_name()
312 .and_then(|n| n.to_str())
313 .map(str::to_owned)
314 .unwrap_or_else(|| key.clone());
315 (fallback_cwd, name)
316 };
317
318 // Extract the session summaries and sort newest-first.
319 let mut sessions: Vec<SessionSummary> =
320 members.into_iter().map(|m| m.summary).collect();
321 sessions.sort_by(|a, b| b.last_activity_at.cmp(&a.last_activity_at));
322
323 Project {
324 id: key,
325 cwd,
326 display_name,
327 session_count,
328 last_activity,
329 category,
330 source_dirs,
331 sessions,
332 }
333 }
334
335 /// Compute a map of "long key" -> "short key" where long extends short
336 /// as a strict path prefix. Only applies to path-like keys; non-path
337 /// keys (e.g. raw encoded dir names) pass through untouched.
338 fn compute_prefix_collapse(keyed: &[Keyed]) -> HashMap<String, String> {
339 let unique_keys: BTreeSet<&str> = keyed
340 .iter()
341 .filter(|k| k.category == ProjectCategory::Regular)
342 .map(|k| k.key.as_str())
343 .collect();
344 let keys: Vec<&str> = unique_keys.into_iter().collect();
345
346 // For each "short" candidate, find every longer path that extends it
347 // within bounds and mark the longer for collapse. A single long key
348 // may have multiple short candidates; pick the deepest (most
349 // specific) short so `/A/B` absorbs `/A/B/C` rather than `/A`.
350 let mut out: HashMap<String, String> = HashMap::new();
351 for long in &keys {
352 let mut best: Option<&str> = None;
353 for short in &keys {
354 if short == long {
355 continue;
356 }
357 if !path_is_strict_prefix(short, long) {
358 continue;
359 }
360 let short_depth = path_depth(short);
361 let long_depth = path_depth(long);
362 if short_depth < MIN_PREFIX_KEY_DEPTH {
363 continue;
364 }
365 if long_depth.saturating_sub(short_depth) > MAX_PREFIX_EXTRA_DEPTH {
366 continue;
367 }
368 // Deeper short wins — more specific match.
369 match best {
370 Some(cur) if path_depth(cur) >= short_depth => {}
371 _ => best = Some(short),
372 }
373 }
374 if let Some(short) = best {
375 out.insert((*long).to_string(), short.to_string());
376 }
377 }
378 out
379 }
380
381 fn path_is_strict_prefix(short: &str, long: &str) -> bool {
382 if !short.starts_with('/') || !long.starts_with('/') {
383 return false;
384 }
385 if long.len() <= short.len() {
386 return false;
387 }
388 let with_sep = if short.ends_with('/') {
389 short.to_string()
390 } else {
391 format!("{short}/")
392 };
393 long.starts_with(&with_sep)
394 }
395
396 fn path_depth(p: &str) -> usize {
397 Path::new(p).components().count()
398 }
399
400 #[cfg(test)]
401 mod tests {
402 use super::*;
403 use tempfile::tempdir;
404
405 fn mk_summary(
406 id: &str,
407 cwd: Option<&str>,
408 last: Option<&str>,
409 ) -> SessionSummary {
410 SessionSummary {
411 id: id.to_string(),
412 project_id: "stub".to_string(),
413 title: id.to_string(),
414 started_at: None,
415 last_activity_at: last
416 .and_then(|s| DateTime::parse_from_rfc3339(s).ok())
417 .map(|dt| dt.with_timezone(&Utc)),
418 model: None,
419 message_count: 0,
420 prompt_count: 0,
421 git_branch: None,
422 version: None,
423 slug: None,
424 cwd: cwd.map(str::to_owned),
425 custom_title: None,
426 entrypoint: None,
427 source: crate::core::schema::SessionSource::Disk,
428 }
429 }
430
431 #[test]
432 fn detects_git_root_on_disk() {
433 let tmp = tempdir().unwrap();
434 let repo = tmp.path().join("proj");
435 std::fs::create_dir_all(repo.join(".git")).unwrap();
436 let sub = repo.join("docs/chapter-1");
437 std::fs::create_dir_all(&sub).unwrap();
438
439 let root = detect_git_root(&sub).unwrap();
440 assert_eq!(root, repo);
441 }
442
443 #[test]
444 fn git_root_missing_returns_none() {
445 let tmp = tempdir().unwrap();
446 let dir = tmp.path().join("nothing/here");
447 std::fs::create_dir_all(&dir).unwrap();
448 assert!(detect_git_root(&dir).is_none());
449 }
450
451 #[test]
452 fn git_root_cwd_does_not_exist_returns_none() {
453 let bogus = Path::new("/absolutely/not/a/real/path/here");
454 assert!(detect_git_root(bogus).is_none());
455 }
456
457 #[test]
458 fn observer_cwd_detection() {
459 assert!(is_observer_cwd("/Users/me/.claude-mem/observer-sessions"));
460 assert!(is_observer_cwd(
461 "/Users/me/.claude-mem/observer-sessions/subdir"
462 ));
463 assert!(!is_observer_cwd("/Users/me/Documents/project"));
464 assert!(!is_observer_cwd("/Users/me/.claude-mem/other"));
465 }
466
467 #[test]
468 fn merges_sibling_cwds_under_git_root() {
469 let tmp = tempdir().unwrap();
470 let repo = tmp.path().join("armfortas");
471 std::fs::create_dir_all(repo.join(".git")).unwrap();
472 std::fs::create_dir_all(repo.join("docs")).unwrap();
473 std::fs::create_dir_all(repo.join("afs-as")).unwrap();
474
475 let s1 = mk_summary(
476 "s1",
477 Some(repo.join("docs").to_str().unwrap()),
478 Some("2026-04-10T00:00:00Z"),
479 );
480 let s2 = mk_summary(
481 "s2",
482 Some(repo.join("afs-as").to_str().unwrap()),
483 Some("2026-04-11T00:00:00Z"),
484 );
485 let s3 = mk_summary(
486 "s3",
487 Some(repo.to_str().unwrap()),
488 Some("2026-04-09T00:00:00Z"),
489 );
490
491 let projects = build_projects(vec![
492 (s1, "-encoded-armfortas-docs".into()),
493 (s2, "-encoded-armfortas-afs-as".into()),
494 (s3, "-encoded-armfortas".into()),
495 ]);
496
497 assert_eq!(projects.len(), 1, "all three should merge into one");
498 let p = &projects[0];
499 assert_eq!(p.session_count, 3);
500 assert_eq!(p.category, ProjectCategory::Regular);
501 assert_eq!(p.source_dirs.len(), 3);
502 assert_eq!(p.display_name, "armfortas");
503 }
504
505 #[test]
506 fn observer_sessions_get_their_own_bucket_at_bottom() {
507 let tmp = tempdir().unwrap();
508 let repo = tmp.path().join("real");
509 std::fs::create_dir_all(repo.join(".git")).unwrap();
510
511 let regular = mk_summary(
512 "s1",
513 Some(repo.to_str().unwrap()),
514 Some("2026-04-11T00:00:00Z"),
515 );
516 let observer = mk_summary(
517 "s2",
518 Some("/Users/me/.claude-mem/observer-sessions"),
519 Some("2026-04-11T12:00:00Z"),
520 );
521
522 let projects = build_projects(vec![
523 (observer, "-encoded-observer".into()),
524 (regular, "-encoded-real".into()),
525 ]);
526
527 assert_eq!(projects.len(), 2);
528 // Regular first despite older mtime, observer second.
529 assert_eq!(projects[0].category, ProjectCategory::Regular);
530 assert_eq!(projects[1].category, ProjectCategory::Observer);
531 }
532
533 #[test]
534 fn prefix_fallback_merges_when_no_git_root() {
535 // Both cwds point to paths that don't exist on disk, so no git
536 // walk is possible. Prefix fallback should still merge them.
537 let s1 = mk_summary(
538 "s1",
539 Some("/imaginary/Users/me/deep/proj"),
540 Some("2026-04-10T00:00:00Z"),
541 );
542 let s2 = mk_summary(
543 "s2",
544 Some("/imaginary/Users/me/deep/proj/subdir"),
545 Some("2026-04-11T00:00:00Z"),
546 );
547
548 let projects = build_projects(vec![
549 (s1, "-enc-1".into()),
550 (s2, "-enc-2".into()),
551 ]);
552 assert_eq!(projects.len(), 1);
553 assert_eq!(projects[0].session_count, 2);
554 }
555
556 #[test]
557 fn prefix_fallback_does_not_merge_unrelated_siblings() {
558 // Two siblings under a short key. The short key itself isn't
559 // present, so neither absorbs the other.
560 let s1 = mk_summary(
561 "s1",
562 Some("/imaginary/Users/me/proj-a"),
563 Some("2026-04-10T00:00:00Z"),
564 );
565 let s2 = mk_summary(
566 "s2",
567 Some("/imaginary/Users/me/proj-b"),
568 Some("2026-04-11T00:00:00Z"),
569 );
570 let projects = build_projects(vec![
571 (s1, "-enc-a".into()),
572 (s2, "-enc-b".into()),
573 ]);
574 assert_eq!(projects.len(), 2);
575 }
576
577 #[test]
578 fn no_cwd_falls_back_to_encoded_dir() {
579 let s = mk_summary("s1", None, Some("2026-04-10T00:00:00Z"));
580 let projects = build_projects(vec![(s, "-enc-fallback".into())]);
581 assert_eq!(projects.len(), 1);
582 assert_eq!(projects[0].id, "-enc-fallback");
583 }
584 }
585