| 1 | //! Cheap metadata extraction for the TOC sidebar. |
| 2 | //! |
| 3 | //! A session file can be 50+ MB for long agent runs. We never parse the |
| 4 | //! whole thing for summaries. Instead: |
| 5 | //! |
| 6 | //! 1. Open the file once and count lines (no JSON parse) for an |
| 7 | //! approximate `messageCount`. |
| 8 | //! 2. Parse at most `HEAD_SCAN_LINES` lines from the start, shallowly, |
| 9 | //! to extract `startedAt`, `cwd`, `gitBranch`, `version`, `slug`, |
| 10 | //! `customTitle`, the first assistant's `model`, and a fallback |
| 11 | //! title from the first non-meta user message. |
| 12 | //! 3. Reverse-scan the last `TAIL_SCAN_BYTES` bytes for the final |
| 13 | //! complete JSON object carrying a `timestamp` field, used as |
| 14 | //! `lastActivityAt`. Falls back to the file's mtime. |
| 15 | |
| 16 | use std::fs::File; |
| 17 | use std::io::{BufRead, BufReader, Read, Seek, SeekFrom}; |
| 18 | use std::path::Path; |
| 19 | |
| 20 | use chrono::{DateTime, Utc}; |
| 21 | use serde_json::Value; |
| 22 | |
| 23 | use crate::core::error::CoreResult; |
| 24 | use crate::core::reader::{is_timeline_event, is_user_prompt, NON_TIMELINE_KINDS}; |
| 25 | use crate::core::schema::{RawEvent, SessionSummary}; |
| 26 | use crate::core::title::sanitize_title; |
| 27 | |
| 28 | /// How many lines from the top of the file we're willing to parse for |
| 29 | /// metadata before giving up. Empirically the first ~20 lines of a real |
| 30 | /// Claude Code session contain customTitle / cwd / first user message. |
| 31 | const HEAD_SCAN_LINES: usize = 100; |
| 32 | |
| 33 | /// How many bytes from the end of the file to tail-scan for the |
| 34 | /// latest timestamp. |
| 35 | const TAIL_SCAN_BYTES: u64 = 64 * 1024; |
| 36 | |
| 37 | pub fn summarize(session_path: &Path, project_id: &str) -> CoreResult<SessionSummary> { |
| 38 | let id = session_path |
| 39 | .file_stem() |
| 40 | .and_then(|s| s.to_str()) |
| 41 | .map(str::to_owned) |
| 42 | .unwrap_or_default(); |
| 43 | |
| 44 | let file = File::open(session_path)?; |
| 45 | let file_meta = file.metadata()?; |
| 46 | |
| 47 | // Single-pass scan: |
| 48 | // * For each line, cheaply classify it as timeline / prompt |
| 49 | // via prefix + substring checks so that counting 171 MB of |
| 50 | // JSONL doesn't cost a full serde parse per line. |
| 51 | // * For the first HEAD_SCAN_LINES (while head info is still |
| 52 | // unsaturated) also do a full RawEvent parse to pick up |
| 53 | // title / cwd / model / version metadata. |
| 54 | let mut head = HeadInfo::default(); |
| 55 | let mut user_title_fallback: Option<String> = None; |
| 56 | let mut message_count: u32 = 0; |
| 57 | let mut prompt_count: u32 = 0; |
| 58 | |
| 59 | { |
| 60 | let reader = BufReader::new(&file); |
| 61 | for (i, line) in reader.lines().enumerate() { |
| 62 | let line = match line { |
| 63 | Ok(l) => l, |
| 64 | Err(_) => continue, // tolerate partial last line |
| 65 | }; |
| 66 | if line.is_empty() { |
| 67 | continue; |
| 68 | } |
| 69 | |
| 70 | // Head path: full parse, feeds both counters (via the |
| 71 | // shared helpers) and head metadata. |
| 72 | let head_parse = i < HEAD_SCAN_LINES && !head.is_saturated(); |
| 73 | if head_parse { |
| 74 | if let Ok(ev) = serde_json::from_str::<RawEvent>(&line) { |
| 75 | head.absorb(&ev); |
| 76 | if user_title_fallback.is_none() { |
| 77 | if let Some(t) = extract_user_title(&ev) { |
| 78 | user_title_fallback = Some(t); |
| 79 | } |
| 80 | } |
| 81 | if is_timeline_event(&ev) { |
| 82 | message_count += 1; |
| 83 | } |
| 84 | if is_user_prompt(&ev) { |
| 85 | prompt_count += 1; |
| 86 | } |
| 87 | continue; |
| 88 | } |
| 89 | // Fall through to fast path if JSON parse fails. |
| 90 | } |
| 91 | |
| 92 | // Fast path: skip full parse. Claude Code's jsonl |
| 93 | // serializer emits `type` first and field order is |
| 94 | // stable, so these byte-level checks match what the |
| 95 | // full helpers would return without allocating. |
| 96 | if is_sidechain_line(&line) { |
| 97 | continue; |
| 98 | } |
| 99 | let kind = parse_kind_prefix(&line); |
| 100 | match kind { |
| 101 | Some(k) if NON_TIMELINE_KINDS.contains(&k) => continue, |
| 102 | Some(k) => { |
| 103 | message_count += 1; |
| 104 | if k == "user" && !has_tool_result(&line) { |
| 105 | prompt_count += 1; |
| 106 | } |
| 107 | } |
| 108 | None => { |
| 109 | // Unrecognized shape — count as timeline so we |
| 110 | // don't silently lose events on schema drift. |
| 111 | message_count += 1; |
| 112 | } |
| 113 | } |
| 114 | } |
| 115 | } |
| 116 | |
| 117 | // Tail scan for lastActivityAt. |
| 118 | let tail_last_ts = scan_tail_for_last_timestamp(session_path)?; |
| 119 | |
| 120 | let last_activity_at = tail_last_ts |
| 121 | .or(head.last_seen_timestamp) |
| 122 | .or_else(|| { |
| 123 | file_meta |
| 124 | .modified() |
| 125 | .ok() |
| 126 | .map(|st| DateTime::<Utc>::from(st)) |
| 127 | }); |
| 128 | |
| 129 | let title = head |
| 130 | .custom_title |
| 131 | .clone() |
| 132 | .or_else(|| head.ai_title.clone()) |
| 133 | .or(user_title_fallback) |
| 134 | .or(head.slug.clone()) |
| 135 | .unwrap_or_else(|| "(untitled)".to_string()); |
| 136 | |
| 137 | Ok(SessionSummary { |
| 138 | id, |
| 139 | project_id: project_id.to_string(), |
| 140 | title, |
| 141 | started_at: head.started_at, |
| 142 | last_activity_at, |
| 143 | model: head.model, |
| 144 | message_count, |
| 145 | prompt_count, |
| 146 | git_branch: head.git_branch, |
| 147 | version: head.version, |
| 148 | slug: head.slug, |
| 149 | cwd: head.cwd, |
| 150 | custom_title: head.custom_title, |
| 151 | entrypoint: head.entrypoint, |
| 152 | source: crate::core::schema::SessionSource::Disk, |
| 153 | }) |
| 154 | } |
| 155 | |
| 156 | /// Extract the `type` field value from a JSONL line that starts |
| 157 | /// with `{"type":"..."`. Returns `None` for lines with any other |
| 158 | /// shape. Zero-allocation; caller borrows into the original line. |
| 159 | fn parse_kind_prefix(line: &str) -> Option<&str> { |
| 160 | let rest = line.strip_prefix("{\"type\":\"")?; |
| 161 | let end = rest.find('"')?; |
| 162 | Some(&rest[..end]) |
| 163 | } |
| 164 | |
| 165 | /// Substring check for `"isSidechain":true`. Stable across field |
| 166 | /// order because the literal `true` doesn't appear for this key |
| 167 | /// in any other configuration. |
| 168 | fn is_sidechain_line(line: &str) -> bool { |
| 169 | line.contains("\"isSidechain\":true") |
| 170 | } |
| 171 | |
| 172 | /// Substring check for `"type":"tool_result"`. Used to classify |
| 173 | /// a `user` event as a tool return rather than a human prompt. |
| 174 | fn has_tool_result(line: &str) -> bool { |
| 175 | line.contains("\"type\":\"tool_result\"") |
| 176 | } |
| 177 | |
| 178 | /// Scratchpad for everything we're learning from the file's head. |
| 179 | #[derive(Debug, Default)] |
| 180 | struct HeadInfo { |
| 181 | started_at: Option<DateTime<Utc>>, |
| 182 | last_seen_timestamp: Option<DateTime<Utc>>, |
| 183 | cwd: Option<String>, |
| 184 | git_branch: Option<String>, |
| 185 | version: Option<String>, |
| 186 | slug: Option<String>, |
| 187 | custom_title: Option<String>, |
| 188 | ai_title: Option<String>, |
| 189 | model: Option<String>, |
| 190 | entrypoint: Option<String>, |
| 191 | } |
| 192 | |
| 193 | impl HeadInfo { |
| 194 | /// Have we collected every field the summary needs? If so the head |
| 195 | /// loop can short-circuit further parsing. |
| 196 | fn is_saturated(&self) -> bool { |
| 197 | self.started_at.is_some() |
| 198 | && self.cwd.is_some() |
| 199 | && self.git_branch.is_some() |
| 200 | && self.version.is_some() |
| 201 | && self.slug.is_some() |
| 202 | && (self.custom_title.is_some() || self.ai_title.is_some()) |
| 203 | && self.model.is_some() |
| 204 | && self.entrypoint.is_some() |
| 205 | } |
| 206 | |
| 207 | fn absorb(&mut self, ev: &RawEvent) { |
| 208 | if let Some(ts) = ev.timestamp { |
| 209 | if self.started_at.is_none() { |
| 210 | self.started_at = Some(ts); |
| 211 | } |
| 212 | self.last_seen_timestamp = Some(ts); |
| 213 | } |
| 214 | if self.cwd.is_none() { |
| 215 | if let Some(c) = &ev.cwd { |
| 216 | self.cwd = Some(c.clone()); |
| 217 | } |
| 218 | } |
| 219 | if self.git_branch.is_none() { |
| 220 | if let Some(b) = &ev.git_branch { |
| 221 | self.git_branch = Some(b.clone()); |
| 222 | } |
| 223 | } |
| 224 | if self.version.is_none() { |
| 225 | if let Some(v) = &ev.version { |
| 226 | self.version = Some(v.clone()); |
| 227 | } |
| 228 | } |
| 229 | if self.slug.is_none() { |
| 230 | if let Some(s) = &ev.slug { |
| 231 | self.slug = Some(s.clone()); |
| 232 | } |
| 233 | } |
| 234 | if self.custom_title.is_none() { |
| 235 | if let Some(t) = &ev.custom_title { |
| 236 | self.custom_title = Some(t.clone()); |
| 237 | } |
| 238 | } |
| 239 | if self.ai_title.is_none() { |
| 240 | if let Some(t) = &ev.ai_title { |
| 241 | self.ai_title = Some(t.clone()); |
| 242 | } |
| 243 | } |
| 244 | if self.entrypoint.is_none() { |
| 245 | if let Some(e) = &ev.entrypoint { |
| 246 | self.entrypoint = Some(e.clone()); |
| 247 | } |
| 248 | } |
| 249 | if self.model.is_none() && ev.kind == "assistant" { |
| 250 | if let Some(msg) = &ev.message { |
| 251 | if let Some(m) = msg.get("model").and_then(Value::as_str) { |
| 252 | self.model = Some(m.to_string()); |
| 253 | } |
| 254 | } |
| 255 | } |
| 256 | } |
| 257 | } |
| 258 | |
| 259 | /// Pull a short human-readable title from the first non-meta user event, |
| 260 | /// running it through the [`sanitize_title`] pipeline to strip IDE |
| 261 | /// wrappers, code blocks, and other noise. Returns `None` if the event |
| 262 | /// is not a user message or nothing usable survives sanitization. |
| 263 | fn extract_user_title(ev: &RawEvent) -> Option<String> { |
| 264 | if ev.kind != "user" { |
| 265 | return None; |
| 266 | } |
| 267 | if ev.is_meta.unwrap_or(false) { |
| 268 | return None; |
| 269 | } |
| 270 | let msg = ev.message.as_ref()?; |
| 271 | let text = match msg.get("content") { |
| 272 | Some(Value::String(s)) => s.clone(), |
| 273 | Some(Value::Array(blocks)) => blocks |
| 274 | .iter() |
| 275 | .filter_map(|b| b.get("text").and_then(Value::as_str)) |
| 276 | .collect::<Vec<_>>() |
| 277 | .join(" "), |
| 278 | _ => return None, |
| 279 | }; |
| 280 | sanitize_title(&text) |
| 281 | } |
| 282 | |
| 283 | /// Read the last `TAIL_SCAN_BYTES` of the file, find the last complete |
| 284 | /// `{...}\n` line, parse it, and return its `timestamp` if any. |
| 285 | fn scan_tail_for_last_timestamp(path: &Path) -> CoreResult<Option<DateTime<Utc>>> { |
| 286 | let mut file = File::open(path)?; |
| 287 | let len = file.metadata()?.len(); |
| 288 | if len == 0 { |
| 289 | return Ok(None); |
| 290 | } |
| 291 | let read_len = len.min(TAIL_SCAN_BYTES); |
| 292 | let start = len - read_len; |
| 293 | file.seek(SeekFrom::Start(start))?; |
| 294 | |
| 295 | let mut buf = vec![0u8; read_len as usize]; |
| 296 | file.read_exact(&mut buf)?; |
| 297 | |
| 298 | // Walk backward through complete lines (newline-terminated or EOF-terminated). |
| 299 | for line in buf.rsplit(|b| *b == b'\n') { |
| 300 | if line.is_empty() { |
| 301 | continue; |
| 302 | } |
| 303 | let text = match std::str::from_utf8(line) { |
| 304 | Ok(s) => s, |
| 305 | Err(_) => continue, |
| 306 | }; |
| 307 | if let Ok(ev) = serde_json::from_str::<RawEvent>(text) { |
| 308 | if let Some(ts) = ev.timestamp { |
| 309 | return Ok(Some(ts)); |
| 310 | } |
| 311 | } |
| 312 | } |
| 313 | Ok(None) |
| 314 | } |
| 315 | |
| 316 | #[cfg(test)] |
| 317 | mod tests { |
| 318 | use super::*; |
| 319 | use std::io::Write; |
| 320 | use tempfile::tempdir; |
| 321 | |
| 322 | const FIXTURE_LINES: &[&str] = &[ |
| 323 | r#"{"type":"permission-mode","permissionMode":"default","sessionId":"abc"}"#, |
| 324 | r#"{"type":"attachment","uuid":"u1","timestamp":"2026-04-11T00:55:32.249Z","cwd":"/Users/me/repo","sessionId":"abc","version":"2.1.101","gitBranch":"main","attachment":{"type":"hook_success"}}"#, |
| 325 | r#"{"type":"user","uuid":"u2","timestamp":"2026-04-11T00:55:35.000Z","cwd":"/Users/me/repo","sessionId":"abc","version":"2.1.101","gitBranch":"main","message":{"role":"user","content":"plan the thread browser feature"}}"#, |
| 326 | r#"{"type":"assistant","uuid":"u3","timestamp":"2026-04-11T00:55:40.000Z","cwd":"/Users/me/repo","sessionId":"abc","version":"2.1.101","gitBranch":"main","message":{"model":"claude-opus-4-6","content":[{"type":"text","text":"Let's plan."}]}}"#, |
| 327 | r#"{"type":"assistant","uuid":"u4","timestamp":"2026-04-11T01:05:00.000Z","cwd":"/Users/me/repo","sessionId":"abc","version":"2.1.101","gitBranch":"main","message":{"model":"claude-opus-4-6","content":[{"type":"text","text":"Done."}]}}"#, |
| 328 | ]; |
| 329 | |
| 330 | fn write_fixture(path: &Path, lines: &[&str]) { |
| 331 | let mut f = File::create(path).unwrap(); |
| 332 | for line in lines { |
| 333 | writeln!(f, "{line}").unwrap(); |
| 334 | } |
| 335 | } |
| 336 | |
| 337 | #[test] |
| 338 | fn extracts_summary_from_realistic_head() { |
| 339 | let tmp = tempdir().unwrap(); |
| 340 | let path = tmp.path().join("abc.jsonl"); |
| 341 | write_fixture(&path, FIXTURE_LINES); |
| 342 | |
| 343 | let s = summarize(&path, "-Users-me-repo").unwrap(); |
| 344 | assert_eq!(s.id, "abc"); |
| 345 | assert_eq!(s.project_id, "-Users-me-repo"); |
| 346 | assert_eq!(s.title, "plan the thread browser feature"); |
| 347 | assert_eq!(s.model.as_deref(), Some("claude-opus-4-6")); |
| 348 | assert_eq!(s.git_branch.as_deref(), Some("main")); |
| 349 | assert_eq!(s.version.as_deref(), Some("2.1.101")); |
| 350 | // permission-mode is metadata → not counted in message_count. |
| 351 | // attachment + user + 2 assistants = 4 timeline events. |
| 352 | assert_eq!(s.message_count, 4); |
| 353 | assert_eq!(s.prompt_count, 1); |
| 354 | assert!(s.started_at.is_some()); |
| 355 | assert!(s.last_activity_at.is_some()); |
| 356 | assert_ne!(s.started_at, s.last_activity_at); |
| 357 | } |
| 358 | |
| 359 | #[test] |
| 360 | fn survives_truncated_last_line() { |
| 361 | let tmp = tempdir().unwrap(); |
| 362 | let path = tmp.path().join("abc.jsonl"); |
| 363 | // Write good lines, then append a partial JSON line with no newline. |
| 364 | let mut f = File::create(&path).unwrap(); |
| 365 | for line in FIXTURE_LINES { |
| 366 | writeln!(f, "{line}").unwrap(); |
| 367 | } |
| 368 | f.write_all(br#"{"type":"assistant","timestamp":"2026-04-11T"#) |
| 369 | .unwrap(); |
| 370 | drop(f); |
| 371 | |
| 372 | let s = summarize(&path, "-Users-me-repo").unwrap(); |
| 373 | assert_eq!(s.title, "plan the thread browser feature"); |
| 374 | // Should still have a valid last activity from the preceding good line. |
| 375 | assert!(s.last_activity_at.is_some()); |
| 376 | } |
| 377 | |
| 378 | #[test] |
| 379 | fn ignores_meta_user_events_for_title() { |
| 380 | let tmp = tempdir().unwrap(); |
| 381 | let path = tmp.path().join("abc.jsonl"); |
| 382 | write_fixture( |
| 383 | &path, |
| 384 | &[ |
| 385 | r#"{"type":"permission-mode","permissionMode":"default","sessionId":"abc"}"#, |
| 386 | r#"{"type":"user","uuid":"u1","timestamp":"2026-04-11T00:55:00.000Z","cwd":"/Users/me/repo","sessionId":"abc","version":"2.1.101","gitBranch":"main","isMeta":true,"message":{"role":"user","content":"<local-command-caveat>noise</local-command-caveat>"}}"#, |
| 387 | r#"{"type":"user","uuid":"u2","timestamp":"2026-04-11T00:55:30.000Z","cwd":"/Users/me/repo","sessionId":"abc","version":"2.1.101","gitBranch":"main","message":{"role":"user","content":"actual first question"}}"#, |
| 388 | ], |
| 389 | ); |
| 390 | |
| 391 | let s = summarize(&path, "-Users-me-repo").unwrap(); |
| 392 | assert_eq!(s.title, "actual first question"); |
| 393 | } |
| 394 | |
| 395 | #[test] |
| 396 | fn prefers_custom_title_over_user_message() { |
| 397 | let tmp = tempdir().unwrap(); |
| 398 | let path = tmp.path().join("abc.jsonl"); |
| 399 | write_fixture( |
| 400 | &path, |
| 401 | &[ |
| 402 | r#"{"type":"custom-title","customTitle":"The Real Title","sessionId":"abc"}"#, |
| 403 | r#"{"type":"user","uuid":"u1","timestamp":"2026-04-11T00:55:00.000Z","cwd":"/Users/me/repo","sessionId":"abc","version":"2.1.101","gitBranch":"main","message":{"role":"user","content":"something else"}}"#, |
| 404 | ], |
| 405 | ); |
| 406 | |
| 407 | let s = summarize(&path, "-Users-me-repo").unwrap(); |
| 408 | assert_eq!(s.title, "The Real Title"); |
| 409 | } |
| 410 | |
| 411 | #[test] |
| 412 | fn empty_file_returns_fallback_title() { |
| 413 | let tmp = tempdir().unwrap(); |
| 414 | let path = tmp.path().join("empty.jsonl"); |
| 415 | File::create(&path).unwrap(); |
| 416 | |
| 417 | let s = summarize(&path, "-Users-me-repo").unwrap(); |
| 418 | assert_eq!(s.title, "(untitled)"); |
| 419 | assert_eq!(s.message_count, 0); |
| 420 | assert_eq!(s.prompt_count, 0); |
| 421 | // last_activity falls back to mtime. |
| 422 | assert!(s.last_activity_at.is_some()); |
| 423 | } |
| 424 | |
| 425 | #[test] |
| 426 | fn prompt_count_excludes_tool_result_returns() { |
| 427 | let tmp = tempdir().unwrap(); |
| 428 | let path = tmp.path().join("abc.jsonl"); |
| 429 | write_fixture( |
| 430 | &path, |
| 431 | &[ |
| 432 | // Real human prompt. |
| 433 | r#"{"type":"user","uuid":"u1","timestamp":"2026-04-11T00:55:00.000Z","cwd":"/Users/me/repo","sessionId":"abc","message":{"role":"user","content":"run the tests"}}"#, |
| 434 | // Assistant's tool_use. |
| 435 | r#"{"type":"assistant","uuid":"u2","timestamp":"2026-04-11T00:55:10.000Z","cwd":"/Users/me/repo","sessionId":"abc","message":{"model":"claude-opus-4-6","content":[{"type":"tool_use","id":"t1","name":"Bash","input":{"command":"cargo test"}}]}}"#, |
| 436 | // Tool_result posing as a user event — NOT a prompt. |
| 437 | r#"{"type":"user","uuid":"u3","timestamp":"2026-04-11T00:55:20.000Z","cwd":"/Users/me/repo","sessionId":"abc","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"t1","content":"ok","is_error":false}]}}"#, |
| 438 | // Another real human prompt. |
| 439 | r#"{"type":"user","uuid":"u4","timestamp":"2026-04-11T00:55:30.000Z","cwd":"/Users/me/repo","sessionId":"abc","message":{"role":"user","content":"now commit"}}"#, |
| 440 | ], |
| 441 | ); |
| 442 | |
| 443 | let s = summarize(&path, "-Users-me-repo").unwrap(); |
| 444 | // All 4 events render in the viewer timeline. |
| 445 | assert_eq!(s.message_count, 4); |
| 446 | // Only the two real human prompts count, not the tool_result. |
| 447 | assert_eq!(s.prompt_count, 2); |
| 448 | } |
| 449 | } |
| 450 |