claudex Public

Watch 0 Fork 0 Star 0

Rust · 17529 bytes Raw Blame History

  
        1
        //! Cheap metadata extraction for the TOC sidebar.
      
        2
        //!
      
        3
        //! A session file can be 50+ MB for long agent runs. We never parse the
      
        4
        //! whole thing for summaries. Instead:
      
        5
        //!
      
        6
        //! 1. Open the file once and count lines (no JSON parse) for an
      
        7
        //!    approximate `messageCount`.
      
        8
        //! 2. Parse at most `HEAD_SCAN_LINES` lines from the start, shallowly,
      
        9
        //!    to extract `startedAt`, `cwd`, `gitBranch`, `version`, `slug`,
      
        10
        //!    `customTitle`, the first assistant's `model`, and a fallback
      
        11
        //!    title from the first non-meta user message.
      
        12
        //! 3. Reverse-scan the last `TAIL_SCAN_BYTES` bytes for the final
      
        13
        //!    complete JSON object carrying a `timestamp` field, used as
      
        14
        //!    `lastActivityAt`. Falls back to the file's mtime.
      
        15
        
        16
        use std::fs::File;
      
        17
        use std::io::{BufRead, BufReader, Read, Seek, SeekFrom};
      
        18
        use std::path::Path;
      
        19
        
        20
        use chrono::{DateTime, Utc};
      
        21
        use serde_json::Value;
      
        22
        
        23
        use crate::core::error::CoreResult;
      
        24
        use crate::core::reader::{is_timeline_event, is_user_prompt, NON_TIMELINE_KINDS};
      
        25
        use crate::core::schema::{RawEvent, SessionSummary};
      
        26
        use crate::core::title::sanitize_title;
      
        27
        
        28
        /// How many lines from the top of the file we're willing to parse for
      
        29
        /// metadata before giving up. Empirically the first ~20 lines of a real
      
        30
        /// Claude Code session contain customTitle / cwd / first user message.
      
        31
        const HEAD_SCAN_LINES: usize = 100;
      
        32
        
        33
        /// How many bytes from the end of the file to tail-scan for the
      
        34
        /// latest timestamp.
      
        35
        const TAIL_SCAN_BYTES: u64 = 64 * 1024;
      
        36
        
        37
        pub fn summarize(session_path: &Path, project_id: &str) -> CoreResult<SessionSummary> {
      
        38
            let id = session_path
      
        39
                .file_stem()
      
        40
                .and_then(|s| s.to_str())
      
        41
                .map(str::to_owned)
      
        42
                .unwrap_or_default();
      
        43
        
        44
            let file = File::open(session_path)?;
      
        45
            let file_meta = file.metadata()?;
      
        46
        
        47
            // Single-pass scan:
      
        48
            //   * For each line, cheaply classify it as timeline / prompt
      
        49
            //     via prefix + substring checks so that counting 171 MB of
      
        50
            //     JSONL doesn't cost a full serde parse per line.
      
        51
            //   * For the first HEAD_SCAN_LINES (while head info is still
      
        52
            //     unsaturated) also do a full RawEvent parse to pick up
      
        53
            //     title / cwd / model / version metadata.
      
        54
            let mut head = HeadInfo::default();
      
        55
            let mut user_title_fallback: Option<String> = None;
      
        56
            let mut message_count: u32 = 0;
      
        57
            let mut prompt_count: u32 = 0;
      
        58
        
        59
            {
      
        60
                let reader = BufReader::new(&file);
      
        61
                for (i, line) in reader.lines().enumerate() {
      
        62
                    let line = match line {
      
        63
                        Ok(l) => l,
      
        64
                        Err(_) => continue, // tolerate partial last line
      
        65
                    };
      
        66
                    if line.is_empty() {
      
        67
                        continue;
      
        68
                    }
      
        69
        
        70
                    // Head path: full parse, feeds both counters (via the
      
        71
                    // shared helpers) and head metadata.
      
        72
                    let head_parse = i < HEAD_SCAN_LINES && !head.is_saturated();
      
        73
                    if head_parse {
      
        74
                        if let Ok(ev) = serde_json::from_str::<RawEvent>(&line) {
      
        75
                            head.absorb(&ev);
      
        76
                            if user_title_fallback.is_none() {
      
        77
                                if let Some(t) = extract_user_title(&ev) {
      
        78
                                    user_title_fallback = Some(t);
      
        79
                                }
      
        80
                            }
      
        81
                            if is_timeline_event(&ev) {
      
        82
                                message_count += 1;
      
        83
                            }
      
        84
                            if is_user_prompt(&ev) {
      
        85
                                prompt_count += 1;
      
        86
                            }
      
        87
                            continue;
      
        88
                        }
      
        89
                        // Fall through to fast path if JSON parse fails.
      
        90
                    }
      
        91
        
        92
                    // Fast path: skip full parse. Claude Code's jsonl
      
        93
                    // serializer emits `type` first and field order is
      
        94
                    // stable, so these byte-level checks match what the
      
        95
                    // full helpers would return without allocating.
      
        96
                    if is_sidechain_line(&line) {
      
        97
                        continue;
      
        98
                    }
      
        99
                    let kind = parse_kind_prefix(&line);
      
        100
                    match kind {
      
        101
                        Some(k) if NON_TIMELINE_KINDS.contains(&k) => continue,
      
        102
                        Some(k) => {
      
        103
                            message_count += 1;
      
        104
                            if k == "user" && !has_tool_result(&line) {
      
        105
                                prompt_count += 1;
      
        106
                            }
      
        107
                        }
      
        108
                        None => {
      
        109
                            // Unrecognized shape — count as timeline so we
      
        110
                            // don't silently lose events on schema drift.
      
        111
                            message_count += 1;
      
        112
                        }
      
        113
                    }
      
        114
                }
      
        115
            }
      
        116
        
        117
            // Tail scan for lastActivityAt.
      
        118
            let tail_last_ts = scan_tail_for_last_timestamp(session_path)?;
      
        119
        
        120
            let last_activity_at = tail_last_ts
      
        121
                .or(head.last_seen_timestamp)
      
        122
                .or_else(|| {
      
        123
                    file_meta
      
        124
                        .modified()
      
        125
                        .ok()
      
        126
                        .map(|st| DateTime::<Utc>::from(st))
      
        127
                });
      
        128
        
        129
            let title = head
      
        130
                .custom_title
      
        131
                .clone()
      
        132
                .or_else(|| head.ai_title.clone())
      
        133
                .or(user_title_fallback)
      
        134
                .or(head.slug.clone())
      
        135
                .unwrap_or_else(|| "(untitled)".to_string());
      
        136
        
        137
            Ok(SessionSummary {
      
        138
                id,
      
        139
                project_id: project_id.to_string(),
      
        140
                title,
      
        141
                started_at: head.started_at,
      
        142
                last_activity_at,
      
        143
                model: head.model,
      
        144
                message_count,
      
        145
                prompt_count,
      
        146
                git_branch: head.git_branch,
      
        147
                version: head.version,
      
        148
                slug: head.slug,
      
        149
                cwd: head.cwd,
      
        150
                custom_title: head.custom_title,
      
        151
                entrypoint: head.entrypoint,
      
        152
                source: crate::core::schema::SessionSource::Disk,
      
        153
            })
      
        154
        }
      
        155
        
        156
        /// Extract the `type` field value from a JSONL line that starts
      
        157
        /// with `{"type":"..."`. Returns `None` for lines with any other
      
        158
        /// shape. Zero-allocation; caller borrows into the original line.
      
        159
        fn parse_kind_prefix(line: &str) -> Option<&str> {
      
        160
            let rest = line.strip_prefix("{\"type\":\"")?;
      
        161
            let end = rest.find('"')?;
      
        162
            Some(&rest[..end])
      
        163
        }
      
        164
        
        165
        /// Substring check for `"isSidechain":true`. Stable across field
      
        166
        /// order because the literal `true` doesn't appear for this key
      
        167
        /// in any other configuration.
      
        168
        fn is_sidechain_line(line: &str) -> bool {
      
        169
            line.contains("\"isSidechain\":true")
      
        170
        }
      
        171
        
        172
        /// Substring check for `"type":"tool_result"`. Used to classify
      
        173
        /// a `user` event as a tool return rather than a human prompt.
      
        174
        fn has_tool_result(line: &str) -> bool {
      
        175
            line.contains("\"type\":\"tool_result\"")
      
        176
        }
      
        177
        
        178
        /// Scratchpad for everything we're learning from the file's head.
      
        179
        #[derive(Debug, Default)]
      
        180
        struct HeadInfo {
      
        181
            started_at: Option<DateTime<Utc>>,
      
        182
            last_seen_timestamp: Option<DateTime<Utc>>,
      
        183
            cwd: Option<String>,
      
        184
            git_branch: Option<String>,
      
        185
            version: Option<String>,
      
        186
            slug: Option<String>,
      
        187
            custom_title: Option<String>,
      
        188
            ai_title: Option<String>,
      
        189
            model: Option<String>,
      
        190
            entrypoint: Option<String>,
      
        191
        }
      
        192
        
        193
        impl HeadInfo {
      
        194
            /// Have we collected every field the summary needs? If so the head
      
        195
            /// loop can short-circuit further parsing.
      
        196
            fn is_saturated(&self) -> bool {
      
        197
                self.started_at.is_some()
      
        198
                    && self.cwd.is_some()
      
        199
                    && self.git_branch.is_some()
      
        200
                    && self.version.is_some()
      
        201
                    && self.slug.is_some()
      
        202
                    && (self.custom_title.is_some() || self.ai_title.is_some())
      
        203
                    && self.model.is_some()
      
        204
                    && self.entrypoint.is_some()
      
        205
            }
      
        206
        
        207
            fn absorb(&mut self, ev: &RawEvent) {
      
        208
                if let Some(ts) = ev.timestamp {
      
        209
                    if self.started_at.is_none() {
      
        210
                        self.started_at = Some(ts);
      
        211
                    }
      
        212
                    self.last_seen_timestamp = Some(ts);
      
        213
                }
      
        214
                if self.cwd.is_none() {
      
        215
                    if let Some(c) = &ev.cwd {
      
        216
                        self.cwd = Some(c.clone());
      
        217
                    }
      
        218
                }
      
        219
                if self.git_branch.is_none() {
      
        220
                    if let Some(b) = &ev.git_branch {
      
        221
                        self.git_branch = Some(b.clone());
      
        222
                    }
      
        223
                }
      
        224
                if self.version.is_none() {
      
        225
                    if let Some(v) = &ev.version {
      
        226
                        self.version = Some(v.clone());
      
        227
                    }
      
        228
                }
      
        229
                if self.slug.is_none() {
      
        230
                    if let Some(s) = &ev.slug {
      
        231
                        self.slug = Some(s.clone());
      
        232
                    }
      
        233
                }
      
        234
                if self.custom_title.is_none() {
      
        235
                    if let Some(t) = &ev.custom_title {
      
        236
                        self.custom_title = Some(t.clone());
      
        237
                    }
      
        238
                }
      
        239
                if self.ai_title.is_none() {
      
        240
                    if let Some(t) = &ev.ai_title {
      
        241
                        self.ai_title = Some(t.clone());
      
        242
                    }
      
        243
                }
      
        244
                if self.entrypoint.is_none() {
      
        245
                    if let Some(e) = &ev.entrypoint {
      
        246
                        self.entrypoint = Some(e.clone());
      
        247
                    }
      
        248
                }
      
        249
                if self.model.is_none() && ev.kind == "assistant" {
      
        250
                    if let Some(msg) = &ev.message {
      
        251
                        if let Some(m) = msg.get("model").and_then(Value::as_str) {
      
        252
                            self.model = Some(m.to_string());
      
        253
                        }
      
        254
                    }
      
        255
                }
      
        256
            }
      
        257
        }
      
        258
        
        259
        /// Pull a short human-readable title from the first non-meta user event,
      
        260
        /// running it through the [`sanitize_title`] pipeline to strip IDE
      
        261
        /// wrappers, code blocks, and other noise. Returns `None` if the event
      
        262
        /// is not a user message or nothing usable survives sanitization.
      
        263
        fn extract_user_title(ev: &RawEvent) -> Option<String> {
      
        264
            if ev.kind != "user" {
      
        265
                return None;
      
        266
            }
      
        267
            if ev.is_meta.unwrap_or(false) {
      
        268
                return None;
      
        269
            }
      
        270
            let msg = ev.message.as_ref()?;
      
        271
            let text = match msg.get("content") {
      
        272
                Some(Value::String(s)) => s.clone(),
      
        273
                Some(Value::Array(blocks)) => blocks
      
        274
                    .iter()
      
        275
                    .filter_map(|b| b.get("text").and_then(Value::as_str))
      
        276
                    .collect::<Vec<_>>()
      
        277
                    .join(" "),
      
        278
                _ => return None,
      
        279
            };
      
        280
            sanitize_title(&text)
      
        281
        }
      
        282
        
        283
        /// Read the last `TAIL_SCAN_BYTES` of the file, find the last complete
      
        284
        /// `{...}\n` line, parse it, and return its `timestamp` if any.
      
        285
        fn scan_tail_for_last_timestamp(path: &Path) -> CoreResult<Option<DateTime<Utc>>> {
      
        286
            let mut file = File::open(path)?;
      
        287
            let len = file.metadata()?.len();
      
        288
            if len == 0 {
      
        289
                return Ok(None);
      
        290
            }
      
        291
            let read_len = len.min(TAIL_SCAN_BYTES);
      
        292
            let start = len - read_len;
      
        293
            file.seek(SeekFrom::Start(start))?;
      
        294
        
        295
            let mut buf = vec![0u8; read_len as usize];
      
        296
            file.read_exact(&mut buf)?;
      
        297
        
        298
            // Walk backward through complete lines (newline-terminated or EOF-terminated).
      
        299
            for line in buf.rsplit(|b| *b == b'\n') {
      
        300
                if line.is_empty() {
      
        301
                    continue;
      
        302
                }
      
        303
                let text = match std::str::from_utf8(line) {
      
        304
                    Ok(s) => s,
      
        305
                    Err(_) => continue,
      
        306
                };
      
        307
                if let Ok(ev) = serde_json::from_str::<RawEvent>(text) {
      
        308
                    if let Some(ts) = ev.timestamp {
      
        309
                        return Ok(Some(ts));
      
        310
                    }
      
        311
                }
      
        312
            }
      
        313
            Ok(None)
      
        314
        }
      
        315
        
        316
        #[cfg(test)]
      
        317
        mod tests {
      
        318
            use super::*;
      
        319
            use std::io::Write;
      
        320
            use tempfile::tempdir;
      
        321
        
        322
            const FIXTURE_LINES: &[&str] = &[
      
        323
                r#"{"type":"permission-mode","permissionMode":"default","sessionId":"abc"}"#,
      
        324
                r#"{"type":"attachment","uuid":"u1","timestamp":"2026-04-11T00:55:32.249Z","cwd":"/Users/me/repo","sessionId":"abc","version":"2.1.101","gitBranch":"main","attachment":{"type":"hook_success"}}"#,
      
        325
                r#"{"type":"user","uuid":"u2","timestamp":"2026-04-11T00:55:35.000Z","cwd":"/Users/me/repo","sessionId":"abc","version":"2.1.101","gitBranch":"main","message":{"role":"user","content":"plan the thread browser feature"}}"#,
      
        326
                r#"{"type":"assistant","uuid":"u3","timestamp":"2026-04-11T00:55:40.000Z","cwd":"/Users/me/repo","sessionId":"abc","version":"2.1.101","gitBranch":"main","message":{"model":"claude-opus-4-6","content":[{"type":"text","text":"Let's plan."}]}}"#,
      
        327
                r#"{"type":"assistant","uuid":"u4","timestamp":"2026-04-11T01:05:00.000Z","cwd":"/Users/me/repo","sessionId":"abc","version":"2.1.101","gitBranch":"main","message":{"model":"claude-opus-4-6","content":[{"type":"text","text":"Done."}]}}"#,
      
        328
            ];
      
        329
        
        330
            fn write_fixture(path: &Path, lines: &[&str]) {
      
        331
                let mut f = File::create(path).unwrap();
      
        332
                for line in lines {
      
        333
                    writeln!(f, "{line}").unwrap();
      
        334
                }
      
        335
            }
      
        336
        
        337
            #[test]
      
        338
            fn extracts_summary_from_realistic_head() {
      
        339
                let tmp = tempdir().unwrap();
      
        340
                let path = tmp.path().join("abc.jsonl");
      
        341
                write_fixture(&path, FIXTURE_LINES);
      
        342
        
        343
                let s = summarize(&path, "-Users-me-repo").unwrap();
      
        344
                assert_eq!(s.id, "abc");
      
        345
                assert_eq!(s.project_id, "-Users-me-repo");
      
        346
                assert_eq!(s.title, "plan the thread browser feature");
      
        347
                assert_eq!(s.model.as_deref(), Some("claude-opus-4-6"));
      
        348
                assert_eq!(s.git_branch.as_deref(), Some("main"));
      
        349
                assert_eq!(s.version.as_deref(), Some("2.1.101"));
      
        350
                // permission-mode is metadata → not counted in message_count.
      
        351
                // attachment + user + 2 assistants = 4 timeline events.
      
        352
                assert_eq!(s.message_count, 4);
      
        353
                assert_eq!(s.prompt_count, 1);
      
        354
                assert!(s.started_at.is_some());
      
        355
                assert!(s.last_activity_at.is_some());
      
        356
                assert_ne!(s.started_at, s.last_activity_at);
      
        357
            }
      
        358
        
        359
            #[test]
      
        360
            fn survives_truncated_last_line() {
      
        361
                let tmp = tempdir().unwrap();
      
        362
                let path = tmp.path().join("abc.jsonl");
      
        363
                // Write good lines, then append a partial JSON line with no newline.
      
        364
                let mut f = File::create(&path).unwrap();
      
        365
                for line in FIXTURE_LINES {
      
        366
                    writeln!(f, "{line}").unwrap();
      
        367
                }
      
        368
                f.write_all(br#"{"type":"assistant","timestamp":"2026-04-11T"#)
      
        369
                    .unwrap();
      
        370
                drop(f);
      
        371
        
        372
                let s = summarize(&path, "-Users-me-repo").unwrap();
      
        373
                assert_eq!(s.title, "plan the thread browser feature");
      
        374
                // Should still have a valid last activity from the preceding good line.
      
        375
                assert!(s.last_activity_at.is_some());
      
        376
            }
      
        377
        
        378
            #[test]
      
        379
            fn ignores_meta_user_events_for_title() {
      
        380
                let tmp = tempdir().unwrap();
      
        381
                let path = tmp.path().join("abc.jsonl");
      
        382
                write_fixture(
      
        383
                    &path,
      
        384
                    &[
      
        385
                        r#"{"type":"permission-mode","permissionMode":"default","sessionId":"abc"}"#,
      
        386
                        r#"{"type":"user","uuid":"u1","timestamp":"2026-04-11T00:55:00.000Z","cwd":"/Users/me/repo","sessionId":"abc","version":"2.1.101","gitBranch":"main","isMeta":true,"message":{"role":"user","content":"<local-command-caveat>noise</local-command-caveat>"}}"#,
      
        387
                        r#"{"type":"user","uuid":"u2","timestamp":"2026-04-11T00:55:30.000Z","cwd":"/Users/me/repo","sessionId":"abc","version":"2.1.101","gitBranch":"main","message":{"role":"user","content":"actual first question"}}"#,
      
        388
                    ],
      
        389
                );
      
        390
        
        391
                let s = summarize(&path, "-Users-me-repo").unwrap();
      
        392
                assert_eq!(s.title, "actual first question");
      
        393
            }
      
        394
        
        395
            #[test]
      
        396
            fn prefers_custom_title_over_user_message() {
      
        397
                let tmp = tempdir().unwrap();
      
        398
                let path = tmp.path().join("abc.jsonl");
      
        399
                write_fixture(
      
        400
                    &path,
      
        401
                    &[
      
        402
                        r#"{"type":"custom-title","customTitle":"The Real Title","sessionId":"abc"}"#,
      
        403
                        r#"{"type":"user","uuid":"u1","timestamp":"2026-04-11T00:55:00.000Z","cwd":"/Users/me/repo","sessionId":"abc","version":"2.1.101","gitBranch":"main","message":{"role":"user","content":"something else"}}"#,
      
        404
                    ],
      
        405
                );
      
        406
        
        407
                let s = summarize(&path, "-Users-me-repo").unwrap();
      
        408
                assert_eq!(s.title, "The Real Title");
      
        409
            }
      
        410
        
        411
            #[test]
      
        412
            fn empty_file_returns_fallback_title() {
      
        413
                let tmp = tempdir().unwrap();
      
        414
                let path = tmp.path().join("empty.jsonl");
      
        415
                File::create(&path).unwrap();
      
        416
        
        417
                let s = summarize(&path, "-Users-me-repo").unwrap();
      
        418
                assert_eq!(s.title, "(untitled)");
      
        419
                assert_eq!(s.message_count, 0);
      
        420
                assert_eq!(s.prompt_count, 0);
      
        421
                // last_activity falls back to mtime.
      
        422
                assert!(s.last_activity_at.is_some());
      
        423
            }
      
        424
        
        425
            #[test]
      
        426
            fn prompt_count_excludes_tool_result_returns() {
      
        427
                let tmp = tempdir().unwrap();
      
        428
                let path = tmp.path().join("abc.jsonl");
      
        429
                write_fixture(
      
        430
                    &path,
      
        431
                    &[
      
        432
                        // Real human prompt.
      
        433
                        r#"{"type":"user","uuid":"u1","timestamp":"2026-04-11T00:55:00.000Z","cwd":"/Users/me/repo","sessionId":"abc","message":{"role":"user","content":"run the tests"}}"#,
      
        434
                        // Assistant's tool_use.
      
        435
                        r#"{"type":"assistant","uuid":"u2","timestamp":"2026-04-11T00:55:10.000Z","cwd":"/Users/me/repo","sessionId":"abc","message":{"model":"claude-opus-4-6","content":[{"type":"tool_use","id":"t1","name":"Bash","input":{"command":"cargo test"}}]}}"#,
      
        436
                        // Tool_result posing as a user event — NOT a prompt.
      
        437
                        r#"{"type":"user","uuid":"u3","timestamp":"2026-04-11T00:55:20.000Z","cwd":"/Users/me/repo","sessionId":"abc","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"t1","content":"ok","is_error":false}]}}"#,
      
        438
                        // Another real human prompt.
      
        439
                        r#"{"type":"user","uuid":"u4","timestamp":"2026-04-11T00:55:30.000Z","cwd":"/Users/me/repo","sessionId":"abc","message":{"role":"user","content":"now commit"}}"#,
      
        440
                    ],
      
        441
                );
      
        442
        
        443
                let s = summarize(&path, "-Users-me-repo").unwrap();
      
        444
                // All 4 events render in the viewer timeline.
      
        445
                assert_eq!(s.message_count, 4);
      
        446
                // Only the two real human prompts count, not the tool_result.
      
        447
                assert_eq!(s.prompt_count, 2);
      
        448
            }
      
        449
        }
      
        450

1	//! Cheap metadata extraction for the TOC sidebar.
2	//!
3	//! A session file can be 50+ MB for long agent runs. We never parse the
4	//! whole thing for summaries. Instead:
5	//!
6	//! 1. Open the file once and count lines (no JSON parse) for an
7	//! approximate `messageCount`.
8	//! 2. Parse at most `HEAD_SCAN_LINES` lines from the start, shallowly,
9	//! to extract `startedAt`, `cwd`, `gitBranch`, `version`, `slug`,
10	//! `customTitle`, the first assistant's `model`, and a fallback
11	//! title from the first non-meta user message.
12	//! 3. Reverse-scan the last `TAIL_SCAN_BYTES` bytes for the final
13	//! complete JSON object carrying a `timestamp` field, used as
14	//! `lastActivityAt`. Falls back to the file's mtime.
15
16	use std::fs::File;
17	use std::io::{BufRead, BufReader, Read, Seek, SeekFrom};
18	use std::path::Path;
19
20	use chrono::{DateTime, Utc};
21	use serde_json::Value;
22
23	use crate::core::error::CoreResult;
24	use crate::core::reader::{is_timeline_event, is_user_prompt, NON_TIMELINE_KINDS};
25	use crate::core::schema::{RawEvent, SessionSummary};
26	use crate::core::title::sanitize_title;
27
28	/// How many lines from the top of the file we're willing to parse for
29	/// metadata before giving up. Empirically the first ~20 lines of a real
30	/// Claude Code session contain customTitle / cwd / first user message.
31	const HEAD_SCAN_LINES: usize = 100;
32
33	/// How many bytes from the end of the file to tail-scan for the
34	/// latest timestamp.
35	const TAIL_SCAN_BYTES: u64 = 64 * 1024;
36
37	pub fn summarize(session_path: &Path, project_id: &str) -> CoreResult<SessionSummary> {
38	let id = session_path
39	.file_stem()
40	.and_then(\|s\| s.to_str())
41	.map(str::to_owned)
42	.unwrap_or_default();
43
44	let file = File::open(session_path)?;
45	let file_meta = file.metadata()?;
46
47	// Single-pass scan:
48	// * For each line, cheaply classify it as timeline / prompt
49	// via prefix + substring checks so that counting 171 MB of
50	// JSONL doesn't cost a full serde parse per line.
51	// * For the first HEAD_SCAN_LINES (while head info is still
52	// unsaturated) also do a full RawEvent parse to pick up
53	// title / cwd / model / version metadata.
54	let mut head = HeadInfo::default();
55	let mut user_title_fallback: Option<String> = None;
56	let mut message_count: u32 = 0;
57	let mut prompt_count: u32 = 0;
58
59	{
60	let reader = BufReader::new(&file);
61	for (i, line) in reader.lines().enumerate() {
62	let line = match line {
63	Ok(l) => l,
64	Err(_) => continue, // tolerate partial last line
65	};
66	if line.is_empty() {
67	continue;
68	}
69
70	// Head path: full parse, feeds both counters (via the
71	// shared helpers) and head metadata.
72	let head_parse = i < HEAD_SCAN_LINES && !head.is_saturated();
73	if head_parse {
74	if let Ok(ev) = serde_json::from_str::<RawEvent>(&line) {
75	head.absorb(&ev);
76	if user_title_fallback.is_none() {
77	if let Some(t) = extract_user_title(&ev) {
78	user_title_fallback = Some(t);
79	}
80	}
81	if is_timeline_event(&ev) {
82	message_count += 1;
83	}
84	if is_user_prompt(&ev) {
85	prompt_count += 1;
86	}
87	continue;
88	}
89	// Fall through to fast path if JSON parse fails.
90	}
91
92	// Fast path: skip full parse. Claude Code's jsonl
93	// serializer emits `type` first and field order is
94	// stable, so these byte-level checks match what the
95	// full helpers would return without allocating.
96	if is_sidechain_line(&line) {
97	continue;
98	}
99	let kind = parse_kind_prefix(&line);
100	match kind {
101	Some(k) if NON_TIMELINE_KINDS.contains(&k) => continue,
102	Some(k) => {
103	message_count += 1;
104	if k == "user" && !has_tool_result(&line) {
105	prompt_count += 1;
106	}
107	}
108	None => {
109	// Unrecognized shape — count as timeline so we
110	// don't silently lose events on schema drift.
111	message_count += 1;
112	}
113	}
114	}
115	}
116
117	// Tail scan for lastActivityAt.
118	let tail_last_ts = scan_tail_for_last_timestamp(session_path)?;
119
120	let last_activity_at = tail_last_ts
121	.or(head.last_seen_timestamp)
122	.or_else(\|\| {
123	file_meta
124	.modified()
125	.ok()
126	.map(\|st\| DateTime::<Utc>::from(st))
127	});
128
129	let title = head
130	.custom_title
131	.clone()
132	.or_else(\|\| head.ai_title.clone())
133	.or(user_title_fallback)
134	.or(head.slug.clone())
135	.unwrap_or_else(\|\| "(untitled)".to_string());
136
137	Ok(SessionSummary {
138	id,
139	project_id: project_id.to_string(),
140	title,
141	started_at: head.started_at,
142	last_activity_at,
143	model: head.model,
144	message_count,
145	prompt_count,
146	git_branch: head.git_branch,
147	version: head.version,
148	slug: head.slug,
149	cwd: head.cwd,
150	custom_title: head.custom_title,
151	entrypoint: head.entrypoint,
152	source: crate::core::schema::SessionSource::Disk,
153	})
154	}
155
156	/// Extract the `type` field value from a JSONL line that starts
157	/// with `{"type":"..."`. Returns `None` for lines with any other
158	/// shape. Zero-allocation; caller borrows into the original line.
159	fn parse_kind_prefix(line: &str) -> Option<&str> {
160	let rest = line.strip_prefix("{\"type\":\"")?;
161	let end = rest.find('"')?;
162	Some(&rest[..end])
163	}
164
165	/// Substring check for `"isSidechain":true`. Stable across field
166	/// order because the literal `true` doesn't appear for this key
167	/// in any other configuration.
168	fn is_sidechain_line(line: &str) -> bool {
169	line.contains("\"isSidechain\":true")
170	}
171
172	/// Substring check for `"type":"tool_result"`. Used to classify
173	/// a `user` event as a tool return rather than a human prompt.
174	fn has_tool_result(line: &str) -> bool {
175	line.contains("\"type\":\"tool_result\"")
176	}
177
178	/// Scratchpad for everything we're learning from the file's head.
179	#[derive(Debug, Default)]
180	struct HeadInfo {
181	started_at: Option<DateTime<Utc>>,
182	last_seen_timestamp: Option<DateTime<Utc>>,
183	cwd: Option<String>,
184	git_branch: Option<String>,
185	version: Option<String>,
186	slug: Option<String>,
187	custom_title: Option<String>,
188	ai_title: Option<String>,
189	model: Option<String>,
190	entrypoint: Option<String>,
191	}
192
193	impl HeadInfo {
194	/// Have we collected every field the summary needs? If so the head
195	/// loop can short-circuit further parsing.
196	fn is_saturated(&self) -> bool {
197	self.started_at.is_some()
198	&& self.cwd.is_some()
199	&& self.git_branch.is_some()
200	&& self.version.is_some()
201	&& self.slug.is_some()
202	&& (self.custom_title.is_some() \|\| self.ai_title.is_some())
203	&& self.model.is_some()
204	&& self.entrypoint.is_some()
205	}
206
207	fn absorb(&mut self, ev: &RawEvent) {
208	if let Some(ts) = ev.timestamp {
209	if self.started_at.is_none() {
210	self.started_at = Some(ts);
211	}
212	self.last_seen_timestamp = Some(ts);
213	}
214	if self.cwd.is_none() {
215	if let Some(c) = &ev.cwd {
216	self.cwd = Some(c.clone());
217	}
218	}
219	if self.git_branch.is_none() {
220	if let Some(b) = &ev.git_branch {
221	self.git_branch = Some(b.clone());
222	}
223	}
224	if self.version.is_none() {
225	if let Some(v) = &ev.version {
226	self.version = Some(v.clone());
227	}
228	}
229	if self.slug.is_none() {
230	if let Some(s) = &ev.slug {
231	self.slug = Some(s.clone());
232	}
233	}
234	if self.custom_title.is_none() {
235	if let Some(t) = &ev.custom_title {
236	self.custom_title = Some(t.clone());
237	}
238	}
239	if self.ai_title.is_none() {
240	if let Some(t) = &ev.ai_title {
241	self.ai_title = Some(t.clone());
242	}
243	}
244	if self.entrypoint.is_none() {
245	if let Some(e) = &ev.entrypoint {
246	self.entrypoint = Some(e.clone());
247	}
248	}
249	if self.model.is_none() && ev.kind == "assistant" {
250	if let Some(msg) = &ev.message {
251	if let Some(m) = msg.get("model").and_then(Value::as_str) {
252	self.model = Some(m.to_string());
253	}
254	}
255	}
256	}
257	}
258
259	/// Pull a short human-readable title from the first non-meta user event,
260	/// running it through the [`sanitize_title`] pipeline to strip IDE
261	/// wrappers, code blocks, and other noise. Returns `None` if the event
262	/// is not a user message or nothing usable survives sanitization.
263	fn extract_user_title(ev: &RawEvent) -> Option<String> {
264	if ev.kind != "user" {
265	return None;
266	}
267	if ev.is_meta.unwrap_or(false) {
268	return None;
269	}
270	let msg = ev.message.as_ref()?;
271	let text = match msg.get("content") {
272	Some(Value::String(s)) => s.clone(),
273	Some(Value::Array(blocks)) => blocks
274	.iter()
275	.filter_map(\|b\| b.get("text").and_then(Value::as_str))
276	.collect::<Vec<_>>()
277	.join(" "),
278	_ => return None,
279	};
280	sanitize_title(&text)
281	}
282
283	/// Read the last `TAIL_SCAN_BYTES` of the file, find the last complete
284	/// `{...}\n` line, parse it, and return its `timestamp` if any.
285	fn scan_tail_for_last_timestamp(path: &Path) -> CoreResult<Option<DateTime<Utc>>> {
286	let mut file = File::open(path)?;
287	let len = file.metadata()?.len();
288	if len == 0 {
289	return Ok(None);
290	}
291	let read_len = len.min(TAIL_SCAN_BYTES);
292	let start = len - read_len;
293	file.seek(SeekFrom::Start(start))?;
294
295	let mut buf = vec![0u8; read_len as usize];
296	file.read_exact(&mut buf)?;
297
298	// Walk backward through complete lines (newline-terminated or EOF-terminated).
299	for line in buf.rsplit(\|b\| *b == b'\n') {
300	if line.is_empty() {
301	continue;
302	}
303	let text = match std::str::from_utf8(line) {
304	Ok(s) => s,
305	Err(_) => continue,
306	};
307	if let Ok(ev) = serde_json::from_str::<RawEvent>(text) {
308	if let Some(ts) = ev.timestamp {
309	return Ok(Some(ts));
310	}
311	}
312	}
313	Ok(None)
314	}
315
316	#[cfg(test)]
317	mod tests {
318	use super::*;
319	use std::io::Write;
320	use tempfile::tempdir;
321
322	const FIXTURE_LINES: &[&str] = &[
323	r#"{"type":"permission-mode","permissionMode":"default","sessionId":"abc"}"#,
324	r#"{"type":"attachment","uuid":"u1","timestamp":"2026-04-11T00:55:32.249Z","cwd":"/Users/me/repo","sessionId":"abc","version":"2.1.101","gitBranch":"main","attachment":{"type":"hook_success"}}"#,
325	r#"{"type":"user","uuid":"u2","timestamp":"2026-04-11T00:55:35.000Z","cwd":"/Users/me/repo","sessionId":"abc","version":"2.1.101","gitBranch":"main","message":{"role":"user","content":"plan the thread browser feature"}}"#,
326	r#"{"type":"assistant","uuid":"u3","timestamp":"2026-04-11T00:55:40.000Z","cwd":"/Users/me/repo","sessionId":"abc","version":"2.1.101","gitBranch":"main","message":{"model":"claude-opus-4-6","content":[{"type":"text","text":"Let's plan."}]}}"#,
327	r#"{"type":"assistant","uuid":"u4","timestamp":"2026-04-11T01:05:00.000Z","cwd":"/Users/me/repo","sessionId":"abc","version":"2.1.101","gitBranch":"main","message":{"model":"claude-opus-4-6","content":[{"type":"text","text":"Done."}]}}"#,
328	];
329
330	fn write_fixture(path: &Path, lines: &[&str]) {
331	let mut f = File::create(path).unwrap();
332	for line in lines {
333	writeln!(f, "{line}").unwrap();
334	}
335	}
336
337	#[test]
338	fn extracts_summary_from_realistic_head() {
339	let tmp = tempdir().unwrap();
340	let path = tmp.path().join("abc.jsonl");
341	write_fixture(&path, FIXTURE_LINES);
342
343	let s = summarize(&path, "-Users-me-repo").unwrap();
344	assert_eq!(s.id, "abc");
345	assert_eq!(s.project_id, "-Users-me-repo");
346	assert_eq!(s.title, "plan the thread browser feature");
347	assert_eq!(s.model.as_deref(), Some("claude-opus-4-6"));
348	assert_eq!(s.git_branch.as_deref(), Some("main"));
349	assert_eq!(s.version.as_deref(), Some("2.1.101"));
350	// permission-mode is metadata → not counted in message_count.
351	// attachment + user + 2 assistants = 4 timeline events.
352	assert_eq!(s.message_count, 4);
353	assert_eq!(s.prompt_count, 1);
354	assert!(s.started_at.is_some());
355	assert!(s.last_activity_at.is_some());
356	assert_ne!(s.started_at, s.last_activity_at);
357	}
358
359	#[test]
360	fn survives_truncated_last_line() {
361	let tmp = tempdir().unwrap();
362	let path = tmp.path().join("abc.jsonl");
363	// Write good lines, then append a partial JSON line with no newline.
364	let mut f = File::create(&path).unwrap();
365	for line in FIXTURE_LINES {
366	writeln!(f, "{line}").unwrap();
367	}
368	f.write_all(br#"{"type":"assistant","timestamp":"2026-04-11T"#)
369	.unwrap();
370	drop(f);
371
372	let s = summarize(&path, "-Users-me-repo").unwrap();
373	assert_eq!(s.title, "plan the thread browser feature");
374	// Should still have a valid last activity from the preceding good line.
375	assert!(s.last_activity_at.is_some());
376	}
377
378	#[test]
379	fn ignores_meta_user_events_for_title() {
380	let tmp = tempdir().unwrap();
381	let path = tmp.path().join("abc.jsonl");
382	write_fixture(
383	&path,
384	&[
385	r#"{"type":"permission-mode","permissionMode":"default","sessionId":"abc"}"#,
386	r#"{"type":"user","uuid":"u1","timestamp":"2026-04-11T00:55:00.000Z","cwd":"/Users/me/repo","sessionId":"abc","version":"2.1.101","gitBranch":"main","isMeta":true,"message":{"role":"user","content":"<local-command-caveat>noise</local-command-caveat>"}}"#,
387	r#"{"type":"user","uuid":"u2","timestamp":"2026-04-11T00:55:30.000Z","cwd":"/Users/me/repo","sessionId":"abc","version":"2.1.101","gitBranch":"main","message":{"role":"user","content":"actual first question"}}"#,
388	],
389	);
390
391	let s = summarize(&path, "-Users-me-repo").unwrap();
392	assert_eq!(s.title, "actual first question");
393	}
394
395	#[test]
396	fn prefers_custom_title_over_user_message() {
397	let tmp = tempdir().unwrap();
398	let path = tmp.path().join("abc.jsonl");
399	write_fixture(
400	&path,
401	&[
402	r#"{"type":"custom-title","customTitle":"The Real Title","sessionId":"abc"}"#,
403	r#"{"type":"user","uuid":"u1","timestamp":"2026-04-11T00:55:00.000Z","cwd":"/Users/me/repo","sessionId":"abc","version":"2.1.101","gitBranch":"main","message":{"role":"user","content":"something else"}}"#,
404	],
405	);
406
407	let s = summarize(&path, "-Users-me-repo").unwrap();
408	assert_eq!(s.title, "The Real Title");
409	}
410
411	#[test]
412	fn empty_file_returns_fallback_title() {
413	let tmp = tempdir().unwrap();
414	let path = tmp.path().join("empty.jsonl");
415	File::create(&path).unwrap();
416
417	let s = summarize(&path, "-Users-me-repo").unwrap();
418	assert_eq!(s.title, "(untitled)");
419	assert_eq!(s.message_count, 0);
420	assert_eq!(s.prompt_count, 0);
421	// last_activity falls back to mtime.
422	assert!(s.last_activity_at.is_some());
423	}
424
425	#[test]
426	fn prompt_count_excludes_tool_result_returns() {
427	let tmp = tempdir().unwrap();
428	let path = tmp.path().join("abc.jsonl");
429	write_fixture(
430	&path,
431	&[
432	// Real human prompt.
433	r#"{"type":"user","uuid":"u1","timestamp":"2026-04-11T00:55:00.000Z","cwd":"/Users/me/repo","sessionId":"abc","message":{"role":"user","content":"run the tests"}}"#,
434	// Assistant's tool_use.
435	r#"{"type":"assistant","uuid":"u2","timestamp":"2026-04-11T00:55:10.000Z","cwd":"/Users/me/repo","sessionId":"abc","message":{"model":"claude-opus-4-6","content":[{"type":"tool_use","id":"t1","name":"Bash","input":{"command":"cargo test"}}]}}"#,
436	// Tool_result posing as a user event — NOT a prompt.
437	r#"{"type":"user","uuid":"u3","timestamp":"2026-04-11T00:55:20.000Z","cwd":"/Users/me/repo","sessionId":"abc","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"t1","content":"ok","is_error":false}]}}"#,
438	// Another real human prompt.
439	r#"{"type":"user","uuid":"u4","timestamp":"2026-04-11T00:55:30.000Z","cwd":"/Users/me/repo","sessionId":"abc","message":{"role":"user","content":"now commit"}}"#,
440	],
441	);
442
443	let s = summarize(&path, "-Users-me-repo").unwrap();
444	// All 4 events render in the viewer timeline.
445	assert_eq!(s.message_count, 4);
446	// Only the two real human prompts count, not the tool_result.
447	assert_eq!(s.prompt_count, 2);
448	}
449	}
450