tenseleyflow/claudex / 6b46cf2

Browse files

perf: cap disk reads at 2000 messages + run async off sync pool

Authored by espadonne
SHA
6b46cf28b1d371dc4e6fdd50a4003f9e34aa0ce6
Parents
6c20a65
Tree
5deb3b2

2 changed files

StatusFile+-
M src-tauri/src/commands.rs 34 8
M src-tauri/src/core/reader.rs 119 21
src-tauri/src/commands.rsmodified
@@ -26,7 +26,7 @@ use crate::core::pty::{
2626
     resize_pty as core_resize_pty, spawn_pty as core_spawn_pty, write_pty as core_write_pty,
2727
     PtyHandle, PtyRequest, RingBuffer,
2828
 };
29
-use crate::core::reader::read_session as core_read_session;
29
+use crate::core::reader::read_session_limited as core_read_session_limited;
3030
 use crate::core::schema::{
3131
     Message, PermissionMode, Project, SessionDetail, SessionSource, SessionSummary,
3232
 };
@@ -686,6 +686,13 @@ pub fn list_sessions(
686686
     Ok(sessions)
687687
 }
688688
 
689
+/// Default tail cap for disk session reads. Bounded so a 100 MB+
690
+/// session file can't hang the main thread during IPC serialize /
691
+/// deserialize. The frontend can opt in to the full transcript by
692
+/// passing an explicit `messageLimit` (future API) once we have
693
+/// lazy-scroll in the viewer.
694
+const DEFAULT_SESSION_MESSAGE_LIMIT: usize = 2000;
695
+
689696
 /// Full viewer payload for one session. Dispatches on `source`:
690697
 ///
691698
 /// - `Disk` (the default): `encoded_dir` is the physical source-dir
@@ -695,32 +702,51 @@ pub fn list_sessions(
695702
 ///   `projectId` field on an archive SessionSummary), and we
696703
 ///   synthesize a prompt-only [`SessionDetail`] from the in-memory
697704
 ///   `history.jsonl` log.
705
+///
706
+/// **Async** so the file read + JSONL parse + serialize-to-IPC
707
+/// pipeline runs on the tokio pool rather than Tauri's sync
708
+/// command pool; this prevents a slow 170 MB read from blocking
709
+/// other commands (`list_ptys`, `write_pty`, etc.) while it runs.
698710
 #[tauri::command]
699
-pub fn read_session(
711
+pub async fn read_session(
700712
     encoded_dir: String,
701713
     session_id: String,
702714
     #[allow(non_snake_case)] source: Option<SessionSource>,
703715
     state: State<'_, AppState>,
704716
 ) -> IpcResult<SessionDetail> {
705717
     match source.unwrap_or(SessionSource::Disk) {
706
-        SessionSource::Disk => read_disk_session(&encoded_dir, &session_id, &state),
718
+        SessionSource::Disk => {
719
+            let projects_root = state.projects_root.clone();
720
+            // Move the blocking file read off the async runtime.
721
+            let encoded_dir_owned = encoded_dir;
722
+            let session_id_owned = session_id;
723
+            tauri::async_runtime::spawn_blocking(move || {
724
+                read_disk_session_blocking(
725
+                    &projects_root,
726
+                    &encoded_dir_owned,
727
+                    &session_id_owned,
728
+                )
729
+            })
730
+            .await
731
+            .map_err(|e| format!("read_session join error: {e}"))?
732
+        }
707733
         SessionSource::Archive => read_archive_session(&encoded_dir, &state),
708734
     }
709735
 }
710736
 
711
-fn read_disk_session(
737
+fn read_disk_session_blocking(
738
+    projects_root: &Path,
712739
     encoded_dir: &str,
713740
     session_id: &str,
714
-    state: &AppState,
715741
 ) -> IpcResult<SessionDetail> {
716
-    let path = state
717
-        .projects_root
742
+    let path = projects_root
718743
         .join(encoded_dir)
719744
         .join(format!("{session_id}.jsonl"));
720745
     if !path.exists() {
721746
         return Err(format!("session not found: {encoded_dir}/{session_id}"));
722747
     }
723
-    core_read_session(&path, encoded_dir).map_err(|e| e.to_string())
748
+    core_read_session_limited(&path, encoded_dir, Some(DEFAULT_SESSION_MESSAGE_LIMIT))
749
+        .map_err(|e| e.to_string())
724750
 }
725751
 
726752
 fn read_archive_session(project_path: &str, state: &AppState) -> IpcResult<SessionDetail> {
src-tauri/src/core/reader.rsmodified
@@ -21,38 +21,89 @@ use crate::core::metadata::summarize;
2121
 use crate::core::schema::{ContentBlock, Message, RawEvent, SessionDetail, Usage};
2222
 
2323
 pub fn read_session(path: &Path, project_id: &str) -> CoreResult<SessionDetail> {
24
+    read_session_limited(path, project_id, None)
25
+}
26
+
27
+/// Read a session, optionally capping the result to the most recent
28
+/// `limit` messages. Required for host performance on very large
29
+/// sessions — one user's armfortas session is 171 MB of JSONL and
30
+/// that's enough to hang the main thread for seconds during IPC
31
+/// deserialization. Passing `Some(N)` streams the whole file but
32
+/// only retains the tail, so the wire payload stays bounded.
33
+///
34
+/// The returned [`SessionDetail`] is identical in shape regardless
35
+/// of whether `limit` was set; truncation is invisible to the
36
+/// frontend except that `messages.len() < summary.message_count`.
37
+pub fn read_session_limited(
38
+    path: &Path,
39
+    project_id: &str,
40
+    limit: Option<usize>,
41
+) -> CoreResult<SessionDetail> {
2442
     let summary = summarize(path, project_id)?;
25
-    let messages = read_messages(path)?;
43
+    let messages = read_messages(path, limit)?;
2644
     Ok(SessionDetail { summary, messages })
2745
 }
2846
 
29
-fn read_messages(path: &Path) -> CoreResult<Vec<Message>> {
47
+fn read_messages(path: &Path, limit: Option<usize>) -> CoreResult<Vec<Message>> {
3048
     let file = File::open(path)?;
3149
     let reader = BufReader::new(file);
32
-    let mut out = Vec::new();
3350
     let mut fallback_counter: u32 = 0;
3451
 
35
-    for line in reader.lines() {
36
-        let line = match line {
37
-            Ok(l) => l,
38
-            Err(_) => continue,
39
-        };
40
-        if line.is_empty() {
41
-            continue;
42
-        }
43
-        let ev: RawEvent = match serde_json::from_str(&line) {
44
-            Ok(e) => e,
45
-            Err(_) => continue,
46
-        };
47
-        if ev.is_sidechain.unwrap_or(false) {
48
-            continue;
52
+    // When a cap is set, keep a rolling tail so we never allocate a
53
+    // `Vec<Message>` bigger than the cap + 1. This keeps memory
54
+    // bounded on multi-hundred-megabyte files.
55
+    match limit {
56
+        Some(cap) if cap > 0 => {
57
+            let mut tail: std::collections::VecDeque<Message> =
58
+                std::collections::VecDeque::with_capacity(cap);
59
+            for line in reader.lines() {
60
+                let line = match line {
61
+                    Ok(l) => l,
62
+                    Err(_) => continue,
63
+                };
64
+                if line.is_empty() {
65
+                    continue;
66
+                }
67
+                let ev: RawEvent = match serde_json::from_str(&line) {
68
+                    Ok(e) => e,
69
+                    Err(_) => continue,
70
+                };
71
+                if ev.is_sidechain.unwrap_or(false) {
72
+                    continue;
73
+                }
74
+                if let Some(msg) = raw_to_message(ev, &mut fallback_counter) {
75
+                    if tail.len() == cap {
76
+                        tail.pop_front();
77
+                    }
78
+                    tail.push_back(msg);
79
+                }
80
+            }
81
+            Ok(tail.into_iter().collect())
4982
         }
50
-        if let Some(msg) = raw_to_message(ev, &mut fallback_counter) {
51
-            out.push(msg);
83
+        _ => {
84
+            let mut out = Vec::new();
85
+            for line in reader.lines() {
86
+                let line = match line {
87
+                    Ok(l) => l,
88
+                    Err(_) => continue,
89
+                };
90
+                if line.is_empty() {
91
+                    continue;
92
+                }
93
+                let ev: RawEvent = match serde_json::from_str(&line) {
94
+                    Ok(e) => e,
95
+                    Err(_) => continue,
96
+                };
97
+                if ev.is_sidechain.unwrap_or(false) {
98
+                    continue;
99
+                }
100
+                if let Some(msg) = raw_to_message(ev, &mut fallback_counter) {
101
+                    out.push(msg);
102
+                }
103
+            }
104
+            Ok(out)
52105
         }
53106
     }
54
-
55
-    Ok(out)
56107
 }
57108
 
58109
 /// Convert one raw event into a timeline message. Returns `None` for
@@ -460,6 +511,53 @@ mod tests {
460511
         assert_eq!(text, "main");
461512
     }
462513
 
514
+    #[test]
515
+    fn limit_returns_tail_and_preserves_order() {
516
+        let tmp = tempdir().unwrap();
517
+        let path = tmp.path().join("s.jsonl");
518
+        let mut lines: Vec<String> = Vec::new();
519
+        for i in 0..10 {
520
+            lines.push(format!(
521
+                "{{\"type\":\"user\",\"uuid\":\"u{i}\",\"timestamp\":\"2026-04-11T00:55:{:02}.000Z\",\"sessionId\":\"abc\",\"message\":{{\"role\":\"user\",\"content\":\"msg{i}\"}}}}",
522
+                i
523
+            ));
524
+        }
525
+        write_fixture(
526
+            &path,
527
+            &lines.iter().map(|s| s.as_str()).collect::<Vec<_>>(),
528
+        );
529
+
530
+        let detail = read_session_limited(&path, "-Users-me-repo", Some(3)).unwrap();
531
+        assert_eq!(detail.messages.len(), 3);
532
+        // Summary still reflects total message count.
533
+        assert_eq!(detail.summary.message_count, 10);
534
+        let texts: Vec<&str> = detail
535
+            .messages
536
+            .iter()
537
+            .filter_map(|m| match m {
538
+                Message::User { text, .. } => Some(text.as_str()),
539
+                _ => None,
540
+            })
541
+            .collect();
542
+        assert_eq!(texts, vec!["msg7", "msg8", "msg9"]);
543
+    }
544
+
545
+    #[test]
546
+    fn limit_zero_is_treated_as_unlimited() {
547
+        // 0 is a weird edge case; we treat it as "no cap" so a
548
+        // caller mishandling its limit arg still gets data.
549
+        let tmp = tempdir().unwrap();
550
+        let path = tmp.path().join("s.jsonl");
551
+        write_fixture(
552
+            &path,
553
+            &[
554
+                r#"{"type":"user","uuid":"u1","timestamp":"2026-04-11T00:55:35.000Z","sessionId":"abc","message":{"role":"user","content":"hi"}}"#,
555
+            ],
556
+        );
557
+        let detail = read_session_limited(&path, "-Users-me-repo", Some(0)).unwrap();
558
+        assert_eq!(detail.messages.len(), 1);
559
+    }
560
+
463561
     #[test]
464562
     fn survives_partial_last_line() {
465563
         let tmp = tempdir().unwrap();