tenseleyflow/claudex / c728a02

Browse files

feat: prompt_count + viewer-aligned message_count in summarize

Authored by espadonne
SHA
c728a027c4ef9d1864150ebf37100c3764dce275
Parents
534d88d
Tree
e078243

8 changed files

StatusFile+-
M src-tauri/src/commands.rs 3 0
M src-tauri/src/core/grouping.rs 4 0
M src-tauri/src/core/metadata.rs 99 7
M src-tauri/src/core/reader.rs 61 0
M src-tauri/src/core/schema.rs 12 1
M src/lib/ipc/generated/SessionSummary.ts 14 2
M src/lib/store/sessions.test.ts 2 0
M src/lib/store/sessions.ts 1 0
src-tauri/src/commands.rsmodified
@@ -821,6 +821,9 @@ fn read_archive_session(project_path: &str, state: &AppState) -> IpcResult<Sessi
821821
         last_activity_at: last_activity,
822822
         model: None,
823823
         message_count: entries.len() as u32,
824
+        // Archive sessions are reconstructed from ~/.claude/history.jsonl
825
+        // which stores only human prompts — every entry IS a prompt.
826
+        prompt_count: entries.len() as u32,
824827
         git_branch: None,
825828
         version: None,
826829
         slug: None,
src-tauri/src/core/grouping.rsmodified
@@ -135,6 +135,9 @@ pub fn build_archive_projects(
135135
             last_activity_at: epoch_ms_to_utc(latest),
136136
             model: None,
137137
             message_count: entries.len() as u32,
138
+            // Archive entries come from ~/.claude/history.jsonl which
139
+            // only stores human prompts.
140
+            prompt_count: entries.len() as u32,
138141
             git_branch: None,
139142
             version: None,
140143
             slug: None,
@@ -414,6 +417,7 @@ mod tests {
414417
                 .map(|dt| dt.with_timezone(&Utc)),
415418
             model: None,
416419
             message_count: 0,
420
+            prompt_count: 0,
417421
             git_branch: None,
418422
             version: None,
419423
             slug: None,
src-tauri/src/core/metadata.rsmodified
@@ -21,6 +21,7 @@ use chrono::{DateTime, Utc};
2121
 use serde_json::Value;
2222
 
2323
 use crate::core::error::CoreResult;
24
+use crate::core::reader::{is_timeline_event, is_user_prompt, NON_TIMELINE_KINDS};
2425
 use crate::core::schema::{RawEvent, SessionSummary};
2526
 use crate::core::title::sanitize_title;
2627
 
@@ -43,11 +44,17 @@ pub fn summarize(session_path: &Path, project_id: &str) -> CoreResult<SessionSum
4344
     let file = File::open(session_path)?;
4445
     let file_meta = file.metadata()?;
4546
 
46
-    // Single-pass head scan: parse up to HEAD_SCAN_LINES while also
47
-    // counting the total line count of the file.
47
+    // Single-pass scan:
48
+    //   * For each line, cheaply classify it as timeline / prompt
49
+    //     via prefix + substring checks so that counting 171 MB of
50
+    //     JSONL doesn't cost a full serde parse per line.
51
+    //   * For the first HEAD_SCAN_LINES (while head info is still
52
+    //     unsaturated) also do a full RawEvent parse to pick up
53
+    //     title / cwd / model / version metadata.
4854
     let mut head = HeadInfo::default();
4955
     let mut user_title_fallback: Option<String> = None;
50
-    let mut total_lines: u32 = 0;
56
+    let mut message_count: u32 = 0;
57
+    let mut prompt_count: u32 = 0;
5158
 
5259
     {
5360
         let reader = BufReader::new(&file);
@@ -59,9 +66,11 @@ pub fn summarize(session_path: &Path, project_id: &str) -> CoreResult<SessionSum
5966
             if line.is_empty() {
6067
                 continue;
6168
             }
62
-            total_lines += 1;
6369
 
64
-            if i < HEAD_SCAN_LINES && !head.is_saturated() {
70
+            // Head path: full parse, feeds both counters (via the
71
+            // shared helpers) and head metadata.
72
+            let head_parse = i < HEAD_SCAN_LINES && !head.is_saturated();
73
+            if head_parse {
6574
                 if let Ok(ev) = serde_json::from_str::<RawEvent>(&line) {
6675
                     head.absorb(&ev);
6776
                     if user_title_fallback.is_none() {
@@ -69,6 +78,37 @@ pub fn summarize(session_path: &Path, project_id: &str) -> CoreResult<SessionSum
6978
                             user_title_fallback = Some(t);
7079
                         }
7180
                     }
81
+                    if is_timeline_event(&ev) {
82
+                        message_count += 1;
83
+                    }
84
+                    if is_user_prompt(&ev) {
85
+                        prompt_count += 1;
86
+                    }
87
+                    continue;
88
+                }
89
+                // Fall through to fast path if JSON parse fails.
90
+            }
91
+
92
+            // Fast path: skip full parse. Claude Code's jsonl
93
+            // serializer emits `type` first and field order is
94
+            // stable, so these byte-level checks match what the
95
+            // full helpers would return without allocating.
96
+            if is_sidechain_line(&line) {
97
+                continue;
98
+            }
99
+            let kind = parse_kind_prefix(&line);
100
+            match kind {
101
+                Some(k) if NON_TIMELINE_KINDS.contains(&k) => continue,
102
+                Some(k) => {
103
+                    message_count += 1;
104
+                    if k == "user" && !has_tool_result(&line) {
105
+                        prompt_count += 1;
106
+                    }
107
+                }
108
+                None => {
109
+                    // Unrecognized shape — count as timeline so we
110
+                    // don't silently lose events on schema drift.
111
+                    message_count += 1;
72112
                 }
73113
             }
74114
         }
@@ -101,7 +141,8 @@ pub fn summarize(session_path: &Path, project_id: &str) -> CoreResult<SessionSum
101141
         started_at: head.started_at,
102142
         last_activity_at,
103143
         model: head.model,
104
-        message_count: total_lines,
144
+        message_count,
145
+        prompt_count,
105146
         git_branch: head.git_branch,
106147
         version: head.version,
107148
         slug: head.slug,
@@ -112,6 +153,28 @@ pub fn summarize(session_path: &Path, project_id: &str) -> CoreResult<SessionSum
112153
     })
113154
 }
114155
 
156
+/// Extract the `type` field value from a JSONL line that starts
157
+/// with `{"type":"..."`. Returns `None` for lines with any other
158
+/// shape. Zero-allocation; caller borrows into the original line.
159
+fn parse_kind_prefix(line: &str) -> Option<&str> {
160
+    let rest = line.strip_prefix("{\"type\":\"")?;
161
+    let end = rest.find('"')?;
162
+    Some(&rest[..end])
163
+}
164
+
165
+/// Substring check for `"isSidechain":true`. Stable across field
166
+/// order because the literal `true` doesn't appear for this key
167
+/// in any other configuration.
168
+fn is_sidechain_line(line: &str) -> bool {
169
+    line.contains("\"isSidechain\":true")
170
+}
171
+
172
+/// Substring check for `"type":"tool_result"`. Used to classify
173
+/// a `user` event as a tool return rather than a human prompt.
174
+fn has_tool_result(line: &str) -> bool {
175
+    line.contains("\"type\":\"tool_result\"")
176
+}
177
+
115178
 /// Scratchpad for everything we're learning from the file's head.
116179
 #[derive(Debug, Default)]
117180
 struct HeadInfo {
@@ -284,7 +347,10 @@ mod tests {
284347
         assert_eq!(s.model.as_deref(), Some("claude-opus-4-6"));
285348
         assert_eq!(s.git_branch.as_deref(), Some("main"));
286349
         assert_eq!(s.version.as_deref(), Some("2.1.101"));
287
-        assert_eq!(s.message_count, 5);
350
+        // permission-mode is metadata → not counted in message_count.
351
+        // attachment + user + 2 assistants = 4 timeline events.
352
+        assert_eq!(s.message_count, 4);
353
+        assert_eq!(s.prompt_count, 1);
288354
         assert!(s.started_at.is_some());
289355
         assert!(s.last_activity_at.is_some());
290356
         assert_ne!(s.started_at, s.last_activity_at);
@@ -351,7 +417,33 @@ mod tests {
351417
         let s = summarize(&path, "-Users-me-repo").unwrap();
352418
         assert_eq!(s.title, "(untitled)");
353419
         assert_eq!(s.message_count, 0);
420
+        assert_eq!(s.prompt_count, 0);
354421
         // last_activity falls back to mtime.
355422
         assert!(s.last_activity_at.is_some());
356423
     }
424
+
425
+    #[test]
426
+    fn prompt_count_excludes_tool_result_returns() {
427
+        let tmp = tempdir().unwrap();
428
+        let path = tmp.path().join("abc.jsonl");
429
+        write_fixture(
430
+            &path,
431
+            &[
432
+                // Real human prompt.
433
+                r#"{"type":"user","uuid":"u1","timestamp":"2026-04-11T00:55:00.000Z","cwd":"/Users/me/repo","sessionId":"abc","message":{"role":"user","content":"run the tests"}}"#,
434
+                // Assistant's tool_use.
435
+                r#"{"type":"assistant","uuid":"u2","timestamp":"2026-04-11T00:55:10.000Z","cwd":"/Users/me/repo","sessionId":"abc","message":{"model":"claude-opus-4-6","content":[{"type":"tool_use","id":"t1","name":"Bash","input":{"command":"cargo test"}}]}}"#,
436
+                // Tool_result posing as a user event — NOT a prompt.
437
+                r#"{"type":"user","uuid":"u3","timestamp":"2026-04-11T00:55:20.000Z","cwd":"/Users/me/repo","sessionId":"abc","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"t1","content":"ok","is_error":false}]}}"#,
438
+                // Another real human prompt.
439
+                r#"{"type":"user","uuid":"u4","timestamp":"2026-04-11T00:55:30.000Z","cwd":"/Users/me/repo","sessionId":"abc","message":{"role":"user","content":"now commit"}}"#,
440
+            ],
441
+        );
442
+
443
+        let s = summarize(&path, "-Users-me-repo").unwrap();
444
+        // All 4 events render in the viewer timeline.
445
+        assert_eq!(s.message_count, 4);
446
+        // Only the two real human prompts count, not the tool_result.
447
+        assert_eq!(s.prompt_count, 2);
448
+    }
357449
 }
src-tauri/src/core/reader.rsmodified
@@ -20,6 +20,67 @@ use crate::core::error::CoreResult;
2020
 use crate::core::metadata::summarize;
2121
 use crate::core::schema::{ContentBlock, Message, RawEvent, SessionDetail, Usage};
2222
 
23
+/// The set of event kinds that `raw_to_message` drops on the floor
24
+/// — session-level metadata that doesn't belong in the viewer
25
+/// timeline. Kept here so `metadata::summarize` can apply the same
26
+/// filter when computing counts, ensuring the sidebar's "N events"
27
+/// matches what the viewer actually renders.
28
+pub const NON_TIMELINE_KINDS: &[&str] = &[
29
+    "permission-mode",
30
+    "custom-title",
31
+    "agent-name",
32
+    "ai-title",
33
+    "file-history-snapshot",
34
+    "queue-operation",
35
+    "progress",
36
+    "last-prompt",
37
+    "pr-link",
38
+];
39
+
40
+/// Returns `true` if this event would render as a row in the
41
+/// viewer timeline. Mirrors the filter at the top of
42
+/// [`raw_to_message`] without allocating the full `Message`.
43
+/// Used by the metadata summarizer for its `message_count`.
44
+///
45
+/// Note: this does NOT reproduce the "empty assistant shell"
46
+/// drop that `raw_to_message` applies — doing so would require
47
+/// parsing `message.content`, which would double the summarize
48
+/// cost for a negligible accuracy gain. Empty-shell assistants
49
+/// are a small fraction in real sessions.
50
+pub fn is_timeline_event(ev: &RawEvent) -> bool {
51
+    if ev.is_sidechain.unwrap_or(false) {
52
+        return false;
53
+    }
54
+    !NON_TIMELINE_KINDS.contains(&ev.kind.as_str())
55
+}
56
+
57
+/// Returns `true` if this event is an actual human-typed prompt
58
+/// — a `user` event whose `content` is either a plain string or
59
+/// an array that doesn't carry a `tool_result` block. After every
60
+/// `tool_use`, Claude Code writes a corresponding `user` event
61
+/// whose content is `[{"type":"tool_result",...}]`; those are
62
+/// tool returns, not prompts, and counting them as "messages I
63
+/// sent" massively inflates the sidebar count for any session
64
+/// with tool calls.
65
+pub fn is_user_prompt(ev: &RawEvent) -> bool {
66
+    if ev.kind != "user" {
67
+        return false;
68
+    }
69
+    if ev.is_sidechain.unwrap_or(false) {
70
+        return false;
71
+    }
72
+    let Some(msg) = ev.message.as_ref() else {
73
+        return false;
74
+    };
75
+    match msg.get("content") {
76
+        Some(Value::String(_)) => true,
77
+        Some(Value::Array(blocks)) => !blocks.iter().any(|b| {
78
+            b.get("type").and_then(Value::as_str) == Some("tool_result")
79
+        }),
80
+        _ => false,
81
+    }
82
+}
83
+
2384
 pub fn read_session(path: &Path, project_id: &str) -> CoreResult<SessionDetail> {
2485
     read_session_limited(path, project_id, None)
2586
 }
src-tauri/src/core/schema.rsmodified
@@ -155,8 +155,19 @@ pub struct SessionSummary {
155155
     pub last_activity_at: Option<DateTime<Utc>>,
156156
     /// First assistant message's `message.model`, if observed.
157157
     pub model: Option<String>,
158
-    /// Approximate; counts every line without re-parsing.
158
+    /// Count of events that render as rows in the viewer timeline.
159
+    /// Excludes session-level metadata events (`permission-mode`,
160
+    /// `file-history-snapshot`, etc.) and sidechain subagent
161
+    /// events. Matches `read_session`'s `messages.len()`.
159162
     pub message_count: u32,
163
+    /// Count of actual human-typed prompts. A `user` event whose
164
+    /// `content` is a plain string, or an array that does NOT
165
+    /// contain a `tool_result` block, counts as a prompt.
166
+    /// Tool-result return events (which Claude Code writes as
167
+    /// `user` events after every `tool_use`) are excluded so the
168
+    /// sidebar reflects turns the human actually drove.
169
+    #[serde(default)]
170
+    pub prompt_count: u32,
160171
     pub git_branch: Option<String>,
161172
     pub version: Option<String>,
162173
     pub slug: Option<String>,
src/lib/ipc/generated/SessionSummary.tsmodified
@@ -30,9 +30,21 @@ lastActivityAt: string | null,
3030
  */
3131
 model: string | null, 
3232
 /**
33
- * Approximate; counts every line without re-parsing.
33
+ * Count of events that render as rows in the viewer timeline.
34
+ * Excludes session-level metadata events (`permission-mode`,
35
+ * `file-history-snapshot`, etc.) and sidechain subagent
36
+ * events. Matches `read_session`'s `messages.len()`.
3437
  */
35
-messageCount: number, gitBranch: string | null, version: string | null, slug: string | null, 
38
+messageCount: number, 
39
+/**
40
+ * Count of actual human-typed prompts. A `user` event whose
41
+ * `content` is a plain string, or an array that does NOT
42
+ * contain a `tool_result` block, counts as a prompt.
43
+ * Tool-result return events (which Claude Code writes as
44
+ * `user` events after every `tool_use`) are excluded so the
45
+ * sidebar reflects turns the human actually drove.
46
+ */
47
+promptCount: number, gitBranch: string | null, version: string | null, slug: string | null, 
3648
 /**
3749
  * Verified working directory from the first event that carried a
3850
  * `cwd` field. Projects use this as the authoritative cwd when
src/lib/store/sessions.test.tsmodified
@@ -57,6 +57,7 @@ function seedSelectedSession(sessionId: string): SessionDetail {
5757
       lastActivityAt: null,
5858
       model: null,
5959
       messageCount: 0,
60
+      promptCount: 0,
6061
       gitBranch: null,
6162
       version: null,
6263
       slug: null,
@@ -259,6 +260,7 @@ function makeSessionSummary(
259260
     lastActivityAt: null,
260261
     model: null,
261262
     messageCount: 0,
263
+    promptCount: 0,
262264
     gitBranch: null,
263265
     version: null,
264266
     slug: null,
src/lib/store/sessions.tsmodified
@@ -529,6 +529,7 @@ export const useSessionStore = create<SessionStore>((set, get) => ({
529529
       lastActivityAt: now,
530530
       model: null,
531531
       messageCount: 0,
532
+      promptCount: 0,
532533
       gitBranch: null,
533534
       version: null,
534535
       slug: null,