`c728a02`

feat: prompt_count + viewer-aligned message_count in summarize

Authored by

espadonne 1 month ago

SHA: c728a027c4ef9d1864150ebf37100c3764dce275
Parents: 534d88d
Tree: e078243

8 changed files

Status	File	+	-
M	`src-tauri/src/commands.rs`	3	0
M	`src-tauri/src/core/grouping.rs`	4	0
M	`src-tauri/src/core/metadata.rs`	99	7
M	`src-tauri/src/core/reader.rs`	61	0
M	`src-tauri/src/core/schema.rs`	12	1
M	`src/lib/ipc/generated/SessionSummary.ts`	14	2
M	`src/lib/store/sessions.test.ts`	2	0
M	`src/lib/store/sessions.ts`	1	0

src-tauri/src/commands.rsmodified

          last_activity_at: last_activity,
          model: None,
          message_count: entries.len() as u32,
 +        // Archive sessions are reconstructed from ~/.claude/history.jsonl
 +        // which stores only human prompts — every entry IS a prompt.
 +        prompt_count: entries.len() as u32,
          git_branch: None,
          version: None,
          slug: None,

src-tauri/src/core/grouping.rsmodified

              last_activity_at: epoch_ms_to_utc(latest),
              model: None,
              message_count: entries.len() as u32,
 +            // Archive entries come from ~/.claude/history.jsonl which
 +            // only stores human prompts.
 +            prompt_count: entries.len() as u32,
              git_branch: None,
              version: None,
              slug: None,
                  .map(|dt| dt.with_timezone(&Utc)),
              model: None,
              message_count: 0,
 +            prompt_count: 0,
              git_branch: None,
              version: None,
              slug: None,

src-tauri/src/core/metadata.rsmodified

  use serde_json::Value;
  use crate::core::error::CoreResult;
 +use crate::core::reader::{is_timeline_event, is_user_prompt, NON_TIMELINE_KINDS};
  use crate::core::schema::{RawEvent, SessionSummary};
  use crate::core::title::sanitize_title;
      let file = File::open(session_path)?;
      let file_meta = file.metadata()?;
 -    // Single-pass head scan: parse up to HEAD_SCAN_LINES while also
 -    // counting the total line count of the file.
 +    // Single-pass scan:
 +    //   * For each line, cheaply classify it as timeline / prompt
 +    //     via prefix + substring checks so that counting 171 MB of
 +    //     JSONL doesn't cost a full serde parse per line.
 +    //   * For the first HEAD_SCAN_LINES (while head info is still
 +    //     unsaturated) also do a full RawEvent parse to pick up
 +    //     title / cwd / model / version metadata.
      let mut head = HeadInfo::default();
      let mut user_title_fallback: Option<String> = None;
 -    let mut total_lines: u32 = 0;
 +    let mut message_count: u32 = 0;
 +    let mut prompt_count: u32 = 0;
+     {
          let reader = BufReader::new(&file);
              if line.is_empty() {
                  continue;
+             }
 -            total_lines += 1;
 -            if i < HEAD_SCAN_LINES && !head.is_saturated() {
 +            // Head path: full parse, feeds both counters (via the
 +            // shared helpers) and head metadata.
 +            let head_parse = i < HEAD_SCAN_LINES && !head.is_saturated();
 +            if head_parse {
                  if let Ok(ev) = serde_json::from_str::<RawEvent>(&line) {
                      head.absorb(&ev);
                      if user_title_fallback.is_none() {
                              user_title_fallback = Some(t);
+                         }
+                     }
 +                    if is_timeline_event(&ev) {
 +                        message_count += 1;
 +                    }
 +                    if is_user_prompt(&ev) {
 +                        prompt_count += 1;
 +                    }
 +                    continue;
 +                }
 +                // Fall through to fast path if JSON parse fails.
 +            }
++
 +            // Fast path: skip full parse. Claude Code's jsonl
 +            // serializer emits `type` first and field order is
 +            // stable, so these byte-level checks match what the
 +            // full helpers would return without allocating.
 +            if is_sidechain_line(&line) {
 +                continue;
 +            }
 +            let kind = parse_kind_prefix(&line);
 +            match kind {
 +                Some(k) if NON_TIMELINE_KINDS.contains(&k) => continue,
 +                Some(k) => {
 +                    message_count += 1;
 +                    if k == "user" && !has_tool_result(&line) {
 +                        prompt_count += 1;
 +                    }
 +                }
 +                None => {
 +                    // Unrecognized shape — count as timeline so we
 +                    // don't silently lose events on schema drift.
 +                    message_count += 1;
+                 }
+             }
+         }
          started_at: head.started_at,
          last_activity_at,
          model: head.model,
 -        message_count: total_lines,
 +        message_count,
 +        prompt_count,
          git_branch: head.git_branch,
          version: head.version,
          slug: head.slug,
      })
+ }
 +/// Extract the `type` field value from a JSONL line that starts
 +/// with `{"type":"..."`. Returns `None` for lines with any other
 +/// shape. Zero-allocation; caller borrows into the original line.
 +fn parse_kind_prefix(line: &str) -> Option<&str> {
 +    let rest = line.strip_prefix("{\"type\":\"")?;
 +    let end = rest.find('"')?;
 +    Some(&rest[..end])
 +}
++
 +/// Substring check for `"isSidechain":true`. Stable across field
 +/// order because the literal `true` doesn't appear for this key
 +/// in any other configuration.
 +fn is_sidechain_line(line: &str) -> bool {
 +    line.contains("\"isSidechain\":true")
 +}
++
 +/// Substring check for `"type":"tool_result"`. Used to classify
 +/// a `user` event as a tool return rather than a human prompt.
 +fn has_tool_result(line: &str) -> bool {
 +    line.contains("\"type\":\"tool_result\"")
 +}
++
  /// Scratchpad for everything we're learning from the file's head.
  #[derive(Debug, Default)]
  struct HeadInfo {
          assert_eq!(s.model.as_deref(), Some("claude-opus-4-6"));
          assert_eq!(s.git_branch.as_deref(), Some("main"));
          assert_eq!(s.version.as_deref(), Some("2.1.101"));
 -        assert_eq!(s.message_count, 5);
 +        // permission-mode is metadata → not counted in message_count.
 +        // attachment + user + 2 assistants = 4 timeline events.
 +        assert_eq!(s.message_count, 4);
 +        assert_eq!(s.prompt_count, 1);
          assert!(s.started_at.is_some());
          assert!(s.last_activity_at.is_some());
          assert_ne!(s.started_at, s.last_activity_at);
          let s = summarize(&path, "-Users-me-repo").unwrap();
          assert_eq!(s.title, "(untitled)");
          assert_eq!(s.message_count, 0);
 +        assert_eq!(s.prompt_count, 0);
          // last_activity falls back to mtime.
          assert!(s.last_activity_at.is_some());
+     }
++
 +    #[test]
 +    fn prompt_count_excludes_tool_result_returns() {
 +        let tmp = tempdir().unwrap();
 +        let path = tmp.path().join("abc.jsonl");
 +        write_fixture(
 +            &path,
 +            &[
 +                // Real human prompt.
 +                r#"{"type":"user","uuid":"u1","timestamp":"2026-04-11T00:55:00.000Z","cwd":"/Users/me/repo","sessionId":"abc","message":{"role":"user","content":"run the tests"}}"#,
 +                // Assistant's tool_use.
 +                r#"{"type":"assistant","uuid":"u2","timestamp":"2026-04-11T00:55:10.000Z","cwd":"/Users/me/repo","sessionId":"abc","message":{"model":"claude-opus-4-6","content":[{"type":"tool_use","id":"t1","name":"Bash","input":{"command":"cargo test"}}]}}"#,
 +                // Tool_result posing as a user event — NOT a prompt.
 +                r#"{"type":"user","uuid":"u3","timestamp":"2026-04-11T00:55:20.000Z","cwd":"/Users/me/repo","sessionId":"abc","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"t1","content":"ok","is_error":false}]}}"#,
 +                // Another real human prompt.
 +                r#"{"type":"user","uuid":"u4","timestamp":"2026-04-11T00:55:30.000Z","cwd":"/Users/me/repo","sessionId":"abc","message":{"role":"user","content":"now commit"}}"#,
 +            ],
 +        );
++
 +        let s = summarize(&path, "-Users-me-repo").unwrap();
 +        // All 4 events render in the viewer timeline.
 +        assert_eq!(s.message_count, 4);
 +        // Only the two real human prompts count, not the tool_result.
 +        assert_eq!(s.prompt_count, 2);
 +    }
+ }

src-tauri/src/core/reader.rsmodified

  use crate::core::metadata::summarize;
  use crate::core::schema::{ContentBlock, Message, RawEvent, SessionDetail, Usage};
 +/// The set of event kinds that `raw_to_message` drops on the floor
 +/// — session-level metadata that doesn't belong in the viewer
 +/// timeline. Kept here so `metadata::summarize` can apply the same
 +/// filter when computing counts, ensuring the sidebar's "N events"
 +/// matches what the viewer actually renders.
 +pub const NON_TIMELINE_KINDS: &[&str] = &[
 +    "permission-mode",
 +    "custom-title",
 +    "agent-name",
 +    "ai-title",
 +    "file-history-snapshot",
 +    "queue-operation",
 +    "progress",
 +    "last-prompt",
 +    "pr-link",
 +];
++
 +/// Returns `true` if this event would render as a row in the
 +/// viewer timeline. Mirrors the filter at the top of
 +/// [`raw_to_message`] without allocating the full `Message`.
 +/// Used by the metadata summarizer for its `message_count`.
 +///
 +/// Note: this does NOT reproduce the "empty assistant shell"
 +/// drop that `raw_to_message` applies — doing so would require
 +/// parsing `message.content`, which would double the summarize
 +/// cost for a negligible accuracy gain. Empty-shell assistants
 +/// are a small fraction in real sessions.
 +pub fn is_timeline_event(ev: &RawEvent) -> bool {
 +    if ev.is_sidechain.unwrap_or(false) {
 +        return false;
 +    }
 +    !NON_TIMELINE_KINDS.contains(&ev.kind.as_str())
 +}
++
 +/// Returns `true` if this event is an actual human-typed prompt
 +/// — a `user` event whose `content` is either a plain string or
 +/// an array that doesn't carry a `tool_result` block. After every
 +/// `tool_use`, Claude Code writes a corresponding `user` event
 +/// whose content is `[{"type":"tool_result",...}]`; those are
 +/// tool returns, not prompts, and counting them as "messages I
 +/// sent" massively inflates the sidebar count for any session
 +/// with tool calls.
 +pub fn is_user_prompt(ev: &RawEvent) -> bool {
 +    if ev.kind != "user" {
 +        return false;
 +    }
 +    if ev.is_sidechain.unwrap_or(false) {
 +        return false;
 +    }
 +    let Some(msg) = ev.message.as_ref() else {
 +        return false;
 +    };
 +    match msg.get("content") {
 +        Some(Value::String(_)) => true,
 +        Some(Value::Array(blocks)) => !blocks.iter().any(|b| {
 +            b.get("type").and_then(Value::as_str) == Some("tool_result")
 +        }),
 +        _ => false,
 +    }
 +}
++
  pub fn read_session(path: &Path, project_id: &str) -> CoreResult<SessionDetail> {
      read_session_limited(path, project_id, None)
+ }

src-tauri/src/core/schema.rsmodified

      pub last_activity_at: Option<DateTime<Utc>>,
      /// First assistant message's `message.model`, if observed.
      pub model: Option<String>,
 -    /// Approximate; counts every line without re-parsing.
 +    /// Count of events that render as rows in the viewer timeline.
 +    /// Excludes session-level metadata events (`permission-mode`,
 +    /// `file-history-snapshot`, etc.) and sidechain subagent
 +    /// events. Matches `read_session`'s `messages.len()`.
      pub message_count: u32,
 +    /// Count of actual human-typed prompts. A `user` event whose
 +    /// `content` is a plain string, or an array that does NOT
 +    /// contain a `tool_result` block, counts as a prompt.
 +    /// Tool-result return events (which Claude Code writes as
 +    /// `user` events after every `tool_use`) are excluded so the
 +    /// sidebar reflects turns the human actually drove.
 +    #[serde(default)]
 +    pub prompt_count: u32,
      pub git_branch: Option<String>,
      pub version: Option<String>,
      pub slug: Option<String>,

src/lib/ipc/generated/SessionSummary.tsmodified

   */
  model: string | null,
  /**
 - * Approximate; counts every line without re-parsing.
 + * Count of events that render as rows in the viewer timeline.
 + * Excludes session-level metadata events (`permission-mode`,
 + * `file-history-snapshot`, etc.) and sidechain subagent
 + * events. Matches `read_session`'s `messages.len()`.
   */
 -messageCount: number, gitBranch: string | null, version: string | null, slug: string | null,
 +messageCount: number,
 +/**
 + * Count of actual human-typed prompts. A `user` event whose
 + * `content` is a plain string, or an array that does NOT
 + * contain a `tool_result` block, counts as a prompt.
 + * Tool-result return events (which Claude Code writes as
 + * `user` events after every `tool_use`) are excluded so the
 + * sidebar reflects turns the human actually drove.
 + */
 +promptCount: number, gitBranch: string | null, version: string | null, slug: string | null,
  /**
   * Verified working directory from the first event that carried a
   * `cwd` field. Projects use this as the authoritative cwd when

src/lib/store/sessions.test.tsmodified

        lastActivityAt: null,
        model: null,
        messageCount: 0,
 +      promptCount: 0,
        gitBranch: null,
        version: null,
        slug: null,
      lastActivityAt: null,
      model: null,
      messageCount: 0,
 +    promptCount: 0,
      gitBranch: null,
      version: null,
      slug: null,

src/lib/store/sessions.tsmodified

        lastActivityAt: now,
        model: null,
        messageCount: 0,
 +      promptCount: 0,
        gitBranch: null,
        version: null,
        slug: null,