Rust · 23560 bytes Raw Blame History
1 //! Full session reader. Streams a `.jsonl` file, converts each event
2 //! to a frontend-facing [`Message`], and assembles a [`SessionDetail`]
3 //! alongside the cheap metadata summary.
4 //!
5 //! Session-level metadata events (`permission-mode`, `custom-title`,
6 //! `agent-name`, `file-history-snapshot`, `queue-operation`) are
7 //! filtered out — they don't belong in the viewer timeline.
8 //!
9 //! Sidechain events are also filtered in v0. Partial/broken lines are
10 //! skipped silently; the reader never fails on malformed data.
11
12 use std::fs::File;
13 use std::io::{BufRead, BufReader};
14 use std::path::Path;
15
16 use chrono::{DateTime, Utc};
17 use serde_json::Value;
18
19 use crate::core::error::CoreResult;
20 use crate::core::metadata::summarize;
21 use crate::core::schema::{ContentBlock, Message, RawEvent, SessionDetail, Usage};
22
23 /// The set of event kinds that `raw_to_message` drops on the floor
24 /// — session-level metadata that doesn't belong in the viewer
25 /// timeline. Kept here so `metadata::summarize` can apply the same
26 /// filter when computing counts, ensuring the sidebar's "N events"
27 /// matches what the viewer actually renders.
28 pub const NON_TIMELINE_KINDS: &[&str] = &[
29 "permission-mode",
30 "custom-title",
31 "agent-name",
32 "ai-title",
33 "file-history-snapshot",
34 "queue-operation",
35 "progress",
36 "last-prompt",
37 "pr-link",
38 ];
39
40 /// Returns `true` if this event would render as a row in the
41 /// viewer timeline. Mirrors the filter at the top of
42 /// [`raw_to_message`] without allocating the full `Message`.
43 /// Used by the metadata summarizer for its `message_count`.
44 ///
45 /// Note: this does NOT reproduce the "empty assistant shell"
46 /// drop that `raw_to_message` applies — doing so would require
47 /// parsing `message.content`, which would double the summarize
48 /// cost for a negligible accuracy gain. Empty-shell assistants
49 /// are a small fraction in real sessions.
50 pub fn is_timeline_event(ev: &RawEvent) -> bool {
51 if ev.is_sidechain.unwrap_or(false) {
52 return false;
53 }
54 !NON_TIMELINE_KINDS.contains(&ev.kind.as_str())
55 }
56
57 /// Returns `true` if this event is an actual human-typed prompt
58 /// — a `user` event whose `content` is either a plain string or
59 /// an array that doesn't carry a `tool_result` block. After every
60 /// `tool_use`, Claude Code writes a corresponding `user` event
61 /// whose content is `[{"type":"tool_result",...}]`; those are
62 /// tool returns, not prompts, and counting them as "messages I
63 /// sent" massively inflates the sidebar count for any session
64 /// with tool calls.
65 pub fn is_user_prompt(ev: &RawEvent) -> bool {
66 if ev.kind != "user" {
67 return false;
68 }
69 if ev.is_sidechain.unwrap_or(false) {
70 return false;
71 }
72 let Some(msg) = ev.message.as_ref() else {
73 return false;
74 };
75 match msg.get("content") {
76 Some(Value::String(_)) => true,
77 Some(Value::Array(blocks)) => !blocks.iter().any(|b| {
78 b.get("type").and_then(Value::as_str) == Some("tool_result")
79 }),
80 _ => false,
81 }
82 }
83
84 pub fn read_session(path: &Path, project_id: &str) -> CoreResult<SessionDetail> {
85 read_session_limited(path, project_id, None)
86 }
87
88 /// Read a session, optionally capping the result to the most recent
89 /// `limit` messages. Required for host performance on very large
90 /// sessions — one user's armfortas session is 171 MB of JSONL and
91 /// that's enough to hang the main thread for seconds during IPC
92 /// deserialization. Passing `Some(N)` streams the whole file but
93 /// only retains the tail, so the wire payload stays bounded.
94 ///
95 /// The returned [`SessionDetail`] is identical in shape regardless
96 /// of whether `limit` was set; truncation is invisible to the
97 /// frontend except that `messages.len() < summary.message_count`.
98 pub fn read_session_limited(
99 path: &Path,
100 project_id: &str,
101 limit: Option<usize>,
102 ) -> CoreResult<SessionDetail> {
103 let summary = summarize(path, project_id)?;
104 let messages = read_messages(path, limit)?;
105 Ok(SessionDetail { summary, messages })
106 }
107
108 fn read_messages(path: &Path, limit: Option<usize>) -> CoreResult<Vec<Message>> {
109 let file = File::open(path)?;
110 let reader = BufReader::new(file);
111 let mut fallback_counter: u32 = 0;
112
113 // When a cap is set, keep a rolling tail so we never allocate a
114 // `Vec<Message>` bigger than the cap + 1. This keeps memory
115 // bounded on multi-hundred-megabyte files.
116 match limit {
117 Some(cap) if cap > 0 => {
118 let mut tail: std::collections::VecDeque<Message> =
119 std::collections::VecDeque::with_capacity(cap);
120 for line in reader.lines() {
121 let line = match line {
122 Ok(l) => l,
123 Err(_) => continue,
124 };
125 if line.is_empty() {
126 continue;
127 }
128 let ev: RawEvent = match serde_json::from_str(&line) {
129 Ok(e) => e,
130 Err(_) => continue,
131 };
132 if ev.is_sidechain.unwrap_or(false) {
133 continue;
134 }
135 if let Some(msg) = raw_to_message(ev, &mut fallback_counter) {
136 if tail.len() == cap {
137 tail.pop_front();
138 }
139 tail.push_back(msg);
140 }
141 }
142 Ok(tail.into_iter().collect())
143 }
144 _ => {
145 let mut out = Vec::new();
146 for line in reader.lines() {
147 let line = match line {
148 Ok(l) => l,
149 Err(_) => continue,
150 };
151 if line.is_empty() {
152 continue;
153 }
154 let ev: RawEvent = match serde_json::from_str(&line) {
155 Ok(e) => e,
156 Err(_) => continue,
157 };
158 if ev.is_sidechain.unwrap_or(false) {
159 continue;
160 }
161 if let Some(msg) = raw_to_message(ev, &mut fallback_counter) {
162 out.push(msg);
163 }
164 }
165 Ok(out)
166 }
167 }
168 }
169
170 /// Convert one raw event into a timeline message. Returns `None` for
171 /// session-level metadata events and for anything we can't construct
172 /// a stable id/timestamp for.
173 pub(crate) fn raw_to_message(ev: RawEvent, fallback_counter: &mut u32) -> Option<Message> {
174 match ev.kind.as_str() {
175 "permission-mode"
176 | "custom-title"
177 | "agent-name"
178 | "ai-title"
179 | "file-history-snapshot"
180 | "queue-operation"
181 | "progress"
182 | "last-prompt"
183 | "pr-link" => return None,
184 _ => {}
185 }
186
187 let id = ev.uuid.clone().unwrap_or_else(|| {
188 *fallback_counter += 1;
189 format!("synthetic-{}", fallback_counter)
190 });
191 let at = ev.timestamp;
192
193 match ev.kind.as_str() {
194 "user" => {
195 let text = ev.message.as_ref().and_then(extract_user_text)?;
196 Some(Message::User {
197 id,
198 at: at?,
199 text,
200 is_meta: ev.is_meta.unwrap_or(false),
201 })
202 }
203
204 "assistant" => {
205 let (model, blocks, stop_reason, usage) = extract_assistant(ev.message.as_ref());
206 if blocks.is_empty() && model.is_none() {
207 // Skip empty assistant shells.
208 return None;
209 }
210 Some(Message::Assistant {
211 id,
212 at: at?,
213 model,
214 blocks,
215 stop_reason,
216 usage,
217 status: None,
218 })
219 }
220
221 "system" => {
222 let text = extract_system_text(&ev);
223 Some(Message::System {
224 id,
225 at: at?,
226 text,
227 subtype: ev.subtype.clone(),
228 })
229 }
230
231 "attachment" => {
232 let (attachment_type, hook_name, text) = extract_attachment(ev.attachment.as_ref());
233 Some(Message::Attachment {
234 id,
235 at: at?,
236 attachment_type,
237 hook_name,
238 text,
239 })
240 }
241
242 raw_type => Some(Message::Unknown {
243 id,
244 at,
245 raw_type: raw_type.to_string(),
246 raw: raw_event_to_value(&ev),
247 }),
248 }
249 }
250
251 fn extract_user_text(msg: &Value) -> Option<String> {
252 match msg.get("content") {
253 Some(Value::String(s)) => Some(s.clone()),
254 Some(Value::Array(blocks)) => {
255 // A tool_result block looks like:
256 // { "type": "tool_result", "tool_use_id": "...", "content": "...", "is_error": ... }
257 // Mixed blocks are rendered as concatenated text for v0.
258 let mut parts = Vec::new();
259 for b in blocks {
260 match b.get("type").and_then(Value::as_str) {
261 Some("text") => {
262 if let Some(t) = b.get("text").and_then(Value::as_str) {
263 parts.push(t.to_string());
264 }
265 }
266 Some("tool_result") => {
267 let is_error = b
268 .get("is_error")
269 .and_then(Value::as_bool)
270 .unwrap_or(false);
271 let prefix = if is_error { "[tool error] " } else { "[tool result] " };
272 let body = match b.get("content") {
273 Some(Value::String(s)) => s.clone(),
274 Some(Value::Array(arr)) => arr
275 .iter()
276 .filter_map(|c| c.get("text").and_then(Value::as_str))
277 .collect::<Vec<_>>()
278 .join("\n"),
279 _ => String::new(),
280 };
281 parts.push(format!("{prefix}{body}"));
282 }
283 _ => {
284 if let Some(t) = b.get("text").and_then(Value::as_str) {
285 parts.push(t.to_string());
286 }
287 }
288 }
289 }
290 if parts.is_empty() {
291 None
292 } else {
293 Some(parts.join("\n"))
294 }
295 }
296 _ => None,
297 }
298 }
299
300 fn extract_assistant(
301 msg: Option<&Value>,
302 ) -> (Option<String>, Vec<ContentBlock>, Option<String>, Option<Usage>) {
303 let Some(msg) = msg else {
304 return (None, Vec::new(), None, None);
305 };
306 let model = msg
307 .get("model")
308 .and_then(Value::as_str)
309 .map(str::to_owned);
310 let stop_reason = msg
311 .get("stop_reason")
312 .and_then(Value::as_str)
313 .map(str::to_owned);
314 let usage = msg
315 .get("usage")
316 .cloned()
317 .and_then(|v| serde_json::from_value::<Usage>(v).ok());
318 let blocks = msg
319 .get("content")
320 .and_then(Value::as_array)
321 .map(|arr| arr.iter().filter_map(parse_content_block).collect())
322 .unwrap_or_default();
323 (model, blocks, stop_reason, usage)
324 }
325
326 fn parse_content_block(block: &Value) -> Option<ContentBlock> {
327 let kind = block.get("type").and_then(Value::as_str)?;
328 match kind {
329 "text" => {
330 let text = block.get("text").and_then(Value::as_str)?.to_string();
331 Some(ContentBlock::Text { text })
332 }
333 "thinking" => {
334 // Observed field is `thinking` not `text`.
335 let text = block
336 .get("thinking")
337 .and_then(Value::as_str)
338 .or_else(|| block.get("text").and_then(Value::as_str))
339 .unwrap_or("")
340 .to_string();
341 Some(ContentBlock::Thinking { text })
342 }
343 "tool_use" => {
344 let id = block.get("id").and_then(Value::as_str)?.to_string();
345 let name = block.get("name").and_then(Value::as_str)?.to_string();
346 let input = block.get("input").cloned().unwrap_or(Value::Null);
347 Some(ContentBlock::ToolUse { id, name, input })
348 }
349 "tool_result" => {
350 // Rare inside assistant content but handle it anyway.
351 let tool_use_id = block
352 .get("tool_use_id")
353 .and_then(Value::as_str)?
354 .to_string();
355 let content = match block.get("content") {
356 Some(Value::String(s)) => s.clone(),
357 Some(Value::Array(arr)) => arr
358 .iter()
359 .filter_map(|c| c.get("text").and_then(Value::as_str))
360 .collect::<Vec<_>>()
361 .join("\n"),
362 _ => String::new(),
363 };
364 let is_error = block
365 .get("is_error")
366 .and_then(Value::as_bool)
367 .unwrap_or(false);
368 Some(ContentBlock::ToolResult {
369 tool_use_id,
370 content,
371 is_error,
372 })
373 }
374 _ => None,
375 }
376 }
377
378 fn extract_system_text(ev: &RawEvent) -> String {
379 // `system` events in the wild carry hookInfos, stopReason, etc. as
380 // top-level fields rather than a tidy `text`. Fall back to subtype
381 // as a label.
382 ev.subtype
383 .clone()
384 .unwrap_or_else(|| "system notice".to_string())
385 }
386
387 fn extract_attachment(att: Option<&Value>) -> (String, Option<String>, String) {
388 let Some(att) = att else {
389 return ("unknown".into(), None, String::new());
390 };
391 let attachment_type = att
392 .get("type")
393 .and_then(Value::as_str)
394 .unwrap_or("attachment")
395 .to_string();
396 let hook_name = att
397 .get("hookName")
398 .and_then(Value::as_str)
399 .map(str::to_owned);
400 // Prefer stdout, then content, then command.
401 let text = att
402 .get("stdout")
403 .and_then(Value::as_str)
404 .filter(|s| !s.is_empty())
405 .or_else(|| att.get("content").and_then(Value::as_str))
406 .or_else(|| att.get("command").and_then(Value::as_str))
407 .unwrap_or("")
408 .to_string();
409 (attachment_type, hook_name, text)
410 }
411
412 /// Best-effort raw event → Value for Unknown variant. Loses the fields
413 /// we didn't carry on RawEvent but preserves the recognisable shape.
414 fn raw_event_to_value(ev: &RawEvent) -> Value {
415 serde_json::json!({
416 "type": ev.kind,
417 "uuid": ev.uuid,
418 "parentUuid": ev.parent_uuid,
419 "timestamp": ev.timestamp.map(|t: DateTime<Utc>| t.to_rfc3339()),
420 "cwd": ev.cwd,
421 "subtype": ev.subtype,
422 })
423 }
424
425 #[cfg(test)]
426 mod tests {
427 use super::*;
428 use std::io::Write;
429 use tempfile::tempdir;
430
431 fn write_fixture(path: &Path, lines: &[&str]) {
432 let mut f = File::create(path).unwrap();
433 for line in lines {
434 writeln!(f, "{line}").unwrap();
435 }
436 }
437
438 #[test]
439 fn reads_user_assistant_pair() {
440 let tmp = tempdir().unwrap();
441 let path = tmp.path().join("s.jsonl");
442 write_fixture(
443 &path,
444 &[
445 r#"{"type":"user","uuid":"u1","timestamp":"2026-04-11T00:55:35.000Z","cwd":"/Users/me/repo","sessionId":"abc","version":"2.1.101","gitBranch":"main","message":{"role":"user","content":"hello"}}"#,
446 r#"{"type":"assistant","uuid":"u2","timestamp":"2026-04-11T00:55:40.000Z","cwd":"/Users/me/repo","sessionId":"abc","version":"2.1.101","gitBranch":"main","message":{"model":"claude-opus-4-6","content":[{"type":"text","text":"hi there"}],"usage":{"input_tokens":5,"output_tokens":10,"cache_creation_input_tokens":0,"cache_read_input_tokens":0}}}"#,
447 ],
448 );
449
450 let detail = read_session(&path, "-Users-me-repo").unwrap();
451 assert_eq!(detail.messages.len(), 2);
452 match &detail.messages[0] {
453 Message::User { text, is_meta, .. } => {
454 assert_eq!(text, "hello");
455 assert!(!is_meta);
456 }
457 other => panic!("expected user, got {other:?}"),
458 }
459 match &detail.messages[1] {
460 Message::Assistant {
461 model,
462 blocks,
463 usage,
464 ..
465 } => {
466 assert_eq!(model.as_deref(), Some("claude-opus-4-6"));
467 assert_eq!(blocks.len(), 1);
468 assert!(matches!(&blocks[0], ContentBlock::Text { text } if text == "hi there"));
469 assert_eq!(usage.as_ref().unwrap().output_tokens, 10);
470 }
471 other => panic!("expected assistant, got {other:?}"),
472 }
473 }
474
475 #[test]
476 fn parses_tool_use_blocks() {
477 let tmp = tempdir().unwrap();
478 let path = tmp.path().join("s.jsonl");
479 write_fixture(
480 &path,
481 &[
482 r#"{"type":"assistant","uuid":"u1","timestamp":"2026-04-11T00:55:40.000Z","sessionId":"abc","message":{"model":"claude-opus-4-6","content":[{"type":"thinking","thinking":"hmm"},{"type":"text","text":"Let me check."},{"type":"tool_use","id":"tu_1","name":"Read","input":{"file_path":"/tmp/x"}}]}}"#,
483 ],
484 );
485
486 let detail = read_session(&path, "-Users-me-repo").unwrap();
487 let Message::Assistant { blocks, .. } = &detail.messages[0] else {
488 panic!("expected assistant");
489 };
490 assert_eq!(blocks.len(), 3);
491 assert!(matches!(&blocks[0], ContentBlock::Thinking { text } if text == "hmm"));
492 assert!(matches!(&blocks[1], ContentBlock::Text { .. }));
493 assert!(matches!(&blocks[2], ContentBlock::ToolUse { name, .. } if name == "Read"));
494 }
495
496 #[test]
497 fn tool_result_inside_user_content_folds_to_text() {
498 let tmp = tempdir().unwrap();
499 let path = tmp.path().join("s.jsonl");
500 write_fixture(
501 &path,
502 &[
503 r#"{"type":"user","uuid":"u1","timestamp":"2026-04-11T00:55:35.000Z","sessionId":"abc","isMeta":true,"message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"tu_1","content":"file contents here","is_error":false}]}}"#,
504 ],
505 );
506 let detail = read_session(&path, "-Users-me-repo").unwrap();
507 match &detail.messages[0] {
508 Message::User { text, is_meta, .. } => {
509 assert!(*is_meta);
510 assert!(text.starts_with("[tool result]"));
511 assert!(text.contains("file contents here"));
512 }
513 other => panic!("expected user/meta, got {other:?}"),
514 }
515 }
516
517 #[test]
518 fn filters_session_metadata_events() {
519 let tmp = tempdir().unwrap();
520 let path = tmp.path().join("s.jsonl");
521 write_fixture(
522 &path,
523 &[
524 r#"{"type":"permission-mode","permissionMode":"default","sessionId":"abc"}"#,
525 r#"{"type":"custom-title","customTitle":"t","sessionId":"abc"}"#,
526 r#"{"type":"agent-name","agentName":"a","sessionId":"abc"}"#,
527 r#"{"type":"file-history-snapshot","messageId":"m","snapshot":{}}"#,
528 r#"{"type":"queue-operation","operation":"enqueue","sessionId":"abc"}"#,
529 r#"{"type":"user","uuid":"u1","timestamp":"2026-04-11T00:55:35.000Z","sessionId":"abc","message":{"role":"user","content":"hi"}}"#,
530 ],
531 );
532 let detail = read_session(&path, "-Users-me-repo").unwrap();
533 assert_eq!(detail.messages.len(), 1);
534 assert!(matches!(detail.messages[0], Message::User { .. }));
535 }
536
537 #[test]
538 fn unknown_event_becomes_unknown_variant() {
539 let tmp = tempdir().unwrap();
540 let path = tmp.path().join("s.jsonl");
541 write_fixture(
542 &path,
543 &[
544 r#"{"type":"brand-new-event-kind","uuid":"u1","timestamp":"2026-04-11T00:55:35.000Z","sessionId":"abc"}"#,
545 ],
546 );
547 let detail = read_session(&path, "-Users-me-repo").unwrap();
548 match &detail.messages[0] {
549 Message::Unknown { raw_type, .. } => {
550 assert_eq!(raw_type, "brand-new-event-kind");
551 }
552 other => panic!("expected unknown, got {other:?}"),
553 }
554 }
555
556 #[test]
557 fn skips_sidechain_events() {
558 let tmp = tempdir().unwrap();
559 let path = tmp.path().join("s.jsonl");
560 write_fixture(
561 &path,
562 &[
563 r#"{"type":"user","uuid":"u1","timestamp":"2026-04-11T00:55:35.000Z","isSidechain":true,"sessionId":"abc","message":{"role":"user","content":"subagent"}}"#,
564 r#"{"type":"user","uuid":"u2","timestamp":"2026-04-11T00:55:36.000Z","isSidechain":false,"sessionId":"abc","message":{"role":"user","content":"main"}}"#,
565 ],
566 );
567 let detail = read_session(&path, "-Users-me-repo").unwrap();
568 assert_eq!(detail.messages.len(), 1);
569 let Message::User { text, .. } = &detail.messages[0] else {
570 panic!("expected user");
571 };
572 assert_eq!(text, "main");
573 }
574
575 #[test]
576 fn limit_returns_tail_and_preserves_order() {
577 let tmp = tempdir().unwrap();
578 let path = tmp.path().join("s.jsonl");
579 let mut lines: Vec<String> = Vec::new();
580 for i in 0..10 {
581 lines.push(format!(
582 "{{\"type\":\"user\",\"uuid\":\"u{i}\",\"timestamp\":\"2026-04-11T00:55:{:02}.000Z\",\"sessionId\":\"abc\",\"message\":{{\"role\":\"user\",\"content\":\"msg{i}\"}}}}",
583 i
584 ));
585 }
586 write_fixture(
587 &path,
588 &lines.iter().map(|s| s.as_str()).collect::<Vec<_>>(),
589 );
590
591 let detail = read_session_limited(&path, "-Users-me-repo", Some(3)).unwrap();
592 assert_eq!(detail.messages.len(), 3);
593 // Summary still reflects total message count.
594 assert_eq!(detail.summary.message_count, 10);
595 let texts: Vec<&str> = detail
596 .messages
597 .iter()
598 .filter_map(|m| match m {
599 Message::User { text, .. } => Some(text.as_str()),
600 _ => None,
601 })
602 .collect();
603 assert_eq!(texts, vec!["msg7", "msg8", "msg9"]);
604 }
605
606 #[test]
607 fn limit_zero_is_treated_as_unlimited() {
608 // 0 is a weird edge case; we treat it as "no cap" so a
609 // caller mishandling its limit arg still gets data.
610 let tmp = tempdir().unwrap();
611 let path = tmp.path().join("s.jsonl");
612 write_fixture(
613 &path,
614 &[
615 r#"{"type":"user","uuid":"u1","timestamp":"2026-04-11T00:55:35.000Z","sessionId":"abc","message":{"role":"user","content":"hi"}}"#,
616 ],
617 );
618 let detail = read_session_limited(&path, "-Users-me-repo", Some(0)).unwrap();
619 assert_eq!(detail.messages.len(), 1);
620 }
621
622 #[test]
623 fn survives_partial_last_line() {
624 let tmp = tempdir().unwrap();
625 let path = tmp.path().join("s.jsonl");
626 let mut f = File::create(&path).unwrap();
627 writeln!(f, r#"{{"type":"user","uuid":"u1","timestamp":"2026-04-11T00:55:35.000Z","sessionId":"abc","message":{{"role":"user","content":"valid"}}}}"#).unwrap();
628 f.write_all(br#"{"type":"assistant","uuid":"u2","timestamp":"2026-04"#).unwrap();
629 drop(f);
630
631 let detail = read_session(&path, "-Users-me-repo").unwrap();
632 assert_eq!(detail.messages.len(), 1);
633 }
634 }
635