| 1 | //! Parser for `~/.claude/history.jsonl` — Claude Code's **prompt |
| 2 | //! input history**. |
| 3 | //! |
| 4 | //! This file is wildly undocumented. Each line is a single JSON |
| 5 | //! object with at minimum: |
| 6 | //! |
| 7 | //! - `display` — the raw text the user typed into the prompt box |
| 8 | //! - `pastedContents` — sidecar blob for `@`-inserted paste content |
| 9 | //! - `timestamp` — epoch milliseconds (note: **not** seconds) |
| 10 | //! - `project` — the absolute path to the cwd the user was in |
| 11 | //! |
| 12 | //! Newer entries (Claude Code 2.x) also carry: |
| 13 | //! |
| 14 | //! - `sessionId` — the content session this prompt belonged to |
| 15 | //! |
| 16 | //! We use it for two things: |
| 17 | //! |
| 18 | //! 1. **Archive discovery** — every distinct `project` path that |
| 19 | //! has entries here but isn't currently represented under |
| 20 | //! `~/.claude/projects/` becomes a ghost [`ProjectCategory::Archive`] |
| 21 | //! project in the sidebar. The raw transcripts are gone, but the |
| 22 | //! user's prompts are preserved. |
| 23 | //! 2. **Prompt-only session reconstruction** — given a project path, |
| 24 | //! [`HistoryLog::entries_for`] returns a chronologically-ordered |
| 25 | //! slice of prompts which the reader can render as a sequence of |
| 26 | //! user-only [`crate::core::schema::Message::User`] cards. |
| 27 | |
| 28 | use std::collections::HashMap; |
| 29 | use std::fs::File; |
| 30 | use std::io::{BufRead, BufReader}; |
| 31 | use std::path::{Path, PathBuf}; |
| 32 | |
| 33 | use serde::Deserialize; |
| 34 | use serde_json::Value; |
| 35 | |
| 36 | /// Default location of Claude Code's history file. |
| 37 | pub fn default_history_path() -> Option<PathBuf> { |
| 38 | dirs::home_dir().map(|h| h.join(".claude/history.jsonl")) |
| 39 | } |
| 40 | |
| 41 | /// One parsed line of `history.jsonl`. |
| 42 | #[derive(Debug, Clone)] |
| 43 | pub struct HistoryEntry { |
| 44 | /// Absolute filesystem path (e.g. `/Users/me/MusicCPR/CPR-Music`). |
| 45 | pub project: String, |
| 46 | /// Epoch milliseconds. |
| 47 | pub timestamp_ms: i64, |
| 48 | /// Raw user input. |
| 49 | pub display: String, |
| 50 | /// Optional session linkage (only on Claude Code 2.x entries — |
| 51 | /// roughly the most recent ~32% of entries on an older account). |
| 52 | pub session_id: Option<String>, |
| 53 | /// Optional paste sidecar — usually an empty object. |
| 54 | pub pasted_contents: Option<Value>, |
| 55 | } |
| 56 | |
| 57 | #[derive(Debug, Deserialize)] |
| 58 | #[serde(rename_all = "camelCase")] |
| 59 | struct RawLine { |
| 60 | #[serde(default)] |
| 61 | display: Option<String>, |
| 62 | #[serde(default)] |
| 63 | timestamp: Option<i64>, |
| 64 | #[serde(default)] |
| 65 | project: Option<String>, |
| 66 | #[serde(default)] |
| 67 | session_id: Option<String>, |
| 68 | #[serde(default)] |
| 69 | pasted_contents: Option<Value>, |
| 70 | } |
| 71 | |
| 72 | /// Full in-memory view of `history.jsonl`, indexed by project path. |
| 73 | #[derive(Debug, Default)] |
| 74 | pub struct HistoryLog { |
| 75 | /// Project path (absolute) → every entry for that project, |
| 76 | /// sorted oldest-first. |
| 77 | by_project: HashMap<String, Vec<HistoryEntry>>, |
| 78 | } |
| 79 | |
| 80 | impl HistoryLog { |
| 81 | pub fn empty() -> Self { |
| 82 | Self::default() |
| 83 | } |
| 84 | |
| 85 | /// Best-effort load. Missing file, partial lines, and unexpected |
| 86 | /// shapes all degrade gracefully to an empty log — this is a |
| 87 | /// nice-to-have source, never a hard dependency. |
| 88 | pub fn load(path: &Path) -> Self { |
| 89 | let Ok(file) = File::open(path) else { |
| 90 | return Self::empty(); |
| 91 | }; |
| 92 | let reader = BufReader::new(file); |
| 93 | |
| 94 | let mut by_project: HashMap<String, Vec<HistoryEntry>> = HashMap::new(); |
| 95 | for line in reader.lines() { |
| 96 | let Ok(line) = line else { continue }; |
| 97 | if line.trim().is_empty() { |
| 98 | continue; |
| 99 | } |
| 100 | let Ok(raw) = serde_json::from_str::<RawLine>(&line) else { |
| 101 | continue; |
| 102 | }; |
| 103 | let (Some(project), Some(timestamp_ms), Some(display)) = |
| 104 | (raw.project, raw.timestamp, raw.display) |
| 105 | else { |
| 106 | continue; |
| 107 | }; |
| 108 | if project.is_empty() { |
| 109 | continue; |
| 110 | } |
| 111 | by_project.entry(project.clone()).or_default().push(HistoryEntry { |
| 112 | project, |
| 113 | timestamp_ms, |
| 114 | display, |
| 115 | session_id: raw.session_id, |
| 116 | pasted_contents: raw.pasted_contents, |
| 117 | }); |
| 118 | } |
| 119 | |
| 120 | // Sort each project's entries oldest-first so downstream |
| 121 | // synthesis can walk them linearly. |
| 122 | for entries in by_project.values_mut() { |
| 123 | entries.sort_by_key(|e| e.timestamp_ms); |
| 124 | } |
| 125 | |
| 126 | Self { by_project } |
| 127 | } |
| 128 | |
| 129 | /// Number of distinct project paths in the log. |
| 130 | pub fn project_count(&self) -> usize { |
| 131 | self.by_project.len() |
| 132 | } |
| 133 | |
| 134 | /// Total entries across all projects. |
| 135 | pub fn total_entries(&self) -> usize { |
| 136 | self.by_project.values().map(|v| v.len()).sum() |
| 137 | } |
| 138 | |
| 139 | /// Iterate every project path in the log. |
| 140 | pub fn projects(&self) -> impl Iterator<Item = &str> { |
| 141 | self.by_project.keys().map(|k| k.as_str()) |
| 142 | } |
| 143 | |
| 144 | /// All entries (oldest-first) for one project path. Empty slice |
| 145 | /// if we've never seen that project. |
| 146 | pub fn entries_for(&self, project: &str) -> &[HistoryEntry] { |
| 147 | self.by_project |
| 148 | .get(project) |
| 149 | .map(|v| v.as_slice()) |
| 150 | .unwrap_or(&[]) |
| 151 | } |
| 152 | |
| 153 | /// Earliest timestamp seen for a project, or `None` if no entries. |
| 154 | pub fn earliest_for(&self, project: &str) -> Option<i64> { |
| 155 | self.by_project |
| 156 | .get(project) |
| 157 | .and_then(|v| v.first()) |
| 158 | .map(|e| e.timestamp_ms) |
| 159 | } |
| 160 | |
| 161 | /// Latest timestamp seen for a project, or `None` if no entries. |
| 162 | pub fn latest_for(&self, project: &str) -> Option<i64> { |
| 163 | self.by_project |
| 164 | .get(project) |
| 165 | .and_then(|v| v.last()) |
| 166 | .map(|e| e.timestamp_ms) |
| 167 | } |
| 168 | } |
| 169 | |
| 170 | #[cfg(test)] |
| 171 | mod tests { |
| 172 | use super::*; |
| 173 | use std::io::Write; |
| 174 | use tempfile::tempdir; |
| 175 | |
| 176 | fn write_fixture(path: &Path, lines: &[&str]) { |
| 177 | let mut f = File::create(path).unwrap(); |
| 178 | for line in lines { |
| 179 | writeln!(f, "{line}").unwrap(); |
| 180 | } |
| 181 | } |
| 182 | |
| 183 | #[test] |
| 184 | fn load_missing_file_returns_empty() { |
| 185 | let tmp = tempdir().unwrap(); |
| 186 | let log = HistoryLog::load(&tmp.path().join("no_such.jsonl")); |
| 187 | assert_eq!(log.project_count(), 0); |
| 188 | assert_eq!(log.total_entries(), 0); |
| 189 | } |
| 190 | |
| 191 | #[test] |
| 192 | fn parses_real_shape() { |
| 193 | let tmp = tempdir().unwrap(); |
| 194 | let path = tmp.path().join("history.jsonl"); |
| 195 | write_fixture( |
| 196 | &path, |
| 197 | &[ |
| 198 | r#"{"display":"/model ","pastedContents":{},"timestamp":1759257932714,"project":"/Users/me/MusicCPR/CPR-Music"}"#, |
| 199 | r#"{"display":"fix the parser bug","pastedContents":{},"timestamp":1759257940000,"project":"/Users/me/MusicCPR/CPR-Music","sessionId":"abc-123"}"#, |
| 200 | r#"{"display":"plan the thread browser","pastedContents":{},"timestamp":1775900000000,"project":"/Users/me/claudex","sessionId":"def-456"}"#, |
| 201 | ], |
| 202 | ); |
| 203 | |
| 204 | let log = HistoryLog::load(&path); |
| 205 | assert_eq!(log.project_count(), 2); |
| 206 | assert_eq!(log.total_entries(), 3); |
| 207 | |
| 208 | let cpr = log.entries_for("/Users/me/MusicCPR/CPR-Music"); |
| 209 | assert_eq!(cpr.len(), 2); |
| 210 | // oldest-first sort. |
| 211 | assert_eq!(cpr[0].display, "/model "); |
| 212 | assert_eq!(cpr[1].display, "fix the parser bug"); |
| 213 | assert_eq!(cpr[1].session_id.as_deref(), Some("abc-123")); |
| 214 | |
| 215 | let claudex = log.entries_for("/Users/me/claudex"); |
| 216 | assert_eq!(claudex.len(), 1); |
| 217 | assert_eq!(claudex[0].display, "plan the thread browser"); |
| 218 | } |
| 219 | |
| 220 | #[test] |
| 221 | fn survives_garbage_lines() { |
| 222 | let tmp = tempdir().unwrap(); |
| 223 | let path = tmp.path().join("history.jsonl"); |
| 224 | write_fixture( |
| 225 | &path, |
| 226 | &[ |
| 227 | r#"{"display":"good","timestamp":1,"project":"/p"}"#, |
| 228 | r#"not even json"#, |
| 229 | r#"{"unrelated":"shape"}"#, |
| 230 | r#"{"display":"","timestamp":2,"project":""}"#, // empty project → skip |
| 231 | r#"{"display":"also good","timestamp":3,"project":"/p"}"#, |
| 232 | ], |
| 233 | ); |
| 234 | let log = HistoryLog::load(&path); |
| 235 | assert_eq!(log.total_entries(), 2); |
| 236 | } |
| 237 | |
| 238 | #[test] |
| 239 | fn earliest_and_latest_are_correct() { |
| 240 | let tmp = tempdir().unwrap(); |
| 241 | let path = tmp.path().join("history.jsonl"); |
| 242 | write_fixture( |
| 243 | &path, |
| 244 | &[ |
| 245 | r#"{"display":"a","timestamp":200,"project":"/p"}"#, |
| 246 | r#"{"display":"b","timestamp":100,"project":"/p"}"#, |
| 247 | r#"{"display":"c","timestamp":300,"project":"/p"}"#, |
| 248 | ], |
| 249 | ); |
| 250 | let log = HistoryLog::load(&path); |
| 251 | assert_eq!(log.earliest_for("/p"), Some(100)); |
| 252 | assert_eq!(log.latest_for("/p"), Some(300)); |
| 253 | } |
| 254 | |
| 255 | #[test] |
| 256 | fn unknown_project_returns_empty_slice() { |
| 257 | let log = HistoryLog::empty(); |
| 258 | assert!(log.entries_for("/nowhere").is_empty()); |
| 259 | assert!(log.earliest_for("/nowhere").is_none()); |
| 260 | } |
| 261 | } |
| 262 |