| 1 | //! Core syntax highlighting engine |
| 2 | |
| 3 | #![allow(dead_code)] |
| 4 | |
| 5 | use super::languages::{Language, LanguageDef}; |
| 6 | use crossterm::style::Color; |
| 7 | |
| 8 | /// Token types for syntax highlighting |
| 9 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] |
| 10 | pub enum TokenType { |
| 11 | Plain, |
| 12 | Keyword, |
| 13 | String, |
| 14 | Number, |
| 15 | Comment, |
| 16 | Operator, |
| 17 | Type, |
| 18 | Function, |
| 19 | Preprocessor, |
| 20 | Attribute, |
| 21 | Punctuation, |
| 22 | } |
| 23 | |
| 24 | impl TokenType { |
| 25 | /// Get the foreground color for this token type |
| 26 | pub fn color(&self) -> Color { |
| 27 | match self { |
| 28 | TokenType::Plain => Color::Reset, |
| 29 | TokenType::Keyword => Color::Blue, |
| 30 | TokenType::String => Color::Green, |
| 31 | TokenType::Number => Color::Magenta, |
| 32 | TokenType::Comment => Color::DarkGrey, |
| 33 | TokenType::Operator => Color::Yellow, |
| 34 | TokenType::Type => Color::Cyan, |
| 35 | TokenType::Function => Color::Cyan, |
| 36 | TokenType::Preprocessor => Color::Magenta, |
| 37 | TokenType::Attribute => Color::Yellow, |
| 38 | TokenType::Punctuation => Color::DarkGrey, |
| 39 | } |
| 40 | } |
| 41 | |
| 42 | /// Whether this token type should be bold |
| 43 | pub fn bold(&self) -> bool { |
| 44 | matches!(self, TokenType::Keyword | TokenType::Function) |
| 45 | } |
| 46 | } |
| 47 | |
| 48 | /// A token in a line of text |
| 49 | #[derive(Debug, Clone)] |
| 50 | pub struct Token { |
| 51 | /// Token type |
| 52 | pub token_type: TokenType, |
| 53 | /// Start column (character index, not byte) |
| 54 | pub start: usize, |
| 55 | /// End column (exclusive, character index) |
| 56 | pub end: usize, |
| 57 | } |
| 58 | |
| 59 | /// State for multiline constructs (comments, strings) |
| 60 | #[derive(Debug, Clone, Default, PartialEq)] |
| 61 | pub struct HighlightState { |
| 62 | /// Currently in a multiline comment |
| 63 | pub in_block_comment: bool, |
| 64 | /// Currently in a multiline string (stores delimiter for matching) |
| 65 | pub in_multiline_string: Option<String>, |
| 66 | } |
| 67 | |
| 68 | /// Syntax highlighter for a specific language |
| 69 | #[derive(Debug)] |
| 70 | pub struct Highlighter { |
| 71 | /// Current language definition |
| 72 | language: Option<LanguageDef>, |
| 73 | /// State for multiline constructs |
| 74 | state: HighlightState, |
| 75 | /// Cached state at the END of each line (state_cache[i] = state after processing line i) |
| 76 | /// This allows O(1) lookup of the starting state for any line |
| 77 | state_cache: Vec<HighlightState>, |
| 78 | /// Line index from which cache is invalid (everything from this line onward needs recalc) |
| 79 | cache_valid_until: usize, |
| 80 | } |
| 81 | |
| 82 | impl Default for Highlighter { |
| 83 | fn default() -> Self { |
| 84 | Self::new() |
| 85 | } |
| 86 | } |
| 87 | |
| 88 | impl Highlighter { |
| 89 | /// Create a new highlighter with no language |
| 90 | pub fn new() -> Self { |
| 91 | Self { |
| 92 | language: None, |
| 93 | state: HighlightState::default(), |
| 94 | state_cache: Vec::new(), |
| 95 | cache_valid_until: 0, |
| 96 | } |
| 97 | } |
| 98 | |
| 99 | /// Detect and set language based on filename |
| 100 | pub fn detect_language(&mut self, filename: &str) { |
| 101 | self.language = Language::detect(filename).map(|l| l.definition()); |
| 102 | self.invalidate_cache(0); |
| 103 | } |
| 104 | |
| 105 | /// Set language explicitly |
| 106 | pub fn set_language(&mut self, lang: Language) { |
| 107 | self.language = Some(lang.definition()); |
| 108 | self.invalidate_cache(0); |
| 109 | } |
| 110 | |
| 111 | /// Clear language (disable highlighting) |
| 112 | pub fn clear_language(&mut self) { |
| 113 | self.language = None; |
| 114 | self.invalidate_cache(0); |
| 115 | } |
| 116 | |
| 117 | /// Check if highlighting is enabled |
| 118 | pub fn is_enabled(&self) -> bool { |
| 119 | self.language.is_some() |
| 120 | } |
| 121 | |
| 122 | /// Get current language name |
| 123 | pub fn language_name(&self) -> Option<&str> { |
| 124 | self.language.as_ref().map(|l| l.name) |
| 125 | } |
| 126 | |
| 127 | /// Get the line comment prefix for the current language (e.g., "//", "#", "--") |
| 128 | pub fn line_comment(&self) -> Option<&'static str> { |
| 129 | self.language.as_ref().and_then(|l| l.line_comment) |
| 130 | } |
| 131 | |
| 132 | /// Reset multiline state (call when buffer changes significantly) |
| 133 | pub fn reset_state(&mut self) { |
| 134 | self.invalidate_cache(0); |
| 135 | } |
| 136 | |
| 137 | /// Invalidate the highlight state cache from a specific line onward. |
| 138 | /// Call this when the buffer content changes at or after line `from_line`. |
| 139 | pub fn invalidate_cache(&mut self, from_line: usize) { |
| 140 | self.cache_valid_until = self.cache_valid_until.min(from_line); |
| 141 | self.state = HighlightState::default(); |
| 142 | } |
| 143 | |
| 144 | /// Get the starting highlight state for a given line by looking up the cache. |
| 145 | /// Returns the state after processing (line_idx - 1), or default state for line 0. |
| 146 | pub fn get_state_for_line(&self, line_idx: usize) -> HighlightState { |
| 147 | if line_idx == 0 { |
| 148 | HighlightState::default() |
| 149 | } else if line_idx <= self.cache_valid_until && line_idx <= self.state_cache.len() { |
| 150 | self.state_cache[line_idx - 1].clone() |
| 151 | } else { |
| 152 | // Cache miss - caller needs to rebuild from last valid point |
| 153 | HighlightState::default() |
| 154 | } |
| 155 | } |
| 156 | |
| 157 | /// Update the state cache after tokenizing a line. |
| 158 | /// Call this after tokenize_line() with the resulting state. |
| 159 | pub fn update_cache(&mut self, line_idx: usize, state: &HighlightState) { |
| 160 | // Ensure cache is large enough |
| 161 | if line_idx >= self.state_cache.len() { |
| 162 | self.state_cache.resize(line_idx + 1, HighlightState::default()); |
| 163 | } |
| 164 | self.state_cache[line_idx] = state.clone(); |
| 165 | // Update valid range if this extends it |
| 166 | if line_idx >= self.cache_valid_until { |
| 167 | self.cache_valid_until = line_idx + 1; |
| 168 | } |
| 169 | } |
| 170 | |
| 171 | /// Get the line number from which the cache is valid |
| 172 | pub fn cache_valid_from(&self) -> usize { |
| 173 | self.cache_valid_until |
| 174 | } |
| 175 | |
| 176 | /// Tokenize a single line, returning tokens and updated state |
| 177 | /// The state should be passed from the previous line for correct multiline handling |
| 178 | pub fn tokenize_line(&self, line: &str, state: &mut HighlightState) -> Vec<Token> { |
| 179 | let lang = match &self.language { |
| 180 | Some(l) => l, |
| 181 | None => return vec![], |
| 182 | }; |
| 183 | |
| 184 | let mut tokens = Vec::new(); |
| 185 | let chars: Vec<char> = line.chars().collect(); |
| 186 | let mut i = 0; |
| 187 | |
| 188 | while i < chars.len() { |
| 189 | // Handle continuing multiline comment |
| 190 | if state.in_block_comment { |
| 191 | if let Some((end_start, end_len)) = self.find_block_comment_end(lang, &chars, i) { |
| 192 | tokens.push(Token { |
| 193 | token_type: TokenType::Comment, |
| 194 | start: i, |
| 195 | end: end_start + end_len, |
| 196 | }); |
| 197 | i = end_start + end_len; |
| 198 | state.in_block_comment = false; |
| 199 | continue; |
| 200 | } else { |
| 201 | // Rest of line is comment |
| 202 | tokens.push(Token { |
| 203 | token_type: TokenType::Comment, |
| 204 | start: i, |
| 205 | end: chars.len(), |
| 206 | }); |
| 207 | break; |
| 208 | } |
| 209 | } |
| 210 | |
| 211 | // Handle continuing multiline string |
| 212 | if let Some(delim) = state.in_multiline_string.as_ref() { |
| 213 | if let Some(end_pos) = self.find_string_end(&chars, i, delim) { |
| 214 | tokens.push(Token { |
| 215 | token_type: TokenType::String, |
| 216 | start: i, |
| 217 | end: end_pos, |
| 218 | }); |
| 219 | i = end_pos; |
| 220 | state.in_multiline_string = None; |
| 221 | continue; |
| 222 | } else { |
| 223 | // Rest of line is string |
| 224 | tokens.push(Token { |
| 225 | token_type: TokenType::String, |
| 226 | start: i, |
| 227 | end: chars.len(), |
| 228 | }); |
| 229 | break; |
| 230 | } |
| 231 | } |
| 232 | |
| 233 | // Skip whitespace |
| 234 | if chars[i].is_whitespace() { |
| 235 | i += 1; |
| 236 | continue; |
| 237 | } |
| 238 | |
| 239 | // Check for line comment |
| 240 | if let Some(ref comment) = lang.line_comment { |
| 241 | if self.matches_at(&chars, i, comment) { |
| 242 | tokens.push(Token { |
| 243 | token_type: TokenType::Comment, |
| 244 | start: i, |
| 245 | end: chars.len(), |
| 246 | }); |
| 247 | break; |
| 248 | } |
| 249 | } |
| 250 | |
| 251 | // Check for block comment start |
| 252 | if let (Some(ref start), Some(_)) = (&lang.block_comment_start, &lang.block_comment_end) { |
| 253 | if self.matches_at(&chars, i, start) { |
| 254 | let comment_start = i; |
| 255 | i += start.chars().count(); |
| 256 | |
| 257 | if let Some((end_start, end_len)) = self.find_block_comment_end(lang, &chars, i) { |
| 258 | tokens.push(Token { |
| 259 | token_type: TokenType::Comment, |
| 260 | start: comment_start, |
| 261 | end: end_start + end_len, |
| 262 | }); |
| 263 | i = end_start + end_len; |
| 264 | } else { |
| 265 | // Multiline comment continues |
| 266 | tokens.push(Token { |
| 267 | token_type: TokenType::Comment, |
| 268 | start: comment_start, |
| 269 | end: chars.len(), |
| 270 | }); |
| 271 | state.in_block_comment = true; |
| 272 | break; |
| 273 | } |
| 274 | continue; |
| 275 | } |
| 276 | } |
| 277 | |
| 278 | // Check for strings |
| 279 | if let Some((token, new_i, multiline_delim)) = self.try_parse_string(lang, &chars, i) { |
| 280 | tokens.push(token); |
| 281 | i = new_i; |
| 282 | if let Some(delim) = multiline_delim { |
| 283 | state.in_multiline_string = Some(delim); |
| 284 | break; |
| 285 | } |
| 286 | continue; |
| 287 | } |
| 288 | |
| 289 | // Check for numbers |
| 290 | if let Some((token, new_i)) = self.try_parse_number(&chars, i) { |
| 291 | tokens.push(token); |
| 292 | i = new_i; |
| 293 | continue; |
| 294 | } |
| 295 | |
| 296 | // Check for preprocessor directives |
| 297 | if lang.has_preprocessor && chars[i] == '#' && self.is_line_start(&chars, i) { |
| 298 | tokens.push(Token { |
| 299 | token_type: TokenType::Preprocessor, |
| 300 | start: i, |
| 301 | end: chars.len(), |
| 302 | }); |
| 303 | break; |
| 304 | } |
| 305 | |
| 306 | // Check for attributes (Rust #[], Python @) |
| 307 | if let Some((token, new_i)) = self.try_parse_attribute(lang, &chars, i) { |
| 308 | tokens.push(token); |
| 309 | i = new_i; |
| 310 | continue; |
| 311 | } |
| 312 | |
| 313 | // Check for identifiers (keywords, types, functions) |
| 314 | if chars[i].is_alphabetic() || chars[i] == '_' { |
| 315 | let start = i; |
| 316 | while i < chars.len() && (chars[i].is_alphanumeric() || chars[i] == '_') { |
| 317 | i += 1; |
| 318 | } |
| 319 | let word: String = chars[start..i].iter().collect(); |
| 320 | |
| 321 | let token_type = if lang.keywords.contains(&word.as_str()) { |
| 322 | TokenType::Keyword |
| 323 | } else if lang.types.contains(&word.as_str()) { |
| 324 | TokenType::Type |
| 325 | } else if i < chars.len() && chars[i] == '(' { |
| 326 | TokenType::Function |
| 327 | } else { |
| 328 | TokenType::Plain |
| 329 | }; |
| 330 | |
| 331 | if token_type != TokenType::Plain { |
| 332 | tokens.push(Token { |
| 333 | token_type, |
| 334 | start, |
| 335 | end: i, |
| 336 | }); |
| 337 | } |
| 338 | continue; |
| 339 | } |
| 340 | |
| 341 | // Check for operators |
| 342 | if let Some((token, new_i)) = self.try_parse_operator(lang, &chars, i) { |
| 343 | tokens.push(token); |
| 344 | i = new_i; |
| 345 | continue; |
| 346 | } |
| 347 | |
| 348 | // Check for punctuation |
| 349 | if lang.punctuation.contains(&chars[i]) { |
| 350 | tokens.push(Token { |
| 351 | token_type: TokenType::Punctuation, |
| 352 | start: i, |
| 353 | end: i + 1, |
| 354 | }); |
| 355 | i += 1; |
| 356 | continue; |
| 357 | } |
| 358 | |
| 359 | // Skip unknown character |
| 360 | i += 1; |
| 361 | } |
| 362 | |
| 363 | tokens |
| 364 | } |
| 365 | |
| 366 | fn matches_at(&self, chars: &[char], pos: usize, pattern: &str) -> bool { |
| 367 | let pattern_chars: Vec<char> = pattern.chars().collect(); |
| 368 | if pos + pattern_chars.len() > chars.len() { |
| 369 | return false; |
| 370 | } |
| 371 | for (i, &pc) in pattern_chars.iter().enumerate() { |
| 372 | if chars[pos + i] != pc { |
| 373 | return false; |
| 374 | } |
| 375 | } |
| 376 | true |
| 377 | } |
| 378 | |
| 379 | fn find_block_comment_end(&self, lang: &LanguageDef, chars: &[char], start: usize) -> Option<(usize, usize)> { |
| 380 | let end_pattern = lang.block_comment_end.as_ref()?; |
| 381 | let end_chars: Vec<char> = end_pattern.chars().collect(); |
| 382 | |
| 383 | for i in start..chars.len() { |
| 384 | if self.matches_at(chars, i, end_pattern) { |
| 385 | return Some((i, end_chars.len())); |
| 386 | } |
| 387 | } |
| 388 | None |
| 389 | } |
| 390 | |
| 391 | fn try_parse_string(&self, lang: &LanguageDef, chars: &[char], start: usize) -> Option<(Token, usize, Option<String>)> { |
| 392 | let c = chars[start]; |
| 393 | |
| 394 | // Check for string delimiters |
| 395 | if !lang.string_delimiters.contains(&c) { |
| 396 | return None; |
| 397 | } |
| 398 | |
| 399 | // Check for triple-quoted strings (Python, etc.) |
| 400 | if lang.multiline_strings { |
| 401 | let triple: String = std::iter::repeat(c).take(3).collect(); |
| 402 | if self.matches_at(chars, start, &triple) { |
| 403 | let delim_len = 3; |
| 404 | let mut i = start + delim_len; |
| 405 | |
| 406 | while i < chars.len() { |
| 407 | if self.matches_at(chars, i, &triple) { |
| 408 | return Some(( |
| 409 | Token { |
| 410 | token_type: TokenType::String, |
| 411 | start, |
| 412 | end: i + delim_len, |
| 413 | }, |
| 414 | i + delim_len, |
| 415 | None, |
| 416 | )); |
| 417 | } |
| 418 | if chars[i] == '\\' && i + 1 < chars.len() { |
| 419 | i += 2; |
| 420 | } else { |
| 421 | i += 1; |
| 422 | } |
| 423 | } |
| 424 | |
| 425 | // String continues on next line |
| 426 | return Some(( |
| 427 | Token { |
| 428 | token_type: TokenType::String, |
| 429 | start, |
| 430 | end: chars.len(), |
| 431 | }, |
| 432 | chars.len(), |
| 433 | Some(triple), |
| 434 | )); |
| 435 | } |
| 436 | } |
| 437 | |
| 438 | // Regular string |
| 439 | let mut i = start + 1; |
| 440 | while i < chars.len() { |
| 441 | if chars[i] == c { |
| 442 | return Some(( |
| 443 | Token { |
| 444 | token_type: TokenType::String, |
| 445 | start, |
| 446 | end: i + 1, |
| 447 | }, |
| 448 | i + 1, |
| 449 | None, |
| 450 | )); |
| 451 | } |
| 452 | if chars[i] == '\\' && i + 1 < chars.len() { |
| 453 | i += 2; |
| 454 | } else { |
| 455 | i += 1; |
| 456 | } |
| 457 | } |
| 458 | |
| 459 | // Unterminated string - highlight to end of line |
| 460 | Some(( |
| 461 | Token { |
| 462 | token_type: TokenType::String, |
| 463 | start, |
| 464 | end: chars.len(), |
| 465 | }, |
| 466 | chars.len(), |
| 467 | None, |
| 468 | )) |
| 469 | } |
| 470 | |
| 471 | fn find_string_end(&self, chars: &[char], start: usize, delim: &str) -> Option<usize> { |
| 472 | let mut i = start; |
| 473 | while i < chars.len() { |
| 474 | if self.matches_at(chars, i, delim) { |
| 475 | return Some(i + delim.chars().count()); |
| 476 | } |
| 477 | if chars[i] == '\\' && i + 1 < chars.len() { |
| 478 | i += 2; |
| 479 | } else { |
| 480 | i += 1; |
| 481 | } |
| 482 | } |
| 483 | None |
| 484 | } |
| 485 | |
| 486 | fn try_parse_number(&self, chars: &[char], start: usize) -> Option<(Token, usize)> { |
| 487 | let c = chars[start]; |
| 488 | |
| 489 | // Must start with digit, or . followed by digit |
| 490 | if !c.is_ascii_digit() { |
| 491 | if c == '.' && start + 1 < chars.len() && chars[start + 1].is_ascii_digit() { |
| 492 | // .5 style float |
| 493 | } else { |
| 494 | return None; |
| 495 | } |
| 496 | } |
| 497 | |
| 498 | let mut i = start; |
| 499 | let mut has_dot = c == '.'; |
| 500 | let mut has_exp = false; |
| 501 | |
| 502 | // Handle hex, octal, binary |
| 503 | if c == '0' && i + 1 < chars.len() { |
| 504 | match chars[i + 1] { |
| 505 | 'x' | 'X' => { |
| 506 | i += 2; |
| 507 | while i < chars.len() && (chars[i].is_ascii_hexdigit() || chars[i] == '_') { |
| 508 | i += 1; |
| 509 | } |
| 510 | return Some((Token { token_type: TokenType::Number, start, end: i }, i)); |
| 511 | } |
| 512 | 'o' | 'O' => { |
| 513 | i += 2; |
| 514 | while i < chars.len() && (chars[i].is_digit(8) || chars[i] == '_') { |
| 515 | i += 1; |
| 516 | } |
| 517 | return Some((Token { token_type: TokenType::Number, start, end: i }, i)); |
| 518 | } |
| 519 | 'b' | 'B' => { |
| 520 | i += 2; |
| 521 | while i < chars.len() && (chars[i] == '0' || chars[i] == '1' || chars[i] == '_') { |
| 522 | i += 1; |
| 523 | } |
| 524 | return Some((Token { token_type: TokenType::Number, start, end: i }, i)); |
| 525 | } |
| 526 | _ => {} |
| 527 | } |
| 528 | } |
| 529 | |
| 530 | // Decimal number (possibly float) |
| 531 | while i < chars.len() { |
| 532 | let ch = chars[i]; |
| 533 | if ch.is_ascii_digit() || ch == '_' { |
| 534 | i += 1; |
| 535 | } else if ch == '.' && !has_dot && !has_exp { |
| 536 | // Check it's not a method call like 5.to_string() |
| 537 | if i + 1 < chars.len() && chars[i + 1].is_ascii_digit() { |
| 538 | has_dot = true; |
| 539 | i += 1; |
| 540 | } else if i + 1 >= chars.len() { |
| 541 | has_dot = true; |
| 542 | i += 1; |
| 543 | } else { |
| 544 | break; |
| 545 | } |
| 546 | } else if (ch == 'e' || ch == 'E') && !has_exp { |
| 547 | has_exp = true; |
| 548 | i += 1; |
| 549 | if i < chars.len() && (chars[i] == '+' || chars[i] == '-') { |
| 550 | i += 1; |
| 551 | } |
| 552 | } else { |
| 553 | break; |
| 554 | } |
| 555 | } |
| 556 | |
| 557 | // Handle type suffixes (f32, i64, etc.) |
| 558 | if i < chars.len() && chars[i].is_alphabetic() { |
| 559 | let suffix_start = i; |
| 560 | while i < chars.len() && (chars[i].is_alphanumeric() || chars[i] == '_') { |
| 561 | i += 1; |
| 562 | } |
| 563 | // Common numeric suffixes |
| 564 | let suffix: String = chars[suffix_start..i].iter().collect(); |
| 565 | let valid_suffixes = ["f32", "f64", "i8", "i16", "i32", "i64", "i128", "isize", |
| 566 | "u8", "u16", "u32", "u64", "u128", "usize", "f", "d", "l", "L"]; |
| 567 | if !valid_suffixes.contains(&suffix.as_str()) { |
| 568 | i = suffix_start; // Not a valid suffix, rollback |
| 569 | } |
| 570 | } |
| 571 | |
| 572 | if i > start { |
| 573 | Some((Token { token_type: TokenType::Number, start, end: i }, i)) |
| 574 | } else { |
| 575 | None |
| 576 | } |
| 577 | } |
| 578 | |
| 579 | fn try_parse_operator(&self, lang: &LanguageDef, chars: &[char], start: usize) -> Option<(Token, usize)> { |
| 580 | // Try longer operators first |
| 581 | for &op in &lang.operators { |
| 582 | if self.matches_at(chars, start, op) { |
| 583 | let len = op.chars().count(); |
| 584 | return Some(( |
| 585 | Token { |
| 586 | token_type: TokenType::Operator, |
| 587 | start, |
| 588 | end: start + len, |
| 589 | }, |
| 590 | start + len, |
| 591 | )); |
| 592 | } |
| 593 | } |
| 594 | None |
| 595 | } |
| 596 | |
| 597 | fn try_parse_attribute(&self, lang: &LanguageDef, chars: &[char], start: usize) -> Option<(Token, usize)> { |
| 598 | // Rust attributes: #[...] or #![...] |
| 599 | if lang.name == "Rust" && chars[start] == '#' { |
| 600 | let mut i = start + 1; |
| 601 | if i < chars.len() && chars[i] == '!' { |
| 602 | i += 1; |
| 603 | } |
| 604 | if i < chars.len() && chars[i] == '[' { |
| 605 | let attr_start = start; |
| 606 | let mut bracket_depth = 1; |
| 607 | i += 1; |
| 608 | while i < chars.len() && bracket_depth > 0 { |
| 609 | match chars[i] { |
| 610 | '[' => bracket_depth += 1, |
| 611 | ']' => bracket_depth -= 1, |
| 612 | _ => {} |
| 613 | } |
| 614 | i += 1; |
| 615 | } |
| 616 | return Some(( |
| 617 | Token { |
| 618 | token_type: TokenType::Attribute, |
| 619 | start: attr_start, |
| 620 | end: i, |
| 621 | }, |
| 622 | i, |
| 623 | )); |
| 624 | } |
| 625 | } |
| 626 | |
| 627 | // Python decorators: @name |
| 628 | if lang.name == "Python" && chars[start] == '@' { |
| 629 | let mut i = start + 1; |
| 630 | while i < chars.len() && (chars[i].is_alphanumeric() || chars[i] == '_' || chars[i] == '.') { |
| 631 | i += 1; |
| 632 | } |
| 633 | if i > start + 1 { |
| 634 | return Some(( |
| 635 | Token { |
| 636 | token_type: TokenType::Attribute, |
| 637 | start, |
| 638 | end: i, |
| 639 | }, |
| 640 | i, |
| 641 | )); |
| 642 | } |
| 643 | } |
| 644 | |
| 645 | None |
| 646 | } |
| 647 | |
| 648 | fn is_line_start(&self, chars: &[char], pos: usize) -> bool { |
| 649 | for i in 0..pos { |
| 650 | if !chars[i].is_whitespace() { |
| 651 | return false; |
| 652 | } |
| 653 | } |
| 654 | true |
| 655 | } |
| 656 | } |
| 657 | |
| 658 | #[cfg(test)] |
| 659 | mod tests { |
| 660 | use super::*; |
| 661 | |
| 662 | #[test] |
| 663 | fn test_rust_keywords() { |
| 664 | let mut hl = Highlighter::new(); |
| 665 | hl.set_language(Language::Rust); |
| 666 | let mut state = HighlightState::default(); |
| 667 | |
| 668 | let tokens = hl.tokenize_line("let x = 42;", &mut state); |
| 669 | assert!(tokens.iter().any(|t| t.token_type == TokenType::Keyword)); // let |
| 670 | assert!(tokens.iter().any(|t| t.token_type == TokenType::Number)); // 42 |
| 671 | } |
| 672 | |
| 673 | #[test] |
| 674 | fn test_string_parsing() { |
| 675 | let mut hl = Highlighter::new(); |
| 676 | hl.set_language(Language::Rust); |
| 677 | let mut state = HighlightState::default(); |
| 678 | |
| 679 | let tokens = hl.tokenize_line(r#"let s = "hello";"#, &mut state); |
| 680 | assert!(tokens.iter().any(|t| t.token_type == TokenType::String)); |
| 681 | } |
| 682 | |
| 683 | #[test] |
| 684 | fn test_comment_parsing() { |
| 685 | let mut hl = Highlighter::new(); |
| 686 | hl.set_language(Language::Rust); |
| 687 | let mut state = HighlightState::default(); |
| 688 | |
| 689 | let tokens = hl.tokenize_line("// this is a comment", &mut state); |
| 690 | assert_eq!(tokens.len(), 1); |
| 691 | assert_eq!(tokens[0].token_type, TokenType::Comment); |
| 692 | } |
| 693 | } |
| 694 |