1 // 2 // https://vt100.net/emu/dec_ansi_parser 3 // 4 // The parser is heavily inspired by the vte (https://crates.io/crates/vte) crate. 5 // Tried to use this crate, but it doesn't work for opposite way (terminal -> sequence), 6 // because there're couple of exceptions we have to handle and it doesn't make much 7 // sense to add them to the vte crate. An example is Esc key where we need to know if 8 // there's additional input available or not and then the decision is made if the 9 // Esc char is dispatched immediately (user hits just Esc key) or if it's an escape/csi/... 10 // sequence. 11 // 12 const MAX_PARAMETERS: usize = 30; 13 const DEFAULT_PARAMETER_VALUE: u64 = 0; 14 const MAX_UTF8_CODE_POINTS: usize = 4; 15 16 /// A parser engine state. 17 /// 18 /// All these variant names come from the 19 /// [A parser for DEC’s ANSI-compatible video terminals](https://vt100.net/emu/dec_ansi_parser) 20 /// description. 21 #[derive(Copy, Clone, Debug, Eq, PartialEq)] 22 enum State { 23 /// Initial state. 24 Ground, 25 /// Escape sequence started. 26 /// 27 /// `Esc` received with a flag that there's more data available. 28 Escape, 29 /// Escape sequence and we're collecting intermediates. 30 /// 31 /// # Notes 32 /// 33 /// This implementation doesn't collect intermediates. It just handles the state 34 /// to distinguish between (im)proper sequences. 35 EscapeIntermediate, 36 /// CSI sequence started. 37 /// 38 /// `Esc` followed by the `[` received. 39 CsiEntry, 40 /// CSI sequence should be consumed, but not dispatched. 41 CsiIgnore, 42 /// CSI sequence and we're collecting parameters. 43 CsiParameter, 44 /// CSI sequence and we're collecting intermediates. 45 /// 46 /// # Notes 47 /// 48 /// This implementation doesn't collect intermediates. It just handles the state 49 /// to distinguish between (im)proper sequences. 50 CsiIntermediate, 51 /// Possible UTF-8 sequence and we're collecting UTF-8 code points. 52 Utf8, 53 } 54 55 pub(crate) trait Provide { provide_char(&mut self, ch: char)56 fn provide_char(&mut self, ch: char); 57 provide_esc_sequence(&mut self, ch: char)58 fn provide_esc_sequence(&mut self, ch: char); 59 provide_csi_sequence(&mut self, parameters: &[u64], ignored_count: usize, ch: char)60 fn provide_csi_sequence(&mut self, parameters: &[u64], ignored_count: usize, ch: char); 61 } 62 63 pub(crate) struct Engine { 64 parameters: [u64; MAX_PARAMETERS], 65 parameters_count: usize, 66 parameter: u64, 67 ignored_parameters_count: usize, 68 state: State, 69 utf8_points: [u8; MAX_UTF8_CODE_POINTS], 70 utf8_points_count: usize, 71 utf8_points_expected_count: usize, 72 } 73 74 impl Default for Engine { default() -> Self75 fn default() -> Self { 76 Engine { 77 parameters: [DEFAULT_PARAMETER_VALUE; MAX_PARAMETERS], 78 parameters_count: 0, 79 parameter: DEFAULT_PARAMETER_VALUE, 80 ignored_parameters_count: 0, 81 state: State::Ground, 82 utf8_points: [0; MAX_UTF8_CODE_POINTS], 83 utf8_points_count: 0, 84 utf8_points_expected_count: 0, 85 } 86 } 87 } 88 89 impl Engine { set_state(&mut self, state: State)90 fn set_state(&mut self, state: State) { 91 if let State::Ground = state { 92 self.parameters_count = 0; 93 self.parameter = DEFAULT_PARAMETER_VALUE; 94 self.ignored_parameters_count = 0; 95 self.utf8_points_count = 0; 96 self.utf8_points_expected_count = 0; 97 } 98 self.state = state; 99 } 100 store_parameter(&mut self)101 fn store_parameter(&mut self) { 102 if self.parameters_count < MAX_PARAMETERS { 103 self.parameters[self.parameters_count] = self.parameter; 104 self.parameters_count += 1; 105 } else { 106 self.ignored_parameters_count += 1; 107 } 108 self.parameter = DEFAULT_PARAMETER_VALUE; 109 } 110 handle_possible_esc(&mut self, provider: &mut dyn Provide, byte: u8, more: bool) -> bool111 fn handle_possible_esc(&mut self, provider: &mut dyn Provide, byte: u8, more: bool) -> bool { 112 if byte != 0x1B { 113 return false; 114 } 115 116 match (self.state, more) { 117 // More input means possible Esc sequence, just switch state and wait 118 (State::Ground, true) => self.set_state(State::Escape), 119 120 // No more input means Esc key, dispatch it 121 (State::Ground, false) => provider.provide_char('\x1B'), 122 123 // More input means possible Esc sequence, dispatch the previous Esc char 124 (State::Escape, true) => provider.provide_char('\x1B'), 125 126 // No more input means Esc key, dispatch the previous & current Esc char 127 (State::Escape, false) => { 128 provider.provide_char('\x1B'); 129 provider.provide_char('\x1B'); 130 self.set_state(State::Ground); 131 } 132 133 // Discard any state 134 // More input means possible Esc sequence 135 (_, true) => self.set_state(State::Escape), 136 137 // Discard any state 138 // No more input means Esc key, dispatch it 139 (_, false) => { 140 provider.provide_char('\x1B'); 141 self.set_state(State::Ground); 142 } 143 } 144 145 true 146 } 147 handle_possible_utf8_code_points(&mut self, provider: &mut dyn Provide, byte: u8) -> bool148 fn handle_possible_utf8_code_points(&mut self, provider: &mut dyn Provide, byte: u8) -> bool { 149 if byte & 0b1000_0000 == 0b0000_0000 { 150 provider.provide_char(byte as char); 151 true 152 } else if byte & 0b1110_0000 == 0b1100_0000 { 153 self.utf8_points_count = 1; 154 self.utf8_points[0] = byte; 155 self.utf8_points_expected_count = 2; 156 self.set_state(State::Utf8); 157 true 158 } else if byte & 0b1111_0000 == 0b1110_0000 { 159 self.utf8_points_count = 1; 160 self.utf8_points[0] = byte; 161 self.utf8_points_expected_count = 3; 162 self.set_state(State::Utf8); 163 true 164 } else if byte & 0b1111_1000 == 0b1111_0000 { 165 self.utf8_points_count = 1; 166 self.utf8_points[0] = byte; 167 self.utf8_points_expected_count = 4; 168 self.set_state(State::Utf8); 169 true 170 } else { 171 false 172 } 173 } 174 advance_ground_state(&mut self, provider: &mut dyn Provide, byte: u8)175 fn advance_ground_state(&mut self, provider: &mut dyn Provide, byte: u8) { 176 if self.handle_possible_utf8_code_points(provider, byte) { 177 return; 178 } 179 180 match byte { 181 0x1B => unreachable!(), 182 183 // Execute 184 0x00..=0x17 | 0x19 | 0x1C..=0x1F => provider.provide_char(byte as char), 185 186 // Print 187 0x20..=0x7F => provider.provide_char(byte as char), 188 189 _ => {} 190 }; 191 } 192 advance_escape_state(&mut self, provider: &mut dyn Provide, byte: u8)193 fn advance_escape_state(&mut self, provider: &mut dyn Provide, byte: u8) { 194 match byte { 195 0x1B => unreachable!(), 196 197 // Intermediate bytes to collect 198 0x20..=0x2F => { 199 self.set_state(State::EscapeIntermediate); 200 } 201 202 // Escape followed by '[' (0x5B) 203 // -> CSI sequence start 204 0x5B => self.set_state(State::CsiEntry), 205 206 // Escape sequence final character 207 0x30..=0x4F | 0x51..=0x57 | 0x59 | 0x5A | 0x5C | 0x60..=0x7E => { 208 provider.provide_esc_sequence(byte as char); 209 self.set_state(State::Ground); 210 } 211 212 // Execute 213 0x00..=0x17 | 0x19 | 0x1C..=0x1F => provider.provide_char(byte as char), 214 215 // TODO Does it mean we should ignore the whole sequence? 216 // Ignore 217 0x7F => {} 218 219 // Other bytes are considered as invalid -> cancel whatever we have 220 _ => self.set_state(State::Ground), 221 }; 222 } 223 advance_escape_intermediate_state(&mut self, provider: &mut dyn Provide, byte: u8)224 fn advance_escape_intermediate_state(&mut self, provider: &mut dyn Provide, byte: u8) { 225 match byte { 226 0x1B => unreachable!(), 227 228 // Intermediate bytes to collect 229 0x20..=0x2F => {} 230 231 // Escape followed by '[' (0x5B) 232 // -> CSI sequence start 233 0x5B => self.set_state(State::CsiEntry), 234 235 // Escape sequence final character 236 0x30..=0x5A | 0x5C..=0x7E => { 237 provider.provide_esc_sequence(byte as char); 238 self.set_state(State::Ground); 239 } 240 241 // Execute 242 0x00..=0x17 | 0x19 | 0x1C..=0x1F => provider.provide_char(byte as char), 243 244 // TODO Does it mean we should ignore the whole sequence? 245 // Ignore 246 0x7F => {} 247 248 // Other bytes are considered as invalid -> cancel whatever we have 249 _ => self.set_state(State::Ground), 250 }; 251 } 252 advance_csi_entry_state(&mut self, provider: &mut dyn Provide, byte: u8)253 fn advance_csi_entry_state(&mut self, provider: &mut dyn Provide, byte: u8) { 254 match byte { 255 0x1B => unreachable!(), 256 257 // Semicolon = parameter delimiter 258 0x3B => { 259 self.store_parameter(); 260 self.set_state(State::CsiParameter); 261 } 262 263 // '0' ..= '9' = parameter value 264 0x30..=0x39 => { 265 self.parameter = (byte as u64) - 0x30; 266 self.set_state(State::CsiParameter); 267 } 268 269 0x3A => self.set_state(State::CsiIgnore), 270 271 // CSI sequence final character 272 // -> dispatch CSI sequence 273 0x40..=0x7E => { 274 provider.provide_csi_sequence( 275 &self.parameters[..self.parameters_count], 276 self.ignored_parameters_count, 277 byte as char, 278 ); 279 280 self.set_state(State::Ground); 281 } 282 283 // Execute 284 0x00..=0x17 | 0x19 | 0x1C..=0x1F => provider.provide_char(byte as char), 285 286 // TODO Does it mean we should ignore the whole sequence? 287 // Ignore 288 0x7F => {} 289 290 // Collect rest as parameters 291 _ => { 292 self.parameter = byte as u64; 293 self.store_parameter(); 294 } 295 }; 296 } 297 advance_csi_ignore_state(&mut self, provider: &mut dyn Provide, byte: u8)298 fn advance_csi_ignore_state(&mut self, provider: &mut dyn Provide, byte: u8) { 299 match byte { 300 0x1B => unreachable!(), 301 302 // Execute 303 0x00..=0x17 | 0x19 | 0x1C..=0x1F => provider.provide_char(byte as char), 304 305 // TODO Does it mean we should ignore the whole sequence? 306 // Ignore 307 0x20..=0x3F | 0x7F => {} 308 309 0x40..=0x7E => self.set_state(State::Ground), 310 311 // Other bytes are considered as invalid -> cancel whatever we have 312 _ => self.set_state(State::Ground), 313 }; 314 } 315 advance_csi_parameter_state(&mut self, provider: &mut dyn Provide, byte: u8)316 fn advance_csi_parameter_state(&mut self, provider: &mut dyn Provide, byte: u8) { 317 match byte { 318 0x1B => unreachable!(), 319 320 // '0' ..= '9' = parameter value 321 0x30..=0x39 => { 322 self.parameter = self.parameter.saturating_mul(10); 323 self.parameter = self.parameter.saturating_add((byte as u64) - 0x30); 324 } 325 326 // Semicolon = parameter delimiter 327 0x3B => self.store_parameter(), 328 329 // CSI sequence final character 330 // -> dispatch CSI sequence 331 0x40..=0x7E => { 332 self.store_parameter(); 333 provider.provide_csi_sequence( 334 &self.parameters[..self.parameters_count], 335 self.ignored_parameters_count, 336 byte as char, 337 ); 338 339 self.set_state(State::Ground); 340 } 341 342 // Intermediates to collect 343 0x20..=0x2F => { 344 self.store_parameter(); 345 self.set_state(State::CsiIntermediate); 346 } 347 348 // Ignore 349 0x3A | 0x3C..=0x3F => self.set_state(State::CsiIgnore), 350 351 // Execute 352 0x00..=0x17 | 0x19 | 0x1C..=0x1F => provider.provide_char(byte as char), 353 354 // TODO Does it mean we should ignore the whole sequence? 355 // Ignore 356 0x7F => {} 357 358 // Other bytes are considered as invalid -> cancel whatever we have 359 _ => self.set_state(State::Ground), 360 }; 361 } 362 advance_csi_intermediate_state(&mut self, provider: &mut dyn Provide, byte: u8)363 fn advance_csi_intermediate_state(&mut self, provider: &mut dyn Provide, byte: u8) { 364 match byte { 365 0x1B => unreachable!(), 366 367 // Intermediates to collect 368 0x20..=0x2F => {} 369 370 // CSI sequence final character 371 // -> dispatch CSI sequence 372 0x40..=0x7E => { 373 provider.provide_csi_sequence( 374 &self.parameters[..self.parameters_count], 375 self.ignored_parameters_count, 376 byte as char, 377 ); 378 379 self.set_state(State::Ground); 380 } 381 382 // Execute 383 0x00..=0x17 | 0x19 | 0x1C..=0x1F => provider.provide_char(byte as char), 384 385 // TODO Does it mean we should ignore the whole sequence? 386 // Ignore 387 0x7F => {} 388 389 // Other bytes are considered as invalid -> cancel whatever we have 390 _ => self.set_state(State::Ground), 391 } 392 } 393 advance_utf8_state(&mut self, provider: &mut dyn Provide, byte: u8)394 fn advance_utf8_state(&mut self, provider: &mut dyn Provide, byte: u8) { 395 if byte & 0b1100_0000 != 0b1000_0000 { 396 self.set_state(State::Ground); 397 return; 398 } 399 400 self.utf8_points[self.utf8_points_count] = byte; 401 self.utf8_points_count += 1; 402 403 if self.utf8_points_count == self.utf8_points_expected_count { 404 if let Some(ch) = std::str::from_utf8(&self.utf8_points[..self.utf8_points_count]) 405 .ok() 406 .and_then(|s| s.chars().next()) 407 { 408 provider.provide_char(ch); 409 } 410 self.set_state(State::Ground); 411 } 412 } 413 advance(&mut self, provider: &mut dyn Provide, byte: u8, more: bool)414 pub(crate) fn advance(&mut self, provider: &mut dyn Provide, byte: u8, more: bool) { 415 // eprintln!("advance: {:?} {} {}", self.state, byte, more); 416 417 if self.handle_possible_esc(provider, byte, more) { 418 return; 419 } 420 421 match self.state { 422 State::Ground => self.advance_ground_state(provider, byte), 423 State::Escape => self.advance_escape_state(provider, byte), 424 State::EscapeIntermediate => self.advance_escape_intermediate_state(provider, byte), 425 State::CsiEntry => self.advance_csi_entry_state(provider, byte), 426 State::CsiIgnore => self.advance_csi_ignore_state(provider, byte), 427 State::CsiParameter => self.advance_csi_parameter_state(provider, byte), 428 State::CsiIntermediate => self.advance_csi_intermediate_state(provider, byte), 429 State::Utf8 => self.advance_utf8_state(provider, byte), 430 }; 431 } 432 } 433 434 #[cfg(test)] 435 mod tests { 436 use super::*; 437 438 #[test] esc_char()439 fn esc_char() { 440 let mut engine = Engine::default(); 441 let mut provider = CharProvider::default(); 442 443 // No more input means that the Esc character should be dispatched immediately 444 engine.advance(&mut provider, 0x1B, false); 445 assert_eq!(provider.chars, &['\x1B']); 446 447 // There's more input so the machine should wait before dispatching Esc character 448 engine.advance(&mut provider, 0x1B, true); 449 assert_eq!(provider.chars, &['\x1B']); 450 451 // Another Esc character, but no more input, machine should dispatch the postponed Esc 452 // character and the new one too. 453 engine.advance(&mut provider, 0x1B, false); 454 assert_eq!(provider.chars, &['\x1B', '\x1B', '\x1B']); 455 } 456 457 #[test] esc_without_intermediates()458 fn esc_without_intermediates() { 459 let mut engine = Engine::default(); 460 let mut provider = EscProvider::default(); 461 462 let input = b"\x1B0\x1B~"; 463 advance(&mut engine, &mut provider, input, false); 464 465 assert_eq!(provider.chars.len(), 2); 466 467 assert_eq!(provider.chars[0], '0'); 468 469 assert_eq!(provider.chars[1], '~'); 470 } 471 472 #[test] csi_without_parameters()473 fn csi_without_parameters() { 474 let mut engine = Engine::default(); 475 let mut provider = CsiProvider::default(); 476 477 let input = b"\x1B\x5Bm"; 478 advance(&mut engine, &mut provider, input, false); 479 480 assert_eq!(provider.parameters.len(), 1); 481 assert_eq!(provider.parameters[0], &[]); 482 assert_eq!(provider.chars.len(), 1); 483 assert_eq!(provider.chars[0], 'm'); 484 } 485 486 #[test] csi_with_two_default_parameters()487 fn csi_with_two_default_parameters() { 488 let mut engine = Engine::default(); 489 let mut provider = CsiProvider::default(); 490 491 let input = b"\x1B\x5B;m"; 492 advance(&mut engine, &mut provider, input, false); 493 494 assert_eq!(provider.parameters.len(), 1); 495 assert_eq!( 496 provider.parameters[0], 497 &[DEFAULT_PARAMETER_VALUE, DEFAULT_PARAMETER_VALUE] 498 ); 499 assert_eq!(provider.chars.len(), 1); 500 assert_eq!(provider.chars[0], 'm'); 501 } 502 503 #[test] csi_with_trailing_semicolon()504 fn csi_with_trailing_semicolon() { 505 let mut engine = Engine::default(); 506 let mut provider = CsiProvider::default(); 507 508 let input = b"\x1B\x5B123;m"; 509 advance(&mut engine, &mut provider, input, false); 510 511 assert_eq!(provider.parameters.len(), 1); 512 assert_eq!(provider.parameters[0], &[123, DEFAULT_PARAMETER_VALUE]); 513 assert_eq!(provider.chars.len(), 1); 514 assert_eq!(provider.chars[0], 'm'); 515 } 516 517 #[test] csi_max_parameters()518 fn csi_max_parameters() { 519 let mut engine = Engine::default(); 520 let mut provider = CsiProvider::default(); 521 522 let input = b"\x1B\x5B1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16;17;18;19;20;21;22;23;24;25;26;27;28;29;30m"; 523 advance(&mut engine, &mut provider, input, false); 524 525 assert_eq!(provider.parameters.len(), 1); 526 assert_eq!(provider.parameters[0].len(), MAX_PARAMETERS); 527 assert_eq!( 528 provider.parameters[0], 529 &[ 530 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 531 24, 25, 26, 27, 28, 29, 30 532 ] 533 ); 534 assert_eq!(provider.chars.len(), 1); 535 assert_eq!(provider.chars[0], 'm'); 536 } 537 538 #[test] test_parse_utf8_character()539 fn test_parse_utf8_character() { 540 let mut engine = Engine::default(); 541 let mut provider = CharProvider::default(); 542 543 advance(&mut engine, &mut provider, &['a' as u8], false); 544 assert_eq!(provider.chars.len(), 1); 545 assert_eq!(provider.chars[0], 'a'); 546 547 advance(&mut engine, &mut provider, &[0xC3, 0xB1], false); 548 assert_eq!(provider.chars.len(), 2); 549 assert_eq!(provider.chars[1], 'ñ'); 550 551 advance(&mut engine, &mut provider, &[0xE2, 0x81, 0xA1], false); 552 assert_eq!(provider.chars.len(), 3); 553 assert_eq!(provider.chars[2], '\u{2061}'); 554 555 advance(&mut engine, &mut provider, &[0xF0, 0x90, 0x8C, 0xBC], false); 556 assert_eq!(provider.chars.len(), 4); 557 assert_eq!(provider.chars[3], ''); 558 } 559 advance(engine: &mut Engine, provider: &mut dyn Provide, bytes: &[u8], more: bool)560 fn advance(engine: &mut Engine, provider: &mut dyn Provide, bytes: &[u8], more: bool) { 561 let len = bytes.len(); 562 563 for (i, byte) in bytes.iter().enumerate() { 564 engine.advance(provider, *byte, i < len - 1 || more); 565 } 566 } 567 568 #[derive(Default)] 569 struct CharProvider { 570 chars: Vec<char>, 571 } 572 573 impl Provide for CharProvider { provide_char(&mut self, ch: char)574 fn provide_char(&mut self, ch: char) { 575 self.chars.push(ch); 576 } 577 provide_esc_sequence(&mut self, _ch: char)578 fn provide_esc_sequence(&mut self, _ch: char) {} 579 provide_csi_sequence(&mut self, _parameters: &[u64], _ignored_count: usize, _ch: char)580 fn provide_csi_sequence(&mut self, _parameters: &[u64], _ignored_count: usize, _ch: char) {} 581 } 582 583 #[derive(Default)] 584 struct CsiProvider { 585 parameters: Vec<Vec<u64>>, 586 chars: Vec<char>, 587 } 588 589 impl Provide for CsiProvider { provide_char(&mut self, _ch: char)590 fn provide_char(&mut self, _ch: char) {} 591 provide_esc_sequence(&mut self, _ch: char)592 fn provide_esc_sequence(&mut self, _ch: char) {} 593 provide_csi_sequence(&mut self, parameters: &[u64], _ignored_count: usize, ch: char)594 fn provide_csi_sequence(&mut self, parameters: &[u64], _ignored_count: usize, ch: char) { 595 self.parameters.push(parameters.to_vec()); 596 self.chars.push(ch); 597 } 598 } 599 600 #[derive(Default)] 601 struct EscProvider { 602 chars: Vec<char>, 603 } 604 605 impl Provide for EscProvider { provide_char(&mut self, _ch: char)606 fn provide_char(&mut self, _ch: char) {} 607 provide_esc_sequence(&mut self, ch: char)608 fn provide_esc_sequence(&mut self, ch: char) { 609 self.chars.push(ch); 610 } 611 provide_csi_sequence(&mut self, _parameters: &[u64], _ignored_count: usize, _ch: char)612 fn provide_csi_sequence(&mut self, _parameters: &[u64], _ignored_count: usize, _ch: char) {} 613 } 614 } 615