1 //! Contains an implementation of pull-based XML parser. 2 3 use crate::common::{is_xml10_char, is_xml11_char, is_xml11_char_not_restricted, is_name_char, is_name_start_char, is_whitespace_char}; 4 use crate::common::{Position, TextPosition, XmlVersion}; 5 use crate::name::OwnedName; 6 use crate::namespace::NamespaceStack; 7 use crate::reader::config::ParserConfig2; 8 use crate::reader::error::SyntaxError; 9 use crate::reader::events::XmlEvent; 10 use crate::reader::indexset::AttributesSet; 11 use crate::reader::lexer::{Lexer, Token}; 12 use super::{Error, ErrorKind}; 13 14 use std::collections::HashMap; 15 use std::io::Read; 16 17 macro_rules! gen_takes( 18 ($($field:ident -> $method:ident, $t:ty, $def:expr);+) => ( 19 $( 20 impl MarkupData { 21 #[inline] 22 #[allow(clippy::mem_replace_option_with_none)] 23 fn $method(&mut self) -> $t { 24 std::mem::replace(&mut self.$field, $def) 25 } 26 } 27 )+ 28 ) 29 ); 30 31 gen_takes!( 32 name -> take_name, String, String::new(); 33 ref_data -> take_ref_data, String, String::new(); 34 35 encoding -> take_encoding, Option<String>, None; 36 37 element_name -> take_element_name, Option<OwnedName>, None; 38 39 attr_name -> take_attr_name, Option<OwnedName>, None; 40 attributes -> take_attributes, AttributesSet, AttributesSet::new() 41 ); 42 43 mod inside_cdata; 44 mod inside_closing_tag_name; 45 mod inside_comment; 46 mod inside_declaration; 47 mod inside_doctype; 48 mod inside_opening_tag; 49 mod inside_processing_instruction; 50 mod inside_reference; 51 mod outside_tag; 52 53 static DEFAULT_VERSION: XmlVersion = XmlVersion::Version10; 54 static DEFAULT_STANDALONE: Option<bool> = None; 55 56 type ElementStack = Vec<OwnedName>; 57 pub type Result = super::Result<XmlEvent>; 58 59 /// Pull-based XML parser. 60 pub(crate) struct PullParser { 61 config: ParserConfig2, 62 lexer: Lexer, 63 st: State, 64 state_after_reference: State, 65 buf: String, 66 67 /// From DTD internal subset 68 entities: HashMap<String, String>, 69 70 nst: NamespaceStack, 71 72 data: MarkupData, 73 final_result: Option<Result>, 74 next_event: Option<Result>, 75 est: ElementStack, 76 pos: Vec<TextPosition>, 77 78 encountered: Encountered, 79 inside_whitespace: bool, 80 read_prefix_separator: bool, 81 pop_namespace: bool, 82 } 83 84 // Keeps track when XML declaration can happen 85 #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] 86 enum Encountered { 87 None = 0, 88 AnyChars, // whitespace before <?xml is not allowed 89 Declaration, 90 Comment, 91 Doctype, 92 Element, 93 } 94 95 impl PullParser { 96 /// Returns a new parser using the given config. 97 #[inline] new(config: impl Into<ParserConfig2>) -> PullParser98 pub fn new(config: impl Into<ParserConfig2>) -> PullParser { 99 let config = config.into(); 100 Self::new_with_config2(config) 101 } 102 103 #[inline] new_with_config2(config: ParserConfig2) -> PullParser104 fn new_with_config2(config: ParserConfig2) -> PullParser { 105 let mut lexer = Lexer::new(&config); 106 if let Some(enc) = config.override_encoding { 107 lexer.set_encoding(enc); 108 } 109 110 let mut pos = Vec::with_capacity(16); 111 pos.push(TextPosition::new()); 112 113 PullParser { 114 config, 115 lexer, 116 st: State::DocumentStart, 117 state_after_reference: State::OutsideTag, 118 buf: String::new(), 119 entities: HashMap::new(), 120 nst: NamespaceStack::default(), 121 122 data: MarkupData { 123 name: String::new(), 124 version: None, 125 encoding: None, 126 standalone: None, 127 ref_data: String::new(), 128 element_name: None, 129 quote: None, 130 attr_name: None, 131 attributes: AttributesSet::new(), 132 }, 133 final_result: None, 134 next_event: None, 135 est: Vec::new(), 136 pos, 137 138 encountered: Encountered::None, 139 inside_whitespace: true, 140 read_prefix_separator: false, 141 pop_namespace: false, 142 } 143 } 144 145 /// Checks if this parser ignores the end of stream errors. is_ignoring_end_of_stream(&self) -> bool146 pub fn is_ignoring_end_of_stream(&self) -> bool { self.config.c.ignore_end_of_stream } 147 148 #[inline(never)] set_encountered(&mut self, new_encounter: Encountered) -> Option<Result>149 fn set_encountered(&mut self, new_encounter: Encountered) -> Option<Result> { 150 if new_encounter <= self.encountered { 151 return None; 152 } 153 let prev_enc = self.encountered; 154 self.encountered = new_encounter; 155 156 // If declaration was not parsed and we have encountered an element, 157 // emit this declaration as the next event. 158 if prev_enc == Encountered::None { 159 self.push_pos(); 160 Some(Ok(XmlEvent::StartDocument { 161 version: DEFAULT_VERSION, 162 encoding: self.lexer.encoding().to_string(), 163 standalone: DEFAULT_STANDALONE, 164 })) 165 } else { 166 None 167 } 168 } 169 } 170 171 impl Position for PullParser { 172 /// Returns the position of the last event produced by the parser 173 #[inline] position(&self) -> TextPosition174 fn position(&self) -> TextPosition { 175 self.pos[0] 176 } 177 } 178 179 #[derive(Copy, Clone, PartialEq)] 180 pub enum State { 181 OutsideTag, 182 InsideOpeningTag(OpeningTagSubstate), 183 InsideClosingTag(ClosingTagSubstate), 184 InsideProcessingInstruction(ProcessingInstructionSubstate), 185 InsideComment, 186 InsideCData, 187 InsideDeclaration(DeclarationSubstate), 188 InsideDoctype(DoctypeSubstate), 189 InsideReference, 190 DocumentStart, 191 } 192 193 #[derive(Copy, Clone, PartialEq)] 194 pub enum DoctypeSubstate { 195 Outside, 196 String, 197 InsideName, 198 BeforeEntityName, 199 EntityName, 200 BeforeEntityValue, 201 EntityValue, 202 NumericReferenceStart, 203 NumericReference, 204 /// expansion 205 PEReferenceInValue, 206 PEReferenceInDtd, 207 /// name definition 208 PEReferenceDefinitionStart, 209 PEReferenceDefinition, 210 SkipDeclaration, 211 Comment, 212 } 213 214 #[derive(Copy, Clone, PartialEq)] 215 pub enum OpeningTagSubstate { 216 InsideName, 217 218 InsideTag, 219 220 InsideAttributeName, 221 AfterAttributeName, 222 223 InsideAttributeValue, 224 AfterAttributeValue, 225 } 226 227 #[derive(Copy, Clone, PartialEq)] 228 pub enum ClosingTagSubstate { 229 CTInsideName, 230 CTAfterName, 231 } 232 233 #[derive(Copy, Clone, PartialEq)] 234 pub enum ProcessingInstructionSubstate { 235 PIInsideName, 236 PIInsideData, 237 } 238 239 #[derive(Copy, Clone, PartialEq)] 240 pub enum DeclarationSubstate { 241 BeforeVersion, 242 InsideVersion, 243 AfterVersion, 244 245 InsideVersionValue, 246 AfterVersionValue, 247 248 BeforeEncoding, 249 InsideEncoding, 250 AfterEncoding, 251 252 InsideEncodingValue, 253 AfterEncodingValue, 254 255 BeforeStandaloneDecl, 256 InsideStandaloneDecl, 257 AfterStandaloneDecl, 258 259 InsideStandaloneDeclValue, 260 AfterStandaloneDeclValue, 261 } 262 263 #[derive(PartialEq)] 264 enum QualifiedNameTarget { 265 AttributeNameTarget, 266 OpeningTagNameTarget, 267 ClosingTagNameTarget, 268 } 269 270 #[derive(Copy, Clone, PartialEq, Eq)] 271 enum QuoteToken { 272 SingleQuoteToken, 273 DoubleQuoteToken, 274 } 275 276 impl QuoteToken { from_token(t: &Token) -> QuoteToken277 fn from_token(t: &Token) -> QuoteToken { 278 match *t { 279 Token::SingleQuote => QuoteToken::SingleQuoteToken, 280 Token::DoubleQuote => QuoteToken::DoubleQuoteToken, 281 _ => panic!("Unexpected token: {t}"), 282 } 283 } 284 as_token(self) -> Token285 fn as_token(self) -> Token { 286 match self { 287 QuoteToken::SingleQuoteToken => Token::SingleQuote, 288 QuoteToken::DoubleQuoteToken => Token::DoubleQuote, 289 } 290 } 291 } 292 293 struct MarkupData { 294 name: String, // used for processing instruction name 295 ref_data: String, // used for reference content 296 297 version: Option<XmlVersion>, // used for XML declaration version 298 encoding: Option<String>, // used for XML declaration encoding 299 standalone: Option<bool>, // used for XML declaration standalone parameter 300 301 element_name: Option<OwnedName>, // used for element name 302 303 quote: Option<QuoteToken>, // used to hold opening quote for attribute value 304 attr_name: Option<OwnedName>, // used to hold attribute name 305 attributes: AttributesSet, // used to hold all accumulated attributes 306 } 307 308 impl PullParser { 309 /// Returns next event read from the given buffer. 310 /// 311 /// This method should be always called with the same buffer. If you call it 312 /// providing different buffers each time, the result will be undefined. next<R: Read>(&mut self, r: &mut R) -> Result313 pub fn next<R: Read>(&mut self, r: &mut R) -> Result { 314 if let Some(ref ev) = self.final_result { 315 return ev.clone(); 316 } 317 318 if let Some(ev) = self.next_event.take() { 319 return ev; 320 } 321 322 if self.pop_namespace { 323 self.pop_namespace = false; 324 self.nst.pop(); 325 } 326 327 loop { 328 debug_assert!(self.next_event.is_none()); 329 debug_assert!(!self.pop_namespace); 330 331 // While lexer gives us Ok(maybe_token) -- we loop. 332 // Upon having a complete XML-event -- we return from the whole function. 333 match self.lexer.next_token(r) { 334 Ok(Some(token)) => { 335 match self.dispatch_token(token) { 336 None => {} // continue 337 Some(Ok(xml_event)) => { 338 self.next_pos(); 339 return Ok(xml_event) 340 }, 341 Some(Err(xml_error)) => { 342 self.next_pos(); 343 return self.set_final_result(Err(xml_error)) 344 }, 345 } 346 }, 347 Ok(None) => break, 348 Err(lexer_error) => { 349 return self.set_final_result(Err(lexer_error)) 350 }, 351 } 352 } 353 354 self.handle_eof() 355 } 356 357 /// Handle end of stream handle_eof(&mut self) -> std::result::Result<XmlEvent, super::Error>358 fn handle_eof(&mut self) -> std::result::Result<XmlEvent, super::Error> { 359 // Forward pos to the lexer head 360 self.next_pos(); 361 let ev = if self.depth() == 0 { 362 if self.encountered == Encountered::Element && self.st == State::OutsideTag { // all is ok 363 Ok(XmlEvent::EndDocument) 364 } else if self.encountered < Encountered::Element { 365 self.error(SyntaxError::NoRootElement) 366 } else { // self.st != State::OutsideTag 367 self.error(SyntaxError::UnexpectedEof) // TODO: add expected hint? 368 } 369 } else if self.config.c.ignore_end_of_stream { 370 self.final_result = None; 371 self.lexer.reset_eof_handled(); 372 return self.error(SyntaxError::UnbalancedRootElement); 373 } else { 374 self.error(SyntaxError::UnbalancedRootElement) 375 }; 376 self.set_final_result(ev) 377 } 378 379 // This function is to be called when a terminal event is reached. 380 // The function sets up the `self.final_result` into `Some(result)` and return `result`. 381 #[inline] set_final_result(&mut self, result: Result) -> Result382 fn set_final_result(&mut self, result: Result) -> Result { 383 self.final_result = Some(result.clone()); 384 result 385 } 386 387 #[cold] error(&self, e: SyntaxError) -> Result388 fn error(&self, e: SyntaxError) -> Result { 389 Err(Error { 390 pos: self.lexer.position(), 391 kind: ErrorKind::Syntax(e.to_cow()), 392 }) 393 } 394 395 #[inline] next_pos(&mut self)396 fn next_pos(&mut self) { 397 // unfortunately calls to next_pos will never be perfectly balanced with push_pos, 398 // at very least because parse errors and EOF can happen unexpectedly without a prior push. 399 if !self.pos.is_empty() { 400 if self.pos.len() > 1 { 401 self.pos.remove(0); 402 } else { 403 self.pos[0] = self.lexer.position(); 404 } 405 } 406 } 407 408 #[inline] 409 #[track_caller] push_pos(&mut self)410 fn push_pos(&mut self) { 411 debug_assert!(self.pos.len() != self.pos.capacity(), "You've found a bug in xml-rs, caused by calls to push_pos() in states that don't end up emitting events. 412 This case is ignored in release mode, and merely causes document positions to be out of sync. 413 Please file a bug and include the XML document that triggers this assert."); 414 415 // it has capacity preallocated for more than it ever needs, so this reduces code size 416 if self.pos.len() != self.pos.capacity() { 417 self.pos.push(self.lexer.position()); 418 } else if self.pos.len() > 1 { 419 self.pos.remove(0); // this mitigates the excessive push_pos() call 420 } 421 } 422 423 #[inline(never)] dispatch_token(&mut self, t: Token) -> Option<Result>424 fn dispatch_token(&mut self, t: Token) -> Option<Result> { 425 match self.st { 426 State::OutsideTag => self.outside_tag(t), 427 State::InsideOpeningTag(s) => self.inside_opening_tag(t, s), 428 State::InsideClosingTag(s) => self.inside_closing_tag_name(t, s), 429 State::InsideReference => self.inside_reference(t), 430 State::InsideComment => self.inside_comment(t), 431 State::InsideCData => self.inside_cdata(t), 432 State::InsideProcessingInstruction(s) => self.inside_processing_instruction(t, s), 433 State::InsideDoctype(s) => self.inside_doctype(t, s), 434 State::InsideDeclaration(s) => self.inside_declaration(t, s), 435 State::DocumentStart => self.document_start(t), 436 } 437 } 438 439 #[inline] depth(&self) -> usize440 fn depth(&self) -> usize { 441 self.est.len() 442 } 443 444 #[inline] buf_has_data(&self) -> bool445 fn buf_has_data(&self) -> bool { 446 !self.buf.is_empty() 447 } 448 449 #[inline] take_buf(&mut self) -> String450 fn take_buf(&mut self) -> String { 451 std::mem::take(&mut self.buf) 452 } 453 454 #[inline] into_state(&mut self, st: State, ev: Option<Result>) -> Option<Result>455 fn into_state(&mut self, st: State, ev: Option<Result>) -> Option<Result> { 456 self.st = st; 457 ev 458 } 459 460 #[inline] into_state_continue(&mut self, st: State) -> Option<Result>461 fn into_state_continue(&mut self, st: State) -> Option<Result> { 462 self.into_state(st, None) 463 } 464 465 #[inline] into_state_emit(&mut self, st: State, ev: Result) -> Option<Result>466 fn into_state_emit(&mut self, st: State, ev: Result) -> Option<Result> { 467 self.into_state(st, Some(ev)) 468 } 469 470 /// Dispatches tokens in order to process qualified name. If qualified name cannot be parsed, 471 /// an error is returned. 472 /// 473 /// # Parameters 474 /// * `t` --- next token; 475 /// * `on_name` --- a callback which is executed when whitespace is encountered. read_qualified_name<F>(&mut self, t: Token, target: QualifiedNameTarget, on_name: F) -> Option<Result> where F: Fn(&mut PullParser, Token, OwnedName) -> Option<Result>476 fn read_qualified_name<F>(&mut self, t: Token, target: QualifiedNameTarget, on_name: F) -> Option<Result> 477 where F: Fn(&mut PullParser, Token, OwnedName) -> Option<Result> { 478 // We can get here for the first time only when self.data.name contains zero or one character, 479 // but first character cannot be a colon anyway 480 if self.buf.len() <= 1 { 481 self.read_prefix_separator = false; 482 } 483 484 let invoke_callback = move |this: &mut PullParser, t| { 485 let name = this.take_buf(); 486 match name.parse() { 487 Ok(name) => on_name(this, t, name), 488 Err(_) => Some(this.error(SyntaxError::InvalidQualifiedName(name.into()))), 489 } 490 }; 491 492 match t { 493 // There can be only one colon, and not as the first character 494 Token::Character(':') if self.buf_has_data() && !self.read_prefix_separator => { 495 self.buf.push(':'); 496 self.read_prefix_separator = true; 497 None 498 } 499 500 Token::Character(c) if c != ':' && (self.buf.is_empty() && is_name_start_char(c) || 501 self.buf_has_data() && is_name_char(c)) => { 502 if self.buf.len() > self.config.max_name_length { 503 return Some(self.error(SyntaxError::ExceededConfiguredLimit)); 504 } 505 self.buf.push(c); 506 None 507 }, 508 509 Token::EqualsSign if target == QualifiedNameTarget::AttributeNameTarget => invoke_callback(self, t), 510 511 Token::EmptyTagEnd if target == QualifiedNameTarget::OpeningTagNameTarget => invoke_callback(self, t), 512 513 Token::TagEnd if target == QualifiedNameTarget::OpeningTagNameTarget || 514 target == QualifiedNameTarget::ClosingTagNameTarget => invoke_callback(self, t), 515 516 Token::Character(c) if is_whitespace_char(c) => invoke_callback(self, t), 517 518 _ => Some(self.error(SyntaxError::UnexpectedQualifiedName(t))), 519 } 520 } 521 522 /// Dispatches tokens in order to process attribute value. 523 /// 524 /// # Parameters 525 /// * `t` --- next token; 526 /// * `on_value` --- a callback which is called when terminating quote is encountered. read_attribute_value<F>(&mut self, t: Token, on_value: F) -> Option<Result> where F: Fn(&mut PullParser, String) -> Option<Result>527 fn read_attribute_value<F>(&mut self, t: Token, on_value: F) -> Option<Result> 528 where F: Fn(&mut PullParser, String) -> Option<Result> { 529 match t { 530 Token::Character(c) if self.data.quote.is_none() && is_whitespace_char(c) => None, // skip leading whitespace 531 532 Token::DoubleQuote | Token::SingleQuote => match self.data.quote { 533 None => { // Entered attribute value 534 self.data.quote = Some(QuoteToken::from_token(&t)); 535 None 536 } 537 Some(q) if q.as_token() == t => { 538 self.data.quote = None; 539 let value = self.take_buf(); 540 on_value(self, value) 541 } 542 _ => { 543 if let Token::Character(c) = t { 544 if !self.is_valid_xml_char_not_restricted(c) { 545 return Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32))); 546 } 547 } 548 if self.buf.len() > self.config.max_attribute_length { 549 return Some(self.error(SyntaxError::ExceededConfiguredLimit)); 550 } 551 t.push_to_string(&mut self.buf); 552 None 553 } 554 }, 555 556 Token::ReferenceStart if self.data.quote.is_some() => { 557 self.state_after_reference = self.st; 558 self.into_state_continue(State::InsideReference) 559 }, 560 561 Token::OpeningTagStart => Some(self.error(SyntaxError::UnexpectedOpeningTag)), 562 563 Token::Character(c) if !self.is_valid_xml_char_not_restricted(c) => { 564 Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32))) 565 }, 566 567 // Every character except " and ' and < is okay 568 _ if self.data.quote.is_some() => { 569 if self.buf.len() > self.config.max_attribute_length { 570 return Some(self.error(SyntaxError::ExceededConfiguredLimit)); 571 } 572 t.push_to_string(&mut self.buf); 573 None 574 } 575 576 _ => Some(self.error(SyntaxError::UnexpectedToken(t))), 577 } 578 } 579 emit_start_element(&mut self, emit_end_element: bool) -> Option<Result>580 fn emit_start_element(&mut self, emit_end_element: bool) -> Option<Result> { 581 let mut name = self.data.take_element_name()?; 582 let mut attributes = self.data.take_attributes().into_vec(); 583 584 // check whether the name prefix is bound and fix its namespace 585 match self.nst.get(name.borrow().prefix_repr()) { 586 Some("") => name.namespace = None, // default namespace 587 Some(ns) => name.namespace = Some(ns.into()), 588 None => return Some(self.error(SyntaxError::UnboundElementPrefix(name.to_string().into()))) 589 } 590 591 // check and fix accumulated attributes prefixes 592 for attr in &mut attributes { 593 if let Some(ref pfx) = attr.name.prefix { 594 let new_ns = match self.nst.get(pfx) { 595 Some("") => None, // default namespace 596 Some(ns) => Some(ns.into()), 597 None => return Some(self.error(SyntaxError::UnboundAttribute(attr.name.to_string().into()))) 598 }; 599 attr.name.namespace = new_ns; 600 } 601 } 602 603 if emit_end_element { 604 self.pop_namespace = true; 605 self.next_event = Some(Ok(XmlEvent::EndElement { 606 name: name.clone() 607 })); 608 } else { 609 self.est.push(name.clone()); 610 } 611 let namespace = self.nst.squash(); 612 self.into_state_emit(State::OutsideTag, Ok(XmlEvent::StartElement { 613 name, 614 attributes, 615 namespace 616 })) 617 } 618 emit_end_element(&mut self) -> Option<Result>619 fn emit_end_element(&mut self) -> Option<Result> { 620 let mut name = self.data.take_element_name()?; 621 622 // check whether the name prefix is bound and fix its namespace 623 match self.nst.get(name.borrow().prefix_repr()) { 624 Some("") => name.namespace = None, // default namespace 625 Some(ns) => name.namespace = Some(ns.into()), 626 None => return Some(self.error(SyntaxError::UnboundElementPrefix(name.to_string().into()))) 627 } 628 629 let op_name = self.est.pop()?; 630 631 if name == op_name { 632 self.pop_namespace = true; 633 self.into_state_emit(State::OutsideTag, Ok(XmlEvent::EndElement { name })) 634 } else { 635 Some(self.error(SyntaxError::UnexpectedClosingTag(format!("{name} != {op_name}").into()))) 636 } 637 } 638 639 #[inline] is_valid_xml_char(&self, c: char) -> bool640 fn is_valid_xml_char(&self, c: char) -> bool { 641 if Some(XmlVersion::Version11) == self.data.version { 642 is_xml11_char(c) 643 } else { 644 is_xml10_char(c) 645 } 646 } 647 648 #[inline] is_valid_xml_char_not_restricted(&self, c: char) -> bool649 fn is_valid_xml_char_not_restricted(&self, c: char) -> bool { 650 if Some(XmlVersion::Version11) == self.data.version { 651 is_xml11_char_not_restricted(c) 652 } else { 653 is_xml10_char(c) 654 } 655 } 656 } 657 658 #[cfg(test)] 659 mod tests { 660 use std::io::BufReader; 661 use crate::attribute::OwnedAttribute; 662 use crate::common::TextPosition; 663 use crate::name::OwnedName; 664 use crate::reader::events::XmlEvent; 665 use crate::reader::parser::PullParser; 666 use crate::reader::ParserConfig; 667 new_parser() -> PullParser668 fn new_parser() -> PullParser { 669 PullParser::new(ParserConfig::new()) 670 } 671 672 macro_rules! expect_event( 673 ($r:expr, $p:expr, $t:pat) => ( 674 match $p.next(&mut $r) { 675 $t => {} 676 e => panic!("Unexpected event: {e:?}\nExpected: {}", stringify!($t)) 677 } 678 ); 679 ($r:expr, $p:expr, $t:pat => $c:expr ) => ( 680 match $p.next(&mut $r) { 681 $t if $c => {} 682 e => panic!("Unexpected event: {e:?}\nExpected: {} if {}", stringify!($t), stringify!($c)) 683 } 684 ) 685 ); 686 687 macro_rules! test_data( 688 ($d:expr) => ({ 689 static DATA: &'static str = $d; 690 let r = BufReader::new(DATA.as_bytes()); 691 let p = new_parser(); 692 (r, p) 693 }) 694 ); 695 696 #[test] issue_3_semicolon_in_attribute_value()697 fn issue_3_semicolon_in_attribute_value() { 698 let (mut r, mut p) = test_data!(r#" 699 <a attr="zzz;zzz" /> 700 "#); 701 702 expect_event!(r, p, Ok(XmlEvent::StartDocument { .. })); 703 expect_event!(r, p, Ok(XmlEvent::StartElement { ref name, ref attributes, ref namespace }) => 704 *name == OwnedName::local("a") && 705 attributes.len() == 1 && 706 attributes[0] == OwnedAttribute::new(OwnedName::local("attr"), "zzz;zzz") && 707 namespace.is_essentially_empty() 708 ); 709 expect_event!(r, p, Ok(XmlEvent::EndElement { ref name }) => *name == OwnedName::local("a")); 710 expect_event!(r, p, Ok(XmlEvent::EndDocument)); 711 } 712 713 #[test] issue_140_entity_reference_inside_tag()714 fn issue_140_entity_reference_inside_tag() { 715 let (mut r, mut p) = test_data!(r#" 716 <bla>♫</bla> 717 "#); 718 719 expect_event!(r, p, Ok(XmlEvent::StartDocument { .. })); 720 expect_event!(r, p, Ok(XmlEvent::StartElement { ref name, .. }) => *name == OwnedName::local("bla")); 721 expect_event!(r, p, Ok(XmlEvent::Characters(ref s)) => s == "\u{266b}"); 722 expect_event!(r, p, Ok(XmlEvent::EndElement { ref name, .. }) => *name == OwnedName::local("bla")); 723 expect_event!(r, p, Ok(XmlEvent::EndDocument)); 724 } 725 726 #[test] issue_220_comment()727 fn issue_220_comment() { 728 let (mut r, mut p) = test_data!(r#"<x><!-- <!--></x>"#); 729 expect_event!(r, p, Ok(XmlEvent::StartDocument { .. })); 730 expect_event!(r, p, Ok(XmlEvent::StartElement { .. })); 731 expect_event!(r, p, Ok(XmlEvent::EndElement { .. })); 732 expect_event!(r, p, Ok(XmlEvent::EndDocument)); 733 734 let (mut r, mut p) = test_data!(r#"<x><!-- <!---></x>"#); 735 expect_event!(r, p, Ok(XmlEvent::StartDocument { .. })); 736 expect_event!(r, p, Ok(XmlEvent::StartElement { .. })); 737 expect_event!(r, p, Err(_)); // ---> is forbidden in comments 738 739 let (mut r, mut p) = test_data!(r#"<x><!--<text&x;> <!--></x>"#); 740 p.config.c.ignore_comments = false; 741 expect_event!(r, p, Ok(XmlEvent::StartDocument { .. })); 742 expect_event!(r, p, Ok(XmlEvent::StartElement { .. })); 743 expect_event!(r, p, Ok(XmlEvent::Comment(s)) => s == "<text&x;> <!"); 744 expect_event!(r, p, Ok(XmlEvent::EndElement { .. })); 745 expect_event!(r, p, Ok(XmlEvent::EndDocument)); 746 } 747 748 #[test] malformed_declaration_attrs()749 fn malformed_declaration_attrs() { 750 let (mut r, mut p) = test_data!(r#"<?xml version x="1.0"?>"#); 751 expect_event!(r, p, Err(_)); 752 753 let (mut r, mut p) = test_data!(r#"<?xml version="1.0" version="1.0"?>"#); 754 expect_event!(r, p, Err(_)); 755 756 let (mut r, mut p) = test_data!(r#"<?xml version="1.0"encoding="utf-8"?>"#); 757 expect_event!(r, p, Err(_)); 758 759 let (mut r, mut p) = test_data!(r#"<?xml version="1.0"standalone="yes"?>"#); 760 expect_event!(r, p, Err(_)); 761 762 let (mut r, mut p) = test_data!(r#"<?xml version="1.0" encoding="utf-8"standalone="yes"?>"#); 763 expect_event!(r, p, Err(_)); 764 } 765 766 #[test] opening_tag_in_attribute_value()767 fn opening_tag_in_attribute_value() { 768 use crate::reader::error::{SyntaxError, Error, ErrorKind}; 769 770 let (mut r, mut p) = test_data!(r#" 771 <a attr="zzz<zzz" /> 772 "#); 773 774 expect_event!(r, p, Ok(XmlEvent::StartDocument { .. })); 775 expect_event!(r, p, Err(ref e) => 776 *e == Error { 777 kind: ErrorKind::Syntax(SyntaxError::UnexpectedOpeningTag.to_cow()), 778 pos: TextPosition { row: 1, column: 24 } 779 } 780 ); 781 } 782 783 #[test] reference_err()784 fn reference_err() { 785 let (mut r, mut p) = test_data!(r#" 786 <a>&&</a> 787 "#); 788 789 expect_event!(r, p, Ok(XmlEvent::StartDocument { .. })); 790 expect_event!(r, p, Ok(XmlEvent::StartElement { .. })); 791 expect_event!(r, p, Err(_)); 792 } 793 794 #[test] state_size()795 fn state_size() { 796 assert_eq!(2, std::mem::size_of::<super::State>()); 797 assert_eq!(1, std::mem::size_of::<super::DoctypeSubstate>()); 798 } 799 } 800