1 //! Contains an implementation of pull-based XML parser.
2 
3 use crate::common::{is_xml10_char, is_xml11_char, is_xml11_char_not_restricted, is_name_char, is_name_start_char, is_whitespace_char};
4 use crate::common::{Position, TextPosition, XmlVersion};
5 use crate::name::OwnedName;
6 use crate::namespace::NamespaceStack;
7 use crate::reader::config::ParserConfig2;
8 use crate::reader::error::SyntaxError;
9 use crate::reader::events::XmlEvent;
10 use crate::reader::indexset::AttributesSet;
11 use crate::reader::lexer::{Lexer, Token};
12 use super::{Error, ErrorKind};
13 
14 use std::collections::HashMap;
15 use std::io::Read;
16 
17 macro_rules! gen_takes(
18     ($($field:ident -> $method:ident, $t:ty, $def:expr);+) => (
19         $(
20         impl MarkupData {
21             #[inline]
22             #[allow(clippy::mem_replace_option_with_none)]
23             fn $method(&mut self) -> $t {
24                 std::mem::replace(&mut self.$field, $def)
25             }
26         }
27         )+
28     )
29 );
30 
31 gen_takes!(
32     name         -> take_name, String, String::new();
33     ref_data     -> take_ref_data, String, String::new();
34 
35     encoding     -> take_encoding, Option<String>, None;
36 
37     element_name -> take_element_name, Option<OwnedName>, None;
38 
39     attr_name    -> take_attr_name, Option<OwnedName>, None;
40     attributes   -> take_attributes, AttributesSet, AttributesSet::new()
41 );
42 
43 mod inside_cdata;
44 mod inside_closing_tag_name;
45 mod inside_comment;
46 mod inside_declaration;
47 mod inside_doctype;
48 mod inside_opening_tag;
49 mod inside_processing_instruction;
50 mod inside_reference;
51 mod outside_tag;
52 
53 static DEFAULT_VERSION: XmlVersion = XmlVersion::Version10;
54 static DEFAULT_STANDALONE: Option<bool> = None;
55 
56 type ElementStack = Vec<OwnedName>;
57 pub type Result = super::Result<XmlEvent>;
58 
59 /// Pull-based XML parser.
60 pub(crate) struct PullParser {
61     config: ParserConfig2,
62     lexer: Lexer,
63     st: State,
64     state_after_reference: State,
65     buf: String,
66 
67     /// From DTD internal subset
68     entities: HashMap<String, String>,
69 
70     nst: NamespaceStack,
71 
72     data: MarkupData,
73     final_result: Option<Result>,
74     next_event: Option<Result>,
75     est: ElementStack,
76     pos: Vec<TextPosition>,
77 
78     encountered: Encountered,
79     inside_whitespace: bool,
80     read_prefix_separator: bool,
81     pop_namespace: bool,
82 }
83 
84 // Keeps track when XML declaration can happen
85 #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
86 enum Encountered {
87     None = 0,
88     AnyChars, // whitespace before <?xml is not allowed
89     Declaration,
90     Comment,
91     Doctype,
92     Element,
93 }
94 
95 impl PullParser {
96     /// Returns a new parser using the given config.
97     #[inline]
new(config: impl Into<ParserConfig2>) -> PullParser98     pub fn new(config: impl Into<ParserConfig2>) -> PullParser {
99         let config = config.into();
100         Self::new_with_config2(config)
101     }
102 
103     #[inline]
new_with_config2(config: ParserConfig2) -> PullParser104     fn new_with_config2(config: ParserConfig2) -> PullParser {
105         let mut lexer = Lexer::new(&config);
106         if let Some(enc) = config.override_encoding {
107             lexer.set_encoding(enc);
108         }
109 
110         let mut pos = Vec::with_capacity(16);
111         pos.push(TextPosition::new());
112 
113         PullParser {
114             config,
115             lexer,
116             st: State::DocumentStart,
117             state_after_reference: State::OutsideTag,
118             buf: String::new(),
119             entities: HashMap::new(),
120             nst: NamespaceStack::default(),
121 
122             data: MarkupData {
123                 name: String::new(),
124                 version: None,
125                 encoding: None,
126                 standalone: None,
127                 ref_data: String::new(),
128                 element_name: None,
129                 quote: None,
130                 attr_name: None,
131                 attributes: AttributesSet::new(),
132             },
133             final_result: None,
134             next_event: None,
135             est: Vec::new(),
136             pos,
137 
138             encountered: Encountered::None,
139             inside_whitespace: true,
140             read_prefix_separator: false,
141             pop_namespace: false,
142         }
143     }
144 
145     /// Checks if this parser ignores the end of stream errors.
is_ignoring_end_of_stream(&self) -> bool146     pub fn is_ignoring_end_of_stream(&self) -> bool { self.config.c.ignore_end_of_stream }
147 
148     #[inline(never)]
set_encountered(&mut self, new_encounter: Encountered) -> Option<Result>149     fn set_encountered(&mut self, new_encounter: Encountered) -> Option<Result> {
150         if new_encounter <= self.encountered {
151             return None;
152         }
153         let prev_enc = self.encountered;
154         self.encountered = new_encounter;
155 
156         // If declaration was not parsed and we have encountered an element,
157         // emit this declaration as the next event.
158         if prev_enc == Encountered::None {
159             self.push_pos();
160             Some(Ok(XmlEvent::StartDocument {
161                 version: DEFAULT_VERSION,
162                 encoding: self.lexer.encoding().to_string(),
163                 standalone: DEFAULT_STANDALONE,
164             }))
165         } else {
166             None
167         }
168     }
169 }
170 
171 impl Position for PullParser {
172     /// Returns the position of the last event produced by the parser
173     #[inline]
position(&self) -> TextPosition174     fn position(&self) -> TextPosition {
175         self.pos[0]
176     }
177 }
178 
179 #[derive(Copy, Clone, PartialEq)]
180 pub enum State {
181     OutsideTag,
182     InsideOpeningTag(OpeningTagSubstate),
183     InsideClosingTag(ClosingTagSubstate),
184     InsideProcessingInstruction(ProcessingInstructionSubstate),
185     InsideComment,
186     InsideCData,
187     InsideDeclaration(DeclarationSubstate),
188     InsideDoctype(DoctypeSubstate),
189     InsideReference,
190     DocumentStart,
191 }
192 
193 #[derive(Copy, Clone, PartialEq)]
194 pub enum DoctypeSubstate {
195     Outside,
196     String,
197     InsideName,
198     BeforeEntityName,
199     EntityName,
200     BeforeEntityValue,
201     EntityValue,
202     NumericReferenceStart,
203     NumericReference,
204     /// expansion
205     PEReferenceInValue,
206     PEReferenceInDtd,
207     /// name definition
208     PEReferenceDefinitionStart,
209     PEReferenceDefinition,
210     SkipDeclaration,
211     Comment,
212 }
213 
214 #[derive(Copy, Clone, PartialEq)]
215 pub enum OpeningTagSubstate {
216     InsideName,
217 
218     InsideTag,
219 
220     InsideAttributeName,
221     AfterAttributeName,
222 
223     InsideAttributeValue,
224     AfterAttributeValue,
225 }
226 
227 #[derive(Copy, Clone, PartialEq)]
228 pub enum ClosingTagSubstate {
229     CTInsideName,
230     CTAfterName,
231 }
232 
233 #[derive(Copy, Clone, PartialEq)]
234 pub enum ProcessingInstructionSubstate {
235     PIInsideName,
236     PIInsideData,
237 }
238 
239 #[derive(Copy, Clone, PartialEq)]
240 pub enum DeclarationSubstate {
241     BeforeVersion,
242     InsideVersion,
243     AfterVersion,
244 
245     InsideVersionValue,
246     AfterVersionValue,
247 
248     BeforeEncoding,
249     InsideEncoding,
250     AfterEncoding,
251 
252     InsideEncodingValue,
253     AfterEncodingValue,
254 
255     BeforeStandaloneDecl,
256     InsideStandaloneDecl,
257     AfterStandaloneDecl,
258 
259     InsideStandaloneDeclValue,
260     AfterStandaloneDeclValue,
261 }
262 
263 #[derive(PartialEq)]
264 enum QualifiedNameTarget {
265     AttributeNameTarget,
266     OpeningTagNameTarget,
267     ClosingTagNameTarget,
268 }
269 
270 #[derive(Copy, Clone, PartialEq, Eq)]
271 enum QuoteToken {
272     SingleQuoteToken,
273     DoubleQuoteToken,
274 }
275 
276 impl QuoteToken {
from_token(t: &Token) -> QuoteToken277     fn from_token(t: &Token) -> QuoteToken {
278         match *t {
279             Token::SingleQuote => QuoteToken::SingleQuoteToken,
280             Token::DoubleQuote => QuoteToken::DoubleQuoteToken,
281             _ => panic!("Unexpected token: {t}"),
282         }
283     }
284 
as_token(self) -> Token285     fn as_token(self) -> Token {
286         match self {
287             QuoteToken::SingleQuoteToken => Token::SingleQuote,
288             QuoteToken::DoubleQuoteToken => Token::DoubleQuote,
289         }
290     }
291 }
292 
293 struct MarkupData {
294     name: String,     // used for processing instruction name
295     ref_data: String,  // used for reference content
296 
297     version: Option<XmlVersion>,  // used for XML declaration version
298     encoding: Option<String>,  // used for XML declaration encoding
299     standalone: Option<bool>,  // used for XML declaration standalone parameter
300 
301     element_name: Option<OwnedName>,  // used for element name
302 
303     quote: Option<QuoteToken>,  // used to hold opening quote for attribute value
304     attr_name: Option<OwnedName>,  // used to hold attribute name
305     attributes: AttributesSet,   // used to hold all accumulated attributes
306 }
307 
308 impl PullParser {
309     /// Returns next event read from the given buffer.
310     ///
311     /// This method should be always called with the same buffer. If you call it
312     /// providing different buffers each time, the result will be undefined.
next<R: Read>(&mut self, r: &mut R) -> Result313     pub fn next<R: Read>(&mut self, r: &mut R) -> Result {
314         if let Some(ref ev) = self.final_result {
315             return ev.clone();
316         }
317 
318         if let Some(ev) = self.next_event.take() {
319             return ev;
320         }
321 
322         if self.pop_namespace {
323             self.pop_namespace = false;
324             self.nst.pop();
325         }
326 
327         loop {
328             debug_assert!(self.next_event.is_none());
329             debug_assert!(!self.pop_namespace);
330 
331             // While lexer gives us Ok(maybe_token) -- we loop.
332             // Upon having a complete XML-event -- we return from the whole function.
333             match self.lexer.next_token(r) {
334                 Ok(Some(token)) => {
335                     match self.dispatch_token(token) {
336                         None => {} // continue
337                         Some(Ok(xml_event)) => {
338                             self.next_pos();
339                             return Ok(xml_event)
340                         },
341                         Some(Err(xml_error)) => {
342                             self.next_pos();
343                             return self.set_final_result(Err(xml_error))
344                         },
345                     }
346                 },
347                 Ok(None) => break,
348                 Err(lexer_error) => {
349                     return self.set_final_result(Err(lexer_error))
350                 },
351             }
352         }
353 
354         self.handle_eof()
355     }
356 
357     /// Handle end of stream
handle_eof(&mut self) -> std::result::Result<XmlEvent, super::Error>358     fn handle_eof(&mut self) -> std::result::Result<XmlEvent, super::Error> {
359         // Forward pos to the lexer head
360         self.next_pos();
361         let ev = if self.depth() == 0 {
362             if self.encountered == Encountered::Element && self.st == State::OutsideTag {  // all is ok
363                 Ok(XmlEvent::EndDocument)
364             } else if self.encountered < Encountered::Element {
365                 self.error(SyntaxError::NoRootElement)
366             } else {  // self.st != State::OutsideTag
367                 self.error(SyntaxError::UnexpectedEof)  // TODO: add expected hint?
368             }
369         } else if self.config.c.ignore_end_of_stream {
370             self.final_result = None;
371             self.lexer.reset_eof_handled();
372             return self.error(SyntaxError::UnbalancedRootElement);
373         } else {
374             self.error(SyntaxError::UnbalancedRootElement)
375         };
376         self.set_final_result(ev)
377     }
378 
379     // This function is to be called when a terminal event is reached.
380     // The function sets up the `self.final_result` into `Some(result)` and return `result`.
381     #[inline]
set_final_result(&mut self, result: Result) -> Result382     fn set_final_result(&mut self, result: Result) -> Result {
383         self.final_result = Some(result.clone());
384         result
385     }
386 
387     #[cold]
error(&self, e: SyntaxError) -> Result388     fn error(&self, e: SyntaxError) -> Result {
389         Err(Error {
390             pos: self.lexer.position(),
391             kind: ErrorKind::Syntax(e.to_cow()),
392         })
393     }
394 
395     #[inline]
next_pos(&mut self)396     fn next_pos(&mut self) {
397         // unfortunately calls to next_pos will never be perfectly balanced with push_pos,
398         // at very least because parse errors and EOF can happen unexpectedly without a prior push.
399         if !self.pos.is_empty() {
400             if self.pos.len() > 1 {
401                 self.pos.remove(0);
402             } else {
403                 self.pos[0] = self.lexer.position();
404             }
405         }
406     }
407 
408     #[inline]
409     #[track_caller]
push_pos(&mut self)410     fn push_pos(&mut self) {
411         debug_assert!(self.pos.len() != self.pos.capacity(), "You've found a bug in xml-rs, caused by calls to push_pos() in states that don't end up emitting events.
412             This case is ignored in release mode, and merely causes document positions to be out of sync.
413             Please file a bug and include the XML document that triggers this assert.");
414 
415         // it has capacity preallocated for more than it ever needs, so this reduces code size
416         if self.pos.len() != self.pos.capacity() {
417             self.pos.push(self.lexer.position());
418         } else if self.pos.len() > 1 {
419             self.pos.remove(0); // this mitigates the excessive push_pos() call
420         }
421     }
422 
423     #[inline(never)]
dispatch_token(&mut self, t: Token) -> Option<Result>424     fn dispatch_token(&mut self, t: Token) -> Option<Result> {
425         match self.st {
426             State::OutsideTag                     => self.outside_tag(t),
427             State::InsideOpeningTag(s)            => self.inside_opening_tag(t, s),
428             State::InsideClosingTag(s)            => self.inside_closing_tag_name(t, s),
429             State::InsideReference                => self.inside_reference(t),
430             State::InsideComment                  => self.inside_comment(t),
431             State::InsideCData                    => self.inside_cdata(t),
432             State::InsideProcessingInstruction(s) => self.inside_processing_instruction(t, s),
433             State::InsideDoctype(s)               => self.inside_doctype(t, s),
434             State::InsideDeclaration(s)           => self.inside_declaration(t, s),
435             State::DocumentStart                  => self.document_start(t),
436         }
437     }
438 
439     #[inline]
depth(&self) -> usize440     fn depth(&self) -> usize {
441         self.est.len()
442     }
443 
444     #[inline]
buf_has_data(&self) -> bool445     fn buf_has_data(&self) -> bool {
446         !self.buf.is_empty()
447     }
448 
449     #[inline]
take_buf(&mut self) -> String450     fn take_buf(&mut self) -> String {
451         std::mem::take(&mut self.buf)
452     }
453 
454     #[inline]
into_state(&mut self, st: State, ev: Option<Result>) -> Option<Result>455     fn into_state(&mut self, st: State, ev: Option<Result>) -> Option<Result> {
456         self.st = st;
457         ev
458     }
459 
460     #[inline]
into_state_continue(&mut self, st: State) -> Option<Result>461     fn into_state_continue(&mut self, st: State) -> Option<Result> {
462         self.into_state(st, None)
463     }
464 
465     #[inline]
into_state_emit(&mut self, st: State, ev: Result) -> Option<Result>466     fn into_state_emit(&mut self, st: State, ev: Result) -> Option<Result> {
467         self.into_state(st, Some(ev))
468     }
469 
470     /// Dispatches tokens in order to process qualified name. If qualified name cannot be parsed,
471     /// an error is returned.
472     ///
473     /// # Parameters
474     /// * `t`       --- next token;
475     /// * `on_name` --- a callback which is executed when whitespace is encountered.
read_qualified_name<F>(&mut self, t: Token, target: QualifiedNameTarget, on_name: F) -> Option<Result> where F: Fn(&mut PullParser, Token, OwnedName) -> Option<Result>476     fn read_qualified_name<F>(&mut self, t: Token, target: QualifiedNameTarget, on_name: F) -> Option<Result>
477       where F: Fn(&mut PullParser, Token, OwnedName) -> Option<Result> {
478         // We can get here for the first time only when self.data.name contains zero or one character,
479         // but first character cannot be a colon anyway
480         if self.buf.len() <= 1 {
481             self.read_prefix_separator = false;
482         }
483 
484         let invoke_callback = move |this: &mut PullParser, t| {
485             let name = this.take_buf();
486             match name.parse() {
487                 Ok(name) => on_name(this, t, name),
488                 Err(_) => Some(this.error(SyntaxError::InvalidQualifiedName(name.into()))),
489             }
490         };
491 
492         match t {
493             // There can be only one colon, and not as the first character
494             Token::Character(':') if self.buf_has_data() && !self.read_prefix_separator => {
495                 self.buf.push(':');
496                 self.read_prefix_separator = true;
497                 None
498             }
499 
500             Token::Character(c) if c != ':' && (self.buf.is_empty() && is_name_start_char(c) ||
501                                           self.buf_has_data() && is_name_char(c)) => {
502                 if self.buf.len() > self.config.max_name_length {
503                     return Some(self.error(SyntaxError::ExceededConfiguredLimit));
504                 }
505                 self.buf.push(c);
506                 None
507             },
508 
509             Token::EqualsSign if target == QualifiedNameTarget::AttributeNameTarget => invoke_callback(self, t),
510 
511             Token::EmptyTagEnd if target == QualifiedNameTarget::OpeningTagNameTarget => invoke_callback(self, t),
512 
513             Token::TagEnd if target == QualifiedNameTarget::OpeningTagNameTarget ||
514                       target == QualifiedNameTarget::ClosingTagNameTarget => invoke_callback(self, t),
515 
516             Token::Character(c) if is_whitespace_char(c) => invoke_callback(self, t),
517 
518             _ => Some(self.error(SyntaxError::UnexpectedQualifiedName(t))),
519         }
520     }
521 
522     /// Dispatches tokens in order to process attribute value.
523     ///
524     /// # Parameters
525     /// * `t`        --- next token;
526     /// * `on_value` --- a callback which is called when terminating quote is encountered.
read_attribute_value<F>(&mut self, t: Token, on_value: F) -> Option<Result> where F: Fn(&mut PullParser, String) -> Option<Result>527     fn read_attribute_value<F>(&mut self, t: Token, on_value: F) -> Option<Result>
528       where F: Fn(&mut PullParser, String) -> Option<Result> {
529         match t {
530             Token::Character(c) if self.data.quote.is_none() && is_whitespace_char(c) => None, // skip leading whitespace
531 
532             Token::DoubleQuote | Token::SingleQuote => match self.data.quote {
533                 None => {  // Entered attribute value
534                     self.data.quote = Some(QuoteToken::from_token(&t));
535                     None
536                 }
537                 Some(q) if q.as_token() == t => {
538                     self.data.quote = None;
539                     let value = self.take_buf();
540                     on_value(self, value)
541                 }
542                 _ => {
543                     if let Token::Character(c) = t {
544                         if !self.is_valid_xml_char_not_restricted(c) {
545                             return Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)));
546                         }
547                     }
548                     if self.buf.len() > self.config.max_attribute_length {
549                         return Some(self.error(SyntaxError::ExceededConfiguredLimit));
550                     }
551                     t.push_to_string(&mut self.buf);
552                     None
553                 }
554             },
555 
556             Token::ReferenceStart if self.data.quote.is_some() => {
557                 self.state_after_reference = self.st;
558                 self.into_state_continue(State::InsideReference)
559             },
560 
561             Token::OpeningTagStart => Some(self.error(SyntaxError::UnexpectedOpeningTag)),
562 
563             Token::Character(c) if !self.is_valid_xml_char_not_restricted(c) => {
564                 Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)))
565             },
566 
567             // Every character except " and ' and < is okay
568             _ if self.data.quote.is_some() => {
569                 if self.buf.len() > self.config.max_attribute_length {
570                     return Some(self.error(SyntaxError::ExceededConfiguredLimit));
571                 }
572                 t.push_to_string(&mut self.buf);
573                 None
574             }
575 
576             _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
577         }
578     }
579 
emit_start_element(&mut self, emit_end_element: bool) -> Option<Result>580     fn emit_start_element(&mut self, emit_end_element: bool) -> Option<Result> {
581         let mut name = self.data.take_element_name()?;
582         let mut attributes = self.data.take_attributes().into_vec();
583 
584         // check whether the name prefix is bound and fix its namespace
585         match self.nst.get(name.borrow().prefix_repr()) {
586             Some("") => name.namespace = None, // default namespace
587             Some(ns) => name.namespace = Some(ns.into()),
588             None => return Some(self.error(SyntaxError::UnboundElementPrefix(name.to_string().into())))
589         }
590 
591         // check and fix accumulated attributes prefixes
592         for attr in &mut attributes {
593             if let Some(ref pfx) = attr.name.prefix {
594                 let new_ns = match self.nst.get(pfx) {
595                     Some("") => None, // default namespace
596                     Some(ns) => Some(ns.into()),
597                     None => return Some(self.error(SyntaxError::UnboundAttribute(attr.name.to_string().into())))
598                 };
599                 attr.name.namespace = new_ns;
600             }
601         }
602 
603         if emit_end_element {
604             self.pop_namespace = true;
605             self.next_event = Some(Ok(XmlEvent::EndElement {
606                 name: name.clone()
607             }));
608         } else {
609             self.est.push(name.clone());
610         }
611         let namespace = self.nst.squash();
612         self.into_state_emit(State::OutsideTag, Ok(XmlEvent::StartElement {
613             name,
614             attributes,
615             namespace
616         }))
617     }
618 
emit_end_element(&mut self) -> Option<Result>619     fn emit_end_element(&mut self) -> Option<Result> {
620         let mut name = self.data.take_element_name()?;
621 
622         // check whether the name prefix is bound and fix its namespace
623         match self.nst.get(name.borrow().prefix_repr()) {
624             Some("") => name.namespace = None, // default namespace
625             Some(ns) => name.namespace = Some(ns.into()),
626             None => return Some(self.error(SyntaxError::UnboundElementPrefix(name.to_string().into())))
627         }
628 
629         let op_name = self.est.pop()?;
630 
631         if name == op_name {
632             self.pop_namespace = true;
633             self.into_state_emit(State::OutsideTag, Ok(XmlEvent::EndElement { name }))
634         } else {
635             Some(self.error(SyntaxError::UnexpectedClosingTag(format!("{name} != {op_name}").into())))
636         }
637     }
638 
639     #[inline]
is_valid_xml_char(&self, c: char) -> bool640     fn is_valid_xml_char(&self, c: char) -> bool {
641         if Some(XmlVersion::Version11) == self.data.version {
642             is_xml11_char(c)
643         } else {
644             is_xml10_char(c)
645         }
646     }
647 
648     #[inline]
is_valid_xml_char_not_restricted(&self, c: char) -> bool649     fn is_valid_xml_char_not_restricted(&self, c: char) -> bool {
650         if Some(XmlVersion::Version11) == self.data.version {
651             is_xml11_char_not_restricted(c)
652         } else {
653             is_xml10_char(c)
654         }
655     }
656 }
657 
658 #[cfg(test)]
659 mod tests {
660     use std::io::BufReader;
661     use crate::attribute::OwnedAttribute;
662     use crate::common::TextPosition;
663     use crate::name::OwnedName;
664     use crate::reader::events::XmlEvent;
665     use crate::reader::parser::PullParser;
666     use crate::reader::ParserConfig;
667 
new_parser() -> PullParser668     fn new_parser() -> PullParser {
669         PullParser::new(ParserConfig::new())
670     }
671 
672     macro_rules! expect_event(
673         ($r:expr, $p:expr, $t:pat) => (
674             match $p.next(&mut $r) {
675                 $t => {}
676                 e => panic!("Unexpected event: {e:?}\nExpected: {}", stringify!($t))
677             }
678         );
679         ($r:expr, $p:expr, $t:pat => $c:expr ) => (
680             match $p.next(&mut $r) {
681                 $t if $c => {}
682                 e => panic!("Unexpected event: {e:?}\nExpected: {} if {}", stringify!($t), stringify!($c))
683             }
684         )
685     );
686 
687     macro_rules! test_data(
688         ($d:expr) => ({
689             static DATA: &'static str = $d;
690             let r = BufReader::new(DATA.as_bytes());
691             let p = new_parser();
692             (r, p)
693         })
694     );
695 
696     #[test]
issue_3_semicolon_in_attribute_value()697     fn issue_3_semicolon_in_attribute_value() {
698         let (mut r, mut p) = test_data!(r#"
699             <a attr="zzz;zzz" />
700         "#);
701 
702         expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
703         expect_event!(r, p, Ok(XmlEvent::StartElement { ref name, ref attributes, ref namespace }) =>
704             *name == OwnedName::local("a") &&
705              attributes.len() == 1 &&
706              attributes[0] == OwnedAttribute::new(OwnedName::local("attr"), "zzz;zzz") &&
707              namespace.is_essentially_empty()
708         );
709         expect_event!(r, p, Ok(XmlEvent::EndElement { ref name }) => *name == OwnedName::local("a"));
710         expect_event!(r, p, Ok(XmlEvent::EndDocument));
711     }
712 
713     #[test]
issue_140_entity_reference_inside_tag()714     fn issue_140_entity_reference_inside_tag() {
715         let (mut r, mut p) = test_data!(r#"
716             <bla>&#9835;</bla>
717         "#);
718 
719         expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
720         expect_event!(r, p, Ok(XmlEvent::StartElement { ref name, .. }) => *name == OwnedName::local("bla"));
721         expect_event!(r, p, Ok(XmlEvent::Characters(ref s)) => s == "\u{266b}");
722         expect_event!(r, p, Ok(XmlEvent::EndElement { ref name, .. }) => *name == OwnedName::local("bla"));
723         expect_event!(r, p, Ok(XmlEvent::EndDocument));
724     }
725 
726     #[test]
issue_220_comment()727     fn issue_220_comment() {
728         let (mut r, mut p) = test_data!(r#"<x><!-- <!--></x>"#);
729         expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
730         expect_event!(r, p, Ok(XmlEvent::StartElement { .. }));
731         expect_event!(r, p, Ok(XmlEvent::EndElement { .. }));
732         expect_event!(r, p, Ok(XmlEvent::EndDocument));
733 
734         let (mut r, mut p) = test_data!(r#"<x><!-- <!---></x>"#);
735         expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
736         expect_event!(r, p, Ok(XmlEvent::StartElement { .. }));
737         expect_event!(r, p, Err(_)); // ---> is forbidden in comments
738 
739         let (mut r, mut p) = test_data!(r#"<x><!--<text&x;> <!--></x>"#);
740         p.config.c.ignore_comments = false;
741         expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
742         expect_event!(r, p, Ok(XmlEvent::StartElement { .. }));
743         expect_event!(r, p, Ok(XmlEvent::Comment(s)) => s == "<text&x;> <!");
744         expect_event!(r, p, Ok(XmlEvent::EndElement { .. }));
745         expect_event!(r, p, Ok(XmlEvent::EndDocument));
746     }
747 
748     #[test]
malformed_declaration_attrs()749     fn malformed_declaration_attrs() {
750         let (mut r, mut p) = test_data!(r#"<?xml version x="1.0"?>"#);
751         expect_event!(r, p, Err(_));
752 
753         let (mut r, mut p) = test_data!(r#"<?xml version="1.0" version="1.0"?>"#);
754         expect_event!(r, p, Err(_));
755 
756         let (mut r, mut p) = test_data!(r#"<?xml version="1.0"encoding="utf-8"?>"#);
757         expect_event!(r, p, Err(_));
758 
759         let (mut r, mut p) = test_data!(r#"<?xml version="1.0"standalone="yes"?>"#);
760         expect_event!(r, p, Err(_));
761 
762         let (mut r, mut p) = test_data!(r#"<?xml version="1.0" encoding="utf-8"standalone="yes"?>"#);
763         expect_event!(r, p, Err(_));
764     }
765 
766     #[test]
opening_tag_in_attribute_value()767     fn opening_tag_in_attribute_value() {
768         use crate::reader::error::{SyntaxError, Error, ErrorKind};
769 
770         let (mut r, mut p) = test_data!(r#"
771             <a attr="zzz<zzz" />
772         "#);
773 
774         expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
775         expect_event!(r, p, Err(ref e) =>
776             *e == Error {
777                 kind: ErrorKind::Syntax(SyntaxError::UnexpectedOpeningTag.to_cow()),
778                 pos: TextPosition { row: 1, column: 24 }
779             }
780         );
781     }
782 
783     #[test]
reference_err()784     fn reference_err() {
785         let (mut r, mut p) = test_data!(r#"
786             <a>&&amp;</a>
787         "#);
788 
789         expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
790         expect_event!(r, p, Ok(XmlEvent::StartElement { .. }));
791         expect_event!(r, p, Err(_));
792     }
793 
794     #[test]
state_size()795     fn state_size() {
796         assert_eq!(2, std::mem::size_of::<super::State>());
797         assert_eq!(1, std::mem::size_of::<super::DoctypeSubstate>());
798     }
799 }
800