1 use crate::Encoding;
2 use crate::reader::lexer::Token;
3 
4 use std::borrow::Cow;
5 use std::error;
6 use std::error::Error as _;
7 use std::fmt;
8 use std::io;
9 use std::str;
10 
11 use crate::common::{Position, TextPosition};
12 use crate::util;
13 
14 #[derive(Debug)]
15 pub enum ErrorKind {
16     Syntax(Cow<'static, str>),
17     Io(io::Error),
18     Utf8(str::Utf8Error),
19     UnexpectedEof,
20 }
21 
22 #[derive(Debug, Clone, PartialEq)]
23 #[non_exhaustive]
24 pub(crate) enum SyntaxError {
25     CannotRedefineXmlnsPrefix,
26     CannotRedefineXmlPrefix,
27     /// Recursive custom entity expanded to too many chars, it could be DoS
28     EntityTooBig,
29     EmptyEntity,
30     NoRootElement,
31     ProcessingInstructionWithoutName,
32     UnbalancedRootElement,
33     UnexpectedEof,
34     UnexpectedOpeningTag,
35     /// Missing `]]>`
36     UnclosedCdata,
37     UnexpectedQualifiedName(Token),
38     UnexpectedTokenOutsideRoot(Token),
39     UnexpectedToken(Token),
40     UnexpectedTokenInEntity(Token),
41     UnexpectedTokenInClosingTag(Token),
42     UnexpectedTokenInOpeningTag(Token),
43     InvalidQualifiedName(Box<str>),
44     UnboundAttribute(Box<str>),
45     UnboundElementPrefix(Box<str>),
46     UnexpectedClosingTag(Box<str>),
47     UnexpectedName(Box<str>),
48     /// Found <?xml-like PI not at the beginning of a document,
49     /// which is an error, see section 2.6 of XML 1.1 spec
50     UnexpectedProcessingInstruction(Box<str>, Token),
51     CannotUndefinePrefix(Box<str>),
52     InvalidCharacterEntity(u32),
53     InvalidDefaultNamespace(Box<str>),
54     InvalidNamePrefix(Box<str>),
55     InvalidNumericEntity(Box<str>),
56     InvalidStandaloneDeclaration(Box<str>),
57     InvalidXmlProcessingInstruction(Box<str>),
58     RedefinedAttribute(Box<str>),
59     UndefinedEntity(Box<str>),
60     UnexpectedEntity(Box<str>),
61     UnexpectedNameInsideXml(Box<str>),
62     UnsupportedEncoding(Box<str>),
63     /// In DTD
64     UnknownMarkupDeclaration(Box<str>),
65     UnexpectedXmlVersion(Box<str>),
66     ConflictingEncoding(Encoding, Encoding),
67     UnexpectedTokenBefore(&'static str, char),
68     /// Document has more stuff than `ParserConfig` allows
69     ExceededConfiguredLimit,
70 }
71 
72 impl fmt::Display for SyntaxError {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result73     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
74         self.to_cow().fmt(f)
75     }
76 }
77 
78 impl SyntaxError {
79     #[inline(never)]
80     #[cold]
to_cow(&self) -> Cow<'static, str>81     pub(crate) fn to_cow(&self) -> Cow<'static, str> {
82         match *self {
83             Self::CannotRedefineXmlnsPrefix => "Cannot redefine XMLNS prefix".into(),
84             Self::CannotRedefineXmlPrefix => "Default XMLNS prefix cannot be rebound to another value".into(),
85             Self::EmptyEntity => "Encountered empty entity".into(),
86             Self::EntityTooBig => "Entity too big".into(),
87             Self::NoRootElement => "Unexpected end of stream: no root element found".into(),
88             Self::ProcessingInstructionWithoutName => "Encountered processing instruction without a name".into(),
89             Self::UnbalancedRootElement => "Unexpected end of stream: still inside the root element".into(),
90             Self::UnclosedCdata => "Unclosed <![CDATA[".into(),
91             Self::UnexpectedEof => "Unexpected end of stream".into(),
92             Self::UnexpectedOpeningTag => "'<' is not allowed in attributes".into(),
93             Self::CannotUndefinePrefix(ref ln) => format!("Cannot undefine prefix '{ln}'").into(),
94             Self::ConflictingEncoding(a, b) => format!("Declared encoding {a}, but uses {b}").into(),
95             Self::InvalidCharacterEntity(num) => format!("Invalid character U+{num:04X}").into(),
96             Self::InvalidDefaultNamespace(ref name) => format!( "Namespace '{name}' cannot be default").into(),
97             Self::InvalidNamePrefix(ref prefix) => format!("'{prefix}' cannot be an element name prefix").into(),
98             Self::InvalidNumericEntity(ref v) => format!("Invalid numeric entity: {v}").into(),
99             Self::InvalidQualifiedName(ref e) => format!("Qualified name is invalid: {e}").into(),
100             Self::InvalidStandaloneDeclaration(ref value) => format!("Invalid standalone declaration value: {value}").into(),
101             Self::InvalidXmlProcessingInstruction(ref name) => format!("Invalid processing instruction: <?{name} - \"<?xml\"-like PI is only valid at the beginning of the document").into(),
102             Self::RedefinedAttribute(ref name) => format!("Attribute '{name}' is redefined").into(),
103             Self::UnboundAttribute(ref name) => format!("Attribute {name} prefix is unbound").into(),
104             Self::UnboundElementPrefix(ref name) => format!("Element {name} prefix is unbound").into(),
105             Self::UndefinedEntity(ref v) => format!("Undefined entity: {v}").into(),
106             Self::UnexpectedClosingTag(ref expected_got) => format!("Unexpected closing tag: {expected_got}").into(),
107             Self::UnexpectedEntity(ref name) => format!("Unexpected entity: {name}").into(),
108             Self::UnexpectedName(ref name) => format!("Unexpected name: {name}").into(),
109             Self::UnexpectedNameInsideXml(ref name) => format!("Unexpected name inside XML declaration: {name}").into(),
110             Self::UnexpectedProcessingInstruction(ref buf, token) => format!("Unexpected token inside processing instruction: <?{buf}{token}").into(),
111             Self::UnexpectedQualifiedName(e) => format!("Unexpected token inside qualified name: {e}").into(),
112             Self::UnexpectedToken(token) => format!("Unexpected token: {token}").into(),
113             Self::UnexpectedTokenBefore(before, c) => format!("Unexpected token '{before}' before '{c}'").into(),
114             Self::UnexpectedTokenInClosingTag(token) => format!("Unexpected token inside closing tag: {token}").into(),
115             Self::UnexpectedTokenInEntity(token) => format!("Unexpected token inside entity: {token}").into(),
116             Self::UnexpectedTokenInOpeningTag(token) => format!("Unexpected token inside opening tag: {token}").into(),
117             Self::UnexpectedTokenOutsideRoot(token) => format!("Unexpected characters outside the root element: {token}").into(),
118             Self::UnexpectedXmlVersion(ref version) => format!("Invalid XML version: {version}").into(),
119             Self::UnknownMarkupDeclaration(ref v) => format!("Unknown markup declaration: {v}").into(),
120             Self::UnsupportedEncoding(ref v) => format!("Unsupported encoding: {v}").into(),
121             Self::ExceededConfiguredLimit => "This document is larger/more complex than allowed by the parser's configuration".into(),
122         }
123     }
124 }
125 
126 /// An XML parsing error.
127 ///
128 /// Consists of a 2D position in a document and a textual message describing the error.
129 #[derive(Clone, PartialEq, Eq, Debug)]
130 pub struct Error {
131     pub(crate) pos: TextPosition,
132     pub(crate) kind: ErrorKind,
133 }
134 
135 impl fmt::Display for Error {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result136     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
137         use self::ErrorKind::{Io, Syntax, UnexpectedEof, Utf8};
138 
139         write!(f, "{} ", self.pos)?;
140         match &self.kind {
141             Io(io_error) => io_error.fmt(f),
142             Utf8(reason) => reason.fmt(f),
143             Syntax(msg) => f.write_str(msg),
144             UnexpectedEof => f.write_str("Unexpected EOF"),
145         }
146     }
147 }
148 
149 impl Position for Error {
150     #[inline]
position(&self) -> TextPosition151     fn position(&self) -> TextPosition { self.pos }
152 }
153 
154 impl Error {
155     /// Returns a reference to a message which is contained inside this error.
156     #[cold]
157     #[doc(hidden)]
158     #[allow(deprecated)]
msg(&self) -> &str159     #[must_use] pub fn msg(&self) -> &str {
160         use self::ErrorKind::{Io, Syntax, UnexpectedEof, Utf8};
161         match &self.kind {
162             Io(io_error) => io_error.description(),
163             Utf8(reason) => reason.description(),
164             Syntax(msg) => msg.as_ref(),
165             UnexpectedEof => "Unexpected EOF",
166         }
167     }
168 
169     #[must_use]
170     #[inline]
kind(&self) -> &ErrorKind171     pub fn kind(&self) -> &ErrorKind {
172         &self.kind
173     }
174 }
175 
176 impl error::Error for Error {
177     #[allow(deprecated)]
178     #[cold]
description(&self) -> &str179     fn description(&self) -> &str { self.msg() }
180 }
181 
182 impl<'a, P, M> From<(&'a P, M)> for Error where P: Position, M: Into<Cow<'static, str>> {
183     #[cold]
from(orig: (&'a P, M)) -> Self184     fn from(orig: (&'a P, M)) -> Self {
185         Error {
186             pos: orig.0.position(),
187             kind: ErrorKind::Syntax(orig.1.into()),
188         }
189     }
190 }
191 
192 impl From<util::CharReadError> for Error {
193     #[cold]
from(e: util::CharReadError) -> Self194     fn from(e: util::CharReadError) -> Self {
195         use crate::util::CharReadError::{Io, UnexpectedEof, Utf8};
196         Error {
197             pos: TextPosition::new(),
198             kind: match e {
199                 UnexpectedEof => ErrorKind::UnexpectedEof,
200                 Utf8(reason) => ErrorKind::Utf8(reason),
201                 Io(io_error) => ErrorKind::Io(io_error),
202             },
203         }
204     }
205 }
206 
207 impl From<io::Error> for Error {
208     #[cold]
from(e: io::Error) -> Self209     fn from(e: io::Error) -> Self {
210         Error {
211             pos: TextPosition::new(),
212             kind: ErrorKind::Io(e),
213         }
214     }
215 }
216 
217 impl Clone for ErrorKind {
218     #[cold]
clone(&self) -> Self219     fn clone(&self) -> Self {
220         use self::ErrorKind::{Io, Syntax, UnexpectedEof, Utf8};
221         match self {
222             UnexpectedEof => UnexpectedEof,
223             Utf8(reason) => Utf8(*reason),
224             Io(io_error) => Io(io::Error::new(io_error.kind(), io_error.to_string())),
225             Syntax(msg) => Syntax(msg.clone()),
226         }
227     }
228 }
229 impl PartialEq for ErrorKind {
230     #[allow(deprecated)]
eq(&self, other: &ErrorKind) -> bool231     fn eq(&self, other: &ErrorKind) -> bool {
232         use self::ErrorKind::{Io, Syntax, UnexpectedEof, Utf8};
233         match (self, other) {
234             (UnexpectedEof, UnexpectedEof) => true,
235             (Utf8(left), Utf8(right)) => left == right,
236             (Io(left), Io(right)) =>
237                 left.kind() == right.kind() &&
238                 left.description() == right.description(),
239             (Syntax(left), Syntax(right)) =>
240                 left == right,
241 
242             (_, _) => false,
243         }
244     }
245 }
246 impl Eq for ErrorKind {}
247 
248 #[test]
err_size()249 fn err_size() {
250     assert!(std::mem::size_of::<SyntaxError>() <= 24);
251 }
252