1 //! Contains functions for performing XML special characters escaping.
2 
3 use std::{borrow::Cow, marker::PhantomData, fmt::{Display, Result, Formatter}};
4 
5 pub(crate) trait Escapes {
escape(c: u8) -> Option<&'static str>6     fn escape(c: u8) -> Option<&'static str>;
7 
byte_needs_escaping(c: u8) -> bool8     fn byte_needs_escaping(c: u8) -> bool {
9         Self::escape(c).is_some()
10     }
11 
str_needs_escaping(s: &str) -> bool12     fn str_needs_escaping(s: &str) -> bool {
13         s.bytes().any(|c| Self::escape(c).is_some())
14     }
15 }
16 
17 pub(crate) struct Escaped<'a, E: Escapes> {
18     _escape_phantom: PhantomData<E>,
19     to_escape: &'a str,
20 }
21 
22 impl<'a, E: Escapes> Escaped<'a, E> {
new(s: &'a str) -> Self23     pub fn new(s: &'a str) -> Self {
24         Escaped {
25             _escape_phantom: PhantomData,
26             to_escape: s,
27         }
28     }
29 }
30 
31 impl<'a, E: Escapes> Display for Escaped<'a, E> {
fmt(&self, f: &mut Formatter<'_>) -> Result32     fn fmt(&self, f: &mut Formatter<'_>) -> Result {
33         let mut total_remaining = self.to_escape;
34 
35         // find the next occurence
36         while let Some(n) = total_remaining
37             .bytes()
38             .position(E::byte_needs_escaping)
39         {
40             let (start, remaining) = total_remaining.split_at(n);
41 
42             f.write_str(start)?;
43 
44             // unwrap is safe because we checked is_some for position n earlier
45             let next_byte = remaining.bytes().next().unwrap();
46             let replacement = E::escape(next_byte).unwrap();
47             f.write_str(replacement)?;
48 
49             total_remaining = &remaining[1..];
50         }
51 
52         f.write_str(total_remaining)
53     }
54 }
55 
escape_str<E: Escapes>(s: &str) -> Cow<'_, str>56 fn escape_str<E: Escapes>(s: &str) -> Cow<'_, str> {
57     if E::str_needs_escaping(s) {
58         Cow::Owned(format!("{}", Escaped::<E>::new(s)))
59     } else {
60         Cow::Borrowed(s)
61     }
62 }
63 
64 macro_rules! escapes {
65     {
66         $name: ident,
67         $($k: expr => $v: expr),* $(,)?
68     } => {
69         pub(crate) struct $name;
70 
71         impl Escapes for $name {
72             fn escape(c: u8) -> Option<&'static str> {
73                 match c {
74                     $( $k => Some($v),)*
75                     _ => None
76                 }
77             }
78         }
79     };
80 }
81 
82 escapes!(
83     AttributeEscapes,
84     b'<'  => "&lt;",
85     b'>'  => "&gt;",
86     b'"'  => "&quot;",
87     b'\'' => "&apos;",
88     b'&'  => "&amp;",
89     b'\n' => "&#xA;",
90     b'\r' => "&#xD;",
91 );
92 
93 escapes!(
94     PcDataEscapes,
95     b'<' => "&lt;",
96     b'&' => "&amp;",
97 );
98 
99 /// Performs escaping of common XML characters inside an attribute value.
100 ///
101 /// This function replaces several important markup characters with their
102 /// entity equivalents:
103 ///
104 /// * `<` → `&lt;`
105 /// * `>` → `&gt;`
106 /// * `"` → `&quot;`
107 /// * `'` → `&apos;`
108 /// * `&` → `&amp;`
109 ///
110 /// The following characters are escaped so that attributes are printed on
111 /// a single line:
112 /// * `\n` → `&#xA;`
113 /// * `\r` → `&#xD;`
114 ///
115 /// The resulting string is safe to use inside XML attribute values or in PCDATA sections.
116 ///
117 /// Does not perform allocations if the given string does not contain escapable characters.
118 #[inline]
119 #[must_use]
escape_str_attribute(s: &str) -> Cow<'_, str>120 pub fn escape_str_attribute(s: &str) -> Cow<'_, str> {
121     escape_str::<AttributeEscapes>(s)
122 }
123 
124 /// Performs escaping of common XML characters inside PCDATA.
125 ///
126 /// This function replaces several important markup characters with their
127 /// entity equivalents:
128 ///
129 /// * `<` → `&lt;`
130 /// * `&` → `&amp;`
131 ///
132 /// The resulting string is safe to use inside PCDATA sections but NOT inside attribute values.
133 ///
134 /// Does not perform allocations if the given string does not contain escapable characters.
135 #[inline]
136 #[must_use]
escape_str_pcdata(s: &str) -> Cow<'_, str>137 pub fn escape_str_pcdata(s: &str) -> Cow<'_, str> {
138     escape_str::<PcDataEscapes>(s)
139 }
140 
141 #[cfg(test)]
142 mod tests {
143     use super::{escape_str_attribute, escape_str_pcdata};
144 
145     #[test]
test_escape_str_attribute()146     fn test_escape_str_attribute() {
147         assert_eq!(escape_str_attribute("<>'\"&\n\r"), "&lt;&gt;&apos;&quot;&amp;&#xA;&#xD;");
148         assert_eq!(escape_str_attribute("no_escapes"), "no_escapes");
149     }
150 
151     #[test]
test_escape_str_pcdata()152     fn test_escape_str_pcdata() {
153         assert_eq!(escape_str_pcdata("<&"), "&lt;&amp;");
154         assert_eq!(escape_str_pcdata("no_escapes"), "no_escapes");
155     }
156 
157     #[test]
test_escape_multibyte_code_points()158     fn test_escape_multibyte_code_points() {
159         assert_eq!(escape_str_attribute("☃<"), "☃&lt;");
160         assert_eq!(escape_str_pcdata("☃<"), "☃&lt;");
161     }
162 }
163