1 //! Contains functions for performing XML special characters escaping.
2
3 use std::{borrow::Cow, marker::PhantomData, fmt::{Display, Result, Formatter}};
4
5 pub(crate) trait Escapes {
escape(c: u8) -> Option<&'static str>6 fn escape(c: u8) -> Option<&'static str>;
7
byte_needs_escaping(c: u8) -> bool8 fn byte_needs_escaping(c: u8) -> bool {
9 Self::escape(c).is_some()
10 }
11
str_needs_escaping(s: &str) -> bool12 fn str_needs_escaping(s: &str) -> bool {
13 s.bytes().any(|c| Self::escape(c).is_some())
14 }
15 }
16
17 pub(crate) struct Escaped<'a, E: Escapes> {
18 _escape_phantom: PhantomData<E>,
19 to_escape: &'a str,
20 }
21
22 impl<'a, E: Escapes> Escaped<'a, E> {
new(s: &'a str) -> Self23 pub fn new(s: &'a str) -> Self {
24 Escaped {
25 _escape_phantom: PhantomData,
26 to_escape: s,
27 }
28 }
29 }
30
31 impl<'a, E: Escapes> Display for Escaped<'a, E> {
fmt(&self, f: &mut Formatter<'_>) -> Result32 fn fmt(&self, f: &mut Formatter<'_>) -> Result {
33 let mut total_remaining = self.to_escape;
34
35 // find the next occurence
36 while let Some(n) = total_remaining
37 .bytes()
38 .position(E::byte_needs_escaping)
39 {
40 let (start, remaining) = total_remaining.split_at(n);
41
42 f.write_str(start)?;
43
44 // unwrap is safe because we checked is_some for position n earlier
45 let next_byte = remaining.bytes().next().unwrap();
46 let replacement = E::escape(next_byte).unwrap();
47 f.write_str(replacement)?;
48
49 total_remaining = &remaining[1..];
50 }
51
52 f.write_str(total_remaining)
53 }
54 }
55
escape_str<E: Escapes>(s: &str) -> Cow<'_, str>56 fn escape_str<E: Escapes>(s: &str) -> Cow<'_, str> {
57 if E::str_needs_escaping(s) {
58 Cow::Owned(format!("{}", Escaped::<E>::new(s)))
59 } else {
60 Cow::Borrowed(s)
61 }
62 }
63
64 macro_rules! escapes {
65 {
66 $name: ident,
67 $($k: expr => $v: expr),* $(,)?
68 } => {
69 pub(crate) struct $name;
70
71 impl Escapes for $name {
72 fn escape(c: u8) -> Option<&'static str> {
73 match c {
74 $( $k => Some($v),)*
75 _ => None
76 }
77 }
78 }
79 };
80 }
81
82 escapes!(
83 AttributeEscapes,
84 b'<' => "<",
85 b'>' => ">",
86 b'"' => """,
87 b'\'' => "'",
88 b'&' => "&",
89 b'\n' => "
",
90 b'\r' => "
",
91 );
92
93 escapes!(
94 PcDataEscapes,
95 b'<' => "<",
96 b'&' => "&",
97 );
98
99 /// Performs escaping of common XML characters inside an attribute value.
100 ///
101 /// This function replaces several important markup characters with their
102 /// entity equivalents:
103 ///
104 /// * `<` → `<`
105 /// * `>` → `>`
106 /// * `"` → `"`
107 /// * `'` → `'`
108 /// * `&` → `&`
109 ///
110 /// The following characters are escaped so that attributes are printed on
111 /// a single line:
112 /// * `\n` → `
`
113 /// * `\r` → `
`
114 ///
115 /// The resulting string is safe to use inside XML attribute values or in PCDATA sections.
116 ///
117 /// Does not perform allocations if the given string does not contain escapable characters.
118 #[inline]
119 #[must_use]
escape_str_attribute(s: &str) -> Cow<'_, str>120 pub fn escape_str_attribute(s: &str) -> Cow<'_, str> {
121 escape_str::<AttributeEscapes>(s)
122 }
123
124 /// Performs escaping of common XML characters inside PCDATA.
125 ///
126 /// This function replaces several important markup characters with their
127 /// entity equivalents:
128 ///
129 /// * `<` → `<`
130 /// * `&` → `&`
131 ///
132 /// The resulting string is safe to use inside PCDATA sections but NOT inside attribute values.
133 ///
134 /// Does not perform allocations if the given string does not contain escapable characters.
135 #[inline]
136 #[must_use]
escape_str_pcdata(s: &str) -> Cow<'_, str>137 pub fn escape_str_pcdata(s: &str) -> Cow<'_, str> {
138 escape_str::<PcDataEscapes>(s)
139 }
140
141 #[cfg(test)]
142 mod tests {
143 use super::{escape_str_attribute, escape_str_pcdata};
144
145 #[test]
test_escape_str_attribute()146 fn test_escape_str_attribute() {
147 assert_eq!(escape_str_attribute("<>'\"&\n\r"), "<>'"&

");
148 assert_eq!(escape_str_attribute("no_escapes"), "no_escapes");
149 }
150
151 #[test]
test_escape_str_pcdata()152 fn test_escape_str_pcdata() {
153 assert_eq!(escape_str_pcdata("<&"), "<&");
154 assert_eq!(escape_str_pcdata("no_escapes"), "no_escapes");
155 }
156
157 #[test]
test_escape_multibyte_code_points()158 fn test_escape_multibyte_code_points() {
159 assert_eq!(escape_str_attribute("☃<"), "☃<");
160 assert_eq!(escape_str_pcdata("☃<"), "☃<");
161 }
162 }
163