1 //! Provides [Alphabet] and constants for alphabets commonly used in the wild.
2 
3 use crate::PAD_BYTE;
4 use core::{convert, fmt};
5 #[cfg(any(feature = "std", test))]
6 use std::error;
7 
8 const ALPHABET_SIZE: usize = 64;
9 
10 /// An alphabet defines the 64 ASCII characters (symbols) used for base64.
11 ///
12 /// Common alphabets are provided as constants, and custom alphabets
13 /// can be made via `from_str` or the `TryFrom<str>` implementation.
14 ///
15 /// # Examples
16 ///
17 /// Building and using a custom Alphabet:
18 ///
19 /// ```
20 /// let custom = base64::alphabet::Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap();
21 ///
22 /// let engine = base64::engine::GeneralPurpose::new(
23 ///     &custom,
24 ///     base64::engine::general_purpose::PAD);
25 /// ```
26 ///
27 /// Building a const:
28 ///
29 /// ```
30 /// use base64::alphabet::Alphabet;
31 ///
32 /// static CUSTOM: Alphabet = {
33 ///     // Result::unwrap() isn't const yet, but panic!() is OK
34 ///     match Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") {
35 ///         Ok(x) => x,
36 ///         Err(_) => panic!("creation of alphabet failed"),
37 ///     }
38 /// };
39 /// ```
40 ///
41 /// Building lazily:
42 ///
43 /// ```
44 /// use base64::{
45 ///     alphabet::Alphabet,
46 ///     engine::{general_purpose::GeneralPurpose, GeneralPurposeConfig},
47 /// };
48 /// use once_cell::sync::Lazy;
49 ///
50 /// static CUSTOM: Lazy<Alphabet> = Lazy::new(||
51 ///     Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap()
52 /// );
53 /// ```
54 #[derive(Clone, Debug, Eq, PartialEq)]
55 pub struct Alphabet {
56     pub(crate) symbols: [u8; ALPHABET_SIZE],
57 }
58 
59 impl Alphabet {
60     /// Performs no checks so that it can be const.
61     /// Used only for known-valid strings.
from_str_unchecked(alphabet: &str) -> Self62     const fn from_str_unchecked(alphabet: &str) -> Self {
63         let mut symbols = [0_u8; ALPHABET_SIZE];
64         let source_bytes = alphabet.as_bytes();
65 
66         // a way to copy that's allowed in const fn
67         let mut index = 0;
68         while index < ALPHABET_SIZE {
69             symbols[index] = source_bytes[index];
70             index += 1;
71         }
72 
73         Self { symbols }
74     }
75 
76     /// Create an `Alphabet` from a string of 64 unique printable ASCII bytes.
77     ///
78     /// The `=` byte is not allowed as it is used for padding.
new(alphabet: &str) -> Result<Self, ParseAlphabetError>79     pub const fn new(alphabet: &str) -> Result<Self, ParseAlphabetError> {
80         let bytes = alphabet.as_bytes();
81         if bytes.len() != ALPHABET_SIZE {
82             return Err(ParseAlphabetError::InvalidLength);
83         }
84 
85         {
86             let mut index = 0;
87             while index < ALPHABET_SIZE {
88                 let byte = bytes[index];
89 
90                 // must be ascii printable. 127 (DEL) is commonly considered printable
91                 // for some reason but clearly unsuitable for base64.
92                 if !(byte >= 32_u8 && byte <= 126_u8) {
93                     return Err(ParseAlphabetError::UnprintableByte(byte));
94                 }
95                 // = is assumed to be padding, so cannot be used as a symbol
96                 if byte == PAD_BYTE {
97                     return Err(ParseAlphabetError::ReservedByte(byte));
98                 }
99 
100                 // Check for duplicates while staying within what const allows.
101                 // It's n^2, but only over 64 hot bytes, and only once, so it's likely in the single digit
102                 // microsecond range.
103 
104                 let mut probe_index = 0;
105                 while probe_index < ALPHABET_SIZE {
106                     if probe_index == index {
107                         probe_index += 1;
108                         continue;
109                     }
110 
111                     let probe_byte = bytes[probe_index];
112 
113                     if byte == probe_byte {
114                         return Err(ParseAlphabetError::DuplicatedByte(byte));
115                     }
116 
117                     probe_index += 1;
118                 }
119 
120                 index += 1;
121             }
122         }
123 
124         Ok(Self::from_str_unchecked(alphabet))
125     }
126 
127     /// Create a `&str` from the symbols in the `Alphabet`
as_str(&self) -> &str128     pub fn as_str(&self) -> &str {
129         core::str::from_utf8(&self.symbols).unwrap()
130     }
131 }
132 
133 impl convert::TryFrom<&str> for Alphabet {
134     type Error = ParseAlphabetError;
135 
try_from(value: &str) -> Result<Self, Self::Error>136     fn try_from(value: &str) -> Result<Self, Self::Error> {
137         Self::new(value)
138     }
139 }
140 
141 /// Possible errors when constructing an [Alphabet] from a `str`.
142 #[derive(Debug, Eq, PartialEq)]
143 pub enum ParseAlphabetError {
144     /// Alphabets must be 64 ASCII bytes
145     InvalidLength,
146     /// All bytes must be unique
147     DuplicatedByte(u8),
148     /// All bytes must be printable (in the range `[32, 126]`).
149     UnprintableByte(u8),
150     /// `=` cannot be used
151     ReservedByte(u8),
152 }
153 
154 impl fmt::Display for ParseAlphabetError {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result155     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
156         match self {
157             Self::InvalidLength => write!(f, "Invalid length - must be 64 bytes"),
158             Self::DuplicatedByte(b) => write!(f, "Duplicated byte: {:#04x}", b),
159             Self::UnprintableByte(b) => write!(f, "Unprintable byte: {:#04x}", b),
160             Self::ReservedByte(b) => write!(f, "Reserved byte: {:#04x}", b),
161         }
162     }
163 }
164 
165 #[cfg(any(feature = "std", test))]
166 impl error::Error for ParseAlphabetError {}
167 
168 /// The standard alphabet (with `+` and `/`) specified in [RFC 4648][].
169 ///
170 /// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-4
171 pub const STANDARD: Alphabet = Alphabet::from_str_unchecked(
172     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
173 );
174 
175 /// The URL-safe alphabet (with `-` and `_`) specified in [RFC 4648][].
176 ///
177 /// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-5
178 pub const URL_SAFE: Alphabet = Alphabet::from_str_unchecked(
179     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_",
180 );
181 
182 /// The `crypt(3)` alphabet (with `.` and `/` as the _first_ two characters).
183 ///
184 /// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses.
185 pub const CRYPT: Alphabet = Alphabet::from_str_unchecked(
186     "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
187 );
188 
189 /// The bcrypt alphabet.
190 pub const BCRYPT: Alphabet = Alphabet::from_str_unchecked(
191     "./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
192 );
193 
194 /// The alphabet used in IMAP-modified UTF-7 (with `+` and `,`).
195 ///
196 /// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3)
197 pub const IMAP_MUTF7: Alphabet = Alphabet::from_str_unchecked(
198     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,",
199 );
200 
201 /// The alphabet used in BinHex 4.0 files.
202 ///
203 /// See [BinHex 4.0 Definition](http://files.stairways.com/other/binhex-40-specs-info.txt)
204 pub const BIN_HEX: Alphabet = Alphabet::from_str_unchecked(
205     "!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr",
206 );
207 
208 #[cfg(test)]
209 mod tests {
210     use crate::alphabet::*;
211     use core::convert::TryFrom as _;
212 
213     #[test]
detects_duplicate_start()214     fn detects_duplicate_start() {
215         assert_eq!(
216             ParseAlphabetError::DuplicatedByte(b'A'),
217             Alphabet::new("AACDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
218                 .unwrap_err()
219         );
220     }
221 
222     #[test]
detects_duplicate_end()223     fn detects_duplicate_end() {
224         assert_eq!(
225             ParseAlphabetError::DuplicatedByte(b'/'),
226             Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789//")
227                 .unwrap_err()
228         );
229     }
230 
231     #[test]
detects_duplicate_middle()232     fn detects_duplicate_middle() {
233         assert_eq!(
234             ParseAlphabetError::DuplicatedByte(b'Z'),
235             Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/")
236                 .unwrap_err()
237         );
238     }
239 
240     #[test]
detects_length()241     fn detects_length() {
242         assert_eq!(
243             ParseAlphabetError::InvalidLength,
244             Alphabet::new(
245                 "xxxxxxxxxABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/",
246             )
247             .unwrap_err()
248         );
249     }
250 
251     #[test]
detects_padding()252     fn detects_padding() {
253         assert_eq!(
254             ParseAlphabetError::ReservedByte(b'='),
255             Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+=")
256                 .unwrap_err()
257         );
258     }
259 
260     #[test]
detects_unprintable()261     fn detects_unprintable() {
262         // form feed
263         assert_eq!(
264             ParseAlphabetError::UnprintableByte(0xc),
265             Alphabet::new("\x0cBCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
266                 .unwrap_err()
267         );
268     }
269 
270     #[test]
same_as_unchecked()271     fn same_as_unchecked() {
272         assert_eq!(
273             STANDARD,
274             Alphabet::try_from("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
275                 .unwrap()
276         );
277     }
278 
279     #[test]
str_same_as_input()280     fn str_same_as_input() {
281         let alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
282         let a = Alphabet::try_from(alphabet).unwrap();
283         assert_eq!(alphabet, a.as_str())
284     }
285 }
286