1 //! Provides [Alphabet] and constants for alphabets commonly used in the wild. 2 3 use crate::PAD_BYTE; 4 use core::{convert, fmt}; 5 #[cfg(any(feature = "std", test))] 6 use std::error; 7 8 const ALPHABET_SIZE: usize = 64; 9 10 /// An alphabet defines the 64 ASCII characters (symbols) used for base64. 11 /// 12 /// Common alphabets are provided as constants, and custom alphabets 13 /// can be made via `from_str` or the `TryFrom<str>` implementation. 14 /// 15 /// # Examples 16 /// 17 /// Building and using a custom Alphabet: 18 /// 19 /// ``` 20 /// let custom = base64::alphabet::Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap(); 21 /// 22 /// let engine = base64::engine::GeneralPurpose::new( 23 /// &custom, 24 /// base64::engine::general_purpose::PAD); 25 /// ``` 26 /// 27 /// Building a const: 28 /// 29 /// ``` 30 /// use base64::alphabet::Alphabet; 31 /// 32 /// static CUSTOM: Alphabet = { 33 /// // Result::unwrap() isn't const yet, but panic!() is OK 34 /// match Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") { 35 /// Ok(x) => x, 36 /// Err(_) => panic!("creation of alphabet failed"), 37 /// } 38 /// }; 39 /// ``` 40 /// 41 /// Building lazily: 42 /// 43 /// ``` 44 /// use base64::{ 45 /// alphabet::Alphabet, 46 /// engine::{general_purpose::GeneralPurpose, GeneralPurposeConfig}, 47 /// }; 48 /// use once_cell::sync::Lazy; 49 /// 50 /// static CUSTOM: Lazy<Alphabet> = Lazy::new(|| 51 /// Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap() 52 /// ); 53 /// ``` 54 #[derive(Clone, Debug, Eq, PartialEq)] 55 pub struct Alphabet { 56 pub(crate) symbols: [u8; ALPHABET_SIZE], 57 } 58 59 impl Alphabet { 60 /// Performs no checks so that it can be const. 61 /// Used only for known-valid strings. from_str_unchecked(alphabet: &str) -> Self62 const fn from_str_unchecked(alphabet: &str) -> Self { 63 let mut symbols = [0_u8; ALPHABET_SIZE]; 64 let source_bytes = alphabet.as_bytes(); 65 66 // a way to copy that's allowed in const fn 67 let mut index = 0; 68 while index < ALPHABET_SIZE { 69 symbols[index] = source_bytes[index]; 70 index += 1; 71 } 72 73 Self { symbols } 74 } 75 76 /// Create an `Alphabet` from a string of 64 unique printable ASCII bytes. 77 /// 78 /// The `=` byte is not allowed as it is used for padding. new(alphabet: &str) -> Result<Self, ParseAlphabetError>79 pub const fn new(alphabet: &str) -> Result<Self, ParseAlphabetError> { 80 let bytes = alphabet.as_bytes(); 81 if bytes.len() != ALPHABET_SIZE { 82 return Err(ParseAlphabetError::InvalidLength); 83 } 84 85 { 86 let mut index = 0; 87 while index < ALPHABET_SIZE { 88 let byte = bytes[index]; 89 90 // must be ascii printable. 127 (DEL) is commonly considered printable 91 // for some reason but clearly unsuitable for base64. 92 if !(byte >= 32_u8 && byte <= 126_u8) { 93 return Err(ParseAlphabetError::UnprintableByte(byte)); 94 } 95 // = is assumed to be padding, so cannot be used as a symbol 96 if byte == PAD_BYTE { 97 return Err(ParseAlphabetError::ReservedByte(byte)); 98 } 99 100 // Check for duplicates while staying within what const allows. 101 // It's n^2, but only over 64 hot bytes, and only once, so it's likely in the single digit 102 // microsecond range. 103 104 let mut probe_index = 0; 105 while probe_index < ALPHABET_SIZE { 106 if probe_index == index { 107 probe_index += 1; 108 continue; 109 } 110 111 let probe_byte = bytes[probe_index]; 112 113 if byte == probe_byte { 114 return Err(ParseAlphabetError::DuplicatedByte(byte)); 115 } 116 117 probe_index += 1; 118 } 119 120 index += 1; 121 } 122 } 123 124 Ok(Self::from_str_unchecked(alphabet)) 125 } 126 127 /// Create a `&str` from the symbols in the `Alphabet` as_str(&self) -> &str128 pub fn as_str(&self) -> &str { 129 core::str::from_utf8(&self.symbols).unwrap() 130 } 131 } 132 133 impl convert::TryFrom<&str> for Alphabet { 134 type Error = ParseAlphabetError; 135 try_from(value: &str) -> Result<Self, Self::Error>136 fn try_from(value: &str) -> Result<Self, Self::Error> { 137 Self::new(value) 138 } 139 } 140 141 /// Possible errors when constructing an [Alphabet] from a `str`. 142 #[derive(Debug, Eq, PartialEq)] 143 pub enum ParseAlphabetError { 144 /// Alphabets must be 64 ASCII bytes 145 InvalidLength, 146 /// All bytes must be unique 147 DuplicatedByte(u8), 148 /// All bytes must be printable (in the range `[32, 126]`). 149 UnprintableByte(u8), 150 /// `=` cannot be used 151 ReservedByte(u8), 152 } 153 154 impl fmt::Display for ParseAlphabetError { fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result155 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 156 match self { 157 Self::InvalidLength => write!(f, "Invalid length - must be 64 bytes"), 158 Self::DuplicatedByte(b) => write!(f, "Duplicated byte: {:#04x}", b), 159 Self::UnprintableByte(b) => write!(f, "Unprintable byte: {:#04x}", b), 160 Self::ReservedByte(b) => write!(f, "Reserved byte: {:#04x}", b), 161 } 162 } 163 } 164 165 #[cfg(any(feature = "std", test))] 166 impl error::Error for ParseAlphabetError {} 167 168 /// The standard alphabet (with `+` and `/`) specified in [RFC 4648][]. 169 /// 170 /// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-4 171 pub const STANDARD: Alphabet = Alphabet::from_str_unchecked( 172 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", 173 ); 174 175 /// The URL-safe alphabet (with `-` and `_`) specified in [RFC 4648][]. 176 /// 177 /// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-5 178 pub const URL_SAFE: Alphabet = Alphabet::from_str_unchecked( 179 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", 180 ); 181 182 /// The `crypt(3)` alphabet (with `.` and `/` as the _first_ two characters). 183 /// 184 /// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses. 185 pub const CRYPT: Alphabet = Alphabet::from_str_unchecked( 186 "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", 187 ); 188 189 /// The bcrypt alphabet. 190 pub const BCRYPT: Alphabet = Alphabet::from_str_unchecked( 191 "./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", 192 ); 193 194 /// The alphabet used in IMAP-modified UTF-7 (with `+` and `,`). 195 /// 196 /// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3) 197 pub const IMAP_MUTF7: Alphabet = Alphabet::from_str_unchecked( 198 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,", 199 ); 200 201 /// The alphabet used in BinHex 4.0 files. 202 /// 203 /// See [BinHex 4.0 Definition](http://files.stairways.com/other/binhex-40-specs-info.txt) 204 pub const BIN_HEX: Alphabet = Alphabet::from_str_unchecked( 205 "!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr", 206 ); 207 208 #[cfg(test)] 209 mod tests { 210 use crate::alphabet::*; 211 use core::convert::TryFrom as _; 212 213 #[test] detects_duplicate_start()214 fn detects_duplicate_start() { 215 assert_eq!( 216 ParseAlphabetError::DuplicatedByte(b'A'), 217 Alphabet::new("AACDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") 218 .unwrap_err() 219 ); 220 } 221 222 #[test] detects_duplicate_end()223 fn detects_duplicate_end() { 224 assert_eq!( 225 ParseAlphabetError::DuplicatedByte(b'/'), 226 Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789//") 227 .unwrap_err() 228 ); 229 } 230 231 #[test] detects_duplicate_middle()232 fn detects_duplicate_middle() { 233 assert_eq!( 234 ParseAlphabetError::DuplicatedByte(b'Z'), 235 Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/") 236 .unwrap_err() 237 ); 238 } 239 240 #[test] detects_length()241 fn detects_length() { 242 assert_eq!( 243 ParseAlphabetError::InvalidLength, 244 Alphabet::new( 245 "xxxxxxxxxABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/", 246 ) 247 .unwrap_err() 248 ); 249 } 250 251 #[test] detects_padding()252 fn detects_padding() { 253 assert_eq!( 254 ParseAlphabetError::ReservedByte(b'='), 255 Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+=") 256 .unwrap_err() 257 ); 258 } 259 260 #[test] detects_unprintable()261 fn detects_unprintable() { 262 // form feed 263 assert_eq!( 264 ParseAlphabetError::UnprintableByte(0xc), 265 Alphabet::new("\x0cBCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") 266 .unwrap_err() 267 ); 268 } 269 270 #[test] same_as_unchecked()271 fn same_as_unchecked() { 272 assert_eq!( 273 STANDARD, 274 Alphabet::try_from("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") 275 .unwrap() 276 ); 277 } 278 279 #[test] str_same_as_input()280 fn str_same_as_input() { 281 let alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 282 let a = Alphabet::try_from(alphabet).unwrap(); 283 assert_eq!(alphabet, a.as_str()) 284 } 285 } 286