1 use std::{fmt, ops::Range}; 2 3 use crate::{ 4 Buffer, ParseError, 5 err::{perr, ParseErrorKind::*}, 6 escape::{scan_raw_string, unescape_string}, 7 }; 8 9 10 /// A byte string or raw byte string literal, e.g. `b"hello"` or `br#"abc"def"#`. 11 /// 12 /// See [the reference][ref] for more information. 13 /// 14 /// [ref]: https://doc.rust-lang.org/reference/tokens.html#byte-string-literals 15 #[derive(Debug, Clone, PartialEq, Eq)] 16 pub struct ByteStringLit<B: Buffer> { 17 /// The raw input. 18 raw: B, 19 20 /// The string value (with all escaped unescaped), or `None` if there were 21 /// no escapes. In the latter case, `input` is the string value. 22 value: Option<Vec<u8>>, 23 24 /// The number of hash signs in case of a raw string literal, or `None` if 25 /// it's not a raw string literal. 26 num_hashes: Option<u32>, 27 } 28 29 impl<B: Buffer> ByteStringLit<B> { 30 /// Parses the input as a (raw) byte string literal. Returns an error if the 31 /// input is invalid or represents a different kind of literal. parse(input: B) -> Result<Self, ParseError>32 pub fn parse(input: B) -> Result<Self, ParseError> { 33 if input.is_empty() { 34 return Err(perr(None, Empty)); 35 } 36 if !input.starts_with(r#"b""#) && !input.starts_with("br") { 37 return Err(perr(None, InvalidByteStringLiteralStart)); 38 } 39 40 Self::parse_impl(input) 41 } 42 43 /// Returns the string value this literal represents (where all escapes have 44 /// been turned into their respective values). value(&self) -> &[u8]45 pub fn value(&self) -> &[u8] { 46 self.value.as_deref().unwrap_or(&self.raw.as_bytes()[self.inner_range()]) 47 } 48 49 /// Like `value` but returns a potentially owned version of the value. 50 /// 51 /// The return value is either `Cow<'static, [u8]>` if `B = String`, or 52 /// `Cow<'a, [u8]>` if `B = &'a str`. into_value(self) -> B::ByteCow53 pub fn into_value(self) -> B::ByteCow { 54 let inner_range = self.inner_range(); 55 let Self { raw, value, .. } = self; 56 value.map(B::ByteCow::from).unwrap_or_else(|| raw.cut(inner_range).into_byte_cow()) 57 } 58 59 /// Returns whether this literal is a raw string literal (starting with 60 /// `r`). is_raw_byte_string(&self) -> bool61 pub fn is_raw_byte_string(&self) -> bool { 62 self.num_hashes.is_some() 63 } 64 65 /// Returns the raw input that was passed to `parse`. raw_input(&self) -> &str66 pub fn raw_input(&self) -> &str { 67 &self.raw 68 } 69 70 /// Returns the raw input that was passed to `parse`, potentially owned. into_raw_input(self) -> B71 pub fn into_raw_input(self) -> B { 72 self.raw 73 } 74 75 /// The range within `self.raw` that excludes the quotes and potential `r#`. inner_range(&self) -> Range<usize>76 fn inner_range(&self) -> Range<usize> { 77 match self.num_hashes { 78 None => 2..self.raw.len() - 1, 79 Some(n) => 2 + n as usize + 1..self.raw.len() - n as usize - 1, 80 } 81 } 82 83 /// Precondition: input has to start with either `b"` or `br`. parse_impl(input: B) -> Result<Self, ParseError>84 pub(crate) fn parse_impl(input: B) -> Result<Self, ParseError> { 85 if input.starts_with(r"br") { 86 let (value, num_hashes) = scan_raw_string::<u8>(&input, 2)?; 87 Ok(Self { 88 raw: input, 89 value: value.map(|s| s.into_bytes()), 90 num_hashes: Some(num_hashes), 91 }) 92 } else { 93 let value = unescape_string::<u8>(&input, 2)?.map(|s| s.into_bytes()); 94 Ok(Self { 95 raw: input, 96 value, 97 num_hashes: None, 98 }) 99 } 100 } 101 } 102 103 impl ByteStringLit<&str> { 104 /// Makes a copy of the underlying buffer and returns the owned version of 105 /// `Self`. into_owned(self) -> ByteStringLit<String>106 pub fn into_owned(self) -> ByteStringLit<String> { 107 ByteStringLit { 108 raw: self.raw.to_owned(), 109 value: self.value, 110 num_hashes: self.num_hashes, 111 } 112 } 113 } 114 115 impl<B: Buffer> fmt::Display for ByteStringLit<B> { fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result116 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 117 f.pad(&self.raw) 118 } 119 } 120 121 122 #[cfg(test)] 123 mod tests; 124