1 use std::fmt;
2 
3 use crate::{
4     Buffer, ParseError,
5     err::{perr, ParseErrorKind::*},
6     parse::{first_byte_or_empty, hex_digit_value},
7 };
8 
9 
10 /// An integer literal, e.g. `27`, `0x7F`, `0b101010u8` or `5_000_000i64`.
11 ///
12 /// An integer literal consists of an optional base prefix (`0b`, `0o`, `0x`),
13 /// the main part (digits and underscores), and an optional type suffix
14 /// (e.g. `u64` or `i8`). See [the reference][ref] for more information.
15 ///
16 /// Note that integer literals are always positive: the grammar does not contain
17 /// the minus sign at all. The minus sign is just the unary negate operator,
18 /// not part of the literal. Which is interesting for cases like `- 128i8`:
19 /// here, the literal itself would overflow the specified type (`i8` cannot
20 /// represent 128). That's why in rustc, the literal overflow check is
21 /// performed as a lint after parsing, not during the lexing stage. Similarly,
22 /// [`IntegerLit::parse`] does not perform an overflow check.
23 ///
24 /// [ref]: https://doc.rust-lang.org/reference/tokens.html#integer-literals
25 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
26 #[non_exhaustive]
27 pub struct IntegerLit<B: Buffer> {
28     raw: B,
29     // First index of the main number part (after the base prefix).
30     start_main_part: usize,
31     // First index not part of the main number part.
32     end_main_part: usize,
33     base: IntegerBase,
34     type_suffix: Option<IntegerType>,
35 }
36 
37 /// The bases in which an integer can be specified.
38 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
39 pub enum IntegerBase {
40     Binary,
41     Octal,
42     Decimal,
43     Hexadecimal,
44 }
45 
46 /// All possible integer type suffixes.
47 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
48 pub enum IntegerType {
49     U8,
50     U16,
51     U32,
52     U64,
53     U128,
54     Usize,
55     I8,
56     I16,
57     I32,
58     I64,
59     I128,
60     Isize,
61 }
62 
63 impl IntegerBase {
64     /// Returns the literal prefix that indicates this base, i.e. `"0b"`,
65     /// `"0o"`, `""` and `"0x"`.
prefix(self) -> &'static str66     pub fn prefix(self) -> &'static str {
67         match self {
68             Self::Binary => "0b",
69             Self::Octal => "0o",
70             Self::Decimal => "",
71             Self::Hexadecimal => "0x",
72         }
73     }
74 }
75 
76 impl<B: Buffer> IntegerLit<B> {
77     /// Parses the input as an integer literal. Returns an error if the input is
78     /// invalid or represents a different kind of literal.
parse(input: B) -> Result<Self, ParseError>79     pub fn parse(input: B) -> Result<Self, ParseError> {
80         match first_byte_or_empty(&input)? {
81             digit @ b'0'..=b'9' => {
82                 // TODO: simplify once RFC 2528 is stabilized
83                 let IntegerLit {
84                     start_main_part,
85                     end_main_part,
86                     base,
87                     type_suffix,
88                     ..
89                 } =  parse_impl(&input, digit)?;
90 
91                 Ok(Self {
92                     raw: input,
93                     start_main_part,
94                     end_main_part,
95                     base,
96                     type_suffix,
97                 })
98             },
99             _ => Err(perr(0, DoesNotStartWithDigit)),
100         }
101     }
102 
103     /// Performs the actual string to int conversion to obtain the integer
104     /// value. The optional type suffix of the literal **is ignored by this
105     /// method**. This means `N` does not need to match the type suffix!
106     ///
107     /// Returns `None` if the literal overflows `N`.
value<N: FromIntegerLiteral>(&self) -> Option<N>108     pub fn value<N: FromIntegerLiteral>(&self) -> Option<N> {
109         let base = match self.base {
110             IntegerBase::Binary => N::from_small_number(2),
111             IntegerBase::Octal => N::from_small_number(8),
112             IntegerBase::Decimal => N::from_small_number(10),
113             IntegerBase::Hexadecimal => N::from_small_number(16),
114         };
115 
116         let mut acc = N::from_small_number(0);
117         for digit in self.raw_main_part().bytes() {
118             if digit == b'_' {
119                 continue;
120             }
121 
122             // We don't actually need the base here: we already know this main
123             // part only contains digits valid for the specified base.
124             let digit = hex_digit_value(digit)
125                 .unwrap_or_else(|| unreachable!("bug: integer main part contains non-digit"));
126 
127             acc = acc.checked_mul(base)?;
128             acc = acc.checked_add(N::from_small_number(digit))?;
129         }
130 
131         Some(acc)
132     }
133 
134     /// The base of this integer literal.
base(&self) -> IntegerBase135     pub fn base(&self) -> IntegerBase {
136         self.base
137     }
138 
139     /// The main part containing the digits and potentially `_`. Do not try to
140     /// parse this directly as that would ignore the base!
raw_main_part(&self) -> &str141     pub fn raw_main_part(&self) -> &str {
142         &(*self.raw)[self.start_main_part..self.end_main_part]
143     }
144 
145     /// The type suffix, if specified.
type_suffix(&self) -> Option<IntegerType>146     pub fn type_suffix(&self) -> Option<IntegerType> {
147         self.type_suffix
148     }
149 
150     /// Returns the raw input that was passed to `parse`.
raw_input(&self) -> &str151     pub fn raw_input(&self) -> &str {
152         &self.raw
153     }
154 
155     /// Returns the raw input that was passed to `parse`, potentially owned.
into_raw_input(self) -> B156     pub fn into_raw_input(self) -> B {
157         self.raw
158     }
159 }
160 
161 impl IntegerLit<&str> {
162     /// Makes a copy of the underlying buffer and returns the owned version of
163     /// `Self`.
to_owned(&self) -> IntegerLit<String>164     pub fn to_owned(&self) -> IntegerLit<String> {
165         IntegerLit {
166             raw: self.raw.to_owned(),
167             start_main_part: self.start_main_part,
168             end_main_part: self.end_main_part,
169             base: self.base,
170             type_suffix: self.type_suffix,
171         }
172     }
173 }
174 
175 impl<B: Buffer> fmt::Display for IntegerLit<B> {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result176     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
177         write!(f, "{}", &*self.raw)
178     }
179 }
180 
181 /// Integer literal types. *Implementation detail*.
182 ///
183 /// Implemented for all integer literal types. This trait is sealed and cannot
184 /// be implemented outside of this crate. The trait's methods are implementation
185 /// detail of this library and are not subject to semver.
186 pub trait FromIntegerLiteral: self::sealed::Sealed + Copy {
187     /// Creates itself from the given number. `n` is guaranteed to be `<= 16`.
188     #[doc(hidden)]
from_small_number(n: u8) -> Self189     fn from_small_number(n: u8) -> Self;
190 
191     #[doc(hidden)]
checked_add(self, rhs: Self) -> Option<Self>192     fn checked_add(self, rhs: Self) -> Option<Self>;
193 
194     #[doc(hidden)]
checked_mul(self, rhs: Self) -> Option<Self>195     fn checked_mul(self, rhs: Self) -> Option<Self>;
196 
197     #[doc(hidden)]
ty() -> IntegerType198     fn ty() -> IntegerType;
199 }
200 
201 macro_rules! impl_from_int_literal {
202     ($( $ty:ty => $variant:ident ,)* ) => {
203         $(
204             impl self::sealed::Sealed for $ty {}
205             impl FromIntegerLiteral for $ty {
206                 fn from_small_number(n: u8) -> Self {
207                     n as Self
208                 }
209                 fn checked_add(self, rhs: Self) -> Option<Self> {
210                     self.checked_add(rhs)
211                 }
212                 fn checked_mul(self, rhs: Self) -> Option<Self> {
213                     self.checked_mul(rhs)
214                 }
215                 fn ty() -> IntegerType {
216                     IntegerType::$variant
217                 }
218             }
219         )*
220     };
221 }
222 
223 impl_from_int_literal!(
224     u8 => U8, u16 => U16, u32 => U32, u64 => U64, u128 => U128, usize => Usize,
225     i8 => I8, i16 => I16, i32 => I32, i64 => I64, i128 => I128, isize => Isize,
226 );
227 
228 mod sealed {
229     pub trait Sealed {}
230 }
231 
232 /// Precondition: first byte of string has to be in `b'0'..=b'9'`.
233 #[inline(never)]
parse_impl(input: &str, first: u8) -> Result<IntegerLit<&str>, ParseError>234 pub(crate) fn parse_impl(input: &str, first: u8) -> Result<IntegerLit<&str>, ParseError> {
235     // Figure out base and strip prefix base, if it exists.
236     let (end_prefix, base) = match (first, input.as_bytes().get(1)) {
237         (b'0', Some(b'b')) => (2, IntegerBase::Binary),
238         (b'0', Some(b'o')) => (2, IntegerBase::Octal),
239         (b'0', Some(b'x')) => (2, IntegerBase::Hexadecimal),
240 
241         // Everything else is treated as decimal. Several cases are caught
242         // by this:
243         // - "123"
244         // - "0"
245         // - "0u8"
246         // - "0r" -> this will error later
247         _ => (0, IntegerBase::Decimal),
248     };
249     let without_prefix = &input[end_prefix..];
250 
251     // Find end of main part.
252     let end_main = without_prefix.bytes()
253             .position(|b| !matches!(b, b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F' | b'_'))
254             .unwrap_or(without_prefix.len());
255     let (main_part, type_suffix) = without_prefix.split_at(end_main);
256 
257     // Check for invalid digits and make sure there is at least one valid digit.
258     let invalid_digit_pos = match base {
259         IntegerBase::Binary => main_part.bytes()
260             .position(|b| !matches!(b, b'0' | b'1' | b'_')),
261         IntegerBase::Octal => main_part.bytes()
262             .position(|b| !matches!(b, b'0'..=b'7' | b'_')),
263         IntegerBase::Decimal => main_part.bytes()
264             .position(|b| !matches!(b, b'0'..=b'9' | b'_')),
265         IntegerBase::Hexadecimal => None,
266     };
267 
268     if let Some(pos) = invalid_digit_pos {
269         return Err(perr(end_prefix + pos, InvalidDigit));
270     }
271 
272     if main_part.bytes().filter(|&b| b != b'_').count() == 0 {
273         return Err(perr(end_prefix..end_prefix + end_main, NoDigits));
274     }
275 
276 
277     // Parse type suffix
278     let type_suffix = match type_suffix {
279         "" => None,
280         "u8" => Some(IntegerType::U8),
281         "u16" => Some(IntegerType::U16),
282         "u32" => Some(IntegerType::U32),
283         "u64" => Some(IntegerType::U64),
284         "u128" => Some(IntegerType::U128),
285         "usize" => Some(IntegerType::Usize),
286         "i8" => Some(IntegerType::I8),
287         "i16" => Some(IntegerType::I16),
288         "i32" => Some(IntegerType::I32),
289         "i64" => Some(IntegerType::I64),
290         "i128" => Some(IntegerType::I128),
291         "isize" => Some(IntegerType::Isize),
292         _ => return Err(perr(end_main + end_prefix..input.len(), InvalidIntegerTypeSuffix)),
293     };
294 
295     Ok(IntegerLit {
296         raw: input,
297         start_main_part: end_prefix,
298         end_main_part: end_main + end_prefix,
299         base,
300         type_suffix,
301     })
302 }
303 
304 
305 #[cfg(test)]
306 mod tests;
307