lexer.rs (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b) - OpenGrok cross reference for /aosp_15_r20/external/cronet/third_party/rust/chromium_crates_io/vendor/fend-core-1.4.6/src/lexer.rs

use crate::date::Date;
use crate::error::{FendError, Interrupt};
use crate::ident::Ident;
use crate::num::{Base, Number};
use crate::result::FResult;
use std::{borrow, convert, fmt};

#[derive(Clone, Debug)]
pub(crate) enum Token {
	Num(Number),
	Ident(Ident),
	Symbol(Symbol),
	StringLiteral(borrow::Cow<'static, str>),
	Date(Date),
}

#[derive(PartialEq, Eq, Copy, Clone, Debug)]
pub(crate) enum Symbol {
	OpenParens,
	CloseParens,
	Add,
	Sub,
	Mul,
	Div,
	Mod,
	Pow,
	BitwiseAnd,
	BitwiseOr,
	BitwiseXor,
	UnitConversion,
	Factorial,
	Fn,
	Backslash,
	Dot,
	Of,
	ShiftLeft,
	ShiftRight,
	Semicolon,
	Equals,       // used for assignment
	DoubleEquals, // used for equality
	NotEquals,
	Combination,
	Permutation,
}

impl fmt::Display for Symbol {
	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
		let s = match self {
			Self::OpenParens => "(",
			Self::CloseParens => ")",
			Self::Add => "+",
			Self::Sub => "-",
			Self::Mul => "*",
			Self::Div => "/",
			Self::Mod => "mod",
			Self::Pow => "^",
			Self::BitwiseAnd => "&",
			Self::BitwiseOr => "|",
			Self::BitwiseXor => " xor ",
			Self::UnitConversion => "to",
			Self::Factorial => "!",
			Self::Fn => ":",
			Self::Backslash => "\"",
			Self::Dot => ".",
			Self::Of => "of",
			Self::ShiftLeft => "<<",
			Self::ShiftRight => ">>",
			Self::Semicolon => ";",
			Self::Equals => "=",
			Self::DoubleEquals => "==",
			Self::NotEquals => "!=",
			Self::Combination => "nCr",
			Self::Permutation => "nPr",
		};
		write!(f, "{s}")?;
		Ok(())
	}
}

fn parse_char(input: &str) -> FResult<(char, &str)> {
	input
		.chars()
		.next()
		.map_or(Err(FendError::ExpectedACharacter), |ch| {
			let (_, b) = input.split_at(ch.len_utf8());
			Ok((ch, b))
		})
}

fn parse_ascii_digit(input: &str, base: Base) -> FResult<(u8, &str)> {
	let (ch, input) = parse_char(input)?;
	let possible_digit = ch.to_digit(base.base_as_u8().into());
	possible_digit
		.and_then(|d| <u32 as convert::TryInto<u8>>::try_into(d).ok())
		.map_or(Err(FendError::ExpectedADigit(ch)), |digit| {
			Ok((digit, input))
		})
}

fn parse_fixed_char(input: &str, ch: char) -> FResult<((), &str)> {
	let (parsed_ch, input) = parse_char(input)?;
	if parsed_ch == ch {
		Ok(((), input))
	} else {
		Err(FendError::ExpectedChar(ch, parsed_ch))
	}
}

fn parse_digit_separator(input: &str) -> FResult<((), &str)> {
	let (parsed_ch, input) = parse_char(input)?;
	if parsed_ch == '_' || parsed_ch == ',' {
		Ok(((), input))
	} else {
		Err(FendError::ExpectedDigitSeparator(parsed_ch))
	}
}

// Parses a plain integer with no whitespace and no base prefix.
// Leading minus sign is not allowed.
fn parse_integer<'a, E: From<FendError>>(
	input: &'a str,
	allow_digit_separator: bool,
	base: Base,
	process_digit: &mut impl FnMut(u8) -> Result<(), E>,
) -> Result<((), &'a str), E> {
	let (digit, mut input) = parse_ascii_digit(input, base)?;
	process_digit(digit)?;
	let mut parsed_digit_separator;
	loop {
		if let Ok(((), remaining)) = parse_digit_separator(input) {
			input = remaining;
			parsed_digit_separator = true;
			if !allow_digit_separator {
				return Err(FendError::DigitSeparatorsNotAllowed.into());
			}
		} else {
			parsed_digit_separator = false;
		}
		match parse_ascii_digit(input, base) {
			Err(_) => {
				if parsed_digit_separator {
					return Err(FendError::DigitSeparatorsOnlyBetweenDigits.into());
				}
				break;
			}
			Ok((digit, next_input)) => {
				process_digit(digit)?;
				input = next_input;
			}
		}
	}
	Ok(((), input))
}

fn parse_base_prefix(input: &str) -> FResult<(Base, &str)> {
	// 0x -> 16
	// 0o -> 8
	// 0b -> 2
	// base# -> base (where 2 <= base <= 36)
	// case-sensitive, no whitespace allowed
	if let Ok(((), input)) = parse_fixed_char(input, '0') {
		let (ch, input) = parse_char(input)?;
		Ok((Base::from_zero_based_prefix_char(ch)?, input))
	} else {
		let mut custom_base: u8 = 0;
		let ((), input) = parse_integer(input, false, Base::default(), &mut |digit| -> Result<
			(),
			FendError,
		> {
			let error = FendError::BaseTooLarge;
			if custom_base > 3 {
				return Err(error);
			}
			custom_base = 10 * custom_base + digit;
			if custom_base > 36 {
				return Err(error);
			}
			Ok(())
		})?;
		if custom_base < 2 {
			return Err(FendError::BaseTooSmall);
		}
		let ((), input) = parse_fixed_char(input, '#')?;
		Ok((Base::from_custom_base(custom_base)?, input))
	}
}

// Try and parse recurring digits in parentheses.
// '1.0(0)' -> success
// '1.0(a)', '1.0( 0)' -> Ok, but not parsed
// '1.0(3a)' -> FendError

fn parse_recurring_digits<'a, I: Interrupt>(
	input: &'a str,
	number: &mut Number,
	num_nonrec_digits: usize,
	base: Base,
	int: &I,
) -> FResult<((), &'a str)> {
	let original_input = input;
	// If there's no '(': return Ok but don't parse anything
	if parse_fixed_char(input, '(').is_err() {
		return Ok(((), original_input));
	}
	let ((), input) = parse_fixed_char(input, '(')?;
	if parse_ascii_digit(input, base).is_err() {
		// return Ok if there were no digits
		return Ok(((), original_input));
	}
	let mut recurring_number_num = Number::from(0);
	let mut recurring_number_den = Number::from(1);
	let base_as_u64 = u64::from(base.base_as_u8());
	let ((), input) = parse_integer(input, true, base, &mut |digit| -> FResult<()> {
		let digit_as_u64 = u64::from(digit);
		recurring_number_num = recurring_number_num
			.clone()
			.mul(base_as_u64.into(), int)?
			.add(digit_as_u64.into(), int)?;
		recurring_number_den = recurring_number_den.clone().mul(base_as_u64.into(), int)?;
		Ok(())
	})?;
	recurring_number_den = recurring_number_den.clone().sub(1.into(), int)?;
	for _ in 0..num_nonrec_digits {
		recurring_number_den = recurring_number_den.clone().mul(base_as_u64.into(), int)?;
	}
	*number = number
		.clone()
		.add(recurring_number_num.div(recurring_number_den, int)?, int)?;
	// return an error if there are any other characters before the closing parentheses
	let ((), input) = parse_fixed_char(input, ')')?;
	Ok(((), input))
}

#[allow(clippy::too_many_lines)]
fn parse_basic_number<'a, I: Interrupt>(
	mut input: &'a str,
	base: Base,
	int: &I,
) -> FResult<(Number, &'a str)> {
	let mut is_dice_with_no_count = false;
	if input.starts_with('d') && base.base_as_u8() <= 10 {
		let mut chars = input.chars();
		chars.next();
		let following = chars.next();
		if following.is_some() && following.unwrap().is_ascii_digit() {
			is_dice_with_no_count = true;
		}
	}

	// parse integer component
	let mut res = Number::zero_with_base(base);
	let base_as_u64 = u64::from(base.base_as_u8());
	let mut is_integer = true;

	if parse_fixed_char(input, '.').is_err() && !is_dice_with_no_count {
		let ((), remaining) = parse_integer(input, true, base, &mut |digit| -> FResult<()> {
			res = res
				.clone()
				.mul(base_as_u64.into(), int)?
				.add(u64::from(digit).into(), int)?;
			Ok(())
		})?;
		input = remaining;
	}

	// parse decimal point and at least one digit
	if let Ok(((), remaining)) = parse_fixed_char(input, '.') {
		is_integer = false;
		let mut num_nonrec_digits = 0;
		let mut numerator = Number::zero_with_base(base);
		let mut denominator = Number::zero_with_base(base).add(1.into(), int)?;
		if parse_fixed_char(remaining, '(').is_err() {
			let ((), remaining) = parse_integer(remaining, true, base, &mut |digit| -> Result<
				(),
				FendError,
			> {
				numerator = numerator
					.clone()
					.mul(base_as_u64.into(), int)?
					.add(u64::from(digit).into(), int)?;
				denominator = denominator.clone().mul(base_as_u64.into(), int)?;
				num_nonrec_digits += 1;
				Ok(())
			})?;
			input = remaining;
		} else {
			input = remaining;
		}
		res = res.add(numerator.div(denominator, int)?, int)?;

		// try parsing recurring decimals
		let ((), remaining) =
			parse_recurring_digits(input, &mut res, num_nonrec_digits, base, int)?;
		input = remaining;
	}

	// parse dice syntax
	if is_integer && base.base_as_u8() <= 10 {
		if let Ok(((), remaining)) = parse_fixed_char(input, 'd') {
			// peek to see if there's a digit immediately after the `d`:
			if parse_ascii_digit(remaining, base).is_ok() {
				let dice_count: u32 = if is_dice_with_no_count {
					1
				} else {
					convert::TryFrom::try_from(res.try_as_usize(int)?)
						.map_err(|_| FendError::InvalidDiceSyntax)?
				};
				let mut face_count = 0_u32;
				let ((), remaining2) =
					parse_integer(remaining, false, base, &mut |digit| -> FResult<()> {
						face_count = face_count
							.checked_mul(base.base_as_u8().into())
							.ok_or(FendError::InvalidDiceSyntax)?
							.checked_add(digit.into())
							.ok_or(FendError::InvalidDiceSyntax)?;
						Ok(())
					})?;
				if dice_count == 0 || face_count == 0 {
					return Err(FendError::InvalidDiceSyntax);
				}
				res = Number::new_die(dice_count, face_count, int)?;
				res = res.with_base(base);
				return Ok((res, remaining2));
			}
		}
	}

	// parse optional exponent, but only for base 10 and below
	if base.base_as_u8() <= 10 {
		let (parsed_exponent, remaining) = if let Ok(((), remaining)) = parse_fixed_char(input, 'e')
		{
			(true, remaining)
		} else if let Ok(((), remaining)) = parse_fixed_char(input, 'E') {
			(true, remaining)
		} else {
			(false, "")
		};

		if parsed_exponent {
			// peek ahead to the next char to determine if we should continue parsing an exponent
			let abort = if let Ok((ch, _)) = parse_char(remaining) {
				// abort if there is a non-digit non-plus or minus char after 'e',
				// such as '(', '/' or 'a'. Note that this is only parsed in base <= 10,
				// so letters can never be digits. We do want to include all digits even for
				// base < 10 though to avoid 6#3e9 from being valid.
				!(ch.is_ascii_digit() || ch == '+' || ch == '-')
			} else {
				// if there is no more input after the 'e', abort
				true
			};
			if !abort {
				input = remaining;
				let mut negative_exponent = false;
				if let Ok(((), remaining)) = parse_fixed_char(input, '-') {
					negative_exponent = true;
					input = remaining;
				} else if let Ok(((), remaining)) = parse_fixed_char(input, '+') {
					input = remaining;
				}
				let mut exp = Number::zero_with_base(base);
				let base_num = Number::from(u64::from(base.base_as_u8()));
				let ((), remaining2) =
					parse_integer(input, true, base, &mut |digit| -> FResult<()> {
						exp = (exp.clone().mul(base_num.clone(), int)?)
							.add(u64::from(digit).into(), int)?;
						Ok(())
					})?;
				if negative_exponent {
					exp = -exp;
				}
				let base_as_number: Number = base_as_u64.into();
				res = res.mul(base_as_number.pow(exp, int)?, int)?;
				input = remaining2;
			}
		}
	}

	// parse exponentiation via unicode superscript digits
	if base.base_as_u8() <= 10
		&& input
			.chars()
			.next()
			.is_some_and(|c| SUPERSCRIPT_DIGITS.contains(&c))
	{
		if let Ok((mut power_digits, remaining)) = parse_power_number(input) {
			let mut exponent = Number::zero_with_base(base);

			power_digits.reverse();

			for (i, digit) in power_digits.into_iter().enumerate() {
				let num = digit * 10u64.pow(u32::try_from(i).unwrap());
				exponent = exponent.add(num.into(), int)?;
			}

			res = res.pow(exponent, int)?;
			input = remaining;
		}
	}

	Ok((res, input))
}

const SUPERSCRIPT_DIGITS: [char; 10] = ['⁰', '¹', '²', '³', '⁴', '⁵', '⁶', '⁷', '⁸', '⁹'];

fn parse_power_number(input: &str) -> FResult<(Vec<u64>, &str)> {
	let mut digits: Vec<u64> = Vec::new();

	let (mut ch, mut input) = parse_char(input)?;
	while let Some((idx, _)) = SUPERSCRIPT_DIGITS
		.iter()
		.enumerate()
		.find(|(_, x)| **x == ch)
	{
		digits.push(idx as u64);
		if input.is_empty() {
			break;
		}
		(ch, input) = parse_char(input)?;
	}

	Ok((digits, input))
}

fn parse_number<'a, I: Interrupt>(input: &'a str, int: &I) -> FResult<(Number, &'a str)> {
	let (base, input) = parse_base_prefix(input).unwrap_or((Base::default(), input));
	let (res, input) = parse_basic_number(input, base, int)?;
	Ok((res, input))
}

fn is_valid_in_ident(ch: char, prev: Option<char>) -> bool {
	let allowed_chars = [
		',', '_', '⅛', '¼', '⅜', '½', '⅝', '¾', '⅞', '⅙', '⅓', '⅔', '⅚', '⅕', '⅖', '⅗', '⅘', '°',
		'$', '℃', '℉', '℧', '℈', '℥', '℔', '¢', '£', '¥', '€', '₩', '₪', '₤', '₨', '฿', '₡', '₣',
		'₦', '₧', '₫', '₭', '₮', '₯', '₱', '﷼', '﹩', '￠', '￡', '￥', '￦', '㍱', '㍲', '㍳',
		'㍴', '㍶', '㎀', '㎁', '㎂', '㎃', '㎄', '㎅', '㎆', '㎇', '㎈', '㎉', '㎊', '㎋', '㎌',
		'㎍', '㎎', '㎏', '㎐', '㎑', '㎒', '㎓', '㎔', '㎕', '㎖', '㎗', '㎘', '㎙', '㎚', '㎛',
		'㎜', '㎝', '㎞', '㎟', '㎠', '㎡', '㎢', '㎣', '㎤', '㎥', '㎦', '㎧', '㎨', '㎩', '㎪',
		'㎫', '㎬', '㎭', '㎮', '㎯', '㎰', '㎱', '㎲', '㎳', '㎴', '㎵', '㎶', '㎷', '㎸', '㎹',
		'㎺', '㎻', '㎼', '㎽', '㎾', '㎿', '㏀', '㏁', '㏃', '㏄', '㏅', '㏆', '㏈', '㏉', '㏊',
		'㏌', '㏏', '㏐', '㏓', '㏔', '㏕', '㏖', '㏗', '㏙', '㏛', '㏜', '㏝',
	];
	let only_valid_by_themselves = ['%', '‰', '‱', '′', '″', '’', '”', 'π'];
	let split_on_subsequent_digit = ['$', '£', '¥'];
	let always_invalid = ['λ'];
	if always_invalid.contains(&ch) {
		false
	} else if only_valid_by_themselves.contains(&ch) {
		// these are only valid if there was no previous char
		prev.is_none()
	} else if only_valid_by_themselves.contains(&prev.unwrap_or('a')) {
		// if prev was a char that's only valid by itself, then this next
		// char cannot be part of an identifier
		false
	} else if ch.is_alphabetic() || allowed_chars.contains(&ch) {
		true
	} else {
		// these are valid only if there was a previous non-$ char in this identifier
		prev.is_some()
			&& !(split_on_subsequent_digit.contains(&prev.unwrap_or('a')))
			&& ".0123456789'\"".contains(ch)
	}
}

fn parse_ident(input: &str, allow_dots: bool) -> FResult<(Token, &str)> {
	let (first_char, _) = parse_char(input)?;
	if !is_valid_in_ident(first_char, None) || first_char == '.' && !allow_dots {
		return Err(FendError::InvalidCharAtBeginningOfIdent(first_char));
	}
	let mut byte_idx = first_char.len_utf8();
	let (_, mut remaining) = input.split_at(byte_idx);
	let mut prev_char = first_char;
	while let Ok((next_char, remaining_input)) = parse_char(remaining) {
		if !is_valid_in_ident(next_char, Some(prev_char)) || next_char == '.' && !allow_dots {
			break;
		}
		remaining = remaining_input;
		byte_idx += next_char.len_utf8();
		prev_char = next_char;
	}
	let (ident, input) = input.split_at(byte_idx);
	Ok((
		match ident {
			"to" | "as" | "in" => Token::Symbol(Symbol::UnitConversion),
			"per" => Token::Symbol(Symbol::Div),
			"of" => Token::Symbol(Symbol::Of),
			"mod" => Token::Symbol(Symbol::Mod),
			"xor" | "XOR" => Token::Symbol(Symbol::BitwiseXor),
			"and" | "AND" => Token::Symbol(Symbol::BitwiseAnd),
			"or" | "OR" => Token::Symbol(Symbol::BitwiseOr),
			"nCr" | "choose" => Token::Symbol(Symbol::Combination),
			"nPr" | "permute" => Token::Symbol(Symbol::Permutation),
			_ => Token::Ident(Ident::new_string(ident.to_string())),
		},
		input,
	))
}

fn parse_symbol(ch: char, input: &mut &str) -> FResult<Token> {
	let mut test_next = |next: char| {
		if input.starts_with(next) {
			let (_, remaining) = input.split_at(next.len_utf8());
			*input = remaining;
			true
		} else {
			false
		}
	};
	Ok(Token::Symbol(match ch {
		'(' => Symbol::OpenParens,
		')' => Symbol::CloseParens,
		'+' => Symbol::Add,
		'!' => {
			if test_next('=') {
				Symbol::NotEquals
			} else {
				Symbol::Factorial
			}
		}
		// unicode minus sign
		'-' | '\u{2212}' => Symbol::Sub,
		'*' | '\u{d7}' | '\u{2715}' => {
			if test_next('*') {
				Symbol::Pow
			} else {
				Symbol::Mul
			}
		}
		'/' | '\u{f7}' | '\u{2215}' => Symbol::Div, // unicode division symbol and slash
		'^' => Symbol::Pow,
		'&' => Symbol::BitwiseAnd,
		'|' => Symbol::BitwiseOr,
		':' => Symbol::Fn,
		'=' => {
			if test_next('>') {
				Symbol::Fn
			} else if test_next('=') {
				Symbol::DoubleEquals
			} else {
				Symbol::Equals
			}
		}
		'\u{2260}' => Symbol::NotEquals,       // unicode not equal to symbol
		'\\' | '\u{3bb}' => Symbol::Backslash, // lambda symbol
		'.' => Symbol::Dot,
		'<' => {
			if test_next('<') {
				Symbol::ShiftLeft
			} else if test_next('>') {
				Symbol::NotEquals
			} else {
				return Err(FendError::UnexpectedChar(ch));
			}
		}
		'>' => {
			if test_next('>') {
				Symbol::ShiftRight
			} else {
				return Err(FendError::UnexpectedChar(ch));
			}
		}
		';' => Symbol::Semicolon,
		_ => return Err(FendError::UnexpectedChar(ch)),
	}))
}

fn parse_unicode_escape(chars_iter: &mut std::str::CharIndices<'_>) -> FResult<char> {
	if chars_iter
		.next()
		.ok_or(FendError::UnterminatedStringLiteral)?
		.1 != '{'
	{
		return Err(FendError::InvalidUnicodeEscapeSequence);
	}
	let mut result_value = 0;
	let mut zero_length = true;
	loop {
		let (_, ch) = chars_iter
			.next()
			.ok_or(FendError::UnterminatedStringLiteral)?;
		if ch.is_ascii_hexdigit() {
			zero_length = false;
			result_value *= 16;
			result_value += ch
				.to_digit(16)
				.ok_or(FendError::InvalidUnicodeEscapeSequence)?;
			if result_value > 0x10_ffff {
				return Err(FendError::InvalidUnicodeEscapeSequence);
			}
		} else if ch == '}' {
			break;
		} else {
			return Err(FendError::InvalidUnicodeEscapeSequence);
		}
	}
	if zero_length {
		return Err(FendError::InvalidUnicodeEscapeSequence);
	}
	if let Ok(ch) = <char as convert::TryFrom<u32>>::try_from(result_value) {
		Ok(ch)
	} else {
		Err(FendError::InvalidUnicodeEscapeSequence)
	}
}

fn parse_string_literal(input: &str, terminator: char) -> FResult<(Token, &str)> {
	let (_, input) = input.split_at(1);
	let mut chars_iter = input.char_indices();
	let mut literal_length = None;
	let mut literal_string = String::new();
	let mut skip_whitespace = false;
	while let Some((idx, ch)) = chars_iter.next() {
		if skip_whitespace {
			if ch.is_ascii_whitespace() {
				continue;
			}
			skip_whitespace = false;
		}
		if ch == terminator {
			literal_length = Some(idx);
			break;
		}
		if ch == '\\' {
			let (_, next) = chars_iter
				.next()
				.ok_or(FendError::UnterminatedStringLiteral)?;
			let escaped_char = match next {
				'\\' => Some('\\'),
				'"' => Some('"'),
				'\'' => Some('\''),
				'a' => Some('\u{7}'),  // bell
				'b' => Some('\u{8}'),  // backspace
				'e' => Some('\u{1b}'), // escape
				'f' => Some('\u{c}'),  // form feed
				'n' => Some('\n'),     // line feed
				'r' => Some('\r'),     // carriage return
				't' => Some('\t'),     // tab
				'v' => Some('\u{0b}'), // vertical tab
				'x' => {
					// two-character hex code
					let (_, hex1) = chars_iter
						.next()
						.ok_or(FendError::UnterminatedStringLiteral)?;
					let (_, hex2) = chars_iter
						.next()
						.ok_or(FendError::UnterminatedStringLiteral)?;
					let hex1: u8 = convert::TryInto::try_into(
						hex1.to_digit(8).ok_or(FendError::BackslashXOutOfRange)?,
					)
					.unwrap();
					let hex2: u8 = convert::TryInto::try_into(
						hex2.to_digit(16).ok_or(FendError::BackslashXOutOfRange)?,
					)
					.unwrap();
					Some((hex1 * 16 + hex2) as char)
				}
				'u' => Some(parse_unicode_escape(&mut chars_iter)?),
				'z' => {
					skip_whitespace = true;
					None
				}
				'^' => {
					// control character escapes
					let (_, letter) = chars_iter
						.next()
						.ok_or(FendError::UnterminatedStringLiteral)?;
					let code = letter as u8;
					if !(63..=95).contains(&code) {
						return Err(FendError::ExpectedALetterOrCode);
					}
					Some(if code == b'?' {
						'\x7f'
					} else {
						(code - 64) as char
					})
				}
				_ => return Err(FendError::UnknownBackslashEscapeSequence(next)),
			};
			if let Some(escaped_char) = escaped_char {
				literal_string.push(escaped_char);
			}
		} else {
			literal_string.push(ch);
		}
	}
	let literal_length = literal_length.ok_or(FendError::UnterminatedStringLiteral)?;
	let (_, remaining) = input.split_at(literal_length + 1);
	Ok((Token::StringLiteral(literal_string.into()), remaining))
}

// parses a unit beginning with ' or "
fn parse_quote_unit(input: &str) -> (Token, &str) {
	let mut split_idx = 1;
	if let Some(ch) = input.split_at(1).1.chars().next() {
		if ch.is_alphabetic() {
			split_idx += ch.len_utf8();
			let mut prev = ch;
			let (_, mut remaining) = input.split_at(split_idx);
			while let Some(next) = remaining.chars().next() {
				if !is_valid_in_ident(next, Some(prev)) {
					break;
				}
				split_idx += next.len_utf8();
				prev = next;
				let (_, remaining2) = input.split_at(split_idx);
				remaining = remaining2;
			}
		}
	}
	let (a, b) = input.split_at(split_idx);
	(Token::Ident(Ident::new_string(a.to_string())), b)
}

pub(crate) struct Lexer<'a, 'b, I: Interrupt> {
	input: &'a str,
	// normally 0; 1 after backslash; 2 after ident after backslash
	after_backslash_state: u8,
	after_number_or_to: bool,
	int: &'b I,
}

fn skip_whitespace_and_comments(input: &mut &str) {
	while !input.is_empty() {
		if input.starts_with("# ") || input.starts_with("#!") {
			if let Some(idx) = input.find('\n') {
				let (_, remaining) = input.split_at(idx);
				*input = remaining;
				continue;
			}
			*input = "";
			return;
		} else if let Some(ch) = input.chars().next() {
			if ch.is_whitespace() {
				let (_, remaining) = input.split_at(ch.len_utf8());
				*input = remaining;
				continue;
			}
		}
		break;
	}
}

fn parse_date(input: &str) -> FResult<(Date, &str)> {
	let (_, input) = input.split_at(1); // skip '@' symbol
	let mut input2 = input;
	let mut split_idx = 0;
	for i in 0..3 {
		let mut n = 0;
		while matches!(input2.chars().next(), Some('0'..='9')) {
			let (_, remaining) = input2.split_at(1);
			input2 = remaining;
			n += 1;
			split_idx += 1;
		}
		if n == 0 {
			return Err(FendError::ExpectedADateLiteral);
		}
		if i == 2 {
			break;
		}
		if !input2.starts_with('-') {
			return Err(FendError::ExpectedADateLiteral);
		}
		let (_, remaining) = input2.split_at(1);
		input2 = remaining;
		split_idx += 1;
	}
	let (date_str, result_remaining) = input.split_at(split_idx);
	let res = Date::parse(date_str)?;
	Ok((res, result_remaining))
}

impl<'a, 'b, I: Interrupt> Lexer<'a, 'b, I> {
	fn next_token(&mut self) -> FResult<Option<Token>> {
		skip_whitespace_and_comments(&mut self.input);
		let (ch, following) = {
			let mut chars = self.input.chars();
			let ch = chars.next();
			let following = chars.next();
			(ch, following)
		};
		Ok(Some(match ch {
			Some(ch) => {
				if ch.is_ascii_digit()
					|| (ch == '.' && self.after_backslash_state == 0)
					|| (ch == 'd' && following.is_some() && following.unwrap().is_ascii_digit())
				{
					let (num, remaining) = parse_number(self.input, self.int)?;
					self.input = remaining;
					Token::Num(num)
				} else if ch == '\'' || ch == '"' {
					if self.after_number_or_to {
						let (token, remaining) = parse_quote_unit(self.input);
						self.input = remaining;
						token
					} else {
						// normal string literal, with possible escape sequences
						let (token, remaining) = parse_string_literal(self.input, ch)?;
						self.input = remaining;
						token
					}
				} else if ch == '@' {
					// date literal, e.g. @1970-01-01
					let (date, remaining) = parse_date(self.input)?;
					self.input = remaining;
					Token::Date(date)
				} else if self.input.starts_with("#\"") {
					// raw string literal
					let (_, remaining) = self.input.split_at(2);
					let literal_length = remaining
						.match_indices("\"#")
						.next()
						.ok_or(FendError::UnterminatedStringLiteral)?
						.0;
					let (literal, remaining) = remaining.split_at(literal_length);
					let (_terminator, remaining) = remaining.split_at(2);
					self.input = remaining;
					Token::StringLiteral(literal.to_string().into())
				} else if is_valid_in_ident(ch, None) {
					// dots aren't allowed in idents after a backslash
					let (ident, remaining) =
						parse_ident(self.input, self.after_backslash_state != 1)?;
					self.input = remaining;
					ident
				} else {
					let (_, remaining) = self.input.split_at(ch.len_utf8());
					self.input = remaining;
					parse_symbol(ch, &mut self.input)?
				}
			}
			None => return Ok(None),
		}))
	}
}

impl<'a, I: Interrupt> Iterator for Lexer<'a, '_, I> {
	type Item = FResult<Token>;

	fn next(&mut self) -> Option<Self::Item> {
		let res = match self.next_token() {
			Err(e) => Some(Err(e)),
			Ok(None) => None,
			Ok(Some(t)) => Some(Ok(t)),
		};
		self.after_number_or_to = matches!(
			res,
			Some(Ok(Token::Num(_) | Token::Symbol(Symbol::UnitConversion)))
		);
		if matches!(res, Some(Ok(Token::Symbol(Symbol::Backslash)))) {
			self.after_backslash_state = 1;
		} else if self.after_backslash_state == 1 {
			if let Some(Ok(Token::Ident(_))) = res {
				self.after_backslash_state = 2;
			} else {
				self.after_backslash_state = 0;
			}
		} else {
			self.after_backslash_state = 0;
		}
		res
	}
}

pub(crate) fn lex<'a, 'b, I: Interrupt>(input: &'a str, int: &'b I) -> Lexer<'a, 'b, I> {
	Lexer {
		input,
		after_backslash_state: 0,
		after_number_or_to: false,
		int,
	}
}