rhai/src/tokenizer.rs

2531 lines
82 KiB
Rust
Raw Normal View History

//! Main module defining the lexer and parser.
2020-07-16 06:09:31 +02:00
use crate::engine::{
2021-03-14 03:47:29 +01:00
Precedence, KEYWORD_DEBUG, KEYWORD_EVAL, KEYWORD_FN_PTR, KEYWORD_FN_PTR_CALL,
KEYWORD_FN_PTR_CURRY, KEYWORD_IS_DEF_VAR, KEYWORD_PRINT, KEYWORD_THIS, KEYWORD_TYPE_OF,
2020-07-16 06:09:31 +02:00
};
2021-11-13 15:36:23 +01:00
use crate::func::native::OnParseTokenCallback;
2022-03-25 01:52:53 +01:00
use crate::{Engine, Identifier, LexError, SmartString, StaticVec, INT, UNSIGNED_INT};
2021-04-17 09:15:54 +02:00
#[cfg(feature = "no_std")]
use std::prelude::v1::*;
use std::{
borrow::Cow,
2022-07-25 07:40:23 +02:00
cell::RefCell,
2021-04-17 09:15:54 +02:00
char, fmt,
2021-04-04 09:06:13 +02:00
iter::{FusedIterator, Peekable},
2021-01-06 06:46:53 +01:00
num::NonZeroUsize,
ops::{Add, AddAssign},
rc::Rc,
str::{Chars, FromStr},
};
2020-11-16 16:10:14 +01:00
2021-07-25 16:56:05 +02:00
/// _(internals)_ A type containing commands to control the tokenizer.
2022-08-19 07:21:47 +02:00
#[derive(Debug, Clone, Eq, PartialEq, Default, Hash)]
2021-04-04 18:05:56 +02:00
pub struct TokenizerControlBlock {
/// Is the current tokenizer position within an interpolated text string?
/// This flag allows switching the tokenizer back to _text_ parsing after an interpolation stream.
pub is_within_text: bool,
2022-07-25 07:40:23 +02:00
/// Collection of global comments.
#[cfg(feature = "metadata")]
pub global_comments: Vec<SmartString>,
}
impl TokenizerControlBlock {
/// Create a new `TokenizerControlBlock`.
#[inline(always)]
#[must_use]
pub const fn new() -> Self {
Self {
is_within_text: false,
2022-07-25 07:40:23 +02:00
#[cfg(feature = "metadata")]
global_comments: Vec::new(),
}
}
}
2021-07-25 16:56:05 +02:00
/// _(internals)_ A shared object that allows control of the tokenizer from outside.
2022-07-25 07:40:23 +02:00
pub type TokenizerControl = Rc<RefCell<TokenizerControlBlock>>;
type LERR = LexError;
/// Separator character for numbers.
2021-06-28 12:06:05 +02:00
const NUMBER_SEPARATOR: char = '_';
/// A stream of tokens.
2021-03-03 15:49:57 +01:00
pub type TokenStream<'a> = Peekable<TokenIterator<'a>>;
2020-06-11 12:13:33 +02:00
/// A location (line number + character position) in the input script.
///
2020-07-28 13:11:37 +02:00
/// # Limitations
///
/// In order to keep footprint small, both line number and character position have 16-bit resolution,
/// meaning they go up to a maximum of 65,535 lines and 65,535 characters per line.
///
/// Advancing beyond the maximum line length or maximum number of lines is not an error but has no effect.
#[derive(Eq, PartialEq, Ord, PartialOrd, Hash, Clone, Copy)]
pub struct Position {
2022-02-08 16:01:47 +01:00
/// Line number: 0 = none
2021-04-22 17:02:25 +02:00
#[cfg(not(feature = "no_position"))]
line: u16,
2022-02-08 16:01:47 +01:00
/// Character position: 0 = BOL
2021-04-22 17:02:25 +02:00
#[cfg(not(feature = "no_position"))]
pos: u16,
}
impl Position {
2020-11-20 09:52:28 +01:00
/// A [`Position`] representing no position.
2021-04-22 17:02:25 +02:00
pub const NONE: Self = Self {
#[cfg(not(feature = "no_position"))]
line: 0,
#[cfg(not(feature = "no_position"))]
pos: 0,
};
2020-11-20 09:52:28 +01:00
/// A [`Position`] representing the first position.
2021-04-22 17:02:25 +02:00
pub const START: Self = Self {
#[cfg(not(feature = "no_position"))]
line: 1,
#[cfg(not(feature = "no_position"))]
pos: 0,
};
2020-11-02 16:54:19 +01:00
2020-11-20 09:52:28 +01:00
/// Create a new [`Position`].
2020-07-28 13:11:37 +02:00
///
/// `line` must not be zero.
2021-07-14 07:58:18 +02:00
///
/// If `position` is zero, then it is at the beginning of a line.
2020-07-28 13:11:37 +02:00
///
/// # Panics
///
/// Panics if `line` is zero.
2020-10-08 16:25:50 +02:00
#[inline(always)]
2021-06-12 16:47:43 +02:00
#[must_use]
2022-02-24 02:08:10 +01:00
pub const fn new(line: u16, position: u16) -> Self {
assert!(line != 0, "line cannot be zero");
2021-07-14 07:58:18 +02:00
let _pos = position;
Self {
2021-04-22 17:02:25 +02:00
#[cfg(not(feature = "no_position"))]
line,
2021-04-22 17:02:25 +02:00
#[cfg(not(feature = "no_position"))]
2021-07-14 07:58:18 +02:00
pos: _pos,
}
}
2020-11-20 09:52:28 +01:00
/// Get the line number (1-based), or [`None`] if there is no position.
#[inline]
2021-06-12 16:47:43 +02:00
#[must_use]
2021-07-14 07:58:18 +02:00
pub const fn line(self) -> Option<usize> {
#[cfg(not(feature = "no_position"))]
return if self.is_none() {
None
} else {
2021-07-14 07:58:18 +02:00
Some(self.line as usize)
};
#[cfg(feature = "no_position")]
return None;
}
2020-11-20 09:52:28 +01:00
/// Get the character position (1-based), or [`None`] if at beginning of a line.
#[inline]
2021-06-12 16:47:43 +02:00
#[must_use]
2021-07-14 07:58:18 +02:00
pub const fn position(self) -> Option<usize> {
#[cfg(not(feature = "no_position"))]
2021-07-24 08:11:16 +02:00
return if self.is_none() || self.pos == 0 {
None
} else {
2021-07-14 07:58:18 +02:00
Some(self.pos as usize)
};
#[cfg(feature = "no_position")]
return None;
}
/// Advance by one character position.
#[inline]
pub(crate) fn advance(&mut self) {
2021-04-22 17:02:25 +02:00
#[cfg(not(feature = "no_position"))]
{
assert!(!self.is_none(), "cannot advance Position::none");
2021-04-22 17:02:25 +02:00
// Advance up to maximum position
if self.pos < u16::MAX {
self.pos += 1;
}
}
}
/// Go backwards by one character position.
///
/// # Panics
///
/// Panics if already at beginning of a line - cannot rewind to a previous line.
#[inline]
pub(crate) fn rewind(&mut self) {
2021-04-22 17:02:25 +02:00
#[cfg(not(feature = "no_position"))]
{
assert!(!self.is_none(), "cannot rewind Position::none");
assert!(self.pos > 0, "cannot rewind at position 0");
self.pos -= 1;
}
}
/// Advance to the next line.
#[inline]
pub(crate) fn new_line(&mut self) {
2021-04-22 17:02:25 +02:00
#[cfg(not(feature = "no_position"))]
{
assert!(!self.is_none(), "cannot advance Position::none");
2021-04-22 17:02:25 +02:00
// Advance up to maximum position
if self.line < u16::MAX {
self.line += 1;
self.pos = 0;
}
}
}
2020-11-20 09:52:28 +01:00
/// Is this [`Position`] at the beginning of a line?
#[inline]
2021-06-12 16:47:43 +02:00
#[must_use]
2021-06-28 12:06:05 +02:00
pub const fn is_beginning_of_line(self) -> bool {
2021-04-22 17:02:25 +02:00
#[cfg(not(feature = "no_position"))]
return self.pos == 0 && !self.is_none();
#[cfg(feature = "no_position")]
return false;
}
2020-11-20 09:52:28 +01:00
/// Is there no [`Position`]?
#[inline]
2021-06-12 16:47:43 +02:00
#[must_use]
2021-06-28 12:06:05 +02:00
pub const fn is_none(self) -> bool {
2021-04-22 17:02:25 +02:00
#[cfg(not(feature = "no_position"))]
2021-06-28 12:06:05 +02:00
return self.line == 0 && self.pos == 0;
2021-04-22 17:02:25 +02:00
#[cfg(feature = "no_position")]
return true;
}
2021-12-15 15:12:51 +01:00
/// Returns an fallback [`Position`] if it is [`NONE`][Position::NONE]?
#[inline]
#[must_use]
pub const fn or_else(self, pos: Self) -> Self {
if self.is_none() {
pos
} else {
self
}
}
2021-04-23 08:24:53 +02:00
/// Print this [`Position`] for debug purposes.
#[inline]
2022-07-27 12:04:59 +02:00
pub(crate) fn debug_print(self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
2021-04-23 08:24:53 +02:00
if !self.is_none() {
2021-04-27 16:28:01 +02:00
write!(_f, " @ {:?}", self)?;
2021-04-23 08:24:53 +02:00
}
Ok(())
}
}
impl Default for Position {
2020-10-08 16:25:50 +02:00
#[inline(always)]
fn default() -> Self {
2020-11-02 16:54:19 +01:00
Self::START
}
}
impl fmt::Display for Position {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.is_none() {
2021-04-22 17:02:25 +02:00
write!(f, "none")?;
} else {
2021-04-22 17:02:25 +02:00
#[cfg(not(feature = "no_position"))]
write!(f, "line {}, position {}", self.line, self.pos)?;
#[cfg(feature = "no_position")]
2021-12-30 05:19:41 +01:00
unreachable!("no position");
}
2021-04-22 17:02:25 +02:00
Ok(())
}
}
impl fmt::Debug for Position {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2022-02-10 10:55:32 +01:00
if self.is_none() {
f.write_str("none")
2022-01-25 10:29:34 +01:00
} else {
2022-02-10 10:55:32 +01:00
#[cfg(not(feature = "no_position"))]
if self.is_beginning_of_line() {
write!(f, "{}", self.line)
} else {
write!(f, "{}:{}", self.line, self.pos)
}
2021-04-22 17:02:25 +02:00
2022-02-10 10:55:32 +01:00
#[cfg(feature = "no_position")]
2022-07-23 15:00:58 +02:00
unreachable!("no position");
2022-02-10 10:55:32 +01:00
}
}
}
impl Add for Position {
type Output = Self;
fn add(self, rhs: Self) -> Self::Output {
if rhs.is_none() {
self
} else {
2021-04-22 17:02:25 +02:00
#[cfg(not(feature = "no_position"))]
return Self {
line: self.line + rhs.line - 1,
pos: if rhs.is_beginning_of_line() {
self.pos
} else {
self.pos + rhs.pos - 1
},
2021-04-22 17:02:25 +02:00
};
#[cfg(feature = "no_position")]
2021-12-30 05:19:41 +01:00
unreachable!("no position");
}
}
}
impl AddAssign for Position {
fn add_assign(&mut self, rhs: Self) {
*self = *self + rhs;
}
}
2022-02-08 16:01:47 +01:00
/// _(internals)_ A span consisting of a starting and an ending [positions][Position].
/// Exported under the `internals` feature only.
2022-07-04 11:42:24 +02:00
#[derive(Eq, PartialEq, Ord, PartialOrd, Hash, Clone, Copy)]
2022-02-08 16:01:47 +01:00
pub struct Span {
/// Starting [position][Position].
start: Position,
/// Ending [position][Position].
end: Position,
}
2022-07-04 11:42:24 +02:00
impl Default for Span {
fn default() -> Self {
Self::NONE
}
}
2022-02-08 16:01:47 +01:00
impl Span {
2022-06-05 12:17:44 +02:00
/// Empty [`Span`].
2022-02-08 16:01:47 +01:00
pub const NONE: Self = Self::new(Position::NONE, Position::NONE);
/// Create a new [`Span`].
#[inline(always)]
#[must_use]
pub const fn new(start: Position, end: Position) -> Self {
Self { start, end }
}
/// Is this [`Span`] non-existent?
#[inline(always)]
#[must_use]
pub const fn is_none(&self) -> bool {
self.start.is_none() && self.end.is_none()
}
/// Get the [`Span`]'s starting [position][Position].
#[inline(always)]
#[must_use]
pub const fn start(&self) -> Position {
self.start
}
/// Get the [`Span`]'s ending [position][Position].
#[inline(always)]
#[must_use]
pub const fn end(&self) -> Position {
self.end
}
}
impl fmt::Display for Span {
2022-08-18 15:16:42 +02:00
#[inline]
2022-02-08 16:01:47 +01:00
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2022-08-18 15:16:42 +02:00
let _f = f;
#[cfg(not(feature = "no_position"))]
2022-07-05 10:26:38 +02:00
match (self.start(), self.end()) {
2022-08-18 15:16:42 +02:00
(Position::NONE, Position::NONE) => write!(_f, "{:?}", Position::NONE),
(Position::NONE, end) => write!(_f, "..{:?}", end),
(start, Position::NONE) => write!(_f, "{:?}", start),
2022-07-05 10:26:38 +02:00
(start, end) if start.line() != end.line() => {
2022-08-18 15:16:42 +02:00
write!(_f, "{:?}-{:?}", start, end)
2022-02-08 16:01:47 +01:00
}
2022-07-05 10:26:38 +02:00
(start, end) => write!(
2022-08-18 15:16:42 +02:00
_f,
2022-02-08 16:01:47 +01:00
"{}:{}-{}",
2022-07-05 10:26:38 +02:00
start.line().unwrap(),
start.position().unwrap_or(0),
end.position().unwrap_or(0)
2022-02-08 16:01:47 +01:00
),
}
2022-08-18 15:16:42 +02:00
#[cfg(feature = "no_position")]
Ok(())
2022-02-08 16:01:47 +01:00
}
}
impl fmt::Debug for Span {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(self, f)
}
}
2021-07-25 16:56:05 +02:00
/// _(internals)_ A Rhai language token.
/// Exported under the `internals` feature only.
2021-02-12 16:07:28 +01:00
#[derive(Debug, PartialEq, Clone, Hash)]
2022-04-26 10:36:24 +02:00
#[non_exhaustive]
pub enum Token {
/// An `INT` constant.
IntegerConstant(INT),
2020-07-28 13:11:37 +02:00
/// A `FLOAT` constant.
///
2020-07-28 13:11:37 +02:00
/// Reserved under the `no_float` feature.
2020-04-17 14:08:41 +02:00
#[cfg(not(feature = "no_float"))]
2021-12-06 13:52:47 +01:00
FloatConstant(crate::ast::FloatWrapper<crate::FLOAT>),
/// A [`Decimal`][rust_decimal::Decimal] constant.
2021-02-13 13:57:56 +01:00
///
/// Requires the `decimal` feature.
#[cfg(feature = "decimal")]
2021-12-06 13:52:47 +01:00
DecimalConstant(rust_decimal::Decimal),
/// An identifier.
2022-03-25 01:52:53 +01:00
Identifier(Identifier),
/// A character constant.
CharConstant(char),
/// A string constant.
2022-02-26 10:28:58 +01:00
StringConstant(SmartString),
2021-04-04 07:13:07 +02:00
/// An interpolated string.
2022-02-26 10:28:58 +01:00
InterpolatedString(SmartString),
/// `{`
LeftBrace,
/// `}`
RightBrace,
/// `(`
LeftParen,
/// `)`
RightParen,
/// `[`
LeftBracket,
/// `]`
RightBracket,
2022-04-21 04:04:46 +02:00
/// `()`
Unit,
/// `+`
Plus,
/// `+` (unary)
UnaryPlus,
/// `-`
Minus,
/// `-` (unary)
UnaryMinus,
/// `*`
Multiply,
/// `/`
Divide,
/// `%`
Modulo,
2021-02-10 05:41:27 +01:00
/// `**`
PowerOf,
/// `<<`
LeftShift,
/// `>>`
RightShift,
/// `;`
SemiColon,
/// `:`
Colon,
/// `::`
2020-05-03 19:19:01 +02:00
DoubleColon,
2020-11-13 11:32:18 +01:00
/// `=>`
DoubleArrow,
/// `_`
Underscore,
/// `,`
Comma,
/// `.`
Period,
2022-06-10 04:26:06 +02:00
/// `?.`
2022-06-11 18:32:12 +02:00
///
/// Reserved under the `no_object` feature.
#[cfg(not(feature = "no_object"))]
2022-06-10 04:26:06 +02:00
Elvis,
2022-06-10 05:22:33 +02:00
/// `??`
DoubleQuestion,
2022-06-11 18:32:12 +02:00
/// `?[`
///
/// Reserved under the `no_object` feature.
#[cfg(not(feature = "no_index"))]
QuestionBracket,
2021-12-15 05:06:17 +01:00
/// `..`
ExclusiveRange,
/// `..=`
InclusiveRange,
/// `#{`
MapStart,
/// `=`
Equals,
/// `true`
True,
/// `false`
False,
/// `let`
Let,
/// `const`
Const,
/// `if`
If,
/// `else`
Else,
2020-11-13 11:32:18 +01:00
/// `switch`
Switch,
2020-11-20 15:23:37 +01:00
/// `do`
Do,
/// `while`
While,
2020-11-20 15:23:37 +01:00
/// `until`
Until,
/// `loop`
Loop,
/// `for`
For,
/// `in`
In,
/// `<`
LessThan,
/// `>`
GreaterThan,
/// `<=`
LessThanEqualsTo,
/// `>=`
GreaterThanEqualsTo,
/// `==`
EqualsTo,
/// `!=`
NotEqualsTo,
/// `!`
Bang,
/// `|`
Pipe,
/// `||`
Or,
/// `^`
XOr,
/// `&`
Ampersand,
/// `&&`
And,
/// `fn`
///
2020-07-28 13:11:37 +02:00
/// Reserved under the `no_function` feature.
#[cfg(not(feature = "no_function"))]
Fn,
/// `continue`
Continue,
/// `break`
Break,
/// `return`
Return,
/// `throw`
Throw,
2020-10-20 17:16:03 +02:00
/// `try`
Try,
/// `catch`
Catch,
/// `+=`
PlusAssign,
/// `-=`
MinusAssign,
/// `*=`
MultiplyAssign,
/// `/=`
DivideAssign,
/// `<<=`
LeftShiftAssign,
/// `>>=`
RightShiftAssign,
/// `&=`
AndAssign,
/// `|=`
OrAssign,
/// `^=`
XOrAssign,
/// `%=`
ModuloAssign,
2021-02-10 05:41:27 +01:00
/// `**=`
PowerOfAssign,
/// `private`
///
2020-07-28 13:11:37 +02:00
/// Reserved under the `no_function` feature.
2020-06-02 07:33:16 +02:00
#[cfg(not(feature = "no_function"))]
Private,
/// `import`
///
2020-07-28 13:11:37 +02:00
/// Reserved under the `no_module` feature.
2020-06-25 05:07:46 +02:00
#[cfg(not(feature = "no_module"))]
2020-05-04 11:43:54 +02:00
Import,
/// `export`
///
2020-07-28 13:11:37 +02:00
/// Reserved under the `no_module` feature.
#[cfg(not(feature = "no_module"))]
2020-05-04 11:43:54 +02:00
Export,
/// `as`
///
2020-07-28 13:11:37 +02:00
/// Reserved under the `no_module` feature.
2020-06-25 05:07:46 +02:00
#[cfg(not(feature = "no_module"))]
2020-05-04 11:43:54 +02:00
As,
/// A lexer error.
2022-02-26 16:18:47 +01:00
LexError(Box<LexError>),
/// A comment block.
2022-02-26 10:28:58 +01:00
Comment(SmartString),
/// A reserved symbol.
2022-02-26 10:28:58 +01:00
Reserved(SmartString),
/// A custom keyword.
2022-07-05 16:59:03 +02:00
///
/// Not available under `no_custom_syntax`.
2022-07-05 16:59:03 +02:00
#[cfg(not(feature = "no_custom_syntax"))]
2022-02-26 10:28:58 +01:00
Custom(SmartString),
/// End of the input stream.
EOF,
}
impl fmt::Display for Token {
#[inline(always)]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.syntax())
}
}
impl Token {
2021-07-10 09:50:31 +02:00
/// Get the literal syntax of the token.
2021-06-12 16:47:43 +02:00
#[must_use]
2021-07-10 09:50:31 +02:00
pub const fn literal_syntax(&self) -> &'static str {
2021-03-23 13:04:54 +01:00
use Token::*;
match self {
LeftBrace => "{",
RightBrace => "}",
LeftParen => "(",
RightParen => ")",
LeftBracket => "[",
RightBracket => "]",
2022-04-21 04:04:46 +02:00
Unit => "()",
2021-03-23 13:04:54 +01:00
Plus => "+",
UnaryPlus => "+",
Minus => "-",
UnaryMinus => "-",
Multiply => "*",
Divide => "/",
SemiColon => ";",
Colon => ":",
DoubleColon => "::",
DoubleArrow => "=>",
Underscore => "_",
Comma => ",",
Period => ".",
2022-06-11 18:32:12 +02:00
#[cfg(not(feature = "no_object"))]
2022-06-10 04:26:06 +02:00
Elvis => "?.",
2022-06-10 05:22:33 +02:00
DoubleQuestion => "??",
2022-06-11 18:32:12 +02:00
#[cfg(not(feature = "no_index"))]
QuestionBracket => "?[",
2021-12-15 05:06:17 +01:00
ExclusiveRange => "..",
InclusiveRange => "..=",
2021-03-23 13:04:54 +01:00
MapStart => "#{",
Equals => "=",
True => "true",
False => "false",
Let => "let",
Const => "const",
If => "if",
Else => "else",
Switch => "switch",
Do => "do",
While => "while",
Until => "until",
Loop => "loop",
For => "for",
In => "in",
LessThan => "<",
GreaterThan => ">",
Bang => "!",
LessThanEqualsTo => "<=",
GreaterThanEqualsTo => ">=",
EqualsTo => "==",
NotEqualsTo => "!=",
Pipe => "|",
Or => "||",
Ampersand => "&",
And => "&&",
Continue => "continue",
Break => "break",
Return => "return",
Throw => "throw",
Try => "try",
Catch => "catch",
PlusAssign => "+=",
MinusAssign => "-=",
MultiplyAssign => "*=",
DivideAssign => "/=",
LeftShiftAssign => "<<=",
RightShiftAssign => ">>=",
AndAssign => "&=",
OrAssign => "|=",
XOrAssign => "^=",
LeftShift => "<<",
RightShift => ">>",
XOr => "^",
Modulo => "%",
ModuloAssign => "%=",
PowerOf => "**",
PowerOfAssign => "**=",
#[cfg(not(feature = "no_function"))]
Fn => "fn",
#[cfg(not(feature = "no_function"))]
Private => "private",
#[cfg(not(feature = "no_module"))]
Import => "import",
#[cfg(not(feature = "no_module"))]
Export => "export",
#[cfg(not(feature = "no_module"))]
As => "as",
2021-06-28 12:06:05 +02:00
_ => "ERROR: NOT A KEYWORD",
2021-03-23 13:04:54 +01:00
}
}
/// Get the syntax of the token.
2021-06-12 16:47:43 +02:00
#[must_use]
pub fn syntax(&self) -> Cow<'static, str> {
use Token::*;
match self {
IntegerConstant(i) => i.to_string().into(),
2020-04-17 14:08:41 +02:00
#[cfg(not(feature = "no_float"))]
FloatConstant(f) => f.to_string().into(),
2021-02-13 13:57:56 +01:00
#[cfg(feature = "decimal")]
DecimalConstant(d) => d.to_string().into(),
StringConstant(s) => format!("\"{s}\"").into(),
2022-02-08 02:46:14 +01:00
InterpolatedString(..) => "string".into(),
CharConstant(c) => c.to_string().into(),
2021-11-11 06:55:52 +01:00
Identifier(s) => s.to_string().into(),
Reserved(s) => s.to_string().into(),
2022-07-05 16:59:03 +02:00
#[cfg(not(feature = "no_custom_syntax"))]
2021-11-11 06:55:52 +01:00
Custom(s) => s.to_string().into(),
LexError(err) => err.to_string().into(),
2021-11-11 06:55:52 +01:00
Comment(s) => s.to_string().into(),
2020-12-12 13:09:29 +01:00
2021-03-23 13:04:54 +01:00
EOF => "{EOF}".into(),
2021-07-10 09:50:31 +02:00
token => token.literal_syntax().into(),
}
}
2021-04-24 05:55:40 +02:00
/// Is this token an op-assignment operator?
#[inline]
2021-06-12 16:47:43 +02:00
#[must_use]
2021-06-28 12:06:05 +02:00
pub const fn is_op_assignment(&self) -> bool {
2021-07-24 08:11:16 +02:00
matches!(
self,
2021-04-24 05:55:40 +02:00
Self::PlusAssign
2021-07-24 08:11:16 +02:00
| Self::MinusAssign
| Self::MultiplyAssign
| Self::DivideAssign
| Self::LeftShiftAssign
| Self::RightShiftAssign
| Self::ModuloAssign
| Self::PowerOfAssign
| Self::AndAssign
| Self::OrAssign
| Self::XOrAssign
)
2021-04-24 05:55:40 +02:00
}
2021-04-23 17:37:10 +02:00
/// Get the corresponding operator of the token if it is an op-assignment operator.
2021-06-12 16:47:43 +02:00
#[must_use]
pub const fn get_base_op_from_assignment(&self) -> Option<Self> {
2021-04-23 17:37:10 +02:00
Some(match self {
Self::PlusAssign => Self::Plus,
Self::MinusAssign => Self::Minus,
Self::MultiplyAssign => Self::Multiply,
Self::DivideAssign => Self::Divide,
Self::LeftShiftAssign => Self::LeftShift,
Self::RightShiftAssign => Self::RightShift,
Self::ModuloAssign => Self::Modulo,
Self::PowerOfAssign => Self::PowerOf,
Self::AndAssign => Self::Ampersand,
Self::OrAssign => Self::Pipe,
Self::XOrAssign => Self::XOr,
_ => return None,
})
}
2021-04-24 05:55:40 +02:00
/// Has this token a corresponding op-assignment operator?
#[inline]
2021-06-12 16:47:43 +02:00
#[must_use]
2021-06-28 12:06:05 +02:00
pub const fn has_op_assignment(&self) -> bool {
2021-07-24 08:11:16 +02:00
matches!(
self,
2021-04-24 05:55:40 +02:00
Self::Plus
2021-07-24 08:11:16 +02:00
| Self::Minus
| Self::Multiply
| Self::Divide
| Self::LeftShift
| Self::RightShift
| Self::Modulo
| Self::PowerOf
| Self::Ampersand
| Self::Pipe
| Self::XOr
)
2021-04-24 05:55:40 +02:00
}
2021-04-23 17:37:10 +02:00
/// Get the corresponding op-assignment operator of the token.
2021-06-12 16:47:43 +02:00
#[must_use]
pub const fn convert_to_op_assignment(&self) -> Option<Self> {
2021-04-23 17:37:10 +02:00
Some(match self {
Self::Plus => Self::PlusAssign,
Self::Minus => Self::MinusAssign,
Self::Multiply => Self::MultiplyAssign,
Self::Divide => Self::DivideAssign,
Self::LeftShift => Self::LeftShiftAssign,
Self::RightShift => Self::RightShiftAssign,
Self::Modulo => Self::ModuloAssign,
Self::PowerOf => Self::PowerOfAssign,
Self::Ampersand => Self::AndAssign,
Self::Pipe => Self::OrAssign,
Self::XOr => Self::XOrAssign,
_ => return None,
})
}
2020-07-09 13:54:28 +02:00
/// Reverse lookup a token from a piece of syntax.
2021-06-12 16:47:43 +02:00
#[must_use]
2022-01-04 08:22:48 +01:00
pub fn lookup_from_syntax(syntax: &str) -> Option<Self> {
2020-07-09 13:54:28 +02:00
use Token::*;
Some(match syntax {
"{" => LeftBrace,
"}" => RightBrace,
"(" => LeftParen,
")" => RightParen,
"[" => LeftBracket,
"]" => RightBracket,
2022-04-21 04:04:46 +02:00
"()" => Unit,
2020-07-09 13:54:28 +02:00
"+" => Plus,
"-" => Minus,
"*" => Multiply,
"/" => Divide,
";" => SemiColon,
":" => Colon,
"::" => DoubleColon,
2020-11-13 11:32:18 +01:00
"=>" => DoubleArrow,
"_" => Underscore,
2020-07-09 13:54:28 +02:00
"," => Comma,
"." => Period,
2022-06-11 18:32:12 +02:00
#[cfg(not(feature = "no_object"))]
2022-06-10 04:26:06 +02:00
"?." => Elvis,
2022-06-10 05:22:33 +02:00
"??" => DoubleQuestion,
2022-06-11 18:32:12 +02:00
#[cfg(not(feature = "no_index"))]
"?[" => QuestionBracket,
2021-12-15 05:06:17 +01:00
".." => ExclusiveRange,
"..=" => InclusiveRange,
2020-07-09 13:54:28 +02:00
"#{" => MapStart,
"=" => Equals,
"true" => True,
"false" => False,
"let" => Let,
"const" => Const,
"if" => If,
"else" => Else,
2020-11-13 11:32:18 +01:00
"switch" => Switch,
2020-11-20 15:23:37 +01:00
"do" => Do,
2020-07-09 13:54:28 +02:00
"while" => While,
2020-11-20 15:23:37 +01:00
"until" => Until,
2020-07-09 13:54:28 +02:00
"loop" => Loop,
"for" => For,
"in" => In,
"<" => LessThan,
">" => GreaterThan,
"!" => Bang,
"<=" => LessThanEqualsTo,
">=" => GreaterThanEqualsTo,
"==" => EqualsTo,
"!=" => NotEqualsTo,
"|" => Pipe,
"||" => Or,
"&" => Ampersand,
"&&" => And,
"continue" => Continue,
"break" => Break,
"return" => Return,
"throw" => Throw,
2020-10-20 17:16:03 +02:00
"try" => Try,
"catch" => Catch,
2020-07-09 13:54:28 +02:00
"+=" => PlusAssign,
"-=" => MinusAssign,
"*=" => MultiplyAssign,
"/=" => DivideAssign,
"<<=" => LeftShiftAssign,
">>=" => RightShiftAssign,
"&=" => AndAssign,
"|=" => OrAssign,
"^=" => XOrAssign,
"<<" => LeftShift,
">>" => RightShift,
"^" => XOr,
"%" => Modulo,
"%=" => ModuloAssign,
2021-02-10 05:41:27 +01:00
"**" => PowerOf,
"**=" => PowerOfAssign,
2022-08-20 15:55:00 +02:00
#[cfg(feature = "no_object")]
"?." => Reserved(syntax.into()),
#[cfg(feature = "no_index")]
"?[" => Reserved(syntax.into()),
#[cfg(not(feature = "no_function"))]
"fn" => Fn,
2020-07-09 13:54:28 +02:00
#[cfg(not(feature = "no_function"))]
"private" => Private,
#[cfg(feature = "no_function")]
"fn" | "private" => Reserved(syntax.into()),
2020-07-09 13:54:28 +02:00
#[cfg(not(feature = "no_module"))]
"import" => Import,
#[cfg(not(feature = "no_module"))]
"export" => Export,
#[cfg(not(feature = "no_module"))]
"as" => As,
#[cfg(feature = "no_module")]
"import" | "export" | "as" => Reserved(syntax.into()),
2021-12-24 07:59:14 +01:00
// List of reserved operators
2022-08-20 15:55:00 +02:00
"===" | "!==" | "->" | "<-" | "?" | ":=" | ":;" | "~" | "!." | "::<" | "(*" | "*)"
| "#" | "#!" | "@" | "$" | "++" | "--" | "..." | "<|" | "|>" => Reserved(syntax.into()),
2021-12-24 07:59:14 +01:00
// List of reserved keywords
"public" | "protected" | "super" | "new" | "use" | "module" | "package" | "var"
| "static" | "shared" | "with" | "is" | "goto" | "exit" | "match" | "case"
| "default" | "void" | "null" | "nil" | "spawn" | "thread" | "go" | "sync"
| "async" | "await" | "yield" => Reserved(syntax.into()),
2020-07-26 15:57:30 +02:00
2020-07-16 06:09:31 +02:00
KEYWORD_PRINT | KEYWORD_DEBUG | KEYWORD_TYPE_OF | KEYWORD_EVAL | KEYWORD_FN_PTR
2021-03-01 15:44:56 +01:00
| KEYWORD_FN_PTR_CALL | KEYWORD_FN_PTR_CURRY | KEYWORD_THIS | KEYWORD_IS_DEF_VAR => {
Reserved(syntax.into())
}
#[cfg(not(feature = "no_function"))]
2021-12-06 13:52:47 +01:00
crate::engine::KEYWORD_IS_DEF_FN => Reserved(syntax.into()),
2020-07-09 13:54:28 +02:00
_ => return None,
})
}
2022-06-05 12:17:44 +02:00
/// Is this token [`EOF`][Token::EOF]?
2020-10-08 16:25:50 +02:00
#[inline(always)]
2021-06-12 16:47:43 +02:00
#[must_use]
2021-06-28 12:06:05 +02:00
pub const fn is_eof(&self) -> bool {
2021-07-24 08:11:16 +02:00
matches!(self, Self::EOF)
}
2022-06-05 12:17:44 +02:00
/// If another operator is after these, it's probably a unary operator
/// (not sure about `fn` name).
2021-06-12 16:47:43 +02:00
#[must_use]
2021-06-28 12:06:05 +02:00
pub const fn is_next_unary(&self) -> bool {
use Token::*;
match self {
2022-06-10 04:26:06 +02:00
LexError(..) |
SemiColon | // ; - is unary
2021-11-13 02:50:49 +01:00
Colon | // #{ foo: - is unary
Comma | // ( ... , -expr ) - is unary
//Period |
2022-06-10 04:26:06 +02:00
//Elvis |
2022-06-10 05:22:33 +02:00
//DoubleQuestion |
2022-06-11 18:32:12 +02:00
//QuestionBracket |
2021-12-15 05:06:17 +01:00
ExclusiveRange | // .. - is unary
InclusiveRange | // ..= - is unary
LeftBrace | // { -expr } - is unary
// RightBrace | { expr } - expr not unary & is closing
LeftParen | // ( -expr ) - is unary
// RightParen | // ( expr ) - expr not unary & is closing
LeftBracket | // [ -expr ] - is unary
// RightBracket | // [ expr ] - expr not unary & is closing
Plus |
PlusAssign |
UnaryPlus |
Minus |
MinusAssign |
UnaryMinus |
Multiply |
MultiplyAssign |
Divide |
DivideAssign |
Modulo |
ModuloAssign |
PowerOf |
PowerOfAssign |
LeftShift |
LeftShiftAssign |
RightShift |
RightShiftAssign |
Equals |
EqualsTo |
NotEqualsTo |
LessThan |
GreaterThan |
Bang |
LessThanEqualsTo |
GreaterThanEqualsTo |
Pipe |
Ampersand |
If |
//Do |
While |
2020-11-20 15:23:37 +01:00
Until |
In |
And |
AndAssign |
Or |
OrAssign |
XOr |
XOrAssign |
Return |
Throw => true,
_ => false,
}
}
/// Get the precedence number of the token.
2021-06-12 16:47:43 +02:00
#[must_use]
2021-06-28 12:06:05 +02:00
pub const fn precedence(&self) -> Option<Precedence> {
use Token::*;
2021-03-14 03:47:29 +01:00
Precedence::new(match self {
2020-07-05 11:41:45 +02:00
Or | XOr | Pipe => 30,
2020-07-05 11:41:45 +02:00
And | Ampersand => 60,
EqualsTo | NotEqualsTo => 90,
2020-10-13 10:01:42 +02:00
In => 110,
2020-10-13 11:16:19 +02:00
LessThan | LessThanEqualsTo | GreaterThan | GreaterThanEqualsTo => 130,
2022-06-10 05:22:33 +02:00
DoubleQuestion => 135,
ExclusiveRange | InclusiveRange => 140,
Plus | Minus => 150,
2020-10-13 09:49:09 +02:00
Divide | Multiply | Modulo => 180,
2020-10-13 03:33:16 +02:00
2020-10-13 09:49:09 +02:00
PowerOf => 190,
LeftShift | RightShift => 210,
_ => 0,
2021-03-14 03:47:29 +01:00
})
}
/// Does an expression bind to the right (instead of left)?
2021-06-12 16:47:43 +02:00
#[must_use]
2021-06-28 12:06:05 +02:00
pub const fn is_bind_right(&self) -> bool {
use Token::*;
match self {
2021-02-10 05:41:27 +01:00
// Exponentiation binds to the right
PowerOf => true,
_ => false,
}
}
2020-07-05 09:23:51 +02:00
2021-02-10 05:41:27 +01:00
/// Is this token a standard symbol used in the language?
2021-06-12 16:47:43 +02:00
#[must_use]
2021-07-10 09:50:31 +02:00
pub const fn is_standard_symbol(&self) -> bool {
2020-07-05 09:23:51 +02:00
use Token::*;
match self {
LeftBrace | RightBrace | LeftParen | RightParen | LeftBracket | RightBracket | Plus
| UnaryPlus | Minus | UnaryMinus | Multiply | Divide | Modulo | PowerOf | LeftShift
2022-06-11 18:32:12 +02:00
| RightShift | SemiColon | Colon | DoubleColon | Comma | Period | DoubleQuestion
| ExclusiveRange | InclusiveRange | MapStart | Equals | LessThan | GreaterThan
| LessThanEqualsTo | GreaterThanEqualsTo | EqualsTo | NotEqualsTo | Bang | Pipe
| Or | XOr | Ampersand | And | PlusAssign | MinusAssign | MultiplyAssign
| DivideAssign | LeftShiftAssign | RightShiftAssign | AndAssign | OrAssign
| XOrAssign | ModuloAssign | PowerOfAssign => true,
#[cfg(not(feature = "no_object"))]
Elvis => true,
#[cfg(not(feature = "no_index"))]
QuestionBracket => true,
2020-07-05 09:23:51 +02:00
_ => false,
}
}
2021-07-10 09:50:31 +02:00
/// Is this token a standard keyword?
#[inline]
2021-06-12 16:47:43 +02:00
#[must_use]
2021-07-10 09:50:31 +02:00
pub const fn is_standard_keyword(&self) -> bool {
2020-07-05 09:23:51 +02:00
use Token::*;
match self {
#[cfg(not(feature = "no_function"))]
Fn | Private => true,
#[cfg(not(feature = "no_module"))]
Import | Export | As => true,
2020-11-20 15:23:37 +01:00
True | False | Let | Const | If | Else | Do | While | Until | Loop | For | In
| Continue | Break | Return | Throw | Try | Catch => true,
2020-07-05 09:23:51 +02:00
_ => false,
}
}
2021-07-10 09:50:31 +02:00
/// Is this token a reserved keyword or symbol?
2020-10-08 16:25:50 +02:00
#[inline(always)]
2021-06-12 16:47:43 +02:00
#[must_use]
2021-06-28 12:06:05 +02:00
pub const fn is_reserved(&self) -> bool {
2022-02-08 02:46:14 +01:00
matches!(self, Self::Reserved(..))
}
2020-07-26 16:25:30 +02:00
/// Convert a token into a function name, if possible.
2020-08-05 16:53:01 +02:00
#[cfg(not(feature = "no_function"))]
#[inline]
2022-02-26 10:28:58 +01:00
pub(crate) fn into_function_name_for_override(self) -> Result<SmartString, Self> {
2020-07-26 16:25:30 +02:00
match self {
2022-07-05 16:59:03 +02:00
#[cfg(not(feature = "no_custom_syntax"))]
Self::Custom(s) if is_valid_function_name(&s) => Ok(s),
Self::Identifier(s) if is_valid_function_name(&s) => Ok(s),
2020-07-26 16:25:30 +02:00
_ => Err(self),
}
}
/// Is this token a custom keyword?
2022-07-05 16:59:03 +02:00
#[cfg(not(feature = "no_custom_syntax"))]
2020-10-08 16:25:50 +02:00
#[inline(always)]
2021-06-12 16:47:43 +02:00
#[must_use]
2021-06-28 12:06:05 +02:00
pub const fn is_custom(&self) -> bool {
2022-02-08 02:46:14 +01:00
matches!(self, Self::Custom(..))
}
}
2020-05-04 13:36:58 +02:00
impl From<Token> for String {
2020-10-08 16:25:50 +02:00
#[inline(always)]
2020-05-04 13:36:58 +02:00
fn from(token: Token) -> Self {
token.syntax().into()
}
}
2021-07-25 16:56:05 +02:00
/// _(internals)_ State of the tokenizer.
/// Exported under the `internals` feature only.
2022-08-19 07:21:47 +02:00
#[derive(Debug, Clone, Eq, PartialEq, Default)]
2020-06-26 13:44:50 +02:00
pub struct TokenizeState {
2021-04-05 17:06:48 +02:00
/// Maximum length of a string.
2021-01-06 06:46:53 +01:00
pub max_string_size: Option<NonZeroUsize>,
/// Can the next token be a unary operator?
pub next_token_cannot_be_unary: bool,
2022-07-25 07:40:23 +02:00
/// Shared object to allow controlling the tokenizer externally.
pub tokenizer_control: TokenizerControl,
2020-06-26 13:44:50 +02:00
/// Is the tokenizer currently inside a block comment?
2020-06-26 16:03:21 +02:00
pub comment_level: usize,
2020-06-26 13:44:50 +02:00
/// Include comments?
2020-06-26 16:03:21 +02:00
pub include_comments: bool,
/// Is the current tokenizer position within the text stream of an interpolated string?
pub is_within_text_terminated_by: Option<char>,
}
2021-07-25 16:56:05 +02:00
/// _(internals)_ Trait that encapsulates a peekable character input stream.
/// Exported under the `internals` feature only.
2020-06-26 13:44:50 +02:00
pub trait InputStream {
2021-01-15 10:13:04 +01:00
/// Un-get a character back into the `InputStream`.
/// The next [`get_next`][InputStream::get_next] or [`peek_next`][InputStream::peek_next]
/// will return this character instead.
fn unget(&mut self, ch: char);
/// Get the next character from the `InputStream`.
2020-06-26 13:44:50 +02:00
fn get_next(&mut self) -> Option<char>;
2021-01-15 10:13:04 +01:00
/// Peek the next character in the `InputStream`.
2021-06-12 16:47:43 +02:00
#[must_use]
2020-06-26 13:44:50 +02:00
fn peek_next(&mut self) -> Option<char>;
}
2022-01-01 12:54:46 +01:00
/// _(internals)_ Parse a string literal ended by a specified termination character.
/// Exported under the `internals` feature only.
///
/// Returns the parsed string and a boolean indicating whether the string is
/// terminated by an interpolation `${`.
///
/// # Returns
///
2022-01-01 12:54:46 +01:00
/// | Type | Return Value |`state.is_within_text_terminated_by`|
/// |---------------------------------|:--------------------------:|:----------------------------------:|
/// |`"hello"` |`StringConstant("hello")` |`None` |
/// |`"hello`_{LF}_ or _{EOF}_ |`LexError` |`None` |
/// |`"hello\`_{EOF}_ or _{LF}{EOF}_ |`StringConstant("hello")` |`Some('"')` |
/// |`` `hello``_{EOF}_ |`StringConstant("hello")` |``Some('`')`` |
/// |`` `hello``_{LF}{EOF}_ |`StringConstant("hello\n")` |``Some('`')`` |
/// |`` `hello ${`` |`InterpolatedString("hello ")`<br/>next token is `{`|`None` |
2021-04-11 15:49:03 +02:00
/// |`` } hello` `` |`StringConstant(" hello")` |`None` |
/// |`} hello`_{EOF}_ |`StringConstant(" hello")` |``Some('`')`` |
///
/// This function does not throw a `LexError` for the following conditions:
///
/// * Unterminated literal string at _{EOF}_
///
/// * Unterminated normal string with continuation at _{EOF}_
///
/// This is to facilitate using this function to parse a script line-by-line, where the end of the
/// line (i.e. _{EOF}_) is not necessarily the end of the script.
///
/// Any time a [`StringConstant`][`Token::StringConstant`] is returned with
/// `state.is_within_text_terminated_by` set to `Some(_)` is one of the above conditions.
2020-06-26 13:44:50 +02:00
pub fn parse_string_literal(
stream: &mut impl InputStream,
state: &mut TokenizeState,
pos: &mut Position,
termination_char: char,
verbatim: bool,
2022-01-01 12:54:46 +01:00
allow_line_continuation: bool,
2021-04-04 07:13:07 +02:00
allow_interpolation: bool,
2022-02-26 10:28:58 +01:00
) -> Result<(SmartString, bool, Position), (LexError, Position)> {
2022-03-25 01:52:53 +01:00
let mut result = SmartString::new_const();
let mut escape = SmartString::new_const();
2020-06-26 13:44:50 +02:00
let start = *pos;
2022-02-10 07:51:31 +01:00
let mut first_char = Position::NONE;
2021-04-04 07:13:07 +02:00
let mut interpolated = false;
2021-04-22 17:02:25 +02:00
#[cfg(not(feature = "no_position"))]
let mut skip_whitespace_until = 0;
2021-04-10 04:20:17 +02:00
state.is_within_text_terminated_by = Some(termination_char);
2020-06-26 13:44:50 +02:00
loop {
2021-04-11 15:49:03 +02:00
assert!(
!verbatim || escape.is_empty(),
"verbatim strings should not have any escapes"
);
2021-04-10 04:20:17 +02:00
let next_char = match stream.get_next() {
Some(ch) => {
pos.advance();
ch
}
2021-04-11 15:49:03 +02:00
None if verbatim => {
assert_eq!(escape, "", "verbatim strings should not have any escapes");
2021-04-10 05:11:42 +02:00
pos.advance();
2021-04-11 15:49:03 +02:00
break;
2021-04-10 05:11:42 +02:00
}
2022-01-01 12:54:46 +01:00
None if allow_line_continuation && !escape.is_empty() => {
2021-04-11 15:49:03 +02:00
assert_eq!(escape, "\\", "unexpected escape {} at end of line", escape);
2021-04-10 04:20:17 +02:00
pos.advance();
break;
}
2021-04-11 15:49:03 +02:00
None => {
pos.advance();
state.is_within_text_terminated_by = None;
return Err((LERR::UnterminatedString, start));
}
2021-04-10 04:20:17 +02:00
};
2020-06-26 13:44:50 +02:00
2021-04-04 07:13:07 +02:00
// String interpolation?
if allow_interpolation
&& next_char == '$'
&& escape.is_empty()
2022-07-27 12:04:59 +02:00
&& stream.peek_next().map_or(false, |ch| ch == '{')
2021-04-04 07:13:07 +02:00
{
interpolated = true;
2021-04-10 04:20:17 +02:00
state.is_within_text_terminated_by = None;
2021-04-04 07:13:07 +02:00
break;
}
2021-01-06 06:46:53 +01:00
if let Some(max) = state.max_string_size {
if result.len() > max.get() {
return Err((LexError::StringTooLong(max.get()), *pos));
}
}
2022-02-10 07:51:31 +01:00
// Close wrapper
if termination_char == next_char && escape.is_empty() {
2022-02-10 11:24:04 +01:00
// Double wrapper
if stream.peek_next().map_or(false, |c| c == termination_char) {
eat_next(stream, pos);
} else {
state.is_within_text_terminated_by = None;
break;
}
2022-02-10 07:51:31 +01:00
}
if first_char.is_none() {
first_char = *pos;
}
2020-06-26 13:44:50 +02:00
match next_char {
// \r - ignore if followed by \n
2022-07-27 12:04:59 +02:00
'\r' if stream.peek_next().map_or(false, |ch| ch == '\n') => (),
2020-06-26 13:44:50 +02:00
// \...
2021-04-11 15:49:03 +02:00
'\\' if !verbatim && escape.is_empty() => {
2020-06-26 13:44:50 +02:00
escape.push('\\');
}
// \\
'\\' if !escape.is_empty() => {
escape.clear();
result.push('\\');
}
// \t
't' if !escape.is_empty() => {
escape.clear();
result.push('\t');
}
// \n
'n' if !escape.is_empty() => {
escape.clear();
result.push('\n');
}
// \r
'r' if !escape.is_empty() => {
escape.clear();
result.push('\r');
}
// \x??, \u????, \U????????
2022-07-27 12:04:59 +02:00
ch @ ('x' | 'u' | 'U') if !escape.is_empty() => {
2020-06-26 13:44:50 +02:00
let mut seq = escape.clone();
escape.clear();
2020-07-29 10:10:06 +02:00
seq.push(ch);
2020-06-26 13:44:50 +02:00
let mut out_val: u32 = 0;
let len = match ch {
'x' => 2,
'u' => 4,
'U' => 8,
2021-12-30 05:19:41 +01:00
c => unreachable!("x or u or U expected but gets '{}'", c),
2020-06-26 13:44:50 +02:00
};
for _ in 0..len {
2021-04-10 04:20:17 +02:00
let c = stream
.get_next()
.ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
2020-06-26 13:44:50 +02:00
seq.push(c);
pos.advance();
out_val *= 16;
2021-04-10 04:20:17 +02:00
out_val += c
.to_digit(16)
.ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
2020-06-26 13:44:50 +02:00
}
2021-04-10 04:20:17 +02:00
result.push(
char::from_u32(out_val)
2022-02-26 10:28:58 +01:00
.ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?,
2021-04-10 04:20:17 +02:00
);
2020-06-26 13:44:50 +02:00
}
// \{termination_char} - escaped
_ if termination_char == next_char && !escape.is_empty() => {
2020-06-26 13:44:50 +02:00
escape.clear();
2022-07-27 12:04:59 +02:00
result.push(next_char);
2020-06-26 13:44:50 +02:00
}
2021-04-11 15:49:03 +02:00
// Verbatim
'\n' if verbatim => {
assert_eq!(escape, "", "verbatim strings should not have any escapes");
pos.new_line();
result.push(next_char);
}
// Line continuation
2022-01-01 12:54:46 +01:00
'\n' if allow_line_continuation && !escape.is_empty() => {
2021-04-11 15:49:03 +02:00
assert_eq!(escape, "\\", "unexpected escape {} at end of line", escape);
escape.clear();
pos.new_line();
2021-04-22 17:02:25 +02:00
#[cfg(not(feature = "no_position"))]
{
2022-01-06 04:07:52 +01:00
let start_position = start.position().unwrap();
2021-05-22 13:14:24 +02:00
skip_whitespace_until = start_position + 1;
2021-04-22 17:02:25 +02:00
}
}
2021-04-11 15:49:03 +02:00
// Unterminated string
'\n' => {
pos.rewind();
2021-04-10 05:11:42 +02:00
state.is_within_text_terminated_by = None;
return Err((LERR::UnterminatedString, start));
}
2020-06-26 13:44:50 +02:00
// Unknown escape sequence
_ if !escape.is_empty() => {
escape.push(next_char);
2022-02-26 10:28:58 +01:00
return Err((LERR::MalformedEscapeSequence(escape.to_string()), *pos));
2020-07-29 10:10:06 +02:00
}
// Whitespace to skip
2021-04-22 17:02:25 +02:00
#[cfg(not(feature = "no_position"))]
2022-01-06 04:07:52 +01:00
_ if next_char.is_whitespace() && pos.position().unwrap() < skip_whitespace_until => {}
2020-06-14 08:25:47 +02:00
2020-06-26 13:44:50 +02:00
// All other characters
_ => {
2020-06-26 13:44:50 +02:00
escape.clear();
result.push(next_char);
2021-04-22 17:02:25 +02:00
#[cfg(not(feature = "no_position"))]
{
skip_whitespace_until = 0;
}
2020-06-26 13:44:50 +02:00
}
}
}
2021-01-06 06:46:53 +01:00
if let Some(max) = state.max_string_size {
2021-04-10 04:20:17 +02:00
if result.len() > max.get() {
2021-01-06 06:46:53 +01:00
return Err((LexError::StringTooLong(max.get()), *pos));
}
2020-06-26 13:44:50 +02:00
}
2022-02-26 10:28:58 +01:00
Ok((result, interpolated, first_char))
2020-06-26 13:44:50 +02:00
}
2020-06-26 13:44:50 +02:00
/// Consume the next character.
2020-10-08 16:25:50 +02:00
#[inline(always)]
2020-07-09 13:54:28 +02:00
fn eat_next(stream: &mut impl InputStream, pos: &mut Position) -> Option<char> {
2020-06-26 13:44:50 +02:00
pos.advance();
2020-07-09 13:54:28 +02:00
stream.get_next()
2020-06-26 13:44:50 +02:00
}
2020-06-26 13:44:50 +02:00
/// Scan for a block comment until the end.
2020-12-18 09:07:19 +01:00
fn scan_block_comment(
2020-06-26 13:44:50 +02:00
stream: &mut impl InputStream,
2021-08-13 07:42:39 +02:00
level: usize,
2020-06-26 13:44:50 +02:00
pos: &mut Position,
2022-03-25 01:52:53 +01:00
comment: Option<&mut SmartString>,
2020-12-12 13:09:29 +01:00
) -> usize {
2021-08-13 07:42:39 +02:00
let mut level = level;
let mut comment = comment;
2021-05-22 13:14:24 +02:00
2020-06-26 13:44:50 +02:00
while let Some(c) = stream.get_next() {
pos.advance();
2021-07-24 08:11:16 +02:00
if let Some(comment) = comment.as_mut() {
comment.push(c);
}
2020-06-26 13:44:50 +02:00
match c {
'/' => {
2021-07-26 16:22:27 +02:00
if let Some(c2) = stream.peek_next().filter(|&c2| c2 == '*') {
2021-05-25 04:54:48 +02:00
eat_next(stream, pos);
2021-07-24 08:11:16 +02:00
if let Some(comment) = comment.as_mut() {
comment.push(c2);
}
2021-05-25 04:54:48 +02:00
level += 1;
2021-07-26 16:22:27 +02:00
}
2020-06-26 13:44:50 +02:00
}
'*' => {
2021-07-26 16:22:27 +02:00
if let Some(c2) = stream.peek_next().filter(|&c2| c2 == '/') {
2021-05-25 04:54:48 +02:00
eat_next(stream, pos);
2021-07-24 08:11:16 +02:00
if let Some(comment) = comment.as_mut() {
comment.push(c2);
}
2021-05-25 04:54:48 +02:00
level -= 1;
2021-07-26 16:22:27 +02:00
}
}
2020-06-26 13:44:50 +02:00
'\n' => pos.new_line(),
_ => (),
}
2020-12-12 13:09:29 +01:00
if level == 0 {
2020-06-26 13:44:50 +02:00
break;
2020-06-14 08:25:47 +02:00
}
2020-06-26 13:44:50 +02:00
}
2020-12-12 13:09:29 +01:00
level
2020-06-26 13:44:50 +02:00
}
2020-06-14 08:25:47 +02:00
2022-01-01 12:54:46 +01:00
/// _(internals)_ Get the next token from the input stream.
/// Exported under the `internals` feature only.
#[inline]
2021-06-12 16:47:43 +02:00
#[must_use]
2020-06-26 16:33:27 +02:00
pub fn get_next_token(
2020-06-26 16:03:21 +02:00
stream: &mut impl InputStream,
state: &mut TokenizeState,
pos: &mut Position,
) -> Option<(Token, Position)> {
let result = get_next_token_inner(stream, state, pos);
// Save the last token's state
2022-02-08 02:02:15 +01:00
if let Some((ref token, ..)) = result {
state.next_token_cannot_be_unary = !token.is_next_unary();
2020-06-26 16:03:21 +02:00
}
result
}
/// Test if the given character is a hex character.
2020-10-08 16:25:50 +02:00
#[inline(always)]
2021-02-11 12:20:30 +01:00
fn is_hex_digit(c: char) -> bool {
2021-07-24 08:11:16 +02:00
matches!(c, 'a'..='f' | 'A'..='F' | '0'..='9')
}
2021-02-11 12:20:30 +01:00
/// Test if the given character is a numeric digit.
2020-10-08 16:25:50 +02:00
#[inline(always)]
2021-02-11 12:20:30 +01:00
fn is_numeric_digit(c: char) -> bool {
2021-07-24 08:11:16 +02:00
matches!(c, '0'..='9')
}
2020-12-12 13:09:29 +01:00
/// Test if the comment block is a doc-comment.
2021-04-09 17:13:33 +02:00
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
#[inline]
2021-06-12 16:47:43 +02:00
#[must_use]
2022-01-04 08:22:48 +01:00
pub fn is_doc_comment(comment: &str) -> bool {
(comment.starts_with("///") && !comment.starts_with("////"))
|| (comment.starts_with("/**") && !comment.starts_with("/***"))
2020-12-12 13:09:29 +01:00
}
2020-06-26 16:03:21 +02:00
/// Get the next token.
2021-06-12 16:47:43 +02:00
#[must_use]
2020-06-26 16:03:21 +02:00
fn get_next_token_inner(
2020-06-26 13:44:50 +02:00
stream: &mut impl InputStream,
state: &mut TokenizeState,
pos: &mut Position,
) -> Option<(Token, Position)> {
// Still inside a comment?
if state.comment_level > 0 {
let start_pos = *pos;
2020-12-18 09:07:19 +01:00
let mut comment = if state.include_comments {
2022-03-25 01:52:53 +01:00
Some(SmartString::new_const())
2020-12-18 09:07:19 +01:00
} else {
None
};
2020-06-26 13:44:50 +02:00
2021-05-22 13:14:24 +02:00
state.comment_level =
scan_block_comment(stream, state.comment_level, pos, comment.as_mut());
2020-12-18 09:07:19 +01:00
2021-05-22 13:14:24 +02:00
let return_comment = state.include_comments;
2021-04-09 17:13:33 +02:00
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
2021-11-13 05:23:35 +01:00
let return_comment = return_comment || is_doc_comment(comment.as_ref().expect("`Some`"));
2021-05-22 13:14:24 +02:00
if return_comment {
2022-07-20 14:28:17 +02:00
return Some((Token::Comment(comment.expect("`Some`")), start_pos));
2021-05-22 13:14:24 +02:00
}
if state.comment_level > 0 {
2021-04-09 17:13:33 +02:00
// Reached EOF without ending comment block
return None;
2020-06-26 13:44:50 +02:00
}
}
// Within text?
2021-04-05 17:06:48 +02:00
if let Some(ch) = state.is_within_text_terminated_by.take() {
2022-01-01 12:54:46 +01:00
return parse_string_literal(stream, state, pos, ch, true, false, true).map_or_else(
2022-02-26 16:18:47 +01:00
|(err, err_pos)| Some((Token::LexError(err.into()), err_pos)),
2022-02-10 07:51:31 +01:00
|(result, interpolated, start_pos)| {
if interpolated {
Some((Token::InterpolatedString(result), start_pos))
} else {
Some((Token::StringConstant(result), start_pos))
}
},
);
}
2021-04-22 17:02:25 +02:00
let mut negated: Option<Position> = None;
2020-06-26 13:44:50 +02:00
while let Some(c) = stream.get_next() {
pos.advance();
2020-06-26 13:44:50 +02:00
let start_pos = *pos;
2020-06-26 13:44:50 +02:00
match (c, stream.peek_next().unwrap_or('\0')) {
// \n
2022-02-08 02:02:15 +01:00
('\n', ..) => pos.new_line(),
2020-06-26 13:44:50 +02:00
// digit ...
2022-02-08 02:02:15 +01:00
('0'..='9', ..) => {
2022-09-14 06:11:18 +02:00
let mut result = SmartString::new_const();
2020-06-26 13:44:50 +02:00
let mut radix_base: Option<u32> = None;
2021-02-11 12:20:30 +01:00
let mut valid: fn(char) -> bool = is_numeric_digit;
2020-06-26 13:44:50 +02:00
result.push(c);
2020-06-26 13:44:50 +02:00
while let Some(next_char) = stream.peek_next() {
match next_char {
2022-09-14 06:11:18 +02:00
NUMBER_SEPARATOR => {
eat_next(stream, pos);
}
ch if valid(ch) => {
2020-06-26 13:44:50 +02:00
result.push(next_char);
eat_next(stream, pos);
}
2021-02-13 13:57:56 +01:00
#[cfg(any(not(feature = "no_float"), feature = "decimal"))]
2020-06-26 13:44:50 +02:00
'.' => {
2022-06-26 12:09:15 +02:00
stream.get_next().unwrap();
// Check if followed by digits or something that cannot start a property name
match stream.peek_next().unwrap_or('\0') {
// digits after period - accept the period
'0'..='9' => {
result.push(next_char);
2021-02-11 12:20:30 +01:00
pos.advance();
}
// _ - cannot follow a decimal point
'_' => {
2021-01-15 10:13:04 +01:00
stream.unget(next_char);
break;
}
// .. - reserved symbol, not a floating-point number
'.' => {
2021-01-15 10:13:04 +01:00
stream.unget(next_char);
break;
}
// symbol after period - probably a float
2021-07-24 08:11:16 +02:00
ch if !is_id_first_alphabetic(ch) => {
result.push(next_char);
pos.advance();
result.push('0');
}
// Not a floating-point number
_ => {
2021-01-15 10:13:04 +01:00
stream.unget(next_char);
break;
}
}
2021-02-11 12:20:30 +01:00
}
#[cfg(not(feature = "no_float"))]
'e' => {
2021-11-13 05:23:35 +01:00
stream.get_next().expect("`e`");
2021-02-11 12:20:30 +01:00
// Check if followed by digits or +/-
match stream.peek_next().unwrap_or('\0') {
// digits after e - accept the e
'0'..='9' => {
result.push(next_char);
pos.advance();
}
// +/- after e - accept the e and the sign
'+' | '-' => {
result.push(next_char);
pos.advance();
2022-06-26 12:09:15 +02:00
result.push(stream.get_next().unwrap());
2021-02-11 12:20:30 +01:00
pos.advance();
}
// Not a floating-point number
_ => {
stream.unget(next_char);
break;
}
}
2020-06-26 13:44:50 +02:00
}
2021-02-11 12:20:30 +01:00
// 0x????, 0o????, 0b???? at beginning
2022-07-27 12:04:59 +02:00
ch @ ('x' | 'o' | 'b' | 'X' | 'O' | 'B')
2021-02-11 12:20:30 +01:00
if c == '0' && result.len() <= 1 =>
2020-07-08 06:09:18 +02:00
{
2020-06-26 13:44:50 +02:00
result.push(next_char);
eat_next(stream, pos);
2021-02-11 12:20:30 +01:00
valid = match ch {
'x' | 'X' => is_hex_digit,
'o' | 'O' => is_numeric_digit,
'b' | 'B' => is_numeric_digit,
2021-12-30 05:19:41 +01:00
c => unreachable!("x/X or o/O or b/B expected but gets '{}'", c),
2020-06-26 13:44:50 +02:00
};
radix_base = Some(match ch {
'x' | 'X' => 16,
'o' | 'O' => 8,
'b' | 'B' => 2,
2021-12-30 05:19:41 +01:00
c => unreachable!("x/X or o/O or b/B expected but gets '{}'", c),
2020-06-26 13:44:50 +02:00
});
}
2020-06-26 13:44:50 +02:00
_ => break,
}
2020-06-26 13:44:50 +02:00
}
2021-05-25 04:54:48 +02:00
let num_pos = negated.map_or(start_pos, |negated_pos| {
2020-06-26 13:44:50 +02:00
result.insert(0, '-');
2021-04-16 15:59:05 +02:00
negated_pos
2021-05-25 04:54:48 +02:00
});
2020-06-26 13:44:50 +02:00
// Parse number
2021-04-05 17:06:48 +02:00
return Some((
if let Some(radix) = radix_base {
2022-09-14 06:11:18 +02:00
let result = &result[2..];
2022-09-14 06:11:18 +02:00
UNSIGNED_INT::from_str_radix(&result, radix)
.map(|v| v as INT)
2022-07-27 12:04:59 +02:00
.map_or_else(
|_| {
Token::LexError(
2022-09-14 06:11:18 +02:00
LERR::MalformedNumber(result.to_string()).into(),
2022-07-27 12:04:59 +02:00
)
},
Token::IntegerConstant,
)
2021-04-05 17:06:48 +02:00
} else {
2022-09-14 06:11:18 +02:00
let num = INT::from_str(&result).map(Token::IntegerConstant);
2020-06-26 13:44:50 +02:00
2021-04-05 17:06:48 +02:00
// If integer parsing is unnecessary, try float instead
#[cfg(not(feature = "no_float"))]
2021-12-06 13:52:47 +01:00
let num = num.or_else(|_| {
2022-09-14 06:11:18 +02:00
crate::ast::FloatWrapper::from_str(&result).map(Token::FloatConstant)
2021-12-06 13:52:47 +01:00
});
2020-06-26 13:44:50 +02:00
2021-04-05 17:06:48 +02:00
// Then try decimal
#[cfg(feature = "decimal")]
2021-12-06 13:52:47 +01:00
let num = num.or_else(|_| {
2022-09-14 06:11:18 +02:00
rust_decimal::Decimal::from_str(&result).map(Token::DecimalConstant)
2021-12-06 13:52:47 +01:00
});
2021-02-13 13:57:56 +01:00
2021-04-05 17:06:48 +02:00
// Then try decimal in scientific notation
#[cfg(feature = "decimal")]
let num = num.or_else(|_| {
2022-09-14 06:11:18 +02:00
rust_decimal::Decimal::from_scientific(&result)
.map(Token::DecimalConstant)
2021-04-05 17:06:48 +02:00
});
2020-06-26 13:44:50 +02:00
num.unwrap_or_else(|_| {
2022-09-14 06:11:18 +02:00
Token::LexError(LERR::MalformedNumber(result.to_string()).into())
2021-04-05 17:06:48 +02:00
})
},
2021-04-16 15:59:05 +02:00
num_pos,
2021-04-05 17:06:48 +02:00
));
2020-06-26 13:44:50 +02:00
}
2020-06-26 13:44:50 +02:00
// letter or underscore ...
2020-12-29 03:41:20 +01:00
#[cfg(not(feature = "unicode-xid-ident"))]
2022-07-27 12:04:59 +02:00
('a'..='z' | '_' | 'A'..='Z', ..) => {
2022-08-29 08:27:05 +02:00
return Some(get_identifier(stream, pos, start_pos, c));
2020-06-26 13:44:50 +02:00
}
2020-12-29 03:41:20 +01:00
#[cfg(feature = "unicode-xid-ident")]
2022-02-08 02:02:15 +01:00
(ch, ..) if unicode_xid::UnicodeXID::is_xid_start(ch) || ch == '_' => {
2022-08-29 08:27:05 +02:00
return Some(get_identifier(stream, pos, start_pos, c));
2020-12-29 03:41:20 +01:00
}
// " - string literal
2022-02-08 02:02:15 +01:00
('"', ..) => {
2022-01-01 12:54:46 +01:00
return parse_string_literal(stream, state, pos, c, false, true, false)
2021-04-04 07:13:07 +02:00
.map_or_else(
2022-02-26 16:18:47 +01:00
|(err, err_pos)| Some((Token::LexError(err.into()), err_pos)),
2022-02-08 02:02:15 +01:00
|(result, ..)| Some((Token::StringConstant(result), start_pos)),
2021-04-04 07:13:07 +02:00
);
}
// ` - string literal
2022-02-08 02:02:15 +01:00
('`', ..) => {
2021-04-10 04:20:17 +02:00
// Start from the next line if at the end of line
match stream.peek_next() {
// `\r - start from next line
Some('\r') => {
eat_next(stream, pos);
// `\r\n
if let Some('\n') = stream.peek_next() {
2021-04-10 04:20:17 +02:00
eat_next(stream, pos);
}
pos.new_line();
2021-04-10 04:20:17 +02:00
}
// `\n - start from next line
Some('\n') => {
eat_next(stream, pos);
pos.new_line();
}
_ => (),
}
2022-01-01 12:54:46 +01:00
return parse_string_literal(stream, state, pos, c, true, false, true).map_or_else(
2022-02-26 16:18:47 +01:00
|(err, err_pos)| Some((Token::LexError(err.into()), err_pos)),
2022-02-10 07:51:31 +01:00
|(result, interpolated, ..)| {
2021-04-10 04:20:17 +02:00
if interpolated {
Some((Token::InterpolatedString(result), start_pos))
} else {
Some((Token::StringConstant(result), start_pos))
}
},
);
2020-07-08 06:09:18 +02:00
}
2020-06-26 13:44:50 +02:00
// ' - character literal
2020-07-08 06:09:18 +02:00
('\'', '\'') => {
return Some((
2022-02-26 16:18:47 +01:00
Token::LexError(LERR::MalformedChar("".to_string()).into()),
2020-07-08 06:09:18 +02:00
start_pos,
))
}
2022-02-08 02:02:15 +01:00
('\'', ..) => {
return Some(
2021-04-10 04:20:17 +02:00
parse_string_literal(stream, state, pos, c, false, false, false).map_or_else(
2022-02-26 16:18:47 +01:00
|(err, err_pos)| (Token::LexError(err.into()), err_pos),
2022-02-08 02:02:15 +01:00
|(result, ..)| {
2021-04-10 04:20:17 +02:00
let mut chars = result.chars();
2022-01-06 04:07:52 +01:00
let first = chars.next().unwrap();
2021-04-10 04:20:17 +02:00
if chars.next().is_some() {
2021-11-11 06:55:52 +01:00
(
2022-02-26 16:18:47 +01:00
Token::LexError(LERR::MalformedChar(result.to_string()).into()),
2021-11-11 06:55:52 +01:00
start_pos,
)
2021-04-10 04:20:17 +02:00
} else {
(Token::CharConstant(first), start_pos)
}
},
),
)
2020-07-08 06:09:18 +02:00
}
2020-06-26 13:44:50 +02:00
// Braces
2022-02-08 02:02:15 +01:00
('{', ..) => return Some((Token::LeftBrace, start_pos)),
('}', ..) => return Some((Token::RightBrace, start_pos)),
2020-06-26 13:44:50 +02:00
2022-04-21 04:04:46 +02:00
// Unit
('(', ')') => {
eat_next(stream, pos);
return Some((Token::Unit, start_pos));
}
2020-06-26 13:44:50 +02:00
// Parentheses
('(', '*') => {
eat_next(stream, pos);
return Some((Token::Reserved("(*".into()), start_pos));
}
2022-02-08 02:02:15 +01:00
('(', ..) => return Some((Token::LeftParen, start_pos)),
(')', ..) => return Some((Token::RightParen, start_pos)),
2020-06-26 13:44:50 +02:00
// Indexing
2022-02-08 02:02:15 +01:00
('[', ..) => return Some((Token::LeftBracket, start_pos)),
(']', ..) => return Some((Token::RightBracket, start_pos)),
2020-06-26 13:44:50 +02:00
// Map literal
#[cfg(not(feature = "no_object"))]
('#', '{') => {
eat_next(stream, pos);
return Some((Token::MapStart, start_pos));
}
2021-03-28 10:36:56 +02:00
// Shebang
('#', '!') => return Some((Token::Reserved("#!".into()), start_pos)),
('#', ' ') => {
eat_next(stream, pos);
let token = if stream.peek_next() == Some('{') {
eat_next(stream, pos);
"# {"
} else {
"#"
};
return Some((Token::Reserved(token.into()), start_pos));
}
2022-02-08 02:02:15 +01:00
('#', ..) => return Some((Token::Reserved("#".into()), start_pos)),
2020-06-26 13:44:50 +02:00
// Operators
('+', '=') => {
eat_next(stream, pos);
return Some((Token::PlusAssign, start_pos));
}
2020-10-10 16:14:10 +02:00
('+', '+') => {
eat_next(stream, pos);
return Some((Token::Reserved("++".into()), start_pos));
}
2022-02-08 02:02:15 +01:00
('+', ..) if !state.next_token_cannot_be_unary => {
return Some((Token::UnaryPlus, start_pos))
}
2022-02-08 02:02:15 +01:00
('+', ..) => return Some((Token::Plus, start_pos)),
2020-06-26 13:44:50 +02:00
('-', '0'..='9') if !state.next_token_cannot_be_unary => negated = Some(start_pos),
2020-06-26 13:44:50 +02:00
('-', '0'..='9') => return Some((Token::Minus, start_pos)),
('-', '=') => {
eat_next(stream, pos);
return Some((Token::MinusAssign, start_pos));
}
('-', '>') => {
eat_next(stream, pos);
return Some((Token::Reserved("->".into()), start_pos));
}
2020-10-10 16:14:10 +02:00
('-', '-') => {
eat_next(stream, pos);
return Some((Token::Reserved("--".into()), start_pos));
}
2022-02-08 02:02:15 +01:00
('-', ..) if !state.next_token_cannot_be_unary => {
return Some((Token::UnaryMinus, start_pos))
}
2022-02-08 02:02:15 +01:00
('-', ..) => return Some((Token::Minus, start_pos)),
2020-06-26 13:44:50 +02:00
('*', ')') => {
eat_next(stream, pos);
return Some((Token::Reserved("*)".into()), start_pos));
}
2020-06-26 13:44:50 +02:00
('*', '=') => {
eat_next(stream, pos);
return Some((Token::MultiplyAssign, start_pos));
}
2021-02-10 05:41:27 +01:00
('*', '*') => {
eat_next(stream, pos);
return Some((
if stream.peek_next() == Some('=') {
eat_next(stream, pos);
Token::PowerOfAssign
} else {
Token::PowerOf
},
start_pos,
));
}
2022-02-08 02:02:15 +01:00
('*', ..) => return Some((Token::Multiply, start_pos)),
2020-06-26 13:44:50 +02:00
// Comments
('/', '/') => {
eat_next(stream, pos);
2022-03-25 01:52:53 +01:00
let mut comment: Option<SmartString> = match stream.peek_next() {
2021-04-09 17:13:33 +02:00
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
2021-05-03 07:07:51 +02:00
Some('/') => {
2020-12-18 09:07:19 +01:00
eat_next(stream, pos);
// Long streams of `///...` are not doc-comments
match stream.peek_next() {
Some('/') => None,
2022-03-25 01:52:53 +01:00
_ => Some("///".into()),
}
2020-12-18 09:07:19 +01:00
}
2022-07-25 07:40:23 +02:00
#[cfg(feature = "metadata")]
Some('!') => {
eat_next(stream, pos);
Some("//!".into())
}
2022-03-25 01:52:53 +01:00
_ if state.include_comments => Some("//".into()),
2020-12-18 09:07:19 +01:00
_ => None,
2020-06-26 13:44:50 +02:00
};
while let Some(c) = stream.get_next() {
if c == '\r' {
// \r\n
if let Some('\n') = stream.peek_next() {
eat_next(stream, pos);
}
pos.new_line();
break;
}
2020-06-26 13:44:50 +02:00
if c == '\n' {
pos.new_line();
break;
}
2021-07-24 08:11:16 +02:00
if let Some(comment) = comment.as_mut() {
comment.push(c);
}
2020-06-26 13:44:50 +02:00
pos.advance();
}
2020-12-18 09:07:19 +01:00
if let Some(comment) = comment {
2022-07-25 07:40:23 +02:00
match comment {
#[cfg(feature = "metadata")]
_ if comment.starts_with("//!") => state
.tokenizer_control
.borrow_mut()
.global_comments
.push(comment),
_ => return Some((Token::Comment(comment), start_pos)),
}
}
2020-06-26 13:44:50 +02:00
}
('/', '*') => {
state.comment_level = 1;
eat_next(stream, pos);
2022-03-25 01:52:53 +01:00
let mut comment: Option<SmartString> = match stream.peek_next() {
2021-04-09 17:13:33 +02:00
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
2021-05-03 07:07:51 +02:00
Some('*') => {
2020-12-18 09:07:19 +01:00
eat_next(stream, pos);
// Long streams of `/****...` are not doc-comments
match stream.peek_next() {
Some('*') => None,
2022-03-25 01:52:53 +01:00
_ => Some("/**".into()),
}
2020-12-18 09:07:19 +01:00
}
2022-03-25 01:52:53 +01:00
_ if state.include_comments => Some("/*".into()),
2020-12-18 09:07:19 +01:00
_ => None,
2020-06-26 13:44:50 +02:00
};
2020-12-18 09:07:19 +01:00
state.comment_level =
2021-05-22 13:14:24 +02:00
scan_block_comment(stream, state.comment_level, pos, comment.as_mut());
2020-12-18 09:07:19 +01:00
if let Some(comment) = comment {
2022-03-25 01:52:53 +01:00
return Some((Token::Comment(comment), start_pos));
}
2020-06-26 13:44:50 +02:00
}
2020-06-26 13:44:50 +02:00
('/', '=') => {
eat_next(stream, pos);
return Some((Token::DivideAssign, start_pos));
}
2022-02-08 02:02:15 +01:00
('/', ..) => return Some((Token::Divide, start_pos)),
2022-02-08 02:02:15 +01:00
(';', ..) => return Some((Token::SemiColon, start_pos)),
(',', ..) => return Some((Token::Comma, start_pos)),
2020-10-10 16:14:10 +02:00
('.', '.') => {
eat_next(stream, pos);
2021-12-15 05:06:17 +01:00
return Some((
match stream.peek_next() {
Some('.') => {
eat_next(stream, pos);
Token::Reserved("...".into())
}
Some('=') => {
eat_next(stream, pos);
Token::InclusiveRange
}
_ => Token::ExclusiveRange,
},
start_pos,
));
2020-10-10 16:14:10 +02:00
}
2022-02-08 02:02:15 +01:00
('.', ..) => return Some((Token::Period, start_pos)),
2020-06-26 13:44:50 +02:00
('=', '=') => {
eat_next(stream, pos);
2020-06-26 13:44:50 +02:00
if stream.peek_next() == Some('=') {
eat_next(stream, pos);
2020-07-08 06:09:18 +02:00
return Some((Token::Reserved("===".into()), start_pos));
}
2020-06-26 13:44:50 +02:00
return Some((Token::EqualsTo, start_pos));
}
('=', '>') => {
eat_next(stream, pos);
2020-11-13 11:32:18 +01:00
return Some((Token::DoubleArrow, start_pos));
}
2022-02-08 02:02:15 +01:00
('=', ..) => return Some((Token::Equals, start_pos)),
2020-06-26 13:44:50 +02:00
2021-10-29 11:01:29 +02:00
#[cfg(not(feature = "no_module"))]
2020-06-26 13:44:50 +02:00
(':', ':') => {
eat_next(stream, pos);
2020-07-08 06:09:18 +02:00
if stream.peek_next() == Some('<') {
eat_next(stream, pos);
2020-07-08 06:09:18 +02:00
return Some((Token::Reserved("::<".into()), start_pos));
}
2020-06-26 13:44:50 +02:00
return Some((Token::DoubleColon, start_pos));
}
(':', '=') => {
eat_next(stream, pos);
return Some((Token::Reserved(":=".into()), start_pos));
}
2022-01-07 05:19:01 +01:00
(':', ';') => {
eat_next(stream, pos);
return Some((Token::Reserved(":;".into()), start_pos));
}
2022-02-08 02:02:15 +01:00
(':', ..) => return Some((Token::Colon, start_pos)),
2020-06-26 13:44:50 +02:00
('<', '=') => {
eat_next(stream, pos);
return Some((Token::LessThanEqualsTo, start_pos));
}
('<', '-') => {
eat_next(stream, pos);
return Some((Token::Reserved("<-".into()), start_pos));
}
2020-06-26 13:44:50 +02:00
('<', '<') => {
eat_next(stream, pos);
return Some((
if stream.peek_next() == Some('=') {
eat_next(stream, pos);
Token::LeftShiftAssign
} else {
Token::LeftShift
},
start_pos,
));
}
2022-08-18 10:59:54 +02:00
('<', '|') => {
eat_next(stream, pos);
return Some((Token::Reserved("<|".into()), start_pos));
}
2022-02-08 02:02:15 +01:00
('<', ..) => return Some((Token::LessThan, start_pos)),
2020-04-22 11:36:51 +02:00
2020-06-26 13:44:50 +02:00
('>', '=') => {
eat_next(stream, pos);
return Some((Token::GreaterThanEqualsTo, start_pos));
}
('>', '>') => {
eat_next(stream, pos);
2020-06-26 13:44:50 +02:00
return Some((
if stream.peek_next() == Some('=') {
eat_next(stream, pos);
Token::RightShiftAssign
} else {
Token::RightShift
},
start_pos,
));
}
2022-02-08 02:02:15 +01:00
('>', ..) => return Some((Token::GreaterThan, start_pos)),
2020-05-03 19:19:01 +02:00
2020-06-26 13:44:50 +02:00
('!', '=') => {
eat_next(stream, pos);
2020-06-26 13:44:50 +02:00
if stream.peek_next() == Some('=') {
eat_next(stream, pos);
2020-07-08 06:09:18 +02:00
return Some((Token::Reserved("!==".into()), start_pos));
}
2020-06-26 13:44:50 +02:00
return Some((Token::NotEqualsTo, start_pos));
}
2022-08-18 10:59:54 +02:00
('!', '.') => {
eat_next(stream, pos);
return Some((Token::Reserved("!.".into()), start_pos));
}
2022-02-08 02:02:15 +01:00
('!', ..) => return Some((Token::Bang, start_pos)),
2020-06-26 13:44:50 +02:00
('|', '|') => {
eat_next(stream, pos);
return Some((Token::Or, start_pos));
}
('|', '=') => {
eat_next(stream, pos);
return Some((Token::OrAssign, start_pos));
}
2022-08-18 10:59:54 +02:00
('|', '>') => {
eat_next(stream, pos);
return Some((Token::Reserved("|>".into()), start_pos));
}
2022-02-08 02:02:15 +01:00
('|', ..) => return Some((Token::Pipe, start_pos)),
2020-04-22 11:36:51 +02:00
2020-06-26 13:44:50 +02:00
('&', '&') => {
eat_next(stream, pos);
return Some((Token::And, start_pos));
}
('&', '=') => {
eat_next(stream, pos);
return Some((Token::AndAssign, start_pos));
}
2022-02-08 02:02:15 +01:00
('&', ..) => return Some((Token::Ampersand, start_pos)),
2020-06-26 13:44:50 +02:00
('^', '=') => {
eat_next(stream, pos);
return Some((Token::XOrAssign, start_pos));
}
2022-02-08 02:02:15 +01:00
('^', ..) => return Some((Token::XOr, start_pos)),
2022-02-08 02:02:15 +01:00
('~', ..) => return Some((Token::Reserved("~".into()), start_pos)),
2021-02-10 05:41:27 +01:00
2020-06-26 13:44:50 +02:00
('%', '=') => {
eat_next(stream, pos);
return Some((Token::ModuloAssign, start_pos));
}
2022-02-08 02:02:15 +01:00
('%', ..) => return Some((Token::Modulo, start_pos)),
2022-02-08 02:02:15 +01:00
('@', ..) => return Some((Token::Reserved("@".into()), start_pos)),
2020-10-05 17:02:50 +02:00
2022-02-08 02:02:15 +01:00
('$', ..) => return Some((Token::Reserved("$".into()), start_pos)),
2020-07-08 06:09:18 +02:00
2022-06-10 04:26:06 +02:00
('?', '.') => {
eat_next(stream, pos);
2022-06-11 18:32:12 +02:00
return Some((
#[cfg(not(feature = "no_object"))]
Token::Elvis,
#[cfg(feature = "no_object")]
Token::Reserved("?.".into()),
start_pos,
));
2022-06-10 04:26:06 +02:00
}
2022-06-10 05:22:33 +02:00
('?', '?') => {
eat_next(stream, pos);
return Some((Token::DoubleQuestion, start_pos));
}
2022-06-11 18:32:12 +02:00
('?', '[') => {
eat_next(stream, pos);
return Some((
#[cfg(not(feature = "no_index"))]
Token::QuestionBracket,
#[cfg(feature = "no_index")]
Token::Reserved("?[".into()),
start_pos,
));
}
2022-06-10 02:47:22 +02:00
('?', ..) => return Some((Token::Reserved("?".into()), start_pos)),
2022-02-08 02:02:15 +01:00
(ch, ..) if ch.is_whitespace() => (),
2020-12-29 03:41:20 +01:00
2022-02-08 02:02:15 +01:00
(ch, ..) => {
2020-07-08 06:09:18 +02:00
return Some((
2022-02-26 16:18:47 +01:00
Token::LexError(LERR::UnexpectedInput(ch.to_string()).into()),
2020-07-08 06:09:18 +02:00
start_pos,
))
}
2020-06-26 13:44:50 +02:00
}
}
2020-06-26 13:44:50 +02:00
pos.advance();
2021-04-11 15:49:03 +02:00
Some((Token::EOF, *pos))
2020-06-26 13:44:50 +02:00
}
2020-07-28 22:26:57 +02:00
/// Get the next identifier.
fn get_identifier(
stream: &mut impl InputStream,
pos: &mut Position,
start_pos: Position,
first_char: char,
2022-08-29 08:27:05 +02:00
) -> (Token, Position) {
2022-09-14 06:11:18 +02:00
let mut identifier = SmartString::new_const();
identifier.push(first_char);
2020-07-28 22:26:57 +02:00
while let Some(next_char) = stream.peek_next() {
match next_char {
2020-07-28 23:24:41 +02:00
x if is_id_continue(x) => {
2022-09-14 06:11:18 +02:00
identifier.push(x);
2020-07-28 22:26:57 +02:00
eat_next(stream, pos);
}
_ => break,
}
}
2022-09-14 06:11:18 +02:00
let is_valid_identifier = is_valid_identifier(identifier.chars());
2020-11-13 11:32:18 +01:00
if let Some(token) = Token::lookup_from_syntax(&identifier) {
2022-08-29 08:27:05 +02:00
return (token, start_pos);
2020-11-13 11:32:18 +01:00
}
2020-07-28 22:26:57 +02:00
if !is_valid_identifier {
2022-08-29 08:27:05 +02:00
return (
2022-09-14 06:11:18 +02:00
Token::LexError(LERR::MalformedIdentifier(identifier.to_string()).into()),
2020-07-28 22:26:57 +02:00
start_pos,
2022-08-29 08:27:05 +02:00
);
2020-07-28 22:26:57 +02:00
}
2022-09-14 06:11:18 +02:00
(Token::Identifier(identifier), start_pos)
2020-07-28 22:26:57 +02:00
}
2020-07-28 23:24:41 +02:00
/// Is a keyword allowed as a function?
#[inline]
2021-06-12 16:47:43 +02:00
#[must_use]
2022-01-04 08:22:48 +01:00
pub fn is_keyword_function(name: &str) -> bool {
match name {
2020-07-31 12:43:34 +02:00
KEYWORD_PRINT | KEYWORD_DEBUG | KEYWORD_TYPE_OF | KEYWORD_EVAL | KEYWORD_FN_PTR
2021-03-01 15:44:56 +01:00
| KEYWORD_FN_PTR_CALL | KEYWORD_FN_PTR_CURRY | KEYWORD_IS_DEF_VAR => true,
#[cfg(not(feature = "no_function"))]
2021-12-06 13:52:47 +01:00
crate::engine::KEYWORD_IS_DEF_FN => true,
2021-03-01 15:44:56 +01:00
2020-07-31 12:43:34 +02:00
_ => false,
2020-07-31 12:06:01 +02:00
}
}
/// _(internals)_ Is a text string a valid identifier?
/// Exported under the `internals` feature only.
2021-06-12 16:47:43 +02:00
#[must_use]
2020-07-28 23:24:41 +02:00
pub fn is_valid_identifier(name: impl Iterator<Item = char>) -> bool {
let mut first_alphabetic = false;
for ch in name {
match ch {
'_' => (),
2020-07-28 23:54:23 +02:00
_ if is_id_first_alphabetic(ch) => first_alphabetic = true,
2020-07-28 23:24:41 +02:00
_ if !first_alphabetic => return false,
_ if char::is_ascii_alphanumeric(&ch) => (),
_ => return false,
}
}
first_alphabetic
}
/// _(internals)_ Is a text string a valid script-defined function name?
/// Exported under the `internals` feature only.
2021-08-30 09:42:47 +02:00
#[inline(always)]
#[must_use]
2022-01-04 08:22:48 +01:00
pub fn is_valid_function_name(name: &str) -> bool {
is_valid_identifier(name.chars())
2021-08-30 09:42:47 +02:00
}
2021-02-24 16:23:32 +01:00
/// Is a character valid to start an identifier?
2020-07-29 01:25:37 +02:00
#[cfg(feature = "unicode-xid-ident")]
#[inline(always)]
2021-06-12 16:47:43 +02:00
#[must_use]
pub fn is_id_first_alphabetic(x: char) -> bool {
2020-07-29 00:03:21 +02:00
unicode_xid::UnicodeXID::is_xid_start(x)
}
2021-02-24 16:23:32 +01:00
/// Is a character valid for an identifier?
2020-07-29 01:25:37 +02:00
#[cfg(feature = "unicode-xid-ident")]
#[inline(always)]
2021-06-12 16:47:43 +02:00
#[must_use]
pub fn is_id_continue(x: char) -> bool {
2020-07-29 00:03:21 +02:00
unicode_xid::UnicodeXID::is_xid_continue(x)
}
2021-02-24 16:23:32 +01:00
/// Is a character valid to start an identifier?
2020-07-29 01:25:37 +02:00
#[cfg(not(feature = "unicode-xid-ident"))]
#[inline(always)]
2021-06-12 16:47:43 +02:00
#[must_use]
2022-08-29 08:27:05 +02:00
pub const fn is_id_first_alphabetic(x: char) -> bool {
2020-07-28 23:24:41 +02:00
x.is_ascii_alphabetic()
}
2021-02-24 16:23:32 +01:00
/// Is a character valid for an identifier?
2020-07-29 01:25:37 +02:00
#[cfg(not(feature = "unicode-xid-ident"))]
#[inline(always)]
2021-06-12 16:47:43 +02:00
#[must_use]
2022-08-29 08:27:05 +02:00
pub const fn is_id_continue(x: char) -> bool {
2020-07-28 23:24:41 +02:00
x.is_ascii_alphanumeric() || x == '_'
}
2021-09-24 03:26:35 +02:00
/// _(internals)_ A type that implements the [`InputStream`] trait.
/// Exported under the `internals` feature only.
///
2020-07-09 13:54:28 +02:00
/// Multiple character streams are jointed together to form one single stream.
2020-06-26 13:44:50 +02:00
pub struct MultiInputsStream<'a> {
/// Buffered character, if any.
2021-09-24 03:26:35 +02:00
pub buf: Option<char>,
2020-07-09 13:54:28 +02:00
/// The current stream index.
2021-09-24 03:26:35 +02:00
pub index: usize,
2021-03-12 15:30:08 +01:00
/// The input character streams.
2021-09-24 03:26:35 +02:00
pub streams: StaticVec<Peekable<Chars<'a>>>,
2020-06-26 13:44:50 +02:00
}
2020-06-26 13:44:50 +02:00
impl InputStream for MultiInputsStream<'_> {
#[inline]
2021-01-15 10:13:04 +01:00
fn unget(&mut self, ch: char) {
2021-04-04 07:13:07 +02:00
if self.buf.is_some() {
panic!("cannot unget two characters in a row");
}
self.buf = Some(ch);
}
2020-06-26 13:44:50 +02:00
fn get_next(&mut self) -> Option<char> {
if let Some(ch) = self.buf.take() {
return Some(ch);
}
2020-06-26 13:44:50 +02:00
loop {
2020-07-09 13:54:28 +02:00
if self.index >= self.streams.len() {
2020-06-26 13:44:50 +02:00
// No more streams
return None;
2022-08-27 10:26:41 +02:00
}
if let Some(ch) = self.streams[self.index].next() {
2020-06-26 13:44:50 +02:00
// Next character in current stream
return Some(ch);
}
2022-08-27 10:26:41 +02:00
// Jump to the next stream
self.index += 1;
}
}
2020-06-26 13:44:50 +02:00
fn peek_next(&mut self) -> Option<char> {
if let Some(ch) = self.buf {
return Some(ch);
}
2020-06-26 13:44:50 +02:00
loop {
2020-07-09 13:54:28 +02:00
if self.index >= self.streams.len() {
2020-06-26 13:44:50 +02:00
// No more streams
return None;
2022-08-27 10:26:41 +02:00
}
if let Some(&ch) = self.streams[self.index].peek() {
2020-06-26 13:44:50 +02:00
// Next character in current stream
2020-07-09 13:54:28 +02:00
return Some(ch);
2020-06-26 13:44:50 +02:00
}
2022-08-27 10:26:41 +02:00
// Jump to the next stream
self.index += 1;
2020-06-26 13:44:50 +02:00
}
}
}
2021-09-24 03:26:35 +02:00
/// _(internals)_ An iterator on a [`Token`] stream.
/// Exported under the `internals` feature only.
2021-03-03 15:49:57 +01:00
pub struct TokenIterator<'a> {
2020-07-05 11:41:45 +02:00
/// Reference to the scripting `Engine`.
2021-09-24 03:26:35 +02:00
pub engine: &'a Engine,
2020-06-26 13:44:50 +02:00
/// Current state.
2021-09-24 03:26:35 +02:00
pub state: TokenizeState,
2020-06-26 13:44:50 +02:00
/// Current position.
2021-09-24 03:26:35 +02:00
pub pos: Position,
2020-06-26 13:44:50 +02:00
/// Input character stream.
2021-09-24 03:26:35 +02:00
pub stream: MultiInputsStream<'a>,
2020-12-29 03:41:20 +01:00
/// A processor function that maps a token to another.
2021-09-24 12:00:48 +02:00
pub token_mapper: Option<&'a OnParseTokenCallback>,
}
2021-03-03 15:49:57 +01:00
impl<'a> Iterator for TokenIterator<'a> {
type Item = (Token, Position);
fn next(&mut self) -> Option<Self::Item> {
2022-07-25 07:40:23 +02:00
{
let control = &mut *self.state.tokenizer_control.borrow_mut();
if control.is_within_text {
// Switch to text mode terminated by back-tick
self.state.is_within_text_terminated_by = Some('`');
// Reset it
control.is_within_text = false;
}
2021-04-04 07:13:07 +02:00
}
let (token, pos) = match get_next_token(&mut self.stream, &mut self.state, &mut self.pos) {
// {EOF}
None => return None,
// {EOF} after unterminated string.
// The only case where `TokenizeState.is_within_text_terminated_by` is set is when
// a verbatim string or a string with continuation encounters {EOF}.
// This is necessary to handle such cases for line-by-line parsing, but for an entire
// script it is a syntax error.
2022-02-08 02:46:14 +01:00
Some((Token::StringConstant(..), pos)) if self.state.is_within_text_terminated_by.is_some() => {
self.state.is_within_text_terminated_by = None;
2022-02-26 16:18:47 +01:00
return Some((Token::LexError(LERR::UnterminatedString.into()), pos));
2021-04-10 04:20:17 +02:00
}
// Reserved keyword/symbol
Some((Token::Reserved(s), pos)) => (match
2022-07-06 06:56:15 +02:00
(&*s,
2022-07-05 16:59:03 +02:00
#[cfg(not(feature = "no_custom_syntax"))]
(!self.engine.custom_keywords.is_empty() && self.engine.custom_keywords.contains_key(&*s)),
#[cfg(feature = "no_custom_syntax")]
false
)
{
2021-11-11 06:55:52 +01:00
("===", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
"'===' is not a valid operator. This is not JavaScript! Should it be '=='?".to_string(),
2022-02-26 16:18:47 +01:00
).into()),
2021-11-11 06:55:52 +01:00
("!==", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
"'!==' is not a valid operator. This is not JavaScript! Should it be '!='?".to_string(),
2022-02-26 16:18:47 +01:00
).into()),
2021-11-11 06:55:52 +01:00
("->", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2022-02-26 16:18:47 +01:00
"'->' is not a valid symbol. This is not C or C++!".to_string()).into()),
2021-11-11 06:55:52 +01:00
("<-", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2020-07-08 06:09:18 +02:00
"'<-' is not a valid symbol. This is not Go! Should it be '<='?".to_string(),
2022-02-26 16:18:47 +01:00
).into()),
2021-11-11 06:55:52 +01:00
(":=", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2020-11-21 08:44:17 +01:00
"':=' is not a valid assignment operator. This is not Go or Pascal! Should it be simply '='?".to_string(),
2022-02-26 16:18:47 +01:00
).into()),
2022-01-07 05:19:01 +01:00
(":;", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
"':;' is not a valid symbol. Should it be '::'?".to_string(),
2022-02-26 16:18:47 +01:00
).into()),
2021-11-11 06:55:52 +01:00
("::<", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
"'::<>' is not a valid symbol. This is not Rust! Should it be '::'?".to_string(),
2022-02-26 16:18:47 +01:00
).into()),
2022-07-27 12:04:59 +02:00
("(*" | "*)", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
"'(* .. *)' is not a valid comment format. This is not Pascal! Should it be '/* .. */'?".to_string(),
2022-02-26 16:18:47 +01:00
).into()),
("# {", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
"'#' is not a valid symbol. Should it be '#{'?".to_string(),
2022-02-26 16:18:47 +01:00
).into()),
// Reserved keyword/operator that is custom.
2022-07-05 16:59:03 +02:00
#[cfg(not(feature = "no_custom_syntax"))]
2022-02-08 02:02:15 +01:00
(.., true) => Token::Custom(s),
2022-07-05 16:59:03 +02:00
#[cfg(feature = "no_custom_syntax")]
(.., true) => unreachable!("no custom operators"),
// Reserved keyword that is not custom and disabled.
2022-03-03 06:02:57 +01:00
(token, false) if !self.engine.disabled_symbols.is_empty() && self.engine.disabled_symbols.contains(token) => {
2022-08-11 13:01:23 +02:00
let msg = format!("reserved {} '{token}' is disabled", if is_valid_identifier(token.chars()) { "keyword"} else {"symbol"});
2022-02-26 16:18:47 +01:00
Token::LexError(LERR::ImproperSymbol(s.to_string(), msg).into())
},
// Reserved keyword/operator that is not custom.
2022-02-08 02:02:15 +01:00
(.., false) => Token::Reserved(s),
}, pos),
// Custom keyword
2022-07-05 16:59:03 +02:00
#[cfg(not(feature = "no_custom_syntax"))]
2022-03-03 06:02:57 +01:00
Some((Token::Identifier(s), pos)) if !self.engine.custom_keywords.is_empty() && self.engine.custom_keywords.contains_key(&*s) => {
(Token::Custom(s), pos)
}
// Custom keyword/symbol - must be disabled
2022-07-05 16:59:03 +02:00
#[cfg(not(feature = "no_custom_syntax"))]
2022-03-03 06:02:57 +01:00
Some((token, pos)) if !self.engine.custom_keywords.is_empty() && self.engine.custom_keywords.contains_key(token.literal_syntax()) => {
if !self.engine.disabled_symbols.is_empty() && self.engine.disabled_symbols.contains(token.literal_syntax()) {
2020-12-26 16:21:09 +01:00
// Disabled standard keyword/symbol
2022-02-26 10:28:58 +01:00
(Token::Custom(token.literal_syntax().into()), pos)
} else {
// Active standard keyword - should never be a custom keyword!
2021-12-30 05:19:41 +01:00
unreachable!("{:?} is an active keyword", token)
}
}
2020-12-26 16:21:09 +01:00
// Disabled symbol
2022-03-03 06:02:57 +01:00
Some((token, pos)) if !self.engine.disabled_symbols.is_empty() && self.engine.disabled_symbols.contains(token.literal_syntax()) => {
2022-02-26 10:28:58 +01:00
(Token::Reserved(token.literal_syntax().into()), pos)
2020-07-05 09:23:51 +02:00
}
2020-12-26 16:21:09 +01:00
// Normal symbol
Some(r) => r,
2020-07-26 16:25:30 +02:00
};
// Run the mapper, if any
2021-12-12 05:33:22 +01:00
let token = match self.token_mapper {
Some(map_func) => map_func(token, pos, &self.state),
None => token,
};
Some((token, pos))
}
}
2021-04-04 07:13:07 +02:00
impl FusedIterator for TokenIterator<'_> {}
impl Engine {
2021-07-25 16:56:05 +02:00
/// _(internals)_ Tokenize an input text stream.
2021-03-03 15:49:57 +01:00
/// Exported under the `internals` feature only.
#[cfg(feature = "internals")]
2020-12-29 03:41:20 +01:00
#[inline(always)]
2021-06-12 16:47:43 +02:00
#[must_use]
2021-04-04 07:13:07 +02:00
pub fn lex<'a>(
&'a self,
2021-11-28 16:06:33 +01:00
input: impl IntoIterator<Item = &'a (impl AsRef<str> + 'a)>,
2021-04-04 18:05:56 +02:00
) -> (TokenIterator<'a>, TokenizerControl) {
self.lex_raw(input, None)
2020-12-29 03:41:20 +01:00
}
2021-07-25 16:56:05 +02:00
/// _(internals)_ Tokenize an input text stream with a mapping function.
2021-03-03 15:49:57 +01:00
/// Exported under the `internals` feature only.
#[cfg(feature = "internals")]
#[inline(always)]
2021-06-12 16:47:43 +02:00
#[must_use]
2021-03-03 15:49:57 +01:00
pub fn lex_with_map<'a>(
&'a self,
2021-11-28 16:06:33 +01:00
input: impl IntoIterator<Item = &'a (impl AsRef<str> + 'a)>,
2021-09-24 12:00:48 +02:00
token_mapper: &'a OnParseTokenCallback,
2021-04-04 18:05:56 +02:00
) -> (TokenIterator<'a>, TokenizerControl) {
2021-09-24 12:00:48 +02:00
self.lex_raw(input, Some(token_mapper))
}
/// Tokenize an input text stream with an optional mapping function.
#[inline]
2021-06-12 16:47:43 +02:00
#[must_use]
2021-03-03 15:49:57 +01:00
pub(crate) fn lex_raw<'a>(
&'a self,
2021-11-27 16:04:45 +01:00
input: impl IntoIterator<Item = &'a (impl AsRef<str> + 'a)>,
2021-09-24 12:00:48 +02:00
token_mapper: Option<&'a OnParseTokenCallback>,
) -> (TokenIterator<'a>, TokenizerControl) {
2022-07-25 07:40:23 +02:00
let buffer: TokenizerControl = RefCell::new(TokenizerControlBlock::new()).into();
2021-04-04 07:13:07 +02:00
let buffer2 = buffer.clone();
(
TokenIterator {
engine: self,
state: TokenizeState {
#[cfg(not(feature = "unchecked"))]
max_string_size: self.limits.max_string_size,
#[cfg(feature = "unchecked")]
max_string_size: None,
next_token_cannot_be_unary: false,
2022-07-25 07:40:23 +02:00
tokenizer_control: buffer,
2021-04-04 07:13:07 +02:00
comment_level: 0,
include_comments: false,
is_within_text_terminated_by: None,
2021-04-04 07:13:07 +02:00
},
pos: Position::new(1, 0),
stream: MultiInputsStream {
buf: None,
2021-11-27 16:04:45 +01:00
streams: input
.into_iter()
.map(|s| s.as_ref().chars().peekable())
.collect(),
2021-04-04 07:13:07 +02:00
index: 0,
},
2021-09-24 12:00:48 +02:00
token_mapper,
},
2021-04-04 07:13:07 +02:00
buffer2,
)
}
}