2020-04-15 16:21:23 +02:00
|
|
|
//! Main module defining the lexer and parser.
|
|
|
|
|
2023-03-15 10:22:11 +01:00
|
|
|
use crate::engine::Precedence;
|
2021-11-13 15:36:23 +01:00
|
|
|
use crate::func::native::OnParseTokenCallback;
|
2022-11-28 16:24:22 +01:00
|
|
|
use crate::{Engine, Identifier, LexError, Position, SmartString, StaticVec, INT, UNSIGNED_INT};
|
2023-02-10 07:58:03 +01:00
|
|
|
use smallvec::SmallVec;
|
2021-04-17 09:15:54 +02:00
|
|
|
#[cfg(feature = "no_std")]
|
|
|
|
use std::prelude::v1::*;
|
|
|
|
use std::{
|
2022-07-25 07:40:23 +02:00
|
|
|
cell::RefCell,
|
2021-04-17 09:15:54 +02:00
|
|
|
char, fmt,
|
2021-04-04 09:06:13 +02:00
|
|
|
iter::{FusedIterator, Peekable},
|
2021-01-06 06:46:53 +01:00
|
|
|
num::NonZeroUsize,
|
2021-04-04 17:08:27 +02:00
|
|
|
rc::Rc,
|
2020-04-15 16:21:23 +02:00
|
|
|
str::{Chars, FromStr},
|
|
|
|
};
|
2020-11-16 16:10:14 +01:00
|
|
|
|
2021-07-25 16:56:05 +02:00
|
|
|
/// _(internals)_ A type containing commands to control the tokenizer.
|
2022-08-19 07:21:47 +02:00
|
|
|
#[derive(Debug, Clone, Eq, PartialEq, Default, Hash)]
|
2021-04-04 18:05:56 +02:00
|
|
|
pub struct TokenizerControlBlock {
|
2021-04-04 17:08:27 +02:00
|
|
|
/// Is the current tokenizer position within an interpolated text string?
|
2022-12-22 10:34:58 +01:00
|
|
|
///
|
2021-04-04 17:08:27 +02:00
|
|
|
/// This flag allows switching the tokenizer back to _text_ parsing after an interpolation stream.
|
|
|
|
pub is_within_text: bool,
|
2022-12-02 07:06:31 +01:00
|
|
|
/// Global comments.
|
2022-07-25 07:40:23 +02:00
|
|
|
#[cfg(feature = "metadata")]
|
2022-12-02 07:06:31 +01:00
|
|
|
pub global_comments: String,
|
2022-12-21 06:54:54 +01:00
|
|
|
/// Whitespace-compressed version of the script (if any).
|
2022-12-22 10:34:58 +01:00
|
|
|
///
|
|
|
|
/// Set to `Some` in order to collect a compressed script.
|
2022-12-21 06:54:54 +01:00
|
|
|
pub compressed: Option<String>,
|
2021-04-04 17:08:27 +02:00
|
|
|
}
|
|
|
|
|
2021-11-07 11:12:37 +01:00
|
|
|
impl TokenizerControlBlock {
|
|
|
|
/// Create a new `TokenizerControlBlock`.
|
2022-09-28 06:06:22 +02:00
|
|
|
#[inline]
|
2021-11-07 11:12:37 +01:00
|
|
|
#[must_use]
|
|
|
|
pub const fn new() -> Self {
|
|
|
|
Self {
|
|
|
|
is_within_text: false,
|
2022-07-25 07:40:23 +02:00
|
|
|
#[cfg(feature = "metadata")]
|
2022-12-02 07:06:31 +01:00
|
|
|
global_comments: String::new(),
|
2022-12-21 06:54:54 +01:00
|
|
|
compressed: None,
|
2021-11-07 11:12:37 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-07-25 16:56:05 +02:00
|
|
|
/// _(internals)_ A shared object that allows control of the tokenizer from outside.
|
2022-07-25 07:40:23 +02:00
|
|
|
pub type TokenizerControl = Rc<RefCell<TokenizerControlBlock>>;
|
2021-04-04 17:08:27 +02:00
|
|
|
|
2020-04-15 16:21:23 +02:00
|
|
|
type LERR = LexError;
|
|
|
|
|
2021-02-14 10:34:53 +01:00
|
|
|
/// Separator character for numbers.
|
2021-06-28 12:06:05 +02:00
|
|
|
const NUMBER_SEPARATOR: char = '_';
|
2021-02-14 10:34:53 +01:00
|
|
|
|
|
|
|
/// A stream of tokens.
|
2021-03-03 15:49:57 +01:00
|
|
|
pub type TokenStream<'a> = Peekable<TokenIterator<'a>>;
|
2020-06-11 12:13:33 +02:00
|
|
|
|
2021-07-25 16:56:05 +02:00
|
|
|
/// _(internals)_ A Rhai language token.
|
2020-07-25 09:52:27 +02:00
|
|
|
/// Exported under the `internals` feature only.
|
2021-02-12 16:07:28 +01:00
|
|
|
#[derive(Debug, PartialEq, Clone, Hash)]
|
2022-04-26 10:36:24 +02:00
|
|
|
#[non_exhaustive]
|
2020-04-15 16:21:23 +02:00
|
|
|
pub enum Token {
|
2020-07-25 09:52:27 +02:00
|
|
|
/// An `INT` constant.
|
2020-04-15 16:21:23 +02:00
|
|
|
IntegerConstant(INT),
|
2020-07-28 13:11:37 +02:00
|
|
|
/// A `FLOAT` constant.
|
2020-07-25 09:52:27 +02:00
|
|
|
///
|
2020-07-28 13:11:37 +02:00
|
|
|
/// Reserved under the `no_float` feature.
|
2020-04-17 14:08:41 +02:00
|
|
|
#[cfg(not(feature = "no_float"))]
|
2022-11-08 16:17:31 +01:00
|
|
|
FloatConstant(crate::types::FloatWrapper<crate::FLOAT>),
|
2021-12-06 13:52:47 +01:00
|
|
|
/// A [`Decimal`][rust_decimal::Decimal] constant.
|
2021-02-13 13:57:56 +01:00
|
|
|
///
|
|
|
|
/// Requires the `decimal` feature.
|
|
|
|
#[cfg(feature = "decimal")]
|
2022-09-25 17:03:18 +02:00
|
|
|
DecimalConstant(Box<rust_decimal::Decimal>),
|
2020-07-25 09:52:27 +02:00
|
|
|
/// An identifier.
|
2022-09-25 17:03:18 +02:00
|
|
|
Identifier(Box<Identifier>),
|
2020-07-25 09:52:27 +02:00
|
|
|
/// A character constant.
|
2020-04-15 16:21:23 +02:00
|
|
|
CharConstant(char),
|
2020-07-25 09:52:27 +02:00
|
|
|
/// A string constant.
|
2022-09-25 17:03:18 +02:00
|
|
|
StringConstant(Box<SmartString>),
|
2021-04-04 07:13:07 +02:00
|
|
|
/// An interpolated string.
|
2022-09-25 17:03:18 +02:00
|
|
|
InterpolatedString(Box<SmartString>),
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `{`
|
2020-04-15 16:21:23 +02:00
|
|
|
LeftBrace,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `}`
|
2020-04-15 16:21:23 +02:00
|
|
|
RightBrace,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `(`
|
2020-04-15 16:21:23 +02:00
|
|
|
LeftParen,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `)`
|
2020-04-15 16:21:23 +02:00
|
|
|
RightParen,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `[`
|
2020-04-15 16:21:23 +02:00
|
|
|
LeftBracket,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `]`
|
2020-04-15 16:21:23 +02:00
|
|
|
RightBracket,
|
2022-04-21 04:04:46 +02:00
|
|
|
/// `()`
|
|
|
|
Unit,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `+`
|
2020-04-15 16:21:23 +02:00
|
|
|
Plus,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `+` (unary)
|
2020-04-15 16:21:23 +02:00
|
|
|
UnaryPlus,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `-`
|
2020-04-15 16:21:23 +02:00
|
|
|
Minus,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `-` (unary)
|
2020-04-15 16:21:23 +02:00
|
|
|
UnaryMinus,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `*`
|
2020-04-15 16:21:23 +02:00
|
|
|
Multiply,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `/`
|
2020-04-15 16:21:23 +02:00
|
|
|
Divide,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `%`
|
2020-04-15 16:21:23 +02:00
|
|
|
Modulo,
|
2021-02-10 05:41:27 +01:00
|
|
|
/// `**`
|
2020-04-15 16:21:23 +02:00
|
|
|
PowerOf,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `<<`
|
2020-04-15 16:21:23 +02:00
|
|
|
LeftShift,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `>>`
|
2020-04-15 16:21:23 +02:00
|
|
|
RightShift,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `;`
|
2020-04-15 16:21:23 +02:00
|
|
|
SemiColon,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `:`
|
2020-04-15 16:21:23 +02:00
|
|
|
Colon,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `::`
|
2020-05-03 19:19:01 +02:00
|
|
|
DoubleColon,
|
2020-11-13 11:32:18 +01:00
|
|
|
/// `=>`
|
|
|
|
DoubleArrow,
|
|
|
|
/// `_`
|
|
|
|
Underscore,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `,`
|
2020-04-15 16:21:23 +02:00
|
|
|
Comma,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `.`
|
2020-04-15 16:21:23 +02:00
|
|
|
Period,
|
2022-06-10 04:26:06 +02:00
|
|
|
/// `?.`
|
2022-06-11 18:32:12 +02:00
|
|
|
///
|
|
|
|
/// Reserved under the `no_object` feature.
|
|
|
|
#[cfg(not(feature = "no_object"))]
|
2022-06-10 04:26:06 +02:00
|
|
|
Elvis,
|
2022-06-10 05:22:33 +02:00
|
|
|
/// `??`
|
|
|
|
DoubleQuestion,
|
2022-06-11 18:32:12 +02:00
|
|
|
/// `?[`
|
|
|
|
///
|
|
|
|
/// Reserved under the `no_object` feature.
|
|
|
|
#[cfg(not(feature = "no_index"))]
|
|
|
|
QuestionBracket,
|
2021-12-15 05:06:17 +01:00
|
|
|
/// `..`
|
|
|
|
ExclusiveRange,
|
|
|
|
/// `..=`
|
|
|
|
InclusiveRange,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `#{`
|
2020-04-15 16:21:23 +02:00
|
|
|
MapStart,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `=`
|
2020-04-15 16:21:23 +02:00
|
|
|
Equals,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `true`
|
2020-04-15 16:21:23 +02:00
|
|
|
True,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `false`
|
2020-04-15 16:21:23 +02:00
|
|
|
False,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `let`
|
2020-04-15 16:21:23 +02:00
|
|
|
Let,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `const`
|
2020-04-15 16:21:23 +02:00
|
|
|
Const,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `if`
|
2020-04-15 16:21:23 +02:00
|
|
|
If,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `else`
|
2020-04-15 16:21:23 +02:00
|
|
|
Else,
|
2020-11-13 11:32:18 +01:00
|
|
|
/// `switch`
|
|
|
|
Switch,
|
2020-11-20 15:23:37 +01:00
|
|
|
/// `do`
|
|
|
|
Do,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `while`
|
2020-04-15 16:21:23 +02:00
|
|
|
While,
|
2020-11-20 15:23:37 +01:00
|
|
|
/// `until`
|
|
|
|
Until,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `loop`
|
2020-04-15 16:21:23 +02:00
|
|
|
Loop,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `for`
|
2020-04-15 16:21:23 +02:00
|
|
|
For,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `in`
|
2020-04-15 16:21:23 +02:00
|
|
|
In,
|
2022-11-30 07:11:09 +01:00
|
|
|
/// `!in`
|
|
|
|
NotIn,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `<`
|
2020-04-15 16:21:23 +02:00
|
|
|
LessThan,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `>`
|
2020-04-15 16:21:23 +02:00
|
|
|
GreaterThan,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `<=`
|
2020-04-15 16:21:23 +02:00
|
|
|
LessThanEqualsTo,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `>=`
|
2020-04-15 16:21:23 +02:00
|
|
|
GreaterThanEqualsTo,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `==`
|
2020-04-15 16:21:23 +02:00
|
|
|
EqualsTo,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `!=`
|
2020-04-15 16:21:23 +02:00
|
|
|
NotEqualsTo,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `!`
|
2020-04-15 16:21:23 +02:00
|
|
|
Bang,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `|`
|
2020-04-15 16:21:23 +02:00
|
|
|
Pipe,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `||`
|
2020-04-15 16:21:23 +02:00
|
|
|
Or,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `^`
|
2020-04-15 16:21:23 +02:00
|
|
|
XOr,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `&`
|
2020-04-15 16:21:23 +02:00
|
|
|
Ampersand,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `&&`
|
2020-04-15 16:21:23 +02:00
|
|
|
And,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `fn`
|
|
|
|
///
|
2020-07-28 13:11:37 +02:00
|
|
|
/// Reserved under the `no_function` feature.
|
2020-06-01 04:58:14 +02:00
|
|
|
#[cfg(not(feature = "no_function"))]
|
2020-04-15 16:21:23 +02:00
|
|
|
Fn,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `continue`
|
2020-04-15 16:21:23 +02:00
|
|
|
Continue,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `break`
|
2020-04-15 16:21:23 +02:00
|
|
|
Break,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `return`
|
2020-04-15 16:21:23 +02:00
|
|
|
Return,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `throw`
|
2020-04-15 16:21:23 +02:00
|
|
|
Throw,
|
2020-10-20 17:16:03 +02:00
|
|
|
/// `try`
|
|
|
|
Try,
|
|
|
|
/// `catch`
|
|
|
|
Catch,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `+=`
|
2020-04-15 16:21:23 +02:00
|
|
|
PlusAssign,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `-=`
|
2020-04-15 16:21:23 +02:00
|
|
|
MinusAssign,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `*=`
|
2020-04-15 16:21:23 +02:00
|
|
|
MultiplyAssign,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `/=`
|
2020-04-15 16:21:23 +02:00
|
|
|
DivideAssign,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `<<=`
|
2020-04-15 16:21:23 +02:00
|
|
|
LeftShiftAssign,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `>>=`
|
2020-04-15 16:21:23 +02:00
|
|
|
RightShiftAssign,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `&=`
|
2020-04-15 16:21:23 +02:00
|
|
|
AndAssign,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `|=`
|
2020-04-15 16:21:23 +02:00
|
|
|
OrAssign,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `^=`
|
2020-04-15 16:21:23 +02:00
|
|
|
XOrAssign,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `%=`
|
2020-04-15 16:21:23 +02:00
|
|
|
ModuloAssign,
|
2021-02-10 05:41:27 +01:00
|
|
|
/// `**=`
|
2020-04-15 16:21:23 +02:00
|
|
|
PowerOfAssign,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `private`
|
|
|
|
///
|
2020-07-28 13:11:37 +02:00
|
|
|
/// Reserved under the `no_function` feature.
|
2020-06-02 07:33:16 +02:00
|
|
|
#[cfg(not(feature = "no_function"))]
|
2020-05-09 05:29:30 +02:00
|
|
|
Private,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `import`
|
|
|
|
///
|
2020-07-28 13:11:37 +02:00
|
|
|
/// Reserved under the `no_module` feature.
|
2020-06-25 05:07:46 +02:00
|
|
|
#[cfg(not(feature = "no_module"))]
|
2020-05-04 11:43:54 +02:00
|
|
|
Import,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `export`
|
|
|
|
///
|
2020-07-28 13:11:37 +02:00
|
|
|
/// Reserved under the `no_module` feature.
|
2020-06-01 04:58:14 +02:00
|
|
|
#[cfg(not(feature = "no_module"))]
|
2020-05-04 11:43:54 +02:00
|
|
|
Export,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// `as`
|
|
|
|
///
|
2020-07-28 13:11:37 +02:00
|
|
|
/// Reserved under the `no_module` feature.
|
2020-06-25 05:07:46 +02:00
|
|
|
#[cfg(not(feature = "no_module"))]
|
2020-05-04 11:43:54 +02:00
|
|
|
As,
|
2020-07-25 09:52:27 +02:00
|
|
|
/// A lexer error.
|
2022-02-26 16:18:47 +01:00
|
|
|
LexError(Box<LexError>),
|
2020-07-25 09:52:27 +02:00
|
|
|
/// A comment block.
|
2022-12-02 07:06:31 +01:00
|
|
|
Comment(Box<String>),
|
2020-07-25 09:52:27 +02:00
|
|
|
/// A reserved symbol.
|
2022-09-25 17:03:18 +02:00
|
|
|
Reserved(Box<SmartString>),
|
2020-07-25 09:52:27 +02:00
|
|
|
/// A custom keyword.
|
2022-07-05 16:59:03 +02:00
|
|
|
///
|
2022-07-26 16:38:40 +02:00
|
|
|
/// Not available under `no_custom_syntax`.
|
2022-07-05 16:59:03 +02:00
|
|
|
#[cfg(not(feature = "no_custom_syntax"))]
|
2022-09-25 17:03:18 +02:00
|
|
|
Custom(Box<SmartString>),
|
2020-07-25 09:52:27 +02:00
|
|
|
/// End of the input stream.
|
2022-11-25 13:42:16 +01:00
|
|
|
/// Used as a placeholder for the end of input.
|
2020-04-17 14:01:41 +02:00
|
|
|
EOF,
|
2020-04-15 16:21:23 +02:00
|
|
|
}
|
|
|
|
|
2022-07-28 11:58:22 +02:00
|
|
|
impl fmt::Display for Token {
|
|
|
|
#[inline(always)]
|
|
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
2022-11-23 09:14:11 +01:00
|
|
|
#[allow(clippy::enum_glob_use)]
|
2022-11-22 16:30:43 +01:00
|
|
|
use Token::*;
|
|
|
|
|
|
|
|
match self {
|
|
|
|
IntegerConstant(i) => write!(f, "{i}"),
|
|
|
|
#[cfg(not(feature = "no_float"))]
|
|
|
|
FloatConstant(v) => write!(f, "{v}"),
|
|
|
|
#[cfg(feature = "decimal")]
|
|
|
|
DecimalConstant(d) => write!(f, "{d}"),
|
|
|
|
StringConstant(s) => write!(f, r#""{s}""#),
|
|
|
|
InterpolatedString(..) => f.write_str("string"),
|
|
|
|
CharConstant(c) => write!(f, "{c}"),
|
|
|
|
Identifier(s) => f.write_str(s),
|
|
|
|
Reserved(s) => f.write_str(s),
|
|
|
|
#[cfg(not(feature = "no_custom_syntax"))]
|
|
|
|
Custom(s) => f.write_str(s),
|
|
|
|
LexError(err) => write!(f, "{err}"),
|
|
|
|
Comment(s) => f.write_str(s),
|
|
|
|
|
|
|
|
EOF => f.write_str("{EOF}"),
|
|
|
|
|
|
|
|
token => f.write_str(token.literal_syntax()),
|
|
|
|
}
|
2022-07-28 11:58:22 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-03-16 23:41:44 +01:00
|
|
|
// Table-driven keyword recognizer generated by GNU `gperf` on the file `tools/keywords.txt`.
|
2023-03-15 10:22:11 +01:00
|
|
|
//
|
|
|
|
// When adding new keywords, make sure to update `tools/keywords.txt` and re-generate this.
|
2023-03-15 01:01:44 +01:00
|
|
|
|
|
|
|
const MIN_KEYWORD_LEN: usize = 1;
|
|
|
|
const MAX_KEYWORD_LEN: usize = 8;
|
|
|
|
const MIN_KEYWORD_HASH_VALUE: usize = 1;
|
|
|
|
const MAX_KEYWORD_HASH_VALUE: usize = 152;
|
|
|
|
|
2023-03-15 01:54:34 +01:00
|
|
|
static KEYWORD_ASSOC_VALUES: [u8; 257] = [
|
2023-03-15 01:01:44 +01:00
|
|
|
153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
|
|
|
|
153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 115, 153, 100, 153, 110,
|
|
|
|
105, 40, 80, 2, 20, 25, 125, 95, 15, 40, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 55,
|
|
|
|
35, 10, 5, 0, 30, 110, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
|
|
|
|
153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 120, 105, 100, 85, 90, 153, 125, 5,
|
|
|
|
0, 125, 35, 10, 100, 153, 20, 0, 153, 10, 0, 45, 55, 0, 153, 50, 55, 5, 0, 153, 0, 0, 35, 153,
|
|
|
|
45, 50, 30, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
|
|
|
|
153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
|
|
|
|
153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
|
|
|
|
153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
|
|
|
|
153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
|
|
|
|
153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
|
|
|
|
153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
|
|
|
|
153,
|
|
|
|
];
|
2023-03-15 01:54:34 +01:00
|
|
|
static KEYWORDS_LIST: [(&str, Token); 153] = [
|
2023-03-15 01:01:44 +01:00
|
|
|
("", Token::EOF),
|
|
|
|
(">", Token::GreaterThan),
|
|
|
|
(">=", Token::GreaterThanEqualsTo),
|
|
|
|
(")", Token::RightParen),
|
|
|
|
("", Token::EOF),
|
|
|
|
("const", Token::Const),
|
|
|
|
("=", Token::Equals),
|
|
|
|
("==", Token::EqualsTo),
|
|
|
|
("continue", Token::Continue),
|
|
|
|
("", Token::EOF),
|
|
|
|
("catch", Token::Catch),
|
|
|
|
("<", Token::LessThan),
|
|
|
|
("<=", Token::LessThanEqualsTo),
|
|
|
|
("for", Token::For),
|
|
|
|
("loop", Token::Loop),
|
|
|
|
("", Token::EOF),
|
|
|
|
(".", Token::Period),
|
|
|
|
("<<", Token::LeftShift),
|
|
|
|
("<<=", Token::LeftShiftAssign),
|
|
|
|
("", Token::EOF),
|
|
|
|
("false", Token::False),
|
|
|
|
("*", Token::Multiply),
|
|
|
|
("*=", Token::MultiplyAssign),
|
|
|
|
("let", Token::Let),
|
|
|
|
("", Token::EOF),
|
|
|
|
("while", Token::While),
|
|
|
|
("+", Token::Plus),
|
|
|
|
("+=", Token::PlusAssign),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("throw", Token::Throw),
|
|
|
|
("}", Token::RightBrace),
|
|
|
|
(">>", Token::RightShift),
|
|
|
|
(">>=", Token::RightShiftAssign),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
(";", Token::SemiColon),
|
|
|
|
("=>", Token::DoubleArrow),
|
|
|
|
("", Token::EOF),
|
|
|
|
("else", Token::Else),
|
|
|
|
("", Token::EOF),
|
|
|
|
("/", Token::Divide),
|
|
|
|
("/=", Token::DivideAssign),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("{", Token::LeftBrace),
|
|
|
|
("**", Token::PowerOf),
|
|
|
|
("**=", Token::PowerOfAssign),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("|", Token::Pipe),
|
|
|
|
("|=", Token::OrAssign),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
(":", Token::Colon),
|
|
|
|
("..", Token::ExclusiveRange),
|
|
|
|
("..=", Token::InclusiveRange),
|
|
|
|
("", Token::EOF),
|
|
|
|
("until", Token::Until),
|
|
|
|
("switch", Token::Switch),
|
|
|
|
#[cfg(not(feature = "no_function"))]
|
|
|
|
("private", Token::Private),
|
|
|
|
#[cfg(feature = "no_function")]
|
|
|
|
("", Token::EOF),
|
|
|
|
("try", Token::Try),
|
|
|
|
("true", Token::True),
|
|
|
|
("break", Token::Break),
|
|
|
|
("return", Token::Return),
|
|
|
|
#[cfg(not(feature = "no_function"))]
|
|
|
|
("fn", Token::Fn),
|
|
|
|
#[cfg(feature = "no_function")]
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
#[cfg(not(feature = "no_module"))]
|
|
|
|
("import", Token::Import),
|
|
|
|
#[cfg(feature = "no_module")]
|
|
|
|
("", Token::EOF),
|
|
|
|
#[cfg(not(feature = "no_object"))]
|
|
|
|
("?.", Token::Elvis),
|
|
|
|
#[cfg(feature = "no_object")]
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
#[cfg(not(feature = "no_module"))]
|
|
|
|
("export", Token::Export),
|
|
|
|
#[cfg(feature = "no_module")]
|
|
|
|
("", Token::EOF),
|
|
|
|
("in", Token::In),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("(", Token::LeftParen),
|
|
|
|
("||", Token::Or),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("^", Token::XOr),
|
|
|
|
("^=", Token::XOrAssign),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("_", Token::Underscore),
|
|
|
|
("::", Token::DoubleColon),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("-", Token::Minus),
|
|
|
|
("-=", Token::MinusAssign),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("]", Token::RightBracket),
|
|
|
|
("()", Token::Unit),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("&", Token::Ampersand),
|
|
|
|
("&=", Token::AndAssign),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("%", Token::Modulo),
|
|
|
|
("%=", Token::ModuloAssign),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("!", Token::Bang),
|
|
|
|
("!=", Token::NotEqualsTo),
|
|
|
|
("!in", Token::NotIn),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("[", Token::LeftBracket),
|
|
|
|
("if", Token::If),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
(",Token::", Token::Comma),
|
|
|
|
("do", Token::Do),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
#[cfg(not(feature = "no_module"))]
|
|
|
|
("as", Token::As),
|
|
|
|
#[cfg(feature = "no_module")]
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
#[cfg(not(feature = "no_index"))]
|
|
|
|
("?[", Token::QuestionBracket),
|
|
|
|
#[cfg(feature = "no_index")]
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("??", Token::DoubleQuestion),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("&&", Token::And),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("", Token::EOF),
|
|
|
|
("#{", Token::MapStart),
|
|
|
|
];
|
|
|
|
|
2023-03-16 23:41:44 +01:00
|
|
|
// Table-driven reserved symbol recognizer generated by GNU `gperf` on the file `tools/reserved.txt`.
|
2023-03-15 10:22:11 +01:00
|
|
|
//
|
|
|
|
// When adding new reserved symbols, make sure to update `tools/reserved.txt` and re-generate this.
|
2023-03-15 01:01:44 +01:00
|
|
|
|
|
|
|
const MIN_RESERVED_LEN: usize = 1;
|
|
|
|
const MAX_RESERVED_LEN: usize = 10;
|
|
|
|
const MIN_RESERVED_HASH_VALUE: usize = 1;
|
|
|
|
const MAX_RESERVED_HASH_VALUE: usize = 112;
|
|
|
|
|
2023-03-15 01:54:34 +01:00
|
|
|
static RESERVED_ASSOC_VALUES: [u8; 256] = [
|
2023-03-15 01:01:44 +01:00
|
|
|
113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113,
|
|
|
|
113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 35, 113, 45, 25, 113,
|
|
|
|
113, 113, 60, 55, 50, 50, 113, 15, 0, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113,
|
|
|
|
10, 85, 45, 5, 55, 50, 5, 113, 113, 113, 113, 113, 85, 113, 113, 113, 113, 113, 113, 113, 113,
|
|
|
|
113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 35, 113, 113, 113, 55, 113, 10, 40,
|
|
|
|
5, 0, 5, 35, 10, 5, 0, 113, 113, 20, 25, 5, 45, 0, 113, 0, 0, 0, 15, 30, 20, 25, 20, 113, 113,
|
|
|
|
20, 113, 0, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113,
|
|
|
|
113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113,
|
|
|
|
113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113,
|
|
|
|
113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113,
|
|
|
|
113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113,
|
|
|
|
113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113,
|
|
|
|
113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113,
|
|
|
|
];
|
2023-03-15 10:22:11 +01:00
|
|
|
static RESERVED_LIST: [(&str, bool, bool, bool); 113] = [
|
|
|
|
("", false, false, false),
|
|
|
|
("~", true, false, false),
|
|
|
|
("is", true, false, false),
|
|
|
|
("...", true, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("print", true, true, false),
|
|
|
|
("@", true, false, false),
|
|
|
|
("private", cfg!(feature = "no_function"), false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("this", true, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("thread", true, false, false),
|
|
|
|
("as", cfg!(feature = "no_module"), false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("spawn", true, false, false),
|
|
|
|
("static", true, false, false),
|
|
|
|
(":=", true, false, false),
|
|
|
|
("===", true, false, false),
|
|
|
|
("case", true, false, false),
|
|
|
|
("super", true, false, false),
|
|
|
|
("shared", true, false, false),
|
|
|
|
("package", true, false, false),
|
|
|
|
("use", true, false, false),
|
|
|
|
("with", true, false, false),
|
|
|
|
("curry", true, true, true),
|
|
|
|
("$", true, false, false),
|
|
|
|
("type_of", true, true, true),
|
|
|
|
("nil", true, false, false),
|
|
|
|
("sync", true, false, false),
|
|
|
|
("yield", true, false, false),
|
|
|
|
("import", cfg!(feature = "no_module"), false, false),
|
|
|
|
("--", true, false, false),
|
|
|
|
("new", true, false, false),
|
|
|
|
("exit", true, false, false),
|
|
|
|
("async", true, false, false),
|
|
|
|
("export", cfg!(feature = "no_module"), false, false),
|
|
|
|
("!.", true, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("call", true, true, true),
|
|
|
|
("match", true, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("fn", cfg!(feature = "no_function"), false, false),
|
|
|
|
("var", true, false, false),
|
|
|
|
("null", true, false, false),
|
|
|
|
("await", true, false, false),
|
|
|
|
("#", true, false, false),
|
|
|
|
("default", true, false, false),
|
|
|
|
("!==", true, false, false),
|
|
|
|
("eval", true, true, false),
|
|
|
|
("debug", true, true, false),
|
|
|
|
("?", true, false, false),
|
|
|
|
("?.", cfg!(feature = "no_object"), false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("protected", true, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("go", true, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("goto", true, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("public", true, false, false),
|
|
|
|
("<-", true, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("is_def_fn", cfg!(not(feature = "no_function")), true, false),
|
|
|
|
("is_def_var", true, true, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("<|", true, false, false),
|
|
|
|
("::<", true, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("->", true, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("module", true, false, false),
|
|
|
|
("|>", true, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("void", true, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("#!", true, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("?[", cfg!(feature = "no_index"), false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("Fn", true, true, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
(":;", true, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("++", true, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("*)", true, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("", false, false, false),
|
|
|
|
("(*", true, false, false),
|
2023-03-15 01:01:44 +01:00
|
|
|
];
|
|
|
|
|
2020-04-15 16:21:23 +02:00
|
|
|
impl Token {
|
2022-11-22 16:30:43 +01:00
|
|
|
/// Is the token a literal symbol?
|
|
|
|
#[must_use]
|
|
|
|
pub const fn is_literal(&self) -> bool {
|
2022-11-23 09:14:11 +01:00
|
|
|
#[allow(clippy::enum_glob_use)]
|
2022-11-22 16:30:43 +01:00
|
|
|
use Token::*;
|
|
|
|
|
|
|
|
match self {
|
|
|
|
IntegerConstant(..) => false,
|
|
|
|
#[cfg(not(feature = "no_float"))]
|
|
|
|
FloatConstant(..) => false,
|
|
|
|
#[cfg(feature = "decimal")]
|
|
|
|
DecimalConstant(..) => false,
|
|
|
|
StringConstant(..)
|
|
|
|
| InterpolatedString(..)
|
|
|
|
| CharConstant(..)
|
|
|
|
| Identifier(..)
|
|
|
|
| Reserved(..) => false,
|
|
|
|
#[cfg(not(feature = "no_custom_syntax"))]
|
|
|
|
Custom(..) => false,
|
|
|
|
LexError(..) | Comment(..) => false,
|
|
|
|
|
2023-01-08 14:15:16 +01:00
|
|
|
EOF => false,
|
2022-11-22 16:30:43 +01:00
|
|
|
|
|
|
|
_ => true,
|
|
|
|
}
|
|
|
|
}
|
2021-07-10 09:50:31 +02:00
|
|
|
/// Get the literal syntax of the token.
|
2022-11-22 16:30:43 +01:00
|
|
|
///
|
|
|
|
/// # Panics
|
|
|
|
///
|
|
|
|
/// Panics if the token is not a literal symbol.
|
2021-06-12 16:47:43 +02:00
|
|
|
#[must_use]
|
2021-07-10 09:50:31 +02:00
|
|
|
pub const fn literal_syntax(&self) -> &'static str {
|
2022-11-23 09:14:11 +01:00
|
|
|
#[allow(clippy::enum_glob_use)]
|
2021-03-23 13:04:54 +01:00
|
|
|
use Token::*;
|
|
|
|
|
|
|
|
match self {
|
|
|
|
LeftBrace => "{",
|
|
|
|
RightBrace => "}",
|
|
|
|
LeftParen => "(",
|
|
|
|
RightParen => ")",
|
|
|
|
LeftBracket => "[",
|
|
|
|
RightBracket => "]",
|
2022-04-21 04:04:46 +02:00
|
|
|
Unit => "()",
|
2021-03-23 13:04:54 +01:00
|
|
|
Plus => "+",
|
|
|
|
UnaryPlus => "+",
|
|
|
|
Minus => "-",
|
|
|
|
UnaryMinus => "-",
|
|
|
|
Multiply => "*",
|
|
|
|
Divide => "/",
|
|
|
|
SemiColon => ";",
|
|
|
|
Colon => ":",
|
|
|
|
DoubleColon => "::",
|
|
|
|
DoubleArrow => "=>",
|
|
|
|
Underscore => "_",
|
|
|
|
Comma => ",",
|
|
|
|
Period => ".",
|
2022-06-11 18:32:12 +02:00
|
|
|
#[cfg(not(feature = "no_object"))]
|
2022-06-10 04:26:06 +02:00
|
|
|
Elvis => "?.",
|
2022-06-10 05:22:33 +02:00
|
|
|
DoubleQuestion => "??",
|
2022-06-11 18:32:12 +02:00
|
|
|
#[cfg(not(feature = "no_index"))]
|
|
|
|
QuestionBracket => "?[",
|
2021-12-15 05:06:17 +01:00
|
|
|
ExclusiveRange => "..",
|
|
|
|
InclusiveRange => "..=",
|
2021-03-23 13:04:54 +01:00
|
|
|
MapStart => "#{",
|
|
|
|
Equals => "=",
|
|
|
|
True => "true",
|
|
|
|
False => "false",
|
|
|
|
Let => "let",
|
|
|
|
Const => "const",
|
|
|
|
If => "if",
|
|
|
|
Else => "else",
|
|
|
|
Switch => "switch",
|
|
|
|
Do => "do",
|
|
|
|
While => "while",
|
|
|
|
Until => "until",
|
|
|
|
Loop => "loop",
|
|
|
|
For => "for",
|
|
|
|
In => "in",
|
2022-11-30 07:11:09 +01:00
|
|
|
NotIn => "!in",
|
2021-03-23 13:04:54 +01:00
|
|
|
LessThan => "<",
|
|
|
|
GreaterThan => ">",
|
|
|
|
Bang => "!",
|
|
|
|
LessThanEqualsTo => "<=",
|
|
|
|
GreaterThanEqualsTo => ">=",
|
|
|
|
EqualsTo => "==",
|
|
|
|
NotEqualsTo => "!=",
|
|
|
|
Pipe => "|",
|
|
|
|
Or => "||",
|
|
|
|
Ampersand => "&",
|
|
|
|
And => "&&",
|
|
|
|
Continue => "continue",
|
|
|
|
Break => "break",
|
|
|
|
Return => "return",
|
|
|
|
Throw => "throw",
|
|
|
|
Try => "try",
|
|
|
|
Catch => "catch",
|
|
|
|
PlusAssign => "+=",
|
|
|
|
MinusAssign => "-=",
|
|
|
|
MultiplyAssign => "*=",
|
|
|
|
DivideAssign => "/=",
|
|
|
|
LeftShiftAssign => "<<=",
|
|
|
|
RightShiftAssign => ">>=",
|
|
|
|
AndAssign => "&=",
|
|
|
|
OrAssign => "|=",
|
|
|
|
XOrAssign => "^=",
|
|
|
|
LeftShift => "<<",
|
|
|
|
RightShift => ">>",
|
|
|
|
XOr => "^",
|
|
|
|
Modulo => "%",
|
|
|
|
ModuloAssign => "%=",
|
|
|
|
PowerOf => "**",
|
|
|
|
PowerOfAssign => "**=",
|
|
|
|
|
|
|
|
#[cfg(not(feature = "no_function"))]
|
|
|
|
Fn => "fn",
|
|
|
|
#[cfg(not(feature = "no_function"))]
|
|
|
|
Private => "private",
|
|
|
|
|
|
|
|
#[cfg(not(feature = "no_module"))]
|
|
|
|
Import => "import",
|
|
|
|
#[cfg(not(feature = "no_module"))]
|
|
|
|
Export => "export",
|
|
|
|
#[cfg(not(feature = "no_module"))]
|
|
|
|
As => "as",
|
|
|
|
|
2022-11-22 16:30:43 +01:00
|
|
|
_ => panic!("token is not a literal symbol"),
|
2020-04-15 16:21:23 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-24 05:55:40 +02:00
|
|
|
/// Is this token an op-assignment operator?
|
|
|
|
#[inline]
|
2021-06-12 16:47:43 +02:00
|
|
|
#[must_use]
|
2021-06-28 12:06:05 +02:00
|
|
|
pub const fn is_op_assignment(&self) -> bool {
|
2022-11-30 07:11:09 +01:00
|
|
|
#[allow(clippy::enum_glob_use)]
|
|
|
|
use Token::*;
|
|
|
|
|
2021-07-24 08:11:16 +02:00
|
|
|
matches!(
|
|
|
|
self,
|
2022-11-30 07:11:09 +01:00
|
|
|
PlusAssign
|
|
|
|
| MinusAssign
|
|
|
|
| MultiplyAssign
|
|
|
|
| DivideAssign
|
|
|
|
| LeftShiftAssign
|
|
|
|
| RightShiftAssign
|
|
|
|
| ModuloAssign
|
|
|
|
| PowerOfAssign
|
|
|
|
| AndAssign
|
|
|
|
| OrAssign
|
|
|
|
| XOrAssign
|
2021-07-24 08:11:16 +02:00
|
|
|
)
|
2021-04-24 05:55:40 +02:00
|
|
|
}
|
|
|
|
|
2021-04-23 17:37:10 +02:00
|
|
|
/// Get the corresponding operator of the token if it is an op-assignment operator.
|
2021-06-12 16:47:43 +02:00
|
|
|
#[must_use]
|
2022-01-23 14:09:37 +01:00
|
|
|
pub const fn get_base_op_from_assignment(&self) -> Option<Self> {
|
2022-11-30 07:11:09 +01:00
|
|
|
#[allow(clippy::enum_glob_use)]
|
|
|
|
use Token::*;
|
|
|
|
|
2021-04-23 17:37:10 +02:00
|
|
|
Some(match self {
|
2022-11-30 07:11:09 +01:00
|
|
|
PlusAssign => Plus,
|
|
|
|
MinusAssign => Minus,
|
|
|
|
MultiplyAssign => Multiply,
|
|
|
|
DivideAssign => Divide,
|
|
|
|
LeftShiftAssign => LeftShift,
|
|
|
|
RightShiftAssign => RightShift,
|
|
|
|
ModuloAssign => Modulo,
|
|
|
|
PowerOfAssign => PowerOf,
|
|
|
|
AndAssign => Ampersand,
|
|
|
|
OrAssign => Pipe,
|
|
|
|
XOrAssign => XOr,
|
2021-04-23 17:37:10 +02:00
|
|
|
_ => return None,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2021-04-24 05:55:40 +02:00
|
|
|
/// Has this token a corresponding op-assignment operator?
|
|
|
|
#[inline]
|
2021-06-12 16:47:43 +02:00
|
|
|
#[must_use]
|
2021-06-28 12:06:05 +02:00
|
|
|
pub const fn has_op_assignment(&self) -> bool {
|
2022-11-30 07:11:09 +01:00
|
|
|
#[allow(clippy::enum_glob_use)]
|
|
|
|
use Token::*;
|
|
|
|
|
2021-07-24 08:11:16 +02:00
|
|
|
matches!(
|
|
|
|
self,
|
2022-11-30 07:11:09 +01:00
|
|
|
Plus | Minus
|
|
|
|
| Multiply
|
|
|
|
| Divide
|
|
|
|
| LeftShift
|
|
|
|
| RightShift
|
|
|
|
| Modulo
|
|
|
|
| PowerOf
|
|
|
|
| Ampersand
|
|
|
|
| Pipe
|
|
|
|
| XOr
|
2021-07-24 08:11:16 +02:00
|
|
|
)
|
2021-04-24 05:55:40 +02:00
|
|
|
}
|
|
|
|
|
2021-04-23 17:37:10 +02:00
|
|
|
/// Get the corresponding op-assignment operator of the token.
|
2021-06-12 16:47:43 +02:00
|
|
|
#[must_use]
|
2022-01-23 14:09:37 +01:00
|
|
|
pub const fn convert_to_op_assignment(&self) -> Option<Self> {
|
2022-11-30 07:11:09 +01:00
|
|
|
#[allow(clippy::enum_glob_use)]
|
|
|
|
use Token::*;
|
|
|
|
|
2021-04-23 17:37:10 +02:00
|
|
|
Some(match self {
|
2022-11-30 07:11:09 +01:00
|
|
|
Plus => PlusAssign,
|
|
|
|
Minus => MinusAssign,
|
|
|
|
Multiply => MultiplyAssign,
|
|
|
|
Divide => DivideAssign,
|
|
|
|
LeftShift => LeftShiftAssign,
|
|
|
|
RightShift => RightShiftAssign,
|
|
|
|
Modulo => ModuloAssign,
|
|
|
|
PowerOf => PowerOfAssign,
|
|
|
|
Ampersand => AndAssign,
|
|
|
|
Pipe => OrAssign,
|
|
|
|
XOr => XOrAssign,
|
2021-04-23 17:37:10 +02:00
|
|
|
_ => return None,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2022-10-30 15:16:09 +01:00
|
|
|
/// Reverse lookup a symbol token from a piece of syntax.
|
2023-03-15 01:01:44 +01:00
|
|
|
#[inline]
|
2021-06-12 16:47:43 +02:00
|
|
|
#[must_use]
|
2022-10-30 15:16:09 +01:00
|
|
|
pub fn lookup_symbol_from_syntax(syntax: &str) -> Option<Self> {
|
2023-03-16 06:05:29 +01:00
|
|
|
// This implementation is based upon a pre-calculated table generated
|
2023-03-16 23:41:44 +01:00
|
|
|
// by GNU `gperf` on the list of keywords.
|
2023-03-15 01:01:44 +01:00
|
|
|
let utf8 = syntax.as_bytes();
|
|
|
|
let len = utf8.len();
|
|
|
|
let mut hash_val = len;
|
2020-07-09 13:54:28 +02:00
|
|
|
|
2023-03-15 01:01:44 +01:00
|
|
|
if !(MIN_KEYWORD_LEN..=MAX_KEYWORD_LEN).contains(&len) {
|
|
|
|
return None;
|
|
|
|
}
|
2020-07-17 04:18:07 +02:00
|
|
|
|
2023-03-15 01:01:44 +01:00
|
|
|
match len {
|
|
|
|
1 => (),
|
|
|
|
_ => hash_val += KEYWORD_ASSOC_VALUES[(utf8[1] as usize) + 1] as usize,
|
|
|
|
}
|
|
|
|
hash_val += KEYWORD_ASSOC_VALUES[utf8[0] as usize] as usize;
|
2020-07-17 04:18:07 +02:00
|
|
|
|
2023-03-15 01:01:44 +01:00
|
|
|
if !(MIN_KEYWORD_HASH_VALUE..=MAX_KEYWORD_HASH_VALUE).contains(&hash_val) {
|
|
|
|
return None;
|
|
|
|
}
|
2020-07-17 04:18:07 +02:00
|
|
|
|
2023-03-15 01:01:44 +01:00
|
|
|
match KEYWORDS_LIST[hash_val] {
|
|
|
|
(_, Token::EOF) => None,
|
2023-03-16 23:41:44 +01:00
|
|
|
// Fail early to avoid calling memcmp().
|
|
|
|
// Since we are already working with bytes, mind as well check the first one.
|
2023-03-16 06:05:29 +01:00
|
|
|
(s, ref t) if s.len() == len && s.as_bytes()[0] == utf8[0] && s == syntax => {
|
|
|
|
Some(t.clone())
|
|
|
|
}
|
2023-03-15 01:01:44 +01:00
|
|
|
_ => None,
|
|
|
|
}
|
2022-10-30 15:16:09 +01:00
|
|
|
}
|
|
|
|
|
2022-06-05 12:17:44 +02:00
|
|
|
/// If another operator is after these, it's probably a unary operator
|
|
|
|
/// (not sure about `fn` name).
|
2021-06-12 16:47:43 +02:00
|
|
|
#[must_use]
|
2021-06-28 12:06:05 +02:00
|
|
|
pub const fn is_next_unary(&self) -> bool {
|
2022-11-23 09:14:11 +01:00
|
|
|
#[allow(clippy::enum_glob_use)]
|
2020-04-15 16:21:23 +02:00
|
|
|
use Token::*;
|
|
|
|
|
|
|
|
match self {
|
2021-11-03 02:11:20 +01:00
|
|
|
SemiColon | // ; - is unary
|
2021-11-13 02:50:49 +01:00
|
|
|
Colon | // #{ foo: - is unary
|
2021-11-03 02:11:20 +01:00
|
|
|
Comma | // ( ... , -expr ) - is unary
|
2022-11-23 04:36:30 +01:00
|
|
|
//Period |
|
|
|
|
//Elvis |
|
|
|
|
DoubleQuestion | // ?? - is unary
|
|
|
|
ExclusiveRange | // .. - is unary
|
2021-12-15 05:06:17 +01:00
|
|
|
InclusiveRange | // ..= - is unary
|
2021-11-03 02:11:20 +01:00
|
|
|
LeftBrace | // { -expr } - is unary
|
2022-11-23 04:36:30 +01:00
|
|
|
// RightBrace | // { expr } - expr not unary & is closing
|
2021-11-03 02:11:20 +01:00
|
|
|
LeftParen | // ( -expr ) - is unary
|
|
|
|
// RightParen | // ( expr ) - expr not unary & is closing
|
|
|
|
LeftBracket | // [ -expr ] - is unary
|
|
|
|
// RightBracket | // [ expr ] - expr not unary & is closing
|
2020-04-15 16:21:23 +02:00
|
|
|
Plus |
|
2021-11-03 02:11:20 +01:00
|
|
|
PlusAssign |
|
2020-04-15 16:21:23 +02:00
|
|
|
UnaryPlus |
|
|
|
|
Minus |
|
2021-11-03 02:11:20 +01:00
|
|
|
MinusAssign |
|
2020-04-15 16:21:23 +02:00
|
|
|
UnaryMinus |
|
|
|
|
Multiply |
|
2021-11-03 02:11:20 +01:00
|
|
|
MultiplyAssign |
|
2020-04-15 16:21:23 +02:00
|
|
|
Divide |
|
2021-11-03 02:11:20 +01:00
|
|
|
DivideAssign |
|
|
|
|
Modulo |
|
|
|
|
ModuloAssign |
|
|
|
|
PowerOf |
|
|
|
|
PowerOfAssign |
|
|
|
|
LeftShift |
|
|
|
|
LeftShiftAssign |
|
|
|
|
RightShift |
|
|
|
|
RightShiftAssign |
|
2020-04-15 16:21:23 +02:00
|
|
|
Equals |
|
2021-11-03 02:11:20 +01:00
|
|
|
EqualsTo |
|
|
|
|
NotEqualsTo |
|
2020-04-15 16:21:23 +02:00
|
|
|
LessThan |
|
|
|
|
GreaterThan |
|
|
|
|
Bang |
|
|
|
|
LessThanEqualsTo |
|
|
|
|
GreaterThanEqualsTo |
|
|
|
|
Pipe |
|
|
|
|
Ampersand |
|
|
|
|
If |
|
2022-11-23 04:36:30 +01:00
|
|
|
//Do |
|
2020-04-15 16:21:23 +02:00
|
|
|
While |
|
2020-11-20 15:23:37 +01:00
|
|
|
Until |
|
2021-11-03 02:11:20 +01:00
|
|
|
In |
|
2022-11-30 07:11:09 +01:00
|
|
|
NotIn |
|
2021-11-03 02:11:20 +01:00
|
|
|
And |
|
2020-04-15 16:21:23 +02:00
|
|
|
AndAssign |
|
2021-11-03 02:11:20 +01:00
|
|
|
Or |
|
2020-04-15 16:21:23 +02:00
|
|
|
OrAssign |
|
|
|
|
XOr |
|
2021-11-03 02:11:20 +01:00
|
|
|
XOrAssign |
|
2020-04-15 16:21:23 +02:00
|
|
|
Return |
|
2022-11-23 04:36:30 +01:00
|
|
|
Throw => true,
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2022-11-23 04:36:30 +01:00
|
|
|
#[cfg(not(feature = "no_index"))]
|
|
|
|
QuestionBracket => true, // ?[ - is unary
|
|
|
|
|
|
|
|
LexError(..) => true,
|
|
|
|
|
|
|
|
_ => false,
|
2020-04-15 16:21:23 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Get the precedence number of the token.
|
2021-06-12 16:47:43 +02:00
|
|
|
#[must_use]
|
2021-06-28 12:06:05 +02:00
|
|
|
pub const fn precedence(&self) -> Option<Precedence> {
|
2022-11-23 09:14:11 +01:00
|
|
|
#[allow(clippy::enum_glob_use)]
|
2020-04-15 16:21:23 +02:00
|
|
|
use Token::*;
|
|
|
|
|
2021-03-14 03:47:29 +01:00
|
|
|
Precedence::new(match self {
|
2020-07-05 11:41:45 +02:00
|
|
|
Or | XOr | Pipe => 30,
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2020-07-05 11:41:45 +02:00
|
|
|
And | Ampersand => 60,
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2020-07-06 06:24:28 +02:00
|
|
|
EqualsTo | NotEqualsTo => 90,
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2022-11-30 07:11:09 +01:00
|
|
|
In | NotIn => 110,
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2020-10-13 11:16:19 +02:00
|
|
|
LessThan | LessThanEqualsTo | GreaterThan | GreaterThanEqualsTo => 130,
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2022-06-10 05:22:33 +02:00
|
|
|
DoubleQuestion => 135,
|
|
|
|
|
2021-12-15 07:18:03 +01:00
|
|
|
ExclusiveRange | InclusiveRange => 140,
|
|
|
|
|
2020-07-06 06:24:28 +02:00
|
|
|
Plus | Minus => 150,
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2020-10-13 09:49:09 +02:00
|
|
|
Divide | Multiply | Modulo => 180,
|
2020-10-13 03:33:16 +02:00
|
|
|
|
2020-10-13 09:49:09 +02:00
|
|
|
PowerOf => 190,
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2020-07-06 06:24:28 +02:00
|
|
|
LeftShift | RightShift => 210,
|
2020-04-15 16:21:23 +02:00
|
|
|
|
|
|
|
_ => 0,
|
2021-03-14 03:47:29 +01:00
|
|
|
})
|
2020-04-15 16:21:23 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Does an expression bind to the right (instead of left)?
|
2021-06-12 16:47:43 +02:00
|
|
|
#[must_use]
|
2021-06-28 12:06:05 +02:00
|
|
|
pub const fn is_bind_right(&self) -> bool {
|
2022-11-23 09:14:11 +01:00
|
|
|
#[allow(clippy::enum_glob_use)]
|
2020-04-15 16:21:23 +02:00
|
|
|
use Token::*;
|
|
|
|
|
|
|
|
match self {
|
2021-02-10 05:41:27 +01:00
|
|
|
// Exponentiation binds to the right
|
|
|
|
PowerOf => true,
|
|
|
|
|
2020-04-15 16:21:23 +02:00
|
|
|
_ => false,
|
|
|
|
}
|
|
|
|
}
|
2020-07-05 09:23:51 +02:00
|
|
|
|
2021-02-10 05:41:27 +01:00
|
|
|
/// Is this token a standard symbol used in the language?
|
2021-06-12 16:47:43 +02:00
|
|
|
#[must_use]
|
2021-07-10 09:50:31 +02:00
|
|
|
pub const fn is_standard_symbol(&self) -> bool {
|
2022-11-23 09:14:11 +01:00
|
|
|
#[allow(clippy::enum_glob_use)]
|
2020-07-05 09:23:51 +02:00
|
|
|
use Token::*;
|
|
|
|
|
|
|
|
match self {
|
|
|
|
LeftBrace | RightBrace | LeftParen | RightParen | LeftBracket | RightBracket | Plus
|
|
|
|
| UnaryPlus | Minus | UnaryMinus | Multiply | Divide | Modulo | PowerOf | LeftShift
|
2022-06-11 18:32:12 +02:00
|
|
|
| RightShift | SemiColon | Colon | DoubleColon | Comma | Period | DoubleQuestion
|
|
|
|
| ExclusiveRange | InclusiveRange | MapStart | Equals | LessThan | GreaterThan
|
|
|
|
| LessThanEqualsTo | GreaterThanEqualsTo | EqualsTo | NotEqualsTo | Bang | Pipe
|
|
|
|
| Or | XOr | Ampersand | And | PlusAssign | MinusAssign | MultiplyAssign
|
|
|
|
| DivideAssign | LeftShiftAssign | RightShiftAssign | AndAssign | OrAssign
|
|
|
|
| XOrAssign | ModuloAssign | PowerOfAssign => true,
|
|
|
|
|
|
|
|
#[cfg(not(feature = "no_object"))]
|
|
|
|
Elvis => true,
|
|
|
|
|
|
|
|
#[cfg(not(feature = "no_index"))]
|
|
|
|
QuestionBracket => true,
|
2020-07-05 09:23:51 +02:00
|
|
|
|
|
|
|
_ => false,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-07-10 09:50:31 +02:00
|
|
|
/// Is this token a standard keyword?
|
2021-07-10 05:06:13 +02:00
|
|
|
#[inline]
|
2021-06-12 16:47:43 +02:00
|
|
|
#[must_use]
|
2021-07-10 09:50:31 +02:00
|
|
|
pub const fn is_standard_keyword(&self) -> bool {
|
2022-11-23 09:14:11 +01:00
|
|
|
#[allow(clippy::enum_glob_use)]
|
2020-07-05 09:23:51 +02:00
|
|
|
use Token::*;
|
|
|
|
|
|
|
|
match self {
|
|
|
|
#[cfg(not(feature = "no_function"))]
|
|
|
|
Fn | Private => true,
|
|
|
|
|
|
|
|
#[cfg(not(feature = "no_module"))]
|
|
|
|
Import | Export | As => true,
|
|
|
|
|
2020-11-20 15:23:37 +01:00
|
|
|
True | False | Let | Const | If | Else | Do | While | Until | Loop | For | In
|
|
|
|
| Continue | Break | Return | Throw | Try | Catch => true,
|
2020-07-05 09:23:51 +02:00
|
|
|
|
|
|
|
_ => false,
|
|
|
|
}
|
|
|
|
}
|
2020-07-10 05:41:56 +02:00
|
|
|
|
2021-07-10 09:50:31 +02:00
|
|
|
/// Is this token a reserved keyword or symbol?
|
2020-10-08 16:25:50 +02:00
|
|
|
#[inline(always)]
|
2021-06-12 16:47:43 +02:00
|
|
|
#[must_use]
|
2021-06-28 12:06:05 +02:00
|
|
|
pub const fn is_reserved(&self) -> bool {
|
2022-02-08 02:46:14 +01:00
|
|
|
matches!(self, Self::Reserved(..))
|
2020-07-10 05:41:56 +02:00
|
|
|
}
|
|
|
|
|
2020-07-26 16:25:30 +02:00
|
|
|
/// Convert a token into a function name, if possible.
|
2020-08-05 16:53:01 +02:00
|
|
|
#[cfg(not(feature = "no_function"))]
|
2021-07-10 05:06:13 +02:00
|
|
|
#[inline]
|
2022-02-26 10:28:58 +01:00
|
|
|
pub(crate) fn into_function_name_for_override(self) -> Result<SmartString, Self> {
|
2020-07-26 16:25:30 +02:00
|
|
|
match self {
|
2022-07-05 16:59:03 +02:00
|
|
|
#[cfg(not(feature = "no_custom_syntax"))]
|
2022-09-25 17:03:18 +02:00
|
|
|
Self::Custom(s) if is_valid_function_name(&s) => Ok(*s),
|
|
|
|
Self::Identifier(s) if is_valid_function_name(&s) => Ok(*s),
|
2020-07-26 16:25:30 +02:00
|
|
|
_ => Err(self),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-07-10 05:41:56 +02:00
|
|
|
/// Is this token a custom keyword?
|
2022-07-05 16:59:03 +02:00
|
|
|
#[cfg(not(feature = "no_custom_syntax"))]
|
2020-10-08 16:25:50 +02:00
|
|
|
#[inline(always)]
|
2021-06-12 16:47:43 +02:00
|
|
|
#[must_use]
|
2021-06-28 12:06:05 +02:00
|
|
|
pub const fn is_custom(&self) -> bool {
|
2022-02-08 02:46:14 +01:00
|
|
|
matches!(self, Self::Custom(..))
|
2020-07-10 05:41:56 +02:00
|
|
|
}
|
2020-04-15 16:21:23 +02:00
|
|
|
}
|
|
|
|
|
2020-05-04 13:36:58 +02:00
|
|
|
impl From<Token> for String {
|
2020-10-08 16:25:50 +02:00
|
|
|
#[inline(always)]
|
2020-05-04 13:36:58 +02:00
|
|
|
fn from(token: Token) -> Self {
|
2022-11-22 16:30:43 +01:00
|
|
|
token.to_string()
|
2020-05-04 13:36:58 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-07-25 16:56:05 +02:00
|
|
|
/// _(internals)_ State of the tokenizer.
|
2020-07-25 09:52:27 +02:00
|
|
|
/// Exported under the `internals` feature only.
|
2022-08-19 07:21:47 +02:00
|
|
|
#[derive(Debug, Clone, Eq, PartialEq, Default)]
|
2020-06-26 13:44:50 +02:00
|
|
|
pub struct TokenizeState {
|
2021-04-05 17:06:48 +02:00
|
|
|
/// Maximum length of a string.
|
2022-11-29 08:50:58 +01:00
|
|
|
pub max_string_len: Option<NonZeroUsize>,
|
2020-04-15 16:21:23 +02:00
|
|
|
/// Can the next token be a unary operator?
|
2021-09-24 16:44:39 +02:00
|
|
|
pub next_token_cannot_be_unary: bool,
|
2022-07-25 07:40:23 +02:00
|
|
|
/// Shared object to allow controlling the tokenizer externally.
|
|
|
|
pub tokenizer_control: TokenizerControl,
|
2020-06-26 13:44:50 +02:00
|
|
|
/// Is the tokenizer currently inside a block comment?
|
2020-06-26 16:03:21 +02:00
|
|
|
pub comment_level: usize,
|
2020-06-26 13:44:50 +02:00
|
|
|
/// Include comments?
|
2020-06-26 16:03:21 +02:00
|
|
|
pub include_comments: bool,
|
2021-04-04 17:23:10 +02:00
|
|
|
/// Is the current tokenizer position within the text stream of an interpolated string?
|
|
|
|
pub is_within_text_terminated_by: Option<char>,
|
2022-12-22 10:34:58 +01:00
|
|
|
/// Textual syntax of the current token, if any.
|
|
|
|
///
|
|
|
|
/// Set to `Some` to begin tracking this information.
|
2022-12-21 06:54:54 +01:00
|
|
|
pub last_token: Option<SmartString>,
|
2020-04-15 16:21:23 +02:00
|
|
|
}
|
|
|
|
|
2021-07-25 16:56:05 +02:00
|
|
|
/// _(internals)_ Trait that encapsulates a peekable character input stream.
|
2020-07-25 09:52:27 +02:00
|
|
|
/// Exported under the `internals` feature only.
|
2020-06-26 13:44:50 +02:00
|
|
|
pub trait InputStream {
|
2021-01-15 10:13:04 +01:00
|
|
|
/// Un-get a character back into the `InputStream`.
|
|
|
|
/// The next [`get_next`][InputStream::get_next] or [`peek_next`][InputStream::peek_next]
|
|
|
|
/// will return this character instead.
|
|
|
|
fn unget(&mut self, ch: char);
|
|
|
|
/// Get the next character from the `InputStream`.
|
2020-06-26 13:44:50 +02:00
|
|
|
fn get_next(&mut self) -> Option<char>;
|
2021-01-15 10:13:04 +01:00
|
|
|
/// Peek the next character in the `InputStream`.
|
2021-06-12 16:47:43 +02:00
|
|
|
#[must_use]
|
2020-06-26 13:44:50 +02:00
|
|
|
fn peek_next(&mut self) -> Option<char>;
|
|
|
|
}
|
|
|
|
|
2022-11-29 08:50:58 +01:00
|
|
|
/// Return error if the string is longer than the maximum length.
|
|
|
|
#[inline]
|
|
|
|
fn ensure_string_len_within_limit(max: Option<NonZeroUsize>, value: &str) -> Result<(), LexError> {
|
|
|
|
if let Some(max) = max {
|
|
|
|
if value.len() > max.get() {
|
|
|
|
return Err(LexError::StringTooLong(max.get()));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2022-01-01 12:54:46 +01:00
|
|
|
/// _(internals)_ Parse a string literal ended by a specified termination character.
|
2020-07-25 09:52:27 +02:00
|
|
|
/// Exported under the `internals` feature only.
|
|
|
|
///
|
2021-04-04 17:08:27 +02:00
|
|
|
/// Returns the parsed string and a boolean indicating whether the string is
|
|
|
|
/// terminated by an interpolation `${`.
|
|
|
|
///
|
2021-04-10 11:47:44 +02:00
|
|
|
/// # Returns
|
|
|
|
///
|
2022-01-01 12:54:46 +01:00
|
|
|
/// | Type | Return Value |`state.is_within_text_terminated_by`|
|
2021-04-13 08:38:04 +02:00
|
|
|
/// |---------------------------------|:--------------------------:|:----------------------------------:|
|
2021-04-10 11:47:44 +02:00
|
|
|
/// |`"hello"` |`StringConstant("hello")` |`None` |
|
|
|
|
/// |`"hello`_{LF}_ or _{EOF}_ |`LexError` |`None` |
|
|
|
|
/// |`"hello\`_{EOF}_ or _{LF}{EOF}_ |`StringConstant("hello")` |`Some('"')` |
|
|
|
|
/// |`` `hello``_{EOF}_ |`StringConstant("hello")` |``Some('`')`` |
|
|
|
|
/// |`` `hello``_{LF}{EOF}_ |`StringConstant("hello\n")` |``Some('`')`` |
|
|
|
|
/// |`` `hello ${`` |`InterpolatedString("hello ")`<br/>next token is `{`|`None` |
|
2021-04-11 15:49:03 +02:00
|
|
|
/// |`` } hello` `` |`StringConstant(" hello")` |`None` |
|
|
|
|
/// |`} hello`_{EOF}_ |`StringConstant(" hello")` |``Some('`')`` |
|
2021-04-13 08:38:04 +02:00
|
|
|
///
|
|
|
|
/// This function does not throw a `LexError` for the following conditions:
|
|
|
|
///
|
|
|
|
/// * Unterminated literal string at _{EOF}_
|
|
|
|
///
|
|
|
|
/// * Unterminated normal string with continuation at _{EOF}_
|
|
|
|
///
|
|
|
|
/// This is to facilitate using this function to parse a script line-by-line, where the end of the
|
|
|
|
/// line (i.e. _{EOF}_) is not necessarily the end of the script.
|
|
|
|
///
|
|
|
|
/// Any time a [`StringConstant`][`Token::StringConstant`] is returned with
|
|
|
|
/// `state.is_within_text_terminated_by` set to `Some(_)` is one of the above conditions.
|
2020-06-26 13:44:50 +02:00
|
|
|
pub fn parse_string_literal(
|
|
|
|
stream: &mut impl InputStream,
|
|
|
|
state: &mut TokenizeState,
|
|
|
|
pos: &mut Position,
|
2021-03-29 18:21:09 +02:00
|
|
|
termination_char: char,
|
|
|
|
verbatim: bool,
|
2022-01-01 12:54:46 +01:00
|
|
|
allow_line_continuation: bool,
|
2021-04-04 07:13:07 +02:00
|
|
|
allow_interpolation: bool,
|
2022-02-26 10:28:58 +01:00
|
|
|
) -> Result<(SmartString, bool, Position), (LexError, Position)> {
|
2022-03-25 01:52:53 +01:00
|
|
|
let mut result = SmartString::new_const();
|
|
|
|
let mut escape = SmartString::new_const();
|
2020-06-26 13:44:50 +02:00
|
|
|
|
2020-09-22 13:18:06 +02:00
|
|
|
let start = *pos;
|
2022-02-10 07:51:31 +01:00
|
|
|
let mut first_char = Position::NONE;
|
2021-04-04 07:13:07 +02:00
|
|
|
let mut interpolated = false;
|
2021-04-22 17:02:25 +02:00
|
|
|
#[cfg(not(feature = "no_position"))]
|
|
|
|
let mut skip_whitespace_until = 0;
|
2020-09-22 13:18:06 +02:00
|
|
|
|
2021-04-10 04:20:17 +02:00
|
|
|
state.is_within_text_terminated_by = Some(termination_char);
|
2022-12-22 10:34:58 +01:00
|
|
|
if let Some(ref mut last) = state.last_token {
|
2022-12-21 06:54:54 +01:00
|
|
|
last.clear();
|
|
|
|
last.push(termination_char);
|
2022-12-22 10:34:58 +01:00
|
|
|
}
|
2021-04-04 17:23:10 +02:00
|
|
|
|
2020-06-26 13:44:50 +02:00
|
|
|
loop {
|
2023-02-11 17:13:54 +01:00
|
|
|
debug_assert!(
|
2021-04-11 15:49:03 +02:00
|
|
|
!verbatim || escape.is_empty(),
|
|
|
|
"verbatim strings should not have any escapes"
|
|
|
|
);
|
|
|
|
|
2021-04-10 04:20:17 +02:00
|
|
|
let next_char = match stream.get_next() {
|
|
|
|
Some(ch) => {
|
|
|
|
pos.advance();
|
|
|
|
ch
|
|
|
|
}
|
2021-04-11 15:49:03 +02:00
|
|
|
None if verbatim => {
|
2023-03-23 02:12:48 +01:00
|
|
|
debug_assert_eq!(escape, "", "verbatim strings should not have any escapes");
|
2021-04-10 05:11:42 +02:00
|
|
|
pos.advance();
|
2021-04-11 15:49:03 +02:00
|
|
|
break;
|
2021-04-10 05:11:42 +02:00
|
|
|
}
|
2022-01-01 12:54:46 +01:00
|
|
|
None if allow_line_continuation && !escape.is_empty() => {
|
2023-03-23 02:12:48 +01:00
|
|
|
debug_assert_eq!(escape, "\\", "unexpected escape {} at end of line", escape);
|
2021-04-10 04:20:17 +02:00
|
|
|
pos.advance();
|
|
|
|
break;
|
|
|
|
}
|
2021-04-11 15:49:03 +02:00
|
|
|
None => {
|
|
|
|
pos.advance();
|
|
|
|
state.is_within_text_terminated_by = None;
|
|
|
|
return Err((LERR::UnterminatedString, start));
|
|
|
|
}
|
2021-04-10 04:20:17 +02:00
|
|
|
};
|
2020-06-26 13:44:50 +02:00
|
|
|
|
2022-12-22 10:34:58 +01:00
|
|
|
if let Some(ref mut last) = state.last_token {
|
|
|
|
last.push(next_char);
|
|
|
|
}
|
2022-12-21 06:54:54 +01:00
|
|
|
|
2021-04-04 07:13:07 +02:00
|
|
|
// String interpolation?
|
|
|
|
if allow_interpolation
|
|
|
|
&& next_char == '$'
|
|
|
|
&& escape.is_empty()
|
2022-07-27 12:04:59 +02:00
|
|
|
&& stream.peek_next().map_or(false, |ch| ch == '{')
|
2021-04-04 07:13:07 +02:00
|
|
|
{
|
|
|
|
interpolated = true;
|
2021-04-10 04:20:17 +02:00
|
|
|
state.is_within_text_terminated_by = None;
|
2021-04-04 07:13:07 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2022-11-29 08:50:58 +01:00
|
|
|
ensure_string_len_within_limit(state.max_string_len, &result)
|
|
|
|
.map_err(|err| (err, start))?;
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2022-02-10 07:51:31 +01:00
|
|
|
// Close wrapper
|
|
|
|
if termination_char == next_char && escape.is_empty() {
|
2022-02-10 11:24:04 +01:00
|
|
|
// Double wrapper
|
|
|
|
if stream.peek_next().map_or(false, |c| c == termination_char) {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2022-12-22 10:34:58 +01:00
|
|
|
if let Some(ref mut last) = state.last_token {
|
|
|
|
last.push(termination_char);
|
|
|
|
}
|
2022-02-10 11:24:04 +01:00
|
|
|
} else {
|
|
|
|
state.is_within_text_terminated_by = None;
|
|
|
|
break;
|
|
|
|
}
|
2022-02-10 07:51:31 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if first_char.is_none() {
|
|
|
|
first_char = *pos;
|
|
|
|
}
|
|
|
|
|
2020-06-26 13:44:50 +02:00
|
|
|
match next_char {
|
2021-03-29 18:21:09 +02:00
|
|
|
// \r - ignore if followed by \n
|
2022-07-27 12:04:59 +02:00
|
|
|
'\r' if stream.peek_next().map_or(false, |ch| ch == '\n') => (),
|
2020-06-26 13:44:50 +02:00
|
|
|
// \...
|
2021-04-11 15:49:03 +02:00
|
|
|
'\\' if !verbatim && escape.is_empty() => {
|
2020-06-26 13:44:50 +02:00
|
|
|
escape.push('\\');
|
|
|
|
}
|
|
|
|
// \\
|
|
|
|
'\\' if !escape.is_empty() => {
|
|
|
|
escape.clear();
|
|
|
|
result.push('\\');
|
|
|
|
}
|
|
|
|
// \t
|
|
|
|
't' if !escape.is_empty() => {
|
|
|
|
escape.clear();
|
|
|
|
result.push('\t');
|
|
|
|
}
|
|
|
|
// \n
|
|
|
|
'n' if !escape.is_empty() => {
|
|
|
|
escape.clear();
|
|
|
|
result.push('\n');
|
|
|
|
}
|
|
|
|
// \r
|
|
|
|
'r' if !escape.is_empty() => {
|
|
|
|
escape.clear();
|
|
|
|
result.push('\r');
|
|
|
|
}
|
|
|
|
// \x??, \u????, \U????????
|
2022-07-27 12:04:59 +02:00
|
|
|
ch @ ('x' | 'u' | 'U') if !escape.is_empty() => {
|
2020-06-26 13:44:50 +02:00
|
|
|
let mut seq = escape.clone();
|
|
|
|
escape.clear();
|
2020-07-29 10:10:06 +02:00
|
|
|
seq.push(ch);
|
2020-06-26 13:44:50 +02:00
|
|
|
|
|
|
|
let mut out_val: u32 = 0;
|
|
|
|
let len = match ch {
|
|
|
|
'x' => 2,
|
|
|
|
'u' => 4,
|
|
|
|
'U' => 8,
|
2021-12-30 05:19:41 +01:00
|
|
|
c => unreachable!("x or u or U expected but gets '{}'", c),
|
2020-06-26 13:44:50 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
for _ in 0..len {
|
2021-04-10 04:20:17 +02:00
|
|
|
let c = stream
|
|
|
|
.get_next()
|
|
|
|
.ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
|
2020-06-26 13:44:50 +02:00
|
|
|
|
|
|
|
pos.advance();
|
2022-12-22 10:34:58 +01:00
|
|
|
seq.push(c);
|
|
|
|
if let Some(ref mut last) = state.last_token {
|
|
|
|
last.push(c);
|
|
|
|
}
|
2020-06-26 13:44:50 +02:00
|
|
|
|
|
|
|
out_val *= 16;
|
2021-04-10 04:20:17 +02:00
|
|
|
out_val += c
|
|
|
|
.to_digit(16)
|
|
|
|
.ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
|
2020-06-26 13:44:50 +02:00
|
|
|
}
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2021-04-10 04:20:17 +02:00
|
|
|
result.push(
|
|
|
|
char::from_u32(out_val)
|
2022-02-26 10:28:58 +01:00
|
|
|
.ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?,
|
2021-04-10 04:20:17 +02:00
|
|
|
);
|
2020-06-26 13:44:50 +02:00
|
|
|
}
|
|
|
|
|
2021-03-29 18:21:09 +02:00
|
|
|
// \{termination_char} - escaped
|
|
|
|
_ if termination_char == next_char && !escape.is_empty() => {
|
2020-06-26 13:44:50 +02:00
|
|
|
escape.clear();
|
2022-07-27 12:04:59 +02:00
|
|
|
result.push(next_char);
|
2020-06-26 13:44:50 +02:00
|
|
|
}
|
|
|
|
|
2021-04-11 15:49:03 +02:00
|
|
|
// Verbatim
|
|
|
|
'\n' if verbatim => {
|
2023-03-23 02:12:48 +01:00
|
|
|
debug_assert_eq!(escape, "", "verbatim strings should not have any escapes");
|
2021-04-11 15:49:03 +02:00
|
|
|
pos.new_line();
|
|
|
|
result.push(next_char);
|
|
|
|
}
|
|
|
|
|
2021-03-29 18:21:09 +02:00
|
|
|
// Line continuation
|
2022-01-01 12:54:46 +01:00
|
|
|
'\n' if allow_line_continuation && !escape.is_empty() => {
|
2023-03-23 02:12:48 +01:00
|
|
|
debug_assert_eq!(escape, "\\", "unexpected escape {} at end of line", escape);
|
2021-03-29 18:21:09 +02:00
|
|
|
escape.clear();
|
|
|
|
pos.new_line();
|
2021-04-22 17:02:25 +02:00
|
|
|
|
|
|
|
#[cfg(not(feature = "no_position"))]
|
|
|
|
{
|
2022-01-06 04:07:52 +01:00
|
|
|
let start_position = start.position().unwrap();
|
2021-05-22 13:14:24 +02:00
|
|
|
skip_whitespace_until = start_position + 1;
|
2021-04-22 17:02:25 +02:00
|
|
|
}
|
2021-03-29 18:21:09 +02:00
|
|
|
}
|
|
|
|
|
2021-04-11 15:49:03 +02:00
|
|
|
// Unterminated string
|
|
|
|
'\n' => {
|
2021-03-29 18:21:09 +02:00
|
|
|
pos.rewind();
|
2021-04-10 05:11:42 +02:00
|
|
|
state.is_within_text_terminated_by = None;
|
2021-04-11 05:19:46 +02:00
|
|
|
return Err((LERR::UnterminatedString, start));
|
2021-03-29 18:21:09 +02:00
|
|
|
}
|
|
|
|
|
2020-06-26 13:44:50 +02:00
|
|
|
// Unknown escape sequence
|
2021-03-29 18:21:09 +02:00
|
|
|
_ if !escape.is_empty() => {
|
|
|
|
escape.push(next_char);
|
2020-09-22 13:18:06 +02:00
|
|
|
|
2022-02-26 10:28:58 +01:00
|
|
|
return Err((LERR::MalformedEscapeSequence(escape.to_string()), *pos));
|
2020-07-29 10:10:06 +02:00
|
|
|
}
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2021-03-29 18:21:09 +02:00
|
|
|
// Whitespace to skip
|
2021-04-22 17:02:25 +02:00
|
|
|
#[cfg(not(feature = "no_position"))]
|
2022-01-06 04:07:52 +01:00
|
|
|
_ if next_char.is_whitespace() && pos.position().unwrap() < skip_whitespace_until => {}
|
2020-06-14 08:25:47 +02:00
|
|
|
|
2020-06-26 13:44:50 +02:00
|
|
|
// All other characters
|
2021-03-29 18:21:09 +02:00
|
|
|
_ => {
|
2020-06-26 13:44:50 +02:00
|
|
|
escape.clear();
|
2021-03-29 18:21:09 +02:00
|
|
|
result.push(next_char);
|
2021-04-22 17:02:25 +02:00
|
|
|
|
|
|
|
#[cfg(not(feature = "no_position"))]
|
|
|
|
{
|
|
|
|
skip_whitespace_until = 0;
|
|
|
|
}
|
2020-06-26 13:44:50 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2022-11-29 08:50:58 +01:00
|
|
|
ensure_string_len_within_limit(state.max_string_len, &result).map_err(|err| (err, start))?;
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2022-02-26 10:28:58 +01:00
|
|
|
Ok((result, interpolated, first_char))
|
2020-06-26 13:44:50 +02:00
|
|
|
}
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2020-06-26 13:44:50 +02:00
|
|
|
/// Consume the next character.
|
2020-10-08 16:25:50 +02:00
|
|
|
#[inline(always)]
|
2023-02-10 07:58:03 +01:00
|
|
|
fn eat_next_and_advance(stream: &mut impl InputStream, pos: &mut Position) -> Option<char> {
|
2020-06-26 13:44:50 +02:00
|
|
|
pos.advance();
|
2020-07-09 13:54:28 +02:00
|
|
|
stream.get_next()
|
2020-06-26 13:44:50 +02:00
|
|
|
}
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2020-06-26 13:44:50 +02:00
|
|
|
/// Scan for a block comment until the end.
|
2020-12-18 09:07:19 +01:00
|
|
|
fn scan_block_comment(
|
2020-06-26 13:44:50 +02:00
|
|
|
stream: &mut impl InputStream,
|
2021-08-13 07:42:39 +02:00
|
|
|
level: usize,
|
2020-06-26 13:44:50 +02:00
|
|
|
pos: &mut Position,
|
2022-12-02 07:06:31 +01:00
|
|
|
comment: Option<&mut String>,
|
2020-12-12 13:09:29 +01:00
|
|
|
) -> usize {
|
2021-08-13 07:42:39 +02:00
|
|
|
let mut level = level;
|
|
|
|
let mut comment = comment;
|
2021-05-22 13:14:24 +02:00
|
|
|
|
2020-06-26 13:44:50 +02:00
|
|
|
while let Some(c) = stream.get_next() {
|
|
|
|
pos.advance();
|
|
|
|
|
2021-07-24 08:11:16 +02:00
|
|
|
if let Some(comment) = comment.as_mut() {
|
|
|
|
comment.push(c);
|
|
|
|
}
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2020-06-26 13:44:50 +02:00
|
|
|
match c {
|
|
|
|
'/' => {
|
2021-07-26 16:22:27 +02:00
|
|
|
if let Some(c2) = stream.peek_next().filter(|&c2| c2 == '*') {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2021-07-24 08:11:16 +02:00
|
|
|
if let Some(comment) = comment.as_mut() {
|
|
|
|
comment.push(c2);
|
|
|
|
}
|
2021-05-25 04:54:48 +02:00
|
|
|
level += 1;
|
2021-07-26 16:22:27 +02:00
|
|
|
}
|
2020-06-26 13:44:50 +02:00
|
|
|
}
|
|
|
|
'*' => {
|
2021-07-26 16:22:27 +02:00
|
|
|
if let Some(c2) = stream.peek_next().filter(|&c2| c2 == '/') {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2021-07-24 08:11:16 +02:00
|
|
|
if let Some(comment) = comment.as_mut() {
|
|
|
|
comment.push(c2);
|
|
|
|
}
|
2021-05-25 04:54:48 +02:00
|
|
|
level -= 1;
|
2021-07-26 16:22:27 +02:00
|
|
|
}
|
2020-04-15 16:21:23 +02:00
|
|
|
}
|
2020-06-26 13:44:50 +02:00
|
|
|
'\n' => pos.new_line(),
|
|
|
|
_ => (),
|
2020-04-15 16:21:23 +02:00
|
|
|
}
|
|
|
|
|
2020-12-12 13:09:29 +01:00
|
|
|
if level == 0 {
|
2020-06-26 13:44:50 +02:00
|
|
|
break;
|
2020-06-14 08:25:47 +02:00
|
|
|
}
|
2020-06-26 13:44:50 +02:00
|
|
|
}
|
2020-12-12 13:09:29 +01:00
|
|
|
|
|
|
|
level
|
2020-06-26 13:44:50 +02:00
|
|
|
}
|
2020-06-14 08:25:47 +02:00
|
|
|
|
2022-01-01 12:54:46 +01:00
|
|
|
/// _(internals)_ Get the next token from the input stream.
|
2020-07-25 09:52:27 +02:00
|
|
|
/// Exported under the `internals` feature only.
|
2021-07-10 05:06:13 +02:00
|
|
|
#[inline]
|
2021-06-12 16:47:43 +02:00
|
|
|
#[must_use]
|
2020-06-26 16:33:27 +02:00
|
|
|
pub fn get_next_token(
|
2020-06-26 16:03:21 +02:00
|
|
|
stream: &mut impl InputStream,
|
|
|
|
state: &mut TokenizeState,
|
|
|
|
pos: &mut Position,
|
|
|
|
) -> Option<(Token, Position)> {
|
|
|
|
let result = get_next_token_inner(stream, state, pos);
|
|
|
|
|
|
|
|
// Save the last token's state
|
2022-02-08 02:02:15 +01:00
|
|
|
if let Some((ref token, ..)) = result {
|
2021-09-24 16:44:39 +02:00
|
|
|
state.next_token_cannot_be_unary = !token.is_next_unary();
|
2020-06-26 16:03:21 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
result
|
|
|
|
}
|
|
|
|
|
2020-07-25 08:32:43 +02:00
|
|
|
/// Test if the given character is a hex character.
|
2020-10-08 16:25:50 +02:00
|
|
|
#[inline(always)]
|
2022-11-23 09:14:11 +01:00
|
|
|
const fn is_hex_digit(c: char) -> bool {
|
2021-07-24 08:11:16 +02:00
|
|
|
matches!(c, 'a'..='f' | 'A'..='F' | '0'..='9')
|
2020-07-25 08:32:43 +02:00
|
|
|
}
|
|
|
|
|
2021-02-11 12:20:30 +01:00
|
|
|
/// Test if the given character is a numeric digit.
|
2020-10-08 16:25:50 +02:00
|
|
|
#[inline(always)]
|
2022-11-23 09:14:11 +01:00
|
|
|
const fn is_numeric_digit(c: char) -> bool {
|
2023-02-05 17:59:02 +01:00
|
|
|
c.is_ascii_digit()
|
2020-07-25 08:32:43 +02:00
|
|
|
}
|
|
|
|
|
2020-12-12 13:09:29 +01:00
|
|
|
/// Test if the comment block is a doc-comment.
|
2021-04-09 17:13:33 +02:00
|
|
|
#[cfg(not(feature = "no_function"))]
|
|
|
|
#[cfg(feature = "metadata")]
|
2021-10-21 11:26:43 +02:00
|
|
|
#[inline]
|
2021-06-12 16:47:43 +02:00
|
|
|
#[must_use]
|
2022-01-04 08:22:48 +01:00
|
|
|
pub fn is_doc_comment(comment: &str) -> bool {
|
2020-12-20 13:05:23 +01:00
|
|
|
(comment.starts_with("///") && !comment.starts_with("////"))
|
|
|
|
|| (comment.starts_with("/**") && !comment.starts_with("/***"))
|
2020-12-12 13:09:29 +01:00
|
|
|
}
|
|
|
|
|
2020-06-26 16:03:21 +02:00
|
|
|
/// Get the next token.
|
2021-06-12 16:47:43 +02:00
|
|
|
#[must_use]
|
2020-06-26 16:03:21 +02:00
|
|
|
fn get_next_token_inner(
|
2020-06-26 13:44:50 +02:00
|
|
|
stream: &mut impl InputStream,
|
|
|
|
state: &mut TokenizeState,
|
|
|
|
pos: &mut Position,
|
|
|
|
) -> Option<(Token, Position)> {
|
2022-12-22 10:34:58 +01:00
|
|
|
state.last_token.as_mut().map(SmartString::clear);
|
2022-12-21 06:54:54 +01:00
|
|
|
|
2020-06-26 13:44:50 +02:00
|
|
|
// Still inside a comment?
|
|
|
|
if state.comment_level > 0 {
|
|
|
|
let start_pos = *pos;
|
2023-02-21 11:16:03 +01:00
|
|
|
let mut comment = state.include_comments.then(|| String::new());
|
2020-06-26 13:44:50 +02:00
|
|
|
|
2021-05-22 13:14:24 +02:00
|
|
|
state.comment_level =
|
|
|
|
scan_block_comment(stream, state.comment_level, pos, comment.as_mut());
|
2020-12-18 09:07:19 +01:00
|
|
|
|
2021-05-22 13:14:24 +02:00
|
|
|
let return_comment = state.include_comments;
|
2021-04-09 17:13:33 +02:00
|
|
|
|
|
|
|
#[cfg(not(feature = "no_function"))]
|
|
|
|
#[cfg(feature = "metadata")]
|
2021-11-13 05:23:35 +01:00
|
|
|
let return_comment = return_comment || is_doc_comment(comment.as_ref().expect("`Some`"));
|
2021-05-22 13:14:24 +02:00
|
|
|
|
|
|
|
if return_comment {
|
2022-09-25 17:03:18 +02:00
|
|
|
return Some((Token::Comment(comment.expect("`Some`").into()), start_pos));
|
2021-05-22 13:14:24 +02:00
|
|
|
}
|
|
|
|
if state.comment_level > 0 {
|
2021-04-09 17:13:33 +02:00
|
|
|
// Reached EOF without ending comment block
|
|
|
|
return None;
|
2020-06-26 13:44:50 +02:00
|
|
|
}
|
2020-04-15 16:21:23 +02:00
|
|
|
}
|
|
|
|
|
2021-04-04 17:23:10 +02:00
|
|
|
// Within text?
|
2021-04-05 17:06:48 +02:00
|
|
|
if let Some(ch) = state.is_within_text_terminated_by.take() {
|
2022-01-01 12:54:46 +01:00
|
|
|
return parse_string_literal(stream, state, pos, ch, true, false, true).map_or_else(
|
2022-02-26 16:18:47 +01:00
|
|
|
|(err, err_pos)| Some((Token::LexError(err.into()), err_pos)),
|
2022-02-10 07:51:31 +01:00
|
|
|
|(result, interpolated, start_pos)| {
|
2021-04-04 17:23:10 +02:00
|
|
|
if interpolated {
|
2022-09-25 17:03:18 +02:00
|
|
|
Some((Token::InterpolatedString(result.into()), start_pos))
|
2021-04-04 17:23:10 +02:00
|
|
|
} else {
|
2022-09-25 17:03:18 +02:00
|
|
|
Some((Token::StringConstant(result.into()), start_pos))
|
2021-04-04 17:23:10 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2021-04-22 17:02:25 +02:00
|
|
|
let mut negated: Option<Position> = None;
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2020-06-26 13:44:50 +02:00
|
|
|
while let Some(c) = stream.get_next() {
|
|
|
|
pos.advance();
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2020-06-26 13:44:50 +02:00
|
|
|
let start_pos = *pos;
|
2023-03-15 01:01:44 +01:00
|
|
|
let cc = stream.peek_next().unwrap_or('\0');
|
|
|
|
|
|
|
|
// Identifiers and strings that can have non-ASCII characters
|
|
|
|
match (c, cc) {
|
2020-06-26 13:44:50 +02:00
|
|
|
// \n
|
2023-03-16 06:05:29 +01:00
|
|
|
('\n', ..) => pos.new_line(),
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2020-06-26 13:44:50 +02:00
|
|
|
// digit ...
|
2023-03-16 06:05:29 +01:00
|
|
|
('0'..='9', ..) => {
|
2022-09-14 06:11:18 +02:00
|
|
|
let mut result = SmartString::new_const();
|
2020-06-26 13:44:50 +02:00
|
|
|
let mut radix_base: Option<u32> = None;
|
2021-02-11 12:20:30 +01:00
|
|
|
let mut valid: fn(char) -> bool = is_numeric_digit;
|
2020-06-26 13:44:50 +02:00
|
|
|
result.push(c);
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2020-06-26 13:44:50 +02:00
|
|
|
while let Some(next_char) = stream.peek_next() {
|
|
|
|
match next_char {
|
2022-09-14 06:11:18 +02:00
|
|
|
NUMBER_SEPARATOR => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2022-09-14 06:11:18 +02:00
|
|
|
}
|
|
|
|
ch if valid(ch) => {
|
2020-06-26 13:44:50 +02:00
|
|
|
result.push(next_char);
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2020-06-26 13:44:50 +02:00
|
|
|
}
|
2021-02-13 13:57:56 +01:00
|
|
|
#[cfg(any(not(feature = "no_float"), feature = "decimal"))]
|
2020-06-26 13:44:50 +02:00
|
|
|
'.' => {
|
2022-06-26 12:09:15 +02:00
|
|
|
stream.get_next().unwrap();
|
2020-09-23 16:48:28 +02:00
|
|
|
|
2020-11-23 15:51:21 +01:00
|
|
|
// Check if followed by digits or something that cannot start a property name
|
2020-09-23 16:48:28 +02:00
|
|
|
match stream.peek_next().unwrap_or('\0') {
|
2020-11-23 15:51:21 +01:00
|
|
|
// digits after period - accept the period
|
|
|
|
'0'..='9' => {
|
2020-09-23 16:48:28 +02:00
|
|
|
result.push(next_char);
|
2021-02-11 12:20:30 +01:00
|
|
|
pos.advance();
|
2020-09-23 16:48:28 +02:00
|
|
|
}
|
2020-11-23 15:51:21 +01:00
|
|
|
// _ - cannot follow a decimal point
|
2022-12-22 10:34:58 +01:00
|
|
|
NUMBER_SEPARATOR => {
|
2021-01-15 10:13:04 +01:00
|
|
|
stream.unget(next_char);
|
2020-11-23 15:51:21 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
// .. - reserved symbol, not a floating-point number
|
|
|
|
'.' => {
|
2021-01-15 10:13:04 +01:00
|
|
|
stream.unget(next_char);
|
2020-11-23 15:51:21 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
// symbol after period - probably a float
|
2021-07-24 08:11:16 +02:00
|
|
|
ch if !is_id_first_alphabetic(ch) => {
|
2020-11-23 15:51:21 +01:00
|
|
|
result.push(next_char);
|
|
|
|
pos.advance();
|
|
|
|
result.push('0');
|
|
|
|
}
|
|
|
|
// Not a floating-point number
|
2020-09-23 16:48:28 +02:00
|
|
|
_ => {
|
2021-01-15 10:13:04 +01:00
|
|
|
stream.unget(next_char);
|
2020-09-23 16:48:28 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2021-02-11 12:20:30 +01:00
|
|
|
}
|
|
|
|
#[cfg(not(feature = "no_float"))]
|
|
|
|
'e' => {
|
2021-11-13 05:23:35 +01:00
|
|
|
stream.get_next().expect("`e`");
|
2020-09-23 16:48:28 +02:00
|
|
|
|
2021-02-11 12:20:30 +01:00
|
|
|
// Check if followed by digits or +/-
|
|
|
|
match stream.peek_next().unwrap_or('\0') {
|
|
|
|
// digits after e - accept the e
|
|
|
|
'0'..='9' => {
|
|
|
|
result.push(next_char);
|
|
|
|
pos.advance();
|
|
|
|
}
|
|
|
|
// +/- after e - accept the e and the sign
|
|
|
|
'+' | '-' => {
|
|
|
|
result.push(next_char);
|
|
|
|
pos.advance();
|
2022-06-26 12:09:15 +02:00
|
|
|
result.push(stream.get_next().unwrap());
|
2021-02-11 12:20:30 +01:00
|
|
|
pos.advance();
|
|
|
|
}
|
|
|
|
// Not a floating-point number
|
|
|
|
_ => {
|
|
|
|
stream.unget(next_char);
|
|
|
|
break;
|
2020-04-15 16:21:23 +02:00
|
|
|
}
|
|
|
|
}
|
2020-06-26 13:44:50 +02:00
|
|
|
}
|
2021-02-11 12:20:30 +01:00
|
|
|
// 0x????, 0o????, 0b???? at beginning
|
2022-07-27 12:04:59 +02:00
|
|
|
ch @ ('x' | 'o' | 'b' | 'X' | 'O' | 'B')
|
2021-02-11 12:20:30 +01:00
|
|
|
if c == '0' && result.len() <= 1 =>
|
2020-07-08 06:09:18 +02:00
|
|
|
{
|
2020-06-26 13:44:50 +02:00
|
|
|
result.push(next_char);
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2020-06-26 13:44:50 +02:00
|
|
|
|
2021-02-11 12:20:30 +01:00
|
|
|
valid = match ch {
|
|
|
|
'x' | 'X' => is_hex_digit,
|
|
|
|
'o' | 'O' => is_numeric_digit,
|
|
|
|
'b' | 'B' => is_numeric_digit,
|
2021-12-30 05:19:41 +01:00
|
|
|
c => unreachable!("x/X or o/O or b/B expected but gets '{}'", c),
|
2020-06-26 13:44:50 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
radix_base = Some(match ch {
|
|
|
|
'x' | 'X' => 16,
|
|
|
|
'o' | 'O' => 8,
|
|
|
|
'b' | 'B' => 2,
|
2021-12-30 05:19:41 +01:00
|
|
|
c => unreachable!("x/X or o/O or b/B expected but gets '{}'", c),
|
2020-06-26 13:44:50 +02:00
|
|
|
});
|
2020-04-15 16:21:23 +02:00
|
|
|
}
|
|
|
|
|
2020-06-26 13:44:50 +02:00
|
|
|
_ => break,
|
2020-04-15 16:21:23 +02:00
|
|
|
}
|
2020-06-26 13:44:50 +02:00
|
|
|
}
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2021-05-25 04:54:48 +02:00
|
|
|
let num_pos = negated.map_or(start_pos, |negated_pos| {
|
2020-06-26 13:44:50 +02:00
|
|
|
result.insert(0, '-');
|
2021-04-16 15:59:05 +02:00
|
|
|
negated_pos
|
2021-05-25 04:54:48 +02:00
|
|
|
});
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2022-12-22 10:34:58 +01:00
|
|
|
if let Some(ref mut last) = state.last_token {
|
|
|
|
*last = result.clone();
|
|
|
|
}
|
2022-12-21 06:54:54 +01:00
|
|
|
|
2020-06-26 13:44:50 +02:00
|
|
|
// Parse number
|
2022-11-23 09:14:11 +01:00
|
|
|
let token = radix_base.map_or_else(
|
|
|
|
|| {
|
2022-09-14 06:11:18 +02:00
|
|
|
let num = INT::from_str(&result).map(Token::IntegerConstant);
|
2020-06-26 13:44:50 +02:00
|
|
|
|
2021-04-05 17:06:48 +02:00
|
|
|
// If integer parsing is unnecessary, try float instead
|
|
|
|
#[cfg(not(feature = "no_float"))]
|
2021-12-06 13:52:47 +01:00
|
|
|
let num = num.or_else(|_| {
|
2022-11-08 16:17:31 +01:00
|
|
|
crate::types::FloatWrapper::from_str(&result).map(Token::FloatConstant)
|
2021-12-06 13:52:47 +01:00
|
|
|
});
|
2020-06-26 13:44:50 +02:00
|
|
|
|
2021-04-05 17:06:48 +02:00
|
|
|
// Then try decimal
|
|
|
|
#[cfg(feature = "decimal")]
|
2021-12-06 13:52:47 +01:00
|
|
|
let num = num.or_else(|_| {
|
2022-09-25 17:03:18 +02:00
|
|
|
rust_decimal::Decimal::from_str(&result)
|
|
|
|
.map(Box::new)
|
|
|
|
.map(Token::DecimalConstant)
|
2021-12-06 13:52:47 +01:00
|
|
|
});
|
2021-02-13 13:57:56 +01:00
|
|
|
|
2021-04-05 17:06:48 +02:00
|
|
|
// Then try decimal in scientific notation
|
|
|
|
#[cfg(feature = "decimal")]
|
|
|
|
let num = num.or_else(|_| {
|
2022-09-14 06:11:18 +02:00
|
|
|
rust_decimal::Decimal::from_scientific(&result)
|
2022-09-25 17:03:18 +02:00
|
|
|
.map(Box::new)
|
2022-09-14 06:11:18 +02:00
|
|
|
.map(Token::DecimalConstant)
|
2021-04-05 17:06:48 +02:00
|
|
|
});
|
2021-02-14 10:34:53 +01:00
|
|
|
|
2020-06-26 13:44:50 +02:00
|
|
|
num.unwrap_or_else(|_| {
|
2022-09-14 06:11:18 +02:00
|
|
|
Token::LexError(LERR::MalformedNumber(result.to_string()).into())
|
2021-04-05 17:06:48 +02:00
|
|
|
})
|
|
|
|
},
|
2022-11-23 09:14:11 +01:00
|
|
|
|radix| {
|
|
|
|
let result = &result[2..];
|
|
|
|
|
|
|
|
UNSIGNED_INT::from_str_radix(result, radix)
|
|
|
|
.map(|v| v as INT)
|
|
|
|
.map_or_else(
|
|
|
|
|_| {
|
|
|
|
Token::LexError(
|
|
|
|
LERR::MalformedNumber(result.to_string()).into(),
|
|
|
|
)
|
|
|
|
},
|
|
|
|
Token::IntegerConstant,
|
|
|
|
)
|
|
|
|
},
|
|
|
|
);
|
|
|
|
|
|
|
|
return Some((token, num_pos));
|
2020-06-26 13:44:50 +02:00
|
|
|
}
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2023-03-16 06:05:29 +01:00
|
|
|
// " - string literal
|
|
|
|
('"', ..) => {
|
|
|
|
return parse_string_literal(stream, state, pos, c, false, true, false)
|
|
|
|
.map_or_else(
|
|
|
|
|(err, err_pos)| Some((Token::LexError(err.into()), err_pos)),
|
|
|
|
|(result, ..)| Some((Token::StringConstant(result.into()), start_pos)),
|
|
|
|
);
|
|
|
|
}
|
|
|
|
// ` - string literal
|
|
|
|
('`', ..) => {
|
|
|
|
// Start from the next line if at the end of line
|
|
|
|
match stream.peek_next() {
|
|
|
|
// `\r - start from next line
|
|
|
|
Some('\r') => {
|
|
|
|
eat_next_and_advance(stream, pos);
|
|
|
|
// `\r\n
|
|
|
|
if stream.peek_next() == Some('\n') {
|
|
|
|
eat_next_and_advance(stream, pos);
|
|
|
|
}
|
|
|
|
pos.new_line();
|
|
|
|
}
|
|
|
|
// `\n - start from next line
|
|
|
|
Some('\n') => {
|
|
|
|
eat_next_and_advance(stream, pos);
|
|
|
|
pos.new_line();
|
|
|
|
}
|
|
|
|
_ => (),
|
|
|
|
}
|
|
|
|
|
|
|
|
return parse_string_literal(stream, state, pos, c, true, false, true).map_or_else(
|
|
|
|
|(err, err_pos)| Some((Token::LexError(err.into()), err_pos)),
|
|
|
|
|(result, interpolated, ..)| {
|
|
|
|
if interpolated {
|
|
|
|
Some((Token::InterpolatedString(result.into()), start_pos))
|
|
|
|
} else {
|
|
|
|
Some((Token::StringConstant(result.into()), start_pos))
|
|
|
|
}
|
|
|
|
},
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
// ' - character literal
|
|
|
|
('\'', '\'') => {
|
|
|
|
return Some((
|
|
|
|
Token::LexError(LERR::MalformedChar(String::new()).into()),
|
|
|
|
start_pos,
|
|
|
|
))
|
|
|
|
}
|
|
|
|
('\'', ..) => {
|
|
|
|
return Some(
|
|
|
|
parse_string_literal(stream, state, pos, c, false, false, false).map_or_else(
|
|
|
|
|(err, err_pos)| (Token::LexError(err.into()), err_pos),
|
|
|
|
|(result, ..)| {
|
|
|
|
let mut chars = result.chars();
|
|
|
|
let first = chars.next().unwrap();
|
|
|
|
|
|
|
|
if chars.next().is_some() {
|
|
|
|
(
|
|
|
|
Token::LexError(LERR::MalformedChar(result.to_string()).into()),
|
|
|
|
start_pos,
|
|
|
|
)
|
|
|
|
} else {
|
|
|
|
(Token::CharConstant(first), start_pos)
|
|
|
|
}
|
|
|
|
},
|
|
|
|
),
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2020-06-26 13:44:50 +02:00
|
|
|
// Braces
|
2023-03-16 06:05:29 +01:00
|
|
|
('{', ..) => return Some((Token::LeftBrace, start_pos)),
|
|
|
|
('}', ..) => return Some((Token::RightBrace, start_pos)),
|
2020-06-26 13:44:50 +02:00
|
|
|
|
2022-04-21 04:04:46 +02:00
|
|
|
// Unit
|
2023-03-16 06:05:29 +01:00
|
|
|
('(', ')') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2022-04-21 04:04:46 +02:00
|
|
|
return Some((Token::Unit, start_pos));
|
|
|
|
}
|
|
|
|
|
2020-06-26 13:44:50 +02:00
|
|
|
// Parentheses
|
2023-03-16 06:05:29 +01:00
|
|
|
('(', '*') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2022-09-25 17:03:18 +02:00
|
|
|
return Some((Token::Reserved(Box::new("(*".into())), start_pos));
|
2020-07-10 05:41:56 +02:00
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
('(', ..) => return Some((Token::LeftParen, start_pos)),
|
|
|
|
(')', ..) => return Some((Token::RightParen, start_pos)),
|
2020-06-26 13:44:50 +02:00
|
|
|
|
|
|
|
// Indexing
|
2023-03-16 06:05:29 +01:00
|
|
|
('[', ..) => return Some((Token::LeftBracket, start_pos)),
|
|
|
|
(']', ..) => return Some((Token::RightBracket, start_pos)),
|
2020-06-26 13:44:50 +02:00
|
|
|
|
|
|
|
// Map literal
|
|
|
|
#[cfg(not(feature = "no_object"))]
|
2023-03-16 06:05:29 +01:00
|
|
|
('#', '{') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2020-06-26 13:44:50 +02:00
|
|
|
return Some((Token::MapStart, start_pos));
|
|
|
|
}
|
2021-03-28 10:36:56 +02:00
|
|
|
// Shebang
|
2023-03-16 06:05:29 +01:00
|
|
|
('#', '!') => return Some((Token::Reserved(Box::new("#!".into())), start_pos)),
|
2021-03-28 10:36:56 +02:00
|
|
|
|
2023-03-16 06:05:29 +01:00
|
|
|
('#', ' ') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2021-12-16 09:06:44 +01:00
|
|
|
let token = if stream.peek_next() == Some('{') {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2021-12-16 09:06:44 +01:00
|
|
|
"# {"
|
|
|
|
} else {
|
|
|
|
"#"
|
|
|
|
};
|
2022-09-25 17:03:18 +02:00
|
|
|
return Some((Token::Reserved(Box::new(token.into())), start_pos));
|
2021-12-16 09:06:44 +01:00
|
|
|
}
|
|
|
|
|
2023-03-16 06:05:29 +01:00
|
|
|
('#', ..) => return Some((Token::Reserved(Box::new("#".into())), start_pos)),
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2020-06-26 13:44:50 +02:00
|
|
|
// Operators
|
2023-03-16 06:05:29 +01:00
|
|
|
('+', '=') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2020-06-26 13:44:50 +02:00
|
|
|
return Some((Token::PlusAssign, start_pos));
|
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
('+', '+') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2022-09-25 17:03:18 +02:00
|
|
|
return Some((Token::Reserved(Box::new("++".into())), start_pos));
|
2020-10-10 16:14:10 +02:00
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
('+', ..) if !state.next_token_cannot_be_unary => {
|
2021-09-24 16:44:39 +02:00
|
|
|
return Some((Token::UnaryPlus, start_pos))
|
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
('+', ..) => return Some((Token::Plus, start_pos)),
|
2020-06-26 13:44:50 +02:00
|
|
|
|
2023-03-16 06:05:29 +01:00
|
|
|
('-', '0'..='9') if !state.next_token_cannot_be_unary => negated = Some(start_pos),
|
|
|
|
('-', '0'..='9') => return Some((Token::Minus, start_pos)),
|
|
|
|
('-', '=') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2020-06-26 13:44:50 +02:00
|
|
|
return Some((Token::MinusAssign, start_pos));
|
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
('-', '>') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2022-09-25 17:03:18 +02:00
|
|
|
return Some((Token::Reserved(Box::new("->".into())), start_pos));
|
2020-07-10 05:41:56 +02:00
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
('-', '-') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2022-09-25 17:03:18 +02:00
|
|
|
return Some((Token::Reserved(Box::new("--".into())), start_pos));
|
2020-10-10 16:14:10 +02:00
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
('-', ..) if !state.next_token_cannot_be_unary => {
|
2021-09-24 16:44:39 +02:00
|
|
|
return Some((Token::UnaryMinus, start_pos))
|
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
('-', ..) => return Some((Token::Minus, start_pos)),
|
2020-06-26 13:44:50 +02:00
|
|
|
|
2023-03-16 06:05:29 +01:00
|
|
|
('*', ')') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2022-09-25 17:03:18 +02:00
|
|
|
return Some((Token::Reserved(Box::new("*)".into())), start_pos));
|
2020-07-10 05:41:56 +02:00
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
('*', '=') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2020-06-26 13:44:50 +02:00
|
|
|
return Some((Token::MultiplyAssign, start_pos));
|
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
('*', '*') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2021-02-10 05:41:27 +01:00
|
|
|
|
|
|
|
return Some((
|
|
|
|
if stream.peek_next() == Some('=') {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2021-02-10 05:41:27 +01:00
|
|
|
Token::PowerOfAssign
|
|
|
|
} else {
|
|
|
|
Token::PowerOf
|
|
|
|
},
|
|
|
|
start_pos,
|
|
|
|
));
|
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
('*', ..) => return Some((Token::Multiply, start_pos)),
|
2020-06-26 13:44:50 +02:00
|
|
|
|
|
|
|
// Comments
|
2023-03-16 06:05:29 +01:00
|
|
|
('/', '/') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2020-06-26 13:44:50 +02:00
|
|
|
|
2022-12-02 07:06:31 +01:00
|
|
|
let mut comment: Option<String> = match stream.peek_next() {
|
2021-04-09 17:13:33 +02:00
|
|
|
#[cfg(not(feature = "no_function"))]
|
|
|
|
#[cfg(feature = "metadata")]
|
2021-05-03 07:07:51 +02:00
|
|
|
Some('/') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2020-12-20 13:05:23 +01:00
|
|
|
|
|
|
|
// Long streams of `///...` are not doc-comments
|
|
|
|
match stream.peek_next() {
|
|
|
|
Some('/') => None,
|
2022-03-25 01:52:53 +01:00
|
|
|
_ => Some("///".into()),
|
2020-12-20 13:05:23 +01:00
|
|
|
}
|
2020-12-18 09:07:19 +01:00
|
|
|
}
|
2022-07-25 07:40:23 +02:00
|
|
|
#[cfg(feature = "metadata")]
|
|
|
|
Some('!') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2022-07-25 07:40:23 +02:00
|
|
|
Some("//!".into())
|
|
|
|
}
|
2022-03-25 01:52:53 +01:00
|
|
|
_ if state.include_comments => Some("//".into()),
|
2020-12-18 09:07:19 +01:00
|
|
|
_ => None,
|
2020-06-26 13:44:50 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
while let Some(c) = stream.get_next() {
|
2022-01-17 16:15:51 +01:00
|
|
|
if c == '\r' {
|
|
|
|
// \r\n
|
2022-11-23 09:14:11 +01:00
|
|
|
if stream.peek_next() == Some('\n') {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2022-01-17 16:15:51 +01:00
|
|
|
}
|
2022-01-24 09:06:41 +01:00
|
|
|
pos.new_line();
|
2022-01-17 16:15:51 +01:00
|
|
|
break;
|
|
|
|
}
|
2020-06-26 13:44:50 +02:00
|
|
|
if c == '\n' {
|
|
|
|
pos.new_line();
|
|
|
|
break;
|
|
|
|
}
|
2021-07-24 08:11:16 +02:00
|
|
|
if let Some(comment) = comment.as_mut() {
|
|
|
|
comment.push(c);
|
|
|
|
}
|
2020-06-26 13:44:50 +02:00
|
|
|
pos.advance();
|
2020-06-14 13:13:11 +02:00
|
|
|
}
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2020-12-18 09:07:19 +01:00
|
|
|
if let Some(comment) = comment {
|
2022-07-25 07:40:23 +02:00
|
|
|
match comment {
|
|
|
|
#[cfg(feature = "metadata")]
|
2022-12-02 07:06:31 +01:00
|
|
|
_ if comment.starts_with("//!") => {
|
|
|
|
let g = &mut state.tokenizer_control.borrow_mut().global_comments;
|
|
|
|
if !g.is_empty() {
|
|
|
|
g.push('\n');
|
|
|
|
}
|
|
|
|
g.push_str(&comment);
|
|
|
|
}
|
2022-09-25 17:03:18 +02:00
|
|
|
_ => return Some((Token::Comment(comment.into()), start_pos)),
|
2022-07-25 07:40:23 +02:00
|
|
|
}
|
2020-04-15 16:21:23 +02:00
|
|
|
}
|
2020-06-26 13:44:50 +02:00
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
('/', '*') => {
|
2020-06-26 13:44:50 +02:00
|
|
|
state.comment_level = 1;
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2022-12-02 07:06:31 +01:00
|
|
|
let mut comment: Option<String> = match stream.peek_next() {
|
2021-04-09 17:13:33 +02:00
|
|
|
#[cfg(not(feature = "no_function"))]
|
|
|
|
#[cfg(feature = "metadata")]
|
2021-05-03 07:07:51 +02:00
|
|
|
Some('*') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2020-12-20 13:05:23 +01:00
|
|
|
|
|
|
|
// Long streams of `/****...` are not doc-comments
|
|
|
|
match stream.peek_next() {
|
|
|
|
Some('*') => None,
|
2022-03-25 01:52:53 +01:00
|
|
|
_ => Some("/**".into()),
|
2020-12-20 13:05:23 +01:00
|
|
|
}
|
2020-12-18 09:07:19 +01:00
|
|
|
}
|
2022-03-25 01:52:53 +01:00
|
|
|
_ if state.include_comments => Some("/*".into()),
|
2020-12-18 09:07:19 +01:00
|
|
|
_ => None,
|
2020-06-26 13:44:50 +02:00
|
|
|
};
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2020-12-18 09:07:19 +01:00
|
|
|
state.comment_level =
|
2021-05-22 13:14:24 +02:00
|
|
|
scan_block_comment(stream, state.comment_level, pos, comment.as_mut());
|
2020-12-18 09:07:19 +01:00
|
|
|
|
|
|
|
if let Some(comment) = comment {
|
2022-09-25 17:03:18 +02:00
|
|
|
return Some((Token::Comment(comment.into()), start_pos));
|
2020-04-15 16:21:23 +02:00
|
|
|
}
|
2020-06-26 13:44:50 +02:00
|
|
|
}
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2023-03-16 06:05:29 +01:00
|
|
|
('/', '=') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2020-06-26 13:44:50 +02:00
|
|
|
return Some((Token::DivideAssign, start_pos));
|
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
('/', ..) => return Some((Token::Divide, start_pos)),
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2023-03-16 06:05:29 +01:00
|
|
|
(';', ..) => return Some((Token::SemiColon, start_pos)),
|
|
|
|
(',', ..) => return Some((Token::Comma, start_pos)),
|
2020-10-10 16:14:10 +02:00
|
|
|
|
2023-03-16 06:05:29 +01:00
|
|
|
('.', '.') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2021-12-15 05:06:17 +01:00
|
|
|
return Some((
|
|
|
|
match stream.peek_next() {
|
|
|
|
Some('.') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2022-09-25 17:03:18 +02:00
|
|
|
Token::Reserved(Box::new("...".into()))
|
2021-12-15 05:06:17 +01:00
|
|
|
}
|
|
|
|
Some('=') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2021-12-15 05:06:17 +01:00
|
|
|
Token::InclusiveRange
|
|
|
|
}
|
|
|
|
_ => Token::ExclusiveRange,
|
|
|
|
},
|
|
|
|
start_pos,
|
|
|
|
));
|
2020-10-10 16:14:10 +02:00
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
('.', ..) => return Some((Token::Period, start_pos)),
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2023-03-16 06:05:29 +01:00
|
|
|
('=', '=') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2020-06-26 13:44:50 +02:00
|
|
|
if stream.peek_next() == Some('=') {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2022-09-25 17:03:18 +02:00
|
|
|
return Some((Token::Reserved(Box::new("===".into())), start_pos));
|
2020-04-15 16:21:23 +02:00
|
|
|
}
|
|
|
|
|
2020-06-26 13:44:50 +02:00
|
|
|
return Some((Token::EqualsTo, start_pos));
|
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
('=', '>') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2020-11-13 11:32:18 +01:00
|
|
|
return Some((Token::DoubleArrow, start_pos));
|
2020-07-10 05:41:56 +02:00
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
('=', ..) => return Some((Token::Equals, start_pos)),
|
2020-06-26 13:44:50 +02:00
|
|
|
|
2021-10-29 11:01:29 +02:00
|
|
|
#[cfg(not(feature = "no_module"))]
|
2023-03-16 06:05:29 +01:00
|
|
|
(':', ':') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2020-07-08 06:09:18 +02:00
|
|
|
|
|
|
|
if stream.peek_next() == Some('<') {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2022-09-25 17:03:18 +02:00
|
|
|
return Some((Token::Reserved(Box::new("::<".into())), start_pos));
|
2020-07-08 06:09:18 +02:00
|
|
|
}
|
|
|
|
|
2020-06-26 13:44:50 +02:00
|
|
|
return Some((Token::DoubleColon, start_pos));
|
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
(':', '=') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2022-09-25 17:03:18 +02:00
|
|
|
return Some((Token::Reserved(Box::new(":=".into())), start_pos));
|
2020-07-10 05:41:56 +02:00
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
(':', ';') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2022-09-25 17:03:18 +02:00
|
|
|
return Some((Token::Reserved(Box::new(":;".into())), start_pos));
|
2022-01-07 05:19:01 +01:00
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
(':', ..) => return Some((Token::Colon, start_pos)),
|
2020-06-26 13:44:50 +02:00
|
|
|
|
2023-03-16 06:05:29 +01:00
|
|
|
('<', '=') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2020-06-26 13:44:50 +02:00
|
|
|
return Some((Token::LessThanEqualsTo, start_pos));
|
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
('<', '-') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2022-09-25 17:03:18 +02:00
|
|
|
return Some((Token::Reserved(Box::new("<-".into())), start_pos));
|
2020-07-10 05:41:56 +02:00
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
('<', '<') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2020-06-26 13:44:50 +02:00
|
|
|
|
|
|
|
return Some((
|
|
|
|
if stream.peek_next() == Some('=') {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2020-06-26 13:44:50 +02:00
|
|
|
Token::LeftShiftAssign
|
|
|
|
} else {
|
|
|
|
Token::LeftShift
|
|
|
|
},
|
|
|
|
start_pos,
|
|
|
|
));
|
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
('<', '|') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2022-09-25 17:03:18 +02:00
|
|
|
return Some((Token::Reserved(Box::new("<|".into())), start_pos));
|
2022-08-18 10:59:54 +02:00
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
('<', ..) => return Some((Token::LessThan, start_pos)),
|
2020-04-22 11:36:51 +02:00
|
|
|
|
2023-03-16 06:05:29 +01:00
|
|
|
('>', '=') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2020-06-26 13:44:50 +02:00
|
|
|
return Some((Token::GreaterThanEqualsTo, start_pos));
|
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
('>', '>') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2020-06-26 13:44:50 +02:00
|
|
|
return Some((
|
|
|
|
if stream.peek_next() == Some('=') {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2020-06-26 13:44:50 +02:00
|
|
|
Token::RightShiftAssign
|
|
|
|
} else {
|
|
|
|
Token::RightShift
|
|
|
|
},
|
|
|
|
start_pos,
|
|
|
|
));
|
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
('>', ..) => return Some((Token::GreaterThan, start_pos)),
|
2020-05-03 19:19:01 +02:00
|
|
|
|
2023-03-16 06:05:29 +01:00
|
|
|
('!', 'i') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
stream.get_next().unwrap();
|
2022-11-30 07:11:09 +01:00
|
|
|
if stream.peek_next() == Some('n') {
|
2023-02-10 07:58:03 +01:00
|
|
|
stream.get_next().unwrap();
|
|
|
|
match stream.peek_next() {
|
|
|
|
Some(c) if is_id_continue(c) => {
|
|
|
|
stream.unget('n');
|
|
|
|
stream.unget('i');
|
|
|
|
return Some((Token::Bang, start_pos));
|
|
|
|
}
|
|
|
|
_ => {
|
|
|
|
pos.advance();
|
|
|
|
pos.advance();
|
|
|
|
return Some((Token::NotIn, start_pos));
|
|
|
|
}
|
|
|
|
}
|
2022-11-30 07:11:09 +01:00
|
|
|
}
|
2023-02-10 17:17:26 +01:00
|
|
|
|
|
|
|
stream.unget('i');
|
|
|
|
return Some((Token::Bang, start_pos));
|
2022-11-30 07:11:09 +01:00
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
('!', '=') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2020-06-26 13:44:50 +02:00
|
|
|
if stream.peek_next() == Some('=') {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2022-09-25 17:03:18 +02:00
|
|
|
return Some((Token::Reserved(Box::new("!==".into())), start_pos));
|
2020-04-15 16:21:23 +02:00
|
|
|
}
|
|
|
|
|
2020-06-26 13:44:50 +02:00
|
|
|
return Some((Token::NotEqualsTo, start_pos));
|
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
('!', '.') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2022-09-25 17:03:18 +02:00
|
|
|
return Some((Token::Reserved(Box::new("!.".into())), start_pos));
|
2022-08-18 10:59:54 +02:00
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
('!', ..) => return Some((Token::Bang, start_pos)),
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2023-03-16 06:05:29 +01:00
|
|
|
('|', '|') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2020-06-26 13:44:50 +02:00
|
|
|
return Some((Token::Or, start_pos));
|
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
('|', '=') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2020-06-26 13:44:50 +02:00
|
|
|
return Some((Token::OrAssign, start_pos));
|
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
('|', '>') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2022-09-25 17:03:18 +02:00
|
|
|
return Some((Token::Reserved(Box::new("|>".into())), start_pos));
|
2022-08-18 10:59:54 +02:00
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
('|', ..) => return Some((Token::Pipe, start_pos)),
|
2020-04-22 11:36:51 +02:00
|
|
|
|
2023-03-16 06:05:29 +01:00
|
|
|
('&', '&') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2020-06-26 13:44:50 +02:00
|
|
|
return Some((Token::And, start_pos));
|
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
('&', '=') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2020-06-26 13:44:50 +02:00
|
|
|
return Some((Token::AndAssign, start_pos));
|
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
('&', ..) => return Some((Token::Ampersand, start_pos)),
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2023-03-16 06:05:29 +01:00
|
|
|
('^', '=') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2020-06-26 13:44:50 +02:00
|
|
|
return Some((Token::XOrAssign, start_pos));
|
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
('^', ..) => return Some((Token::XOr, start_pos)),
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2023-03-16 06:05:29 +01:00
|
|
|
('~', ..) => return Some((Token::Reserved(Box::new("~".into())), start_pos)),
|
2021-02-10 05:41:27 +01:00
|
|
|
|
2023-03-16 06:05:29 +01:00
|
|
|
('%', '=') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2020-06-26 13:44:50 +02:00
|
|
|
return Some((Token::ModuloAssign, start_pos));
|
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
('%', ..) => return Some((Token::Modulo, start_pos)),
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2023-03-16 06:05:29 +01:00
|
|
|
('@', ..) => return Some((Token::Reserved(Box::new("@".into())), start_pos)),
|
2020-10-05 17:02:50 +02:00
|
|
|
|
2023-03-16 06:05:29 +01:00
|
|
|
('$', ..) => return Some((Token::Reserved(Box::new("$".into())), start_pos)),
|
2020-07-08 06:09:18 +02:00
|
|
|
|
2023-03-16 06:05:29 +01:00
|
|
|
('?', '.') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2022-06-11 18:32:12 +02:00
|
|
|
return Some((
|
|
|
|
#[cfg(not(feature = "no_object"))]
|
|
|
|
Token::Elvis,
|
|
|
|
#[cfg(feature = "no_object")]
|
2022-09-25 17:03:18 +02:00
|
|
|
Token::Reserved(Box::new("?.".into())),
|
2022-06-11 18:32:12 +02:00
|
|
|
start_pos,
|
|
|
|
));
|
2022-06-10 04:26:06 +02:00
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
('?', '?') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2022-06-10 05:22:33 +02:00
|
|
|
return Some((Token::DoubleQuestion, start_pos));
|
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
('?', '[') => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2022-06-11 18:32:12 +02:00
|
|
|
return Some((
|
|
|
|
#[cfg(not(feature = "no_index"))]
|
|
|
|
Token::QuestionBracket,
|
|
|
|
#[cfg(feature = "no_index")]
|
2022-09-25 17:03:18 +02:00
|
|
|
Token::Reserved(Box::new("?[".into())),
|
2022-06-11 18:32:12 +02:00
|
|
|
start_pos,
|
|
|
|
));
|
|
|
|
}
|
2023-03-16 06:05:29 +01:00
|
|
|
('?', ..) => return Some((Token::Reserved(Box::new("?".into())), start_pos)),
|
|
|
|
|
|
|
|
// letter or underscore ...
|
|
|
|
_ if is_id_first_alphabetic(c) || c == '_' => {
|
|
|
|
return Some(parse_identifier_token(stream, state, pos, start_pos, c));
|
|
|
|
}
|
2022-06-10 02:47:22 +02:00
|
|
|
|
2023-03-15 01:01:44 +01:00
|
|
|
_ if c.is_whitespace() => (),
|
2020-12-29 03:41:20 +01:00
|
|
|
|
2023-03-15 01:01:44 +01:00
|
|
|
_ => {
|
2020-07-08 06:09:18 +02:00
|
|
|
return Some((
|
2023-03-15 01:01:44 +01:00
|
|
|
Token::LexError(LERR::UnexpectedInput(c.to_string()).into()),
|
2020-07-08 06:09:18 +02:00
|
|
|
start_pos,
|
|
|
|
))
|
|
|
|
}
|
2020-06-26 13:44:50 +02:00
|
|
|
}
|
|
|
|
}
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2020-06-26 13:44:50 +02:00
|
|
|
pos.advance();
|
|
|
|
|
2021-04-11 15:49:03 +02:00
|
|
|
Some((Token::EOF, *pos))
|
2020-06-26 13:44:50 +02:00
|
|
|
}
|
|
|
|
|
2022-10-30 15:16:09 +01:00
|
|
|
/// Get the next token, parsing it as an identifier.
|
2022-11-29 08:50:58 +01:00
|
|
|
fn parse_identifier_token(
|
2020-07-28 22:26:57 +02:00
|
|
|
stream: &mut impl InputStream,
|
2022-12-21 06:54:54 +01:00
|
|
|
state: &mut TokenizeState,
|
2020-07-28 22:26:57 +02:00
|
|
|
pos: &mut Position,
|
|
|
|
start_pos: Position,
|
|
|
|
first_char: char,
|
2022-12-22 10:34:58 +01:00
|
|
|
) -> (Token, Position) {
|
2022-09-14 06:11:18 +02:00
|
|
|
let mut identifier = SmartString::new_const();
|
|
|
|
identifier.push(first_char);
|
2022-12-22 10:34:58 +01:00
|
|
|
if let Some(ref mut last) = state.last_token {
|
2022-12-21 06:54:54 +01:00
|
|
|
last.clear();
|
|
|
|
last.push(first_char);
|
2022-12-22 10:34:58 +01:00
|
|
|
}
|
2020-07-28 22:26:57 +02:00
|
|
|
|
|
|
|
while let Some(next_char) = stream.peek_next() {
|
|
|
|
match next_char {
|
2020-07-28 23:24:41 +02:00
|
|
|
x if is_id_continue(x) => {
|
2023-02-10 07:58:03 +01:00
|
|
|
eat_next_and_advance(stream, pos);
|
2022-12-21 06:54:54 +01:00
|
|
|
identifier.push(x);
|
2022-12-22 10:34:58 +01:00
|
|
|
if let Some(ref mut last) = state.last_token {
|
|
|
|
last.push(x);
|
|
|
|
}
|
2020-07-28 22:26:57 +02:00
|
|
|
}
|
|
|
|
_ => break,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-10-30 15:16:09 +01:00
|
|
|
if let Some(token) = Token::lookup_symbol_from_syntax(&identifier) {
|
2022-12-22 10:34:58 +01:00
|
|
|
return (token, start_pos);
|
2022-11-29 08:50:58 +01:00
|
|
|
}
|
2022-12-27 15:06:51 +01:00
|
|
|
|
2023-03-15 10:22:11 +01:00
|
|
|
if is_reserved_keyword_or_symbol(&identifier).0 {
|
2022-12-22 10:34:58 +01:00
|
|
|
return (Token::Reserved(Box::new(identifier)), start_pos);
|
2020-11-13 11:32:18 +01:00
|
|
|
}
|
2020-07-28 22:26:57 +02:00
|
|
|
|
2022-10-30 15:16:09 +01:00
|
|
|
if !is_valid_identifier(&identifier) {
|
2022-12-22 10:34:58 +01:00
|
|
|
return (
|
2022-09-14 06:11:18 +02:00
|
|
|
Token::LexError(LERR::MalformedIdentifier(identifier.to_string()).into()),
|
2020-07-28 22:26:57 +02:00
|
|
|
start_pos,
|
2022-12-22 10:34:58 +01:00
|
|
|
);
|
2020-07-28 22:26:57 +02:00
|
|
|
}
|
|
|
|
|
2022-12-22 10:34:58 +01:00
|
|
|
(Token::Identifier(identifier.into()), start_pos)
|
2020-07-28 22:26:57 +02:00
|
|
|
}
|
2020-07-28 23:24:41 +02:00
|
|
|
|
2022-07-25 16:05:52 +02:00
|
|
|
/// _(internals)_ Is a text string a valid identifier?
|
|
|
|
/// Exported under the `internals` feature only.
|
2021-06-12 16:47:43 +02:00
|
|
|
#[must_use]
|
2022-10-30 08:45:25 +01:00
|
|
|
pub fn is_valid_identifier(name: &str) -> bool {
|
2020-07-28 23:24:41 +02:00
|
|
|
let mut first_alphabetic = false;
|
|
|
|
|
2022-10-30 08:45:25 +01:00
|
|
|
for ch in name.chars() {
|
2020-07-28 23:24:41 +02:00
|
|
|
match ch {
|
|
|
|
'_' => (),
|
2020-07-28 23:54:23 +02:00
|
|
|
_ if is_id_first_alphabetic(ch) => first_alphabetic = true,
|
2020-07-28 23:24:41 +02:00
|
|
|
_ if !first_alphabetic => return false,
|
|
|
|
_ if char::is_ascii_alphanumeric(&ch) => (),
|
|
|
|
_ => return false,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
first_alphabetic
|
|
|
|
}
|
|
|
|
|
2022-07-25 16:05:52 +02:00
|
|
|
/// _(internals)_ Is a text string a valid script-defined function name?
|
|
|
|
/// Exported under the `internals` feature only.
|
2021-08-30 09:42:47 +02:00
|
|
|
#[inline(always)]
|
|
|
|
#[must_use]
|
2022-01-04 08:22:48 +01:00
|
|
|
pub fn is_valid_function_name(name: &str) -> bool {
|
2022-12-27 15:06:51 +01:00
|
|
|
is_valid_identifier(name)
|
2023-03-15 10:22:11 +01:00
|
|
|
&& !is_reserved_keyword_or_symbol(name).0
|
2022-12-27 15:06:51 +01:00
|
|
|
&& Token::lookup_symbol_from_syntax(name).is_none()
|
2021-08-30 09:42:47 +02:00
|
|
|
}
|
|
|
|
|
2021-02-24 16:23:32 +01:00
|
|
|
/// Is a character valid to start an identifier?
|
2020-07-29 07:57:17 +02:00
|
|
|
#[inline(always)]
|
2021-06-12 16:47:43 +02:00
|
|
|
#[must_use]
|
2021-02-24 06:53:11 +01:00
|
|
|
pub fn is_id_first_alphabetic(x: char) -> bool {
|
2023-03-15 11:31:53 +01:00
|
|
|
#[cfg(feature = "unicode-xid-ident")]
|
|
|
|
return unicode_xid::UnicodeXID::is_xid_start(x);
|
|
|
|
#[cfg(not(feature = "unicode-xid-ident"))]
|
|
|
|
return x.is_ascii_alphabetic();
|
2020-07-29 00:03:21 +02:00
|
|
|
}
|
|
|
|
|
2021-02-24 16:23:32 +01:00
|
|
|
/// Is a character valid for an identifier?
|
2020-07-29 07:57:17 +02:00
|
|
|
#[inline(always)]
|
2021-06-12 16:47:43 +02:00
|
|
|
#[must_use]
|
2021-02-24 06:53:11 +01:00
|
|
|
pub fn is_id_continue(x: char) -> bool {
|
2023-03-15 11:31:53 +01:00
|
|
|
#[cfg(feature = "unicode-xid-ident")]
|
|
|
|
return unicode_xid::UnicodeXID::is_xid_continue(x);
|
|
|
|
#[cfg(not(feature = "unicode-xid-ident"))]
|
|
|
|
return x.is_ascii_alphanumeric() || x == '_';
|
2020-07-28 23:24:41 +02:00
|
|
|
}
|
|
|
|
|
2023-02-20 06:28:17 +01:00
|
|
|
/// Is a piece of syntax a reserved keyword or reserved symbol?
|
2023-03-15 10:22:11 +01:00
|
|
|
///
|
|
|
|
/// # Return values
|
|
|
|
///
|
|
|
|
/// The first `bool` indicates whether it is a reserved keyword or symbol.
|
|
|
|
///
|
|
|
|
/// The second `bool` indicates whether the keyword can be called normally as a function.
|
|
|
|
///
|
|
|
|
/// The third `bool` indicates whether the keyword can be called in method-call style.
|
2023-03-15 01:54:34 +01:00
|
|
|
#[inline]
|
2022-12-27 15:06:51 +01:00
|
|
|
#[must_use]
|
2023-03-15 10:22:11 +01:00
|
|
|
pub fn is_reserved_keyword_or_symbol(syntax: &str) -> (bool, bool, bool) {
|
2023-03-16 06:05:29 +01:00
|
|
|
// This implementation is based upon a pre-calculated table generated
|
2023-03-16 23:41:44 +01:00
|
|
|
// by GNU `gperf` on the list of keywords.
|
2023-03-15 01:01:44 +01:00
|
|
|
let utf8 = syntax.as_bytes();
|
|
|
|
let len = utf8.len();
|
2023-03-15 01:54:34 +01:00
|
|
|
let rounds = len.min(3);
|
2023-03-15 01:01:44 +01:00
|
|
|
let mut hash_val = len;
|
2022-12-27 15:06:51 +01:00
|
|
|
|
2023-03-15 01:01:44 +01:00
|
|
|
if !(MIN_RESERVED_LEN..=MAX_RESERVED_LEN).contains(&len) {
|
2023-03-15 10:22:11 +01:00
|
|
|
return (false, false, false);
|
2023-03-15 01:01:44 +01:00
|
|
|
}
|
|
|
|
|
2023-03-15 01:54:34 +01:00
|
|
|
for x in 0..rounds {
|
|
|
|
hash_val += RESERVED_ASSOC_VALUES[utf8[rounds - 1 - x] as usize] as usize;
|
2023-03-15 01:01:44 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if !(MIN_RESERVED_HASH_VALUE..=MAX_RESERVED_HASH_VALUE).contains(&hash_val) {
|
2023-03-15 10:22:11 +01:00
|
|
|
return (false, false, false);
|
2023-03-15 01:01:44 +01:00
|
|
|
}
|
2022-12-27 15:06:51 +01:00
|
|
|
|
2023-03-15 01:01:44 +01:00
|
|
|
match RESERVED_LIST[hash_val] {
|
2023-03-15 10:22:11 +01:00
|
|
|
("", ..) => (false, false, false),
|
2023-03-16 06:05:29 +01:00
|
|
|
(s, true, a, b) => (
|
2023-03-16 23:41:44 +01:00
|
|
|
// Fail early to avoid calling memcmp().
|
|
|
|
// Since we are already working with bytes, mind as well check the first one.
|
2023-03-16 06:05:29 +01:00
|
|
|
s.len() == len && s.as_bytes()[0] == utf8[0] && s == syntax,
|
|
|
|
a,
|
|
|
|
b,
|
|
|
|
),
|
2023-03-15 10:22:11 +01:00
|
|
|
_ => (false, false, false),
|
2022-12-27 15:06:51 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-09-24 03:26:35 +02:00
|
|
|
/// _(internals)_ A type that implements the [`InputStream`] trait.
|
|
|
|
/// Exported under the `internals` feature only.
|
|
|
|
///
|
2020-07-09 13:54:28 +02:00
|
|
|
/// Multiple character streams are jointed together to form one single stream.
|
2020-06-26 13:44:50 +02:00
|
|
|
pub struct MultiInputsStream<'a> {
|
2023-02-10 07:58:03 +01:00
|
|
|
/// Buffered characters, if any.
|
|
|
|
pub buf: SmallVec<[char; 2]>,
|
2020-07-09 13:54:28 +02:00
|
|
|
/// The current stream index.
|
2021-09-24 03:26:35 +02:00
|
|
|
pub index: usize,
|
2021-03-12 15:30:08 +01:00
|
|
|
/// The input character streams.
|
2021-09-24 03:26:35 +02:00
|
|
|
pub streams: StaticVec<Peekable<Chars<'a>>>,
|
2020-06-26 13:44:50 +02:00
|
|
|
}
|
2020-04-23 07:24:24 +02:00
|
|
|
|
2020-06-26 13:44:50 +02:00
|
|
|
impl InputStream for MultiInputsStream<'_> {
|
2021-07-10 05:06:13 +02:00
|
|
|
#[inline]
|
2021-01-15 10:13:04 +01:00
|
|
|
fn unget(&mut self, ch: char) {
|
2023-02-10 07:58:03 +01:00
|
|
|
self.buf.push(ch);
|
2020-09-23 16:48:28 +02:00
|
|
|
}
|
2020-06-26 13:44:50 +02:00
|
|
|
fn get_next(&mut self) -> Option<char> {
|
2023-02-10 07:58:03 +01:00
|
|
|
if let ch @ Some(..) = self.buf.pop() {
|
|
|
|
return ch;
|
2020-09-23 16:48:28 +02:00
|
|
|
}
|
|
|
|
|
2020-06-26 13:44:50 +02:00
|
|
|
loop {
|
2020-07-09 13:54:28 +02:00
|
|
|
if self.index >= self.streams.len() {
|
2020-06-26 13:44:50 +02:00
|
|
|
// No more streams
|
|
|
|
return None;
|
2022-08-27 10:26:41 +02:00
|
|
|
}
|
|
|
|
if let Some(ch) = self.streams[self.index].next() {
|
2020-06-26 13:44:50 +02:00
|
|
|
// Next character in current stream
|
|
|
|
return Some(ch);
|
2020-04-15 16:21:23 +02:00
|
|
|
}
|
2022-08-27 10:26:41 +02:00
|
|
|
// Jump to the next stream
|
|
|
|
self.index += 1;
|
2020-04-15 16:21:23 +02:00
|
|
|
}
|
|
|
|
}
|
2020-06-26 13:44:50 +02:00
|
|
|
fn peek_next(&mut self) -> Option<char> {
|
2023-02-10 07:58:03 +01:00
|
|
|
if let ch @ Some(..) = self.buf.last() {
|
2023-02-10 17:17:26 +01:00
|
|
|
return ch.copied();
|
2020-09-23 16:48:28 +02:00
|
|
|
}
|
|
|
|
|
2020-06-26 13:44:50 +02:00
|
|
|
loop {
|
2020-07-09 13:54:28 +02:00
|
|
|
if self.index >= self.streams.len() {
|
2020-06-26 13:44:50 +02:00
|
|
|
// No more streams
|
|
|
|
return None;
|
2022-08-27 10:26:41 +02:00
|
|
|
}
|
|
|
|
if let Some(&ch) = self.streams[self.index].peek() {
|
2020-06-26 13:44:50 +02:00
|
|
|
// Next character in current stream
|
2020-07-09 13:54:28 +02:00
|
|
|
return Some(ch);
|
2020-06-26 13:44:50 +02:00
|
|
|
}
|
2022-08-27 10:26:41 +02:00
|
|
|
// Jump to the next stream
|
|
|
|
self.index += 1;
|
2020-06-26 13:44:50 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-09-24 03:26:35 +02:00
|
|
|
/// _(internals)_ An iterator on a [`Token`] stream.
|
|
|
|
/// Exported under the `internals` feature only.
|
2021-03-03 15:49:57 +01:00
|
|
|
pub struct TokenIterator<'a> {
|
2020-07-05 11:41:45 +02:00
|
|
|
/// Reference to the scripting `Engine`.
|
2021-09-24 03:26:35 +02:00
|
|
|
pub engine: &'a Engine,
|
2020-06-26 13:44:50 +02:00
|
|
|
/// Current state.
|
2021-09-24 03:26:35 +02:00
|
|
|
pub state: TokenizeState,
|
2020-06-26 13:44:50 +02:00
|
|
|
/// Current position.
|
2021-09-24 03:26:35 +02:00
|
|
|
pub pos: Position,
|
2020-06-26 13:44:50 +02:00
|
|
|
/// Input character stream.
|
2021-09-24 03:26:35 +02:00
|
|
|
pub stream: MultiInputsStream<'a>,
|
2020-12-29 03:41:20 +01:00
|
|
|
/// A processor function that maps a token to another.
|
2021-09-24 12:00:48 +02:00
|
|
|
pub token_mapper: Option<&'a OnParseTokenCallback>,
|
2020-04-15 16:21:23 +02:00
|
|
|
}
|
|
|
|
|
2021-03-03 15:49:57 +01:00
|
|
|
impl<'a> Iterator for TokenIterator<'a> {
|
2020-04-15 16:21:23 +02:00
|
|
|
type Item = (Token, Position);
|
|
|
|
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
2022-12-21 06:54:54 +01:00
|
|
|
let (within_interpolated, compress_script) = {
|
2022-07-25 07:40:23 +02:00
|
|
|
let control = &mut *self.state.tokenizer_control.borrow_mut();
|
|
|
|
|
|
|
|
if control.is_within_text {
|
|
|
|
// Switch to text mode terminated by back-tick
|
|
|
|
self.state.is_within_text_terminated_by = Some('`');
|
|
|
|
// Reset it
|
|
|
|
control.is_within_text = false;
|
|
|
|
}
|
2022-12-21 06:54:54 +01:00
|
|
|
|
|
|
|
(
|
|
|
|
self.state.is_within_text_terminated_by.is_some(),
|
|
|
|
control.compressed.is_some(),
|
|
|
|
)
|
|
|
|
};
|
2021-04-04 07:13:07 +02:00
|
|
|
|
2020-12-29 06:12:11 +01:00
|
|
|
let (token, pos) = match get_next_token(&mut self.stream, &mut self.state, &mut self.pos) {
|
2020-07-17 08:50:23 +02:00
|
|
|
// {EOF}
|
2020-12-29 06:12:11 +01:00
|
|
|
None => return None,
|
2021-04-13 08:38:04 +02:00
|
|
|
// {EOF} after unterminated string.
|
|
|
|
// The only case where `TokenizeState.is_within_text_terminated_by` is set is when
|
|
|
|
// a verbatim string or a string with continuation encounters {EOF}.
|
|
|
|
// This is necessary to handle such cases for line-by-line parsing, but for an entire
|
|
|
|
// script it is a syntax error.
|
2022-02-08 02:46:14 +01:00
|
|
|
Some((Token::StringConstant(..), pos)) if self.state.is_within_text_terminated_by.is_some() => {
|
2021-04-11 05:19:46 +02:00
|
|
|
self.state.is_within_text_terminated_by = None;
|
2022-02-26 16:18:47 +01:00
|
|
|
return Some((Token::LexError(LERR::UnterminatedString.into()), pos));
|
2021-04-10 04:20:17 +02:00
|
|
|
}
|
2020-07-17 08:50:23 +02:00
|
|
|
// Reserved keyword/symbol
|
2020-12-29 06:12:11 +01:00
|
|
|
Some((Token::Reserved(s), pos)) => (match
|
2022-09-25 17:03:18 +02:00
|
|
|
(s.as_str(),
|
2022-07-05 16:59:03 +02:00
|
|
|
#[cfg(not(feature = "no_custom_syntax"))]
|
2022-11-25 13:42:16 +01:00
|
|
|
self.engine.custom_keywords.as_deref().map_or(false, |m| m.contains_key(&*s)),
|
2022-07-05 16:59:03 +02:00
|
|
|
#[cfg(feature = "no_custom_syntax")]
|
|
|
|
false
|
|
|
|
)
|
2020-07-17 08:50:23 +02:00
|
|
|
{
|
2021-11-11 06:55:52 +01:00
|
|
|
("===", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
|
2020-07-17 08:50:23 +02:00
|
|
|
"'===' is not a valid operator. This is not JavaScript! Should it be '=='?".to_string(),
|
2022-02-26 16:18:47 +01:00
|
|
|
).into()),
|
2021-11-11 06:55:52 +01:00
|
|
|
("!==", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
|
2020-07-17 08:50:23 +02:00
|
|
|
"'!==' is not a valid operator. This is not JavaScript! Should it be '!='?".to_string(),
|
2022-02-26 16:18:47 +01:00
|
|
|
).into()),
|
2021-11-11 06:55:52 +01:00
|
|
|
("->", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
|
2022-02-26 16:18:47 +01:00
|
|
|
"'->' is not a valid symbol. This is not C or C++!".to_string()).into()),
|
2021-11-11 06:55:52 +01:00
|
|
|
("<-", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
|
2020-07-08 06:09:18 +02:00
|
|
|
"'<-' is not a valid symbol. This is not Go! Should it be '<='?".to_string(),
|
2022-02-26 16:18:47 +01:00
|
|
|
).into()),
|
2021-11-11 06:55:52 +01:00
|
|
|
(":=", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
|
2020-11-21 08:44:17 +01:00
|
|
|
"':=' is not a valid assignment operator. This is not Go or Pascal! Should it be simply '='?".to_string(),
|
2022-02-26 16:18:47 +01:00
|
|
|
).into()),
|
2022-01-07 05:19:01 +01:00
|
|
|
(":;", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
|
|
|
|
"':;' is not a valid symbol. Should it be '::'?".to_string(),
|
2022-02-26 16:18:47 +01:00
|
|
|
).into()),
|
2021-11-11 06:55:52 +01:00
|
|
|
("::<", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
|
2020-07-17 08:50:23 +02:00
|
|
|
"'::<>' is not a valid symbol. This is not Rust! Should it be '::'?".to_string(),
|
2022-02-26 16:18:47 +01:00
|
|
|
).into()),
|
2022-07-27 12:04:59 +02:00
|
|
|
("(*" | "*)", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
|
2020-07-17 08:50:23 +02:00
|
|
|
"'(* .. *)' is not a valid comment format. This is not Pascal! Should it be '/* .. */'?".to_string(),
|
2022-02-26 16:18:47 +01:00
|
|
|
).into()),
|
2021-12-16 09:06:44 +01:00
|
|
|
("# {", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
|
2020-07-17 08:50:23 +02:00
|
|
|
"'#' is not a valid symbol. Should it be '#{'?".to_string(),
|
2022-02-26 16:18:47 +01:00
|
|
|
).into()),
|
2020-07-17 08:50:23 +02:00
|
|
|
// Reserved keyword/operator that is custom.
|
2022-07-05 16:59:03 +02:00
|
|
|
#[cfg(not(feature = "no_custom_syntax"))]
|
2022-02-08 02:02:15 +01:00
|
|
|
(.., true) => Token::Custom(s),
|
2022-07-05 16:59:03 +02:00
|
|
|
#[cfg(feature = "no_custom_syntax")]
|
|
|
|
(.., true) => unreachable!("no custom operators"),
|
2020-07-17 08:50:23 +02:00
|
|
|
// Reserved keyword that is not custom and disabled.
|
2022-11-25 13:42:16 +01:00
|
|
|
(token, false) if self.engine.disabled_symbols.as_deref().map_or(false,|m| m.contains(token)) => {
|
2022-10-30 08:45:25 +01:00
|
|
|
let msg = format!("reserved {} '{token}' is disabled", if is_valid_identifier(token) { "keyword"} else {"symbol"});
|
2022-02-26 16:18:47 +01:00
|
|
|
Token::LexError(LERR::ImproperSymbol(s.to_string(), msg).into())
|
2020-11-21 08:08:18 +01:00
|
|
|
},
|
2020-07-17 08:50:23 +02:00
|
|
|
// Reserved keyword/operator that is not custom.
|
2022-02-08 02:02:15 +01:00
|
|
|
(.., false) => Token::Reserved(s),
|
2020-12-29 06:12:11 +01:00
|
|
|
}, pos),
|
2020-07-17 08:50:23 +02:00
|
|
|
// Custom keyword
|
2022-07-05 16:59:03 +02:00
|
|
|
#[cfg(not(feature = "no_custom_syntax"))]
|
2022-11-25 13:42:16 +01:00
|
|
|
Some((Token::Identifier(s), pos)) if self.engine.custom_keywords.as_deref().map_or(false,|m| m.contains_key(&*s)) => {
|
2020-12-29 06:12:11 +01:00
|
|
|
(Token::Custom(s), pos)
|
2020-07-10 05:41:56 +02:00
|
|
|
}
|
2021-12-16 11:01:49 +01:00
|
|
|
// Custom keyword/symbol - must be disabled
|
2022-07-05 16:59:03 +02:00
|
|
|
#[cfg(not(feature = "no_custom_syntax"))]
|
2022-11-25 13:42:16 +01:00
|
|
|
Some((token, pos)) if token.is_literal() && self.engine.custom_keywords.as_deref().map_or(false,|m| m.contains_key(token.literal_syntax())) => {
|
|
|
|
if self.engine.disabled_symbols.as_deref().map_or(false,|m| m.contains(token.literal_syntax())) {
|
2020-12-26 16:21:09 +01:00
|
|
|
// Disabled standard keyword/symbol
|
2022-09-25 17:03:18 +02:00
|
|
|
(Token::Custom(Box::new(token.literal_syntax().into())), pos)
|
2020-07-17 08:50:23 +02:00
|
|
|
} else {
|
|
|
|
// Active standard keyword - should never be a custom keyword!
|
2021-12-30 05:19:41 +01:00
|
|
|
unreachable!("{:?} is an active keyword", token)
|
2020-07-17 08:50:23 +02:00
|
|
|
}
|
2020-07-10 05:41:56 +02:00
|
|
|
}
|
2020-12-26 16:21:09 +01:00
|
|
|
// Disabled symbol
|
2022-11-25 13:42:16 +01:00
|
|
|
Some((token, pos)) if token.is_literal() && self.engine.disabled_symbols.as_deref().map_or(false,|m| m.contains(token.literal_syntax())) => {
|
2022-09-25 17:03:18 +02:00
|
|
|
(Token::Reserved(Box::new(token.literal_syntax().into())), pos)
|
2020-07-05 09:23:51 +02:00
|
|
|
}
|
2020-12-26 16:21:09 +01:00
|
|
|
// Normal symbol
|
2020-12-29 06:12:11 +01:00
|
|
|
Some(r) => r,
|
2020-07-26 16:25:30 +02:00
|
|
|
};
|
|
|
|
|
2020-12-29 06:12:11 +01:00
|
|
|
// Run the mapper, if any
|
2022-12-22 10:34:58 +01:00
|
|
|
let token = if let Some(func) = self.token_mapper {
|
|
|
|
func(token, pos, &self.state)
|
|
|
|
} else {
|
|
|
|
token
|
2020-12-29 06:12:11 +01:00
|
|
|
};
|
|
|
|
|
2022-12-21 06:54:54 +01:00
|
|
|
// Collect the compressed script, if needed
|
|
|
|
if compress_script {
|
|
|
|
let control = &mut *self.state.tokenizer_control.borrow_mut();
|
|
|
|
|
|
|
|
if let Some(ref mut compressed) = control.compressed {
|
|
|
|
if !matches!(token, Token::EOF) {
|
|
|
|
use std::fmt::Write;
|
|
|
|
|
|
|
|
let last_token = self.state.last_token.as_ref().unwrap();
|
|
|
|
let mut buf = SmartString::new_const();
|
|
|
|
|
|
|
|
if last_token.is_empty() {
|
|
|
|
write!(buf, "{token}").unwrap();
|
|
|
|
} else if within_interpolated
|
|
|
|
&& matches!(
|
|
|
|
token,
|
|
|
|
Token::StringConstant(..) | Token::InterpolatedString(..)
|
|
|
|
)
|
|
|
|
{
|
|
|
|
compressed.push_str(&last_token[1..]);
|
|
|
|
} else {
|
|
|
|
buf = last_token.clone();
|
|
|
|
}
|
|
|
|
|
2022-12-22 10:34:58 +01:00
|
|
|
if !buf.is_empty() && !compressed.is_empty() {
|
|
|
|
let cur = buf.chars().next().unwrap();
|
|
|
|
|
|
|
|
if cur == '_' || is_id_first_alphabetic(cur) || is_id_continue(cur) {
|
2022-12-21 06:54:54 +01:00
|
|
|
let prev = compressed.chars().last().unwrap();
|
2022-12-22 10:34:58 +01:00
|
|
|
|
|
|
|
if prev == '_' || is_id_first_alphabetic(prev) || is_id_continue(prev) {
|
2022-12-21 06:54:54 +01:00
|
|
|
compressed.push(' ');
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2022-12-22 10:34:58 +01:00
|
|
|
|
|
|
|
compressed.push_str(&buf);
|
2022-12-21 06:54:54 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-29 06:12:11 +01:00
|
|
|
Some((token, pos))
|
2020-04-15 16:21:23 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-04 07:13:07 +02:00
|
|
|
impl FusedIterator for TokenIterator<'_> {}
|
|
|
|
|
2020-10-19 13:11:55 +02:00
|
|
|
impl Engine {
|
2021-07-25 16:56:05 +02:00
|
|
|
/// _(internals)_ Tokenize an input text stream.
|
2021-03-03 15:49:57 +01:00
|
|
|
/// Exported under the `internals` feature only.
|
|
|
|
#[cfg(feature = "internals")]
|
2020-12-29 03:41:20 +01:00
|
|
|
#[inline(always)]
|
2021-06-12 16:47:43 +02:00
|
|
|
#[must_use]
|
2021-04-04 07:13:07 +02:00
|
|
|
pub fn lex<'a>(
|
|
|
|
&'a self,
|
2021-11-28 16:06:33 +01:00
|
|
|
input: impl IntoIterator<Item = &'a (impl AsRef<str> + 'a)>,
|
2021-04-04 18:05:56 +02:00
|
|
|
) -> (TokenIterator<'a>, TokenizerControl) {
|
2020-12-29 06:12:11 +01:00
|
|
|
self.lex_raw(input, None)
|
2020-12-29 03:41:20 +01:00
|
|
|
}
|
2021-07-25 16:56:05 +02:00
|
|
|
/// _(internals)_ Tokenize an input text stream with a mapping function.
|
2021-03-03 15:49:57 +01:00
|
|
|
/// Exported under the `internals` feature only.
|
|
|
|
#[cfg(feature = "internals")]
|
2020-12-29 06:12:11 +01:00
|
|
|
#[inline(always)]
|
2021-06-12 16:47:43 +02:00
|
|
|
#[must_use]
|
2021-03-03 15:49:57 +01:00
|
|
|
pub fn lex_with_map<'a>(
|
|
|
|
&'a self,
|
2021-11-28 16:06:33 +01:00
|
|
|
input: impl IntoIterator<Item = &'a (impl AsRef<str> + 'a)>,
|
2021-09-24 12:00:48 +02:00
|
|
|
token_mapper: &'a OnParseTokenCallback,
|
2021-04-04 18:05:56 +02:00
|
|
|
) -> (TokenIterator<'a>, TokenizerControl) {
|
2021-09-24 12:00:48 +02:00
|
|
|
self.lex_raw(input, Some(token_mapper))
|
2020-12-29 06:12:11 +01:00
|
|
|
}
|
|
|
|
/// Tokenize an input text stream with an optional mapping function.
|
2021-07-10 05:06:13 +02:00
|
|
|
#[inline]
|
2021-06-12 16:47:43 +02:00
|
|
|
#[must_use]
|
2021-03-03 15:49:57 +01:00
|
|
|
pub(crate) fn lex_raw<'a>(
|
|
|
|
&'a self,
|
2021-11-27 16:04:45 +01:00
|
|
|
input: impl IntoIterator<Item = &'a (impl AsRef<str> + 'a)>,
|
2021-09-24 12:00:48 +02:00
|
|
|
token_mapper: Option<&'a OnParseTokenCallback>,
|
2021-04-04 17:08:27 +02:00
|
|
|
) -> (TokenIterator<'a>, TokenizerControl) {
|
2022-07-25 07:40:23 +02:00
|
|
|
let buffer: TokenizerControl = RefCell::new(TokenizerControlBlock::new()).into();
|
2021-04-04 07:13:07 +02:00
|
|
|
let buffer2 = buffer.clone();
|
|
|
|
|
|
|
|
(
|
|
|
|
TokenIterator {
|
|
|
|
engine: self,
|
|
|
|
state: TokenizeState {
|
2022-11-29 08:50:58 +01:00
|
|
|
max_string_len: NonZeroUsize::new(self.max_string_size()),
|
2021-09-24 16:44:39 +02:00
|
|
|
next_token_cannot_be_unary: false,
|
2022-07-25 07:40:23 +02:00
|
|
|
tokenizer_control: buffer,
|
2021-04-04 07:13:07 +02:00
|
|
|
comment_level: 0,
|
|
|
|
include_comments: false,
|
2021-04-04 17:23:10 +02:00
|
|
|
is_within_text_terminated_by: None,
|
2022-12-21 06:54:54 +01:00
|
|
|
last_token: None,
|
2021-04-04 07:13:07 +02:00
|
|
|
},
|
|
|
|
pos: Position::new(1, 0),
|
|
|
|
stream: MultiInputsStream {
|
2023-02-10 07:58:03 +01:00
|
|
|
buf: SmallVec::new_const(),
|
2021-11-27 16:04:45 +01:00
|
|
|
streams: input
|
|
|
|
.into_iter()
|
|
|
|
.map(|s| s.as_ref().chars().peekable())
|
|
|
|
.collect(),
|
2021-04-04 07:13:07 +02:00
|
|
|
index: 0,
|
|
|
|
},
|
2021-09-24 12:00:48 +02:00
|
|
|
token_mapper,
|
2020-10-19 13:11:55 +02:00
|
|
|
},
|
2021-04-04 07:13:07 +02:00
|
|
|
buffer2,
|
|
|
|
)
|
2020-04-15 16:21:23 +02:00
|
|
|
}
|
|
|
|
}
|