diff --git a/.gitignore b/.gitignore index 63a3239a..d2538ea1 100644 --- a/.gitignore +++ b/.gitignore @@ -6,9 +6,9 @@ benches/results clippy.toml Rhai.toml **/*.bat +**/*.exe doc/rhai-sync.json doc/rhai.json -tools/ .idea/ .idea .idea/* diff --git a/CHANGELOG.md b/CHANGELOG.md index ed9ad185..5913e007 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ Enhancements * Range cases in `switch` statements now also match floating-point and decimal values. In order to support this, however, small numeric ranges cases are no longer unrolled. * Loading a module via `import` now gives the module access to the current scope, including variables and constants defined inside. * Some very simple operator calls (e.g. integer add) are short-circuited to avoid the overhead of a function call, resulting in a small speed improvement. +* The tokenizer now uses table-driven keyword recognizers generated by GNU gperf. At least _theoretically_ it should be faster... Version 1.12.0 diff --git a/src/README.md b/src/README.md index fc1d7bbf..a22e5cfe 100644 --- a/src/README.md +++ b/src/README.md @@ -28,4 +28,5 @@ Sub-Directories | `func` | Support for function calls | | `eval` | Evaluation engine | | `serde` | Support for [`serde`](https://crates.io/crates/serde) | +| `tools` | External tools needed for building | | `bin` | Pre-built CLI binaries (e.g. `rhai-run`, `rhai-repl`) | diff --git a/src/api/custom_syntax.rs b/src/api/custom_syntax.rs index f0368b9a..acd01195 100644 --- a/src/api/custom_syntax.rs +++ b/src/api/custom_syntax.rs @@ -232,7 +232,7 @@ impl Engine { } let token = Token::lookup_symbol_from_syntax(s).or_else(|| { - if is_reserved_keyword_or_symbol(s) { + if is_reserved_keyword_or_symbol(s).0 { Some(Token::Reserved(Box::new(s.into()))) } else { None @@ -296,7 +296,7 @@ impl Engine { // Identifier or symbol in first position _ if segments.is_empty() - && (is_valid_identifier(s) || is_reserved_keyword_or_symbol(s)) => + && (is_valid_identifier(s) || is_reserved_keyword_or_symbol(s).0) => { // Make it a custom keyword/symbol if it is disabled or reserved if self diff --git a/src/api/mod.rs b/src/api/mod.rs index 50fbdaae..a2cf21ce 100644 --- a/src/api/mod.rs +++ b/src/api/mod.rs @@ -54,7 +54,7 @@ impl Engine { #[inline(always)] #[must_use] pub fn module_resolver(&self) -> &dyn crate::ModuleResolver { - const DUMMY_RESOLVER: crate::module::resolvers::DummyModuleResolver = + static DUMMY_RESOLVER: crate::module::resolvers::DummyModuleResolver = crate::module::resolvers::DummyModuleResolver; self.module_resolver.as_deref().unwrap_or(&DUMMY_RESOLVER) diff --git a/src/parser.rs b/src/parser.rs index ac621c25..cbe69827 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -11,7 +11,7 @@ use crate::engine::{Precedence, KEYWORD_THIS, OP_CONTAINS, OP_NOT}; use crate::eval::{Caches, GlobalRuntimeState}; use crate::func::{hashing::get_hasher, StraightHashMap}; use crate::tokenizer::{ - is_keyword_function, is_valid_function_name, is_valid_identifier, Token, TokenStream, + is_reserved_keyword_or_symbol, is_valid_function_name, is_valid_identifier, Token, TokenStream, TokenizerControl, }; use crate::types::dynamic::{AccessMode, Union}; @@ -1665,7 +1665,9 @@ impl Engine { match input.peek().expect(NEVER_ENDS).0 { // Function call is allowed to have reserved keyword - Token::LeftParen | Token::Bang | Token::Unit if is_keyword_function(&s).0 => { + Token::LeftParen | Token::Bang | Token::Unit + if is_reserved_keyword_or_symbol(&s).1 => + { Expr::Variable( (None, ns, 0, state.get_interned_string(*s)).into(), None, @@ -1824,7 +1826,7 @@ impl Engine { // Prevents capturing of the object properties as vars: xxx. state.allow_capture = false; } - (Token::Reserved(s), ..) if is_keyword_function(s).1 => (), + (Token::Reserved(s), ..) if is_reserved_keyword_or_symbol(s).2 => (), (Token::Reserved(s), pos) => { return Err(PERR::Reserved(s.to_string()).into_err(*pos)) } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 5ed4bb9a..6a15539d 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1,9 +1,6 @@ //! Main module defining the lexer and parser. -use crate::engine::{ - Precedence, KEYWORD_DEBUG, KEYWORD_EVAL, KEYWORD_FN_PTR, KEYWORD_FN_PTR_CALL, - KEYWORD_FN_PTR_CURRY, KEYWORD_IS_DEF_VAR, KEYWORD_PRINT, KEYWORD_TYPE_OF, -}; +use crate::engine::Precedence; use crate::func::native::OnParseTokenCallback; use crate::{Engine, Identifier, LexError, Position, SmartString, StaticVec, INT, UNSIGNED_INT}; use smallvec::SmallVec; @@ -308,7 +305,9 @@ impl fmt::Display for Token { } } -// Table-driven keyword recognizer generated by GNU gperf. +// Table-driven keyword recognizer generated by GNU gperf on the file `tools/keywords.txt`. +// +// When adding new keywords, make sure to update `tools/keywords.txt` and re-generate this. const MIN_KEYWORD_LEN: usize = 1; const MAX_KEYWORD_LEN: usize = 8; @@ -508,7 +507,9 @@ static KEYWORDS_LIST: [(&str, Token); 153] = [ ("#{", Token::MapStart), ]; -// Table-driven reserved symbol recognizer generated by GNU gperf. +// Table-driven reserved symbol recognizer generated by GNU gperf on the file `tools/reserved.txt`. +// +// When adding new reserved symbols, make sure to update `tools/reserved.txt` and re-generate this. const MIN_RESERVED_LEN: usize = 1; const MAX_RESERVED_LEN: usize = 10; @@ -530,120 +531,120 @@ static RESERVED_ASSOC_VALUES: [u8; 256] = [ 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, ]; -static RESERVED_LIST: [(&str, bool); 113] = [ - ("", false), - ("~", true), - ("is", true), - ("...", true), - ("", false), - ("print", true), - ("@", true), - ("private", cfg!(feature = "no_function")), - ("", false), - ("this", true), - ("", false), - ("thread", true), - ("as", cfg!(feature = "no_module")), - ("", false), - ("", false), - ("spawn", true), - ("static", true), - (":=", true), - ("===", true), - ("case", true), - ("super", true), - ("shared", true), - ("package", true), - ("use", true), - ("with", true), - ("curry", true), - ("$", true), - ("type_of", true), - ("nil", true), - ("sync", true), - ("yield", true), - ("import", cfg!(feature = "no_module")), - ("--", true), - ("new", true), - ("exit", true), - ("async", true), - ("export", cfg!(feature = "no_module")), - ("!.", true), - ("", false), - ("call", true), - ("match", true), - ("", false), - ("fn", cfg!(feature = "no_function")), - ("var", true), - ("null", true), - ("await", true), - ("#", true), - ("default", true), - ("!==", true), - ("eval", true), - ("debug", true), - ("?", true), - ("?.", cfg!(feature = "no_object")), - ("", false), - ("protected", true), - ("", false), - ("", false), - ("go", true), - ("", false), - ("goto", true), - ("", false), - ("public", true), - ("<-", true), - ("", false), - ("is_def_fn", cfg!(not(feature = "no_function"))), - ("is_def_var", true), - ("", false), - ("<|", true), - ("::<", true), - ("", false), - ("", false), - ("", false), - ("->", true), - ("", false), - ("", false), - ("", false), - ("module", true), - ("|>", true), - ("", false), - ("void", true), - ("", false), - ("", false), - ("#!", true), - ("", false), - ("", false), - ("", false), - ("", false), - ("?[", cfg!(feature = "no_index")), - ("", false), - ("", false), - ("", false), - ("", false), - ("Fn", true), - ("", false), - ("", false), - ("", false), - ("", false), - (":;", true), - ("", false), - ("", false), - ("", false), - ("", false), - ("++", true), - ("", false), - ("", false), - ("", false), - ("", false), - ("*)", true), - ("", false), - ("", false), - ("", false), - ("", false), - ("(*", true), +static RESERVED_LIST: [(&str, bool, bool, bool); 113] = [ + ("", false, false, false), + ("~", true, false, false), + ("is", true, false, false), + ("...", true, false, false), + ("", false, false, false), + ("print", true, true, false), + ("@", true, false, false), + ("private", cfg!(feature = "no_function"), false, false), + ("", false, false, false), + ("this", true, false, false), + ("", false, false, false), + ("thread", true, false, false), + ("as", cfg!(feature = "no_module"), false, false), + ("", false, false, false), + ("", false, false, false), + ("spawn", true, false, false), + ("static", true, false, false), + (":=", true, false, false), + ("===", true, false, false), + ("case", true, false, false), + ("super", true, false, false), + ("shared", true, false, false), + ("package", true, false, false), + ("use", true, false, false), + ("with", true, false, false), + ("curry", true, true, true), + ("$", true, false, false), + ("type_of", true, true, true), + ("nil", true, false, false), + ("sync", true, false, false), + ("yield", true, false, false), + ("import", cfg!(feature = "no_module"), false, false), + ("--", true, false, false), + ("new", true, false, false), + ("exit", true, false, false), + ("async", true, false, false), + ("export", cfg!(feature = "no_module"), false, false), + ("!.", true, false, false), + ("", false, false, false), + ("call", true, true, true), + ("match", true, false, false), + ("", false, false, false), + ("fn", cfg!(feature = "no_function"), false, false), + ("var", true, false, false), + ("null", true, false, false), + ("await", true, false, false), + ("#", true, false, false), + ("default", true, false, false), + ("!==", true, false, false), + ("eval", true, true, false), + ("debug", true, true, false), + ("?", true, false, false), + ("?.", cfg!(feature = "no_object"), false, false), + ("", false, false, false), + ("protected", true, false, false), + ("", false, false, false), + ("", false, false, false), + ("go", true, false, false), + ("", false, false, false), + ("goto", true, false, false), + ("", false, false, false), + ("public", true, false, false), + ("<-", true, false, false), + ("", false, false, false), + ("is_def_fn", cfg!(not(feature = "no_function")), true, false), + ("is_def_var", true, true, false), + ("", false, false, false), + ("<|", true, false, false), + ("::<", true, false, false), + ("", false, false, false), + ("", false, false, false), + ("", false, false, false), + ("->", true, false, false), + ("", false, false, false), + ("", false, false, false), + ("", false, false, false), + ("module", true, false, false), + ("|>", true, false, false), + ("", false, false, false), + ("void", true, false, false), + ("", false, false, false), + ("", false, false, false), + ("#!", true, false, false), + ("", false, false, false), + ("", false, false, false), + ("", false, false, false), + ("", false, false, false), + ("?[", cfg!(feature = "no_index"), false, false), + ("", false, false, false), + ("", false, false, false), + ("", false, false, false), + ("", false, false, false), + ("Fn", true, true, false), + ("", false, false, false), + ("", false, false, false), + ("", false, false, false), + ("", false, false, false), + (":;", true, false, false), + ("", false, false, false), + ("", false, false, false), + ("", false, false, false), + ("", false, false, false), + ("++", true, false, false), + ("", false, false, false), + ("", false, false, false), + ("", false, false, false), + ("", false, false, false), + ("*)", true, false, false), + ("", false, false, false), + ("", false, false, false), + ("", false, false, false), + ("", false, false, false), + ("(*", true, false, false), ]; impl Token { @@ -2250,7 +2251,7 @@ fn parse_identifier_token( return (token, start_pos); } - if is_reserved_keyword_or_symbol(&identifier) { + if is_reserved_keyword_or_symbol(&identifier).0 { return (Token::Reserved(Box::new(identifier)), start_pos); } @@ -2264,30 +2265,6 @@ fn parse_identifier_token( (Token::Identifier(identifier.into()), start_pos) } -/// Can a keyword be called like a function? -/// -/// # Return values -/// -/// The first `bool` indicates whether the keyword can be called normally as a function. -/// -/// The second `bool` indicates whether the keyword can be called in method-call style. -#[inline] -#[must_use] -pub fn is_keyword_function(name: &str) -> (bool, bool) { - match name { - KEYWORD_TYPE_OF | KEYWORD_FN_PTR_CALL | KEYWORD_FN_PTR_CURRY => (true, true), - - KEYWORD_PRINT | KEYWORD_DEBUG | KEYWORD_EVAL | KEYWORD_FN_PTR | KEYWORD_IS_DEF_VAR => { - (true, false) - } - - #[cfg(not(feature = "no_function"))] - crate::engine::KEYWORD_IS_DEF_FN => (true, false), - - _ => (false, false), - } -} - /// _(internals)_ Is a text string a valid identifier? /// Exported under the `internals` feature only. #[must_use] @@ -2313,7 +2290,7 @@ pub fn is_valid_identifier(name: &str) -> bool { #[must_use] pub fn is_valid_function_name(name: &str) -> bool { is_valid_identifier(name) - && !is_reserved_keyword_or_symbol(name) + && !is_reserved_keyword_or_symbol(name).0 && Token::lookup_symbol_from_syntax(name).is_none() } @@ -2350,16 +2327,24 @@ pub const fn is_id_continue(x: char) -> bool { } /// Is a piece of syntax a reserved keyword or reserved symbol? +/// +/// # Return values +/// +/// The first `bool` indicates whether it is a reserved keyword or symbol. +/// +/// The second `bool` indicates whether the keyword can be called normally as a function. +/// +/// The third `bool` indicates whether the keyword can be called in method-call style. #[inline] #[must_use] -pub fn is_reserved_keyword_or_symbol(syntax: &str) -> bool { +pub fn is_reserved_keyword_or_symbol(syntax: &str) -> (bool, bool, bool) { let utf8 = syntax.as_bytes(); let len = utf8.len(); let rounds = len.min(3); let mut hash_val = len; if !(MIN_RESERVED_LEN..=MAX_RESERVED_LEN).contains(&len) { - return false; + return (false, false, false); } for x in 0..rounds { @@ -2367,12 +2352,13 @@ pub fn is_reserved_keyword_or_symbol(syntax: &str) -> bool { } if !(MIN_RESERVED_HASH_VALUE..=MAX_RESERVED_HASH_VALUE).contains(&hash_val) { - return false; + return (false, false, false); } match RESERVED_LIST[hash_val] { - (s, t) if s == syntax => t, - _ => false, + ("", ..) => (false, false, false), + (s, true, a, b) => (s == syntax, a, b), + _ => (false, false, false), } } diff --git a/src/tools/README.md b/src/tools/README.md new file mode 100644 index 00000000..ff8fe844 --- /dev/null +++ b/src/tools/README.md @@ -0,0 +1,7 @@ +Build Tools +=========== + +| File | Description | +| -------------- | ------------------------------------------- | +| `keywords.txt` | Input file for GNU gperf for the tokenizer. | +| `reserved.txt` | Input file for GNU gperf for the tokenizer. | diff --git a/src/tools/keywords.txt b/src/tools/keywords.txt new file mode 100644 index 00000000..33375c57 --- /dev/null +++ b/src/tools/keywords.txt @@ -0,0 +1,102 @@ +// This file holds a list of keywords/symbols for the Rhai language, with mapping to +// an appropriate `Token` variant. +// +// Generate the output table via: +// ```bash +// gperf -t keywords.txt +// ``` +// +// Since GNU gperf does not produce Rust output, the ANSI-C output must be hand-edited and +// manually spliced into `tokenizer.rs`. +// +// This includes: +// * Rewrite the C hashing program (especially since it uses a `switch` statement with fall-through) +// into equivalent Rust as the function `lookup_symbol_from_syntax`. +// * Update the values for the `???_KEYWORD_???` constants. +// * Copy the `asso_values` array into `KEYWORD_ASSOC_VALUES`. +// * Copy the `wordlist` array into `KEYWORDS_LIST` with the following modifications: +// - Remove the `#line` comments +// - Change the entry wrapping `{ .. }` into tuples `( .. )` +// - Replace all entries `("")` by `("", Token::EOF)` +// - Put feature flags on the appropriate lines, and duplicating lines that maps to `Token::EOF` +// for the opposite feature flags +// +struct keyword; +%% +"{", Token::LeftBrace +"}", Token::RightBrace +"(", Token::LeftParen +")", Token::RightParen +"[", Token::LeftBracket +"]", Token::RightBracket +"()", Token::Unit +"+", Token::Plus +"-", Token::Minus +"*", Token::Multiply +"/", Token::Divide +";", Token::SemiColon +":", Token::Colon +"::", Token::DoubleColon +"=>", Token::DoubleArrow +"_", Token::Underscore +",", Token::Comma +".", Token::Period +"?.", Token::Elvis +"??", Token::DoubleQuestion +"?[", Token::QuestionBracket +"..", Token::ExclusiveRange +"..=", Token::InclusiveRange +"#{", Token::MapStart +"=", Token::Equals +"true", Token::True +"false", Token::False +"let", Token::Let +"const", Token::Const +"if", Token::If +"else", Token::Else +"switch", Token::Switch +"do", Token::Do +"while", Token::While +"until", Token::Until +"loop", Token::Loop +"for", Token::For +"in", Token::In +"!in", Token::NotIn +"<", Token::LessThan +">", Token::GreaterThan +"<=", Token::LessThanEqualsTo +">=", Token::GreaterThanEqualsTo +"==", Token::EqualsTo +"!=", Token::NotEqualsTo +"!", Token::Bang +"|", Token::Pipe +"||", Token::Or +"&", Token::Ampersand +"&&", Token::And +"continue", Token::Continue +"break", Token::Break +"return", Token::Return +"throw", Token::Throw +"try", Token::Try +"catch", Token::Catch +"+=", Token::PlusAssign +"-=", Token::MinusAssign +"*=", Token::MultiplyAssign +"/=", Token::DivideAssign +"<<=", Token::LeftShiftAssign +">>=", Token::RightShiftAssign +"&=", Token::AndAssign +"|=", Token::OrAssign +"^=", Token::XOrAssign +"<<", Token::LeftShift +">>", Token::RightShift +"^", Token::XOr +"%", Token::Modulo +"%=", Token::ModuloAssign +"**", Token::PowerOf +"**=", Token::PowerOfAssign +"fn", Token::Fn +"private", Token::Private +"import", Token::Import +"export", Token::Export +"as", Token::As diff --git a/src/tools/reserved.txt b/src/tools/reserved.txt new file mode 100644 index 00000000..2dbe79cd --- /dev/null +++ b/src/tools/reserved.txt @@ -0,0 +1,93 @@ +// This file holds a list of reserved symbols for the Rhai language. +// +// The mapped attributes are: +// - is this a reserved symbol? (bool) +// - can this keyword be called normally as a function? (bool) +// - can this keyword be called in method-call style? (bool) +// +// Generate the output table via: +// ```bash +// gperf -t reserved.txt +// ``` +// +// Since GNU gperf does not produce Rust output, the ANSI-C output must be hand-edited and +// manually spliced into `tokenizer.rs`. +// +// This includes: +// * Rewrite the C hashing program (especially since it uses a `switch` statement with fall-through) +// into equivalent Rust as the function `is_reserved_keyword_or_symbol`. +// * Update the values for the `???_RESERVED_???` constants. +// * Copy the `asso_values` array into `RESERVED_ASSOC_VALUES`. +// * Copy the `wordlist` array into `RESERVED_LIST` with the following modifications: +// - Remove the `#line` comments +// - Change the entry wrapping `{ .. }` into tuples `( .. )` +// - Replace all entries `("")` by `("", false, false, false)` +// - Feature flags can be incorporated directly into the output via the `cfg!` macro +// +struct reserved; +%% +"?.", cfg!(feature = "no_object"), false, false +"?[", cfg!(feature = "no_index"), false, false +"fn", cfg!(feature = "no_function"), false, false +"private", cfg!(feature = "no_function"), false, false +"import", cfg!(feature = "no_module"), false, false +"export", cfg!(feature = "no_module"), false, false +"as", cfg!(feature = "no_module"), false, false +"===", true, false, false +"!==", true, false, false +"->", true, false, false +"<-", true, false, false +"?", true, false, false +":=", true, false, false +":;", true, false, false +"~", true, false, false +"!.", true, false, false +"::<", true, false, false +"(*", true, false, false +"*)", true, false, false +"#", true, false, false +"#!", true, false, false +"@", true, false, false +"$", true, false, false +"++", true, false, false +"--", true, false, false +"...", true, false, false +"<|", true, false, false +"|>", true, false, false +"public", true, false, false +"protected", true, false, false +"super", true, false, false +"new", true, false, false +"use", true, false, false +"module", true, false, false +"package", true, false, false +"var", true, false, false +"static", true, false, false +"shared", true, false, false +"with", true, false, false +"is", true, false, false +"goto", true, false, false +"exit", true, false, false +"match", true, false, false +"case", true, false, false +"default", true, false, false +"void", true, false, false +"null", true, false, false +"nil", true, false, false +"spawn", true, false, false +"thread", true, false, false +"go", true, false, false +"sync", true, false, false +"async", true, false, false +"await", true, false, false +"yield", true, false, false +"print", true, true, false +"debug", true, true, false +"type_of", true, true, true +"eval", true, true, false +"Fn", true, true, false +"call", true, true, true +"curry", true, true, true +"this", true, false, false +"is_def_var", true, true, false +"is_def_fn", cfg!(not(feature = "no_function")), true, false diff --git a/src/types/fn_ptr.rs b/src/types/fn_ptr.rs index b73246a7..864aea58 100644 --- a/src/types/fn_ptr.rs +++ b/src/types/fn_ptr.rs @@ -538,7 +538,7 @@ impl TryFrom for FnPtr { #[cfg(not(feature = "no_function"))] fn_def: None, }) - } else if is_reserved_keyword_or_symbol(&value) + } else if is_reserved_keyword_or_symbol(&value).0 || Token::lookup_symbol_from_syntax(&value).is_some() { Err(