diff --git a/.gitignore b/.gitignore index 10e86a86..63a3239a 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ Rhai.toml **/*.bat doc/rhai-sync.json doc/rhai.json +tools/ .idea/ .idea .idea/* diff --git a/CHANGELOG.md b/CHANGELOG.md index 812cfa50..ed9ad185 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ Enhancements * The functions `min` and `max` are added for numbers. * Range cases in `switch` statements now also match floating-point and decimal values. In order to support this, however, small numeric ranges cases are no longer unrolled. * Loading a module via `import` now gives the module access to the current scope, including variables and constants defined inside. +* Some very simple operator calls (e.g. integer add) are short-circuited to avoid the overhead of a function call, resulting in a small speed improvement. Version 1.12.0 diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 2d27a948..943d2f29 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -2,7 +2,7 @@ use crate::engine::{ Precedence, KEYWORD_DEBUG, KEYWORD_EVAL, KEYWORD_FN_PTR, KEYWORD_FN_PTR_CALL, - KEYWORD_FN_PTR_CURRY, KEYWORD_IS_DEF_VAR, KEYWORD_PRINT, KEYWORD_THIS, KEYWORD_TYPE_OF, + KEYWORD_FN_PTR_CURRY, KEYWORD_IS_DEF_VAR, KEYWORD_PRINT, KEYWORD_TYPE_OF, }; use crate::func::native::OnParseTokenCallback; use crate::{Engine, Identifier, LexError, Position, SmartString, StaticVec, INT, UNSIGNED_INT}; @@ -308,6 +308,345 @@ impl fmt::Display for Token { } } +// Table-driven keyword recognizer generated by GNU gperf. + +const MIN_KEYWORD_LEN: usize = 1; +const MAX_KEYWORD_LEN: usize = 8; +const MIN_KEYWORD_HASH_VALUE: usize = 1; +const MAX_KEYWORD_HASH_VALUE: usize = 152; + +const KEYWORD_ASSOC_VALUES: [u8; 257] = [ + 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, + 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 115, 153, 100, 153, 110, + 105, 40, 80, 2, 20, 25, 125, 95, 15, 40, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 55, + 35, 10, 5, 0, 30, 110, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, + 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 120, 105, 100, 85, 90, 153, 125, 5, + 0, 125, 35, 10, 100, 153, 20, 0, 153, 10, 0, 45, 55, 0, 153, 50, 55, 5, 0, 153, 0, 0, 35, 153, + 45, 50, 30, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, + 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, + 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, + 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, + 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, + 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, + 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, + 153, +]; +const KEYWORDS_LIST: [(&str, Token); 153] = [ + ("", Token::EOF), + (">", Token::GreaterThan), + (">=", Token::GreaterThanEqualsTo), + (")", Token::RightParen), + ("", Token::EOF), + ("const", Token::Const), + ("=", Token::Equals), + ("==", Token::EqualsTo), + ("continue", Token::Continue), + ("", Token::EOF), + ("catch", Token::Catch), + ("<", Token::LessThan), + ("<=", Token::LessThanEqualsTo), + ("for", Token::For), + ("loop", Token::Loop), + ("", Token::EOF), + (".", Token::Period), + ("<<", Token::LeftShift), + ("<<=", Token::LeftShiftAssign), + ("", Token::EOF), + ("false", Token::False), + ("*", Token::Multiply), + ("*=", Token::MultiplyAssign), + ("let", Token::Let), + ("", Token::EOF), + ("while", Token::While), + ("+", Token::Plus), + ("+=", Token::PlusAssign), + ("", Token::EOF), + ("", Token::EOF), + ("throw", Token::Throw), + ("}", Token::RightBrace), + (">>", Token::RightShift), + (">>=", Token::RightShiftAssign), + ("", Token::EOF), + ("", Token::EOF), + (";", Token::SemiColon), + ("=>", Token::DoubleArrow), + ("", Token::EOF), + ("else", Token::Else), + ("", Token::EOF), + ("/", Token::Divide), + ("/=", Token::DivideAssign), + ("", Token::EOF), + ("", Token::EOF), + ("", Token::EOF), + ("{", Token::LeftBrace), + ("**", Token::PowerOf), + ("**=", Token::PowerOfAssign), + ("", Token::EOF), + ("", Token::EOF), + ("|", Token::Pipe), + ("|=", Token::OrAssign), + ("", Token::EOF), + ("", Token::EOF), + ("", Token::EOF), + (":", Token::Colon), + ("..", Token::ExclusiveRange), + ("..=", Token::InclusiveRange), + ("", Token::EOF), + ("until", Token::Until), + ("switch", Token::Switch), + #[cfg(not(feature = "no_function"))] + ("private", Token::Private), + #[cfg(feature = "no_function")] + ("", Token::EOF), + ("try", Token::Try), + ("true", Token::True), + ("break", Token::Break), + ("return", Token::Return), + #[cfg(not(feature = "no_function"))] + ("fn", Token::Fn), + #[cfg(feature = "no_function")] + ("", Token::EOF), + ("", Token::EOF), + ("", Token::EOF), + ("", Token::EOF), + #[cfg(not(feature = "no_module"))] + ("import", Token::Import), + #[cfg(feature = "no_module")] + ("", Token::EOF), + #[cfg(not(feature = "no_object"))] + ("?.", Token::Elvis), + #[cfg(feature = "no_object")] + ("", Token::EOF), + ("", Token::EOF), + ("", Token::EOF), + ("", Token::EOF), + #[cfg(not(feature = "no_module"))] + ("export", Token::Export), + #[cfg(feature = "no_module")] + ("", Token::EOF), + ("in", Token::In), + ("", Token::EOF), + ("", Token::EOF), + ("", Token::EOF), + ("(", Token::LeftParen), + ("||", Token::Or), + ("", Token::EOF), + ("", Token::EOF), + ("", Token::EOF), + ("^", Token::XOr), + ("^=", Token::XOrAssign), + ("", Token::EOF), + ("", Token::EOF), + ("", Token::EOF), + ("_", Token::Underscore), + ("::", Token::DoubleColon), + ("", Token::EOF), + ("", Token::EOF), + ("", Token::EOF), + ("-", Token::Minus), + ("-=", Token::MinusAssign), + ("", Token::EOF), + ("", Token::EOF), + ("", Token::EOF), + ("]", Token::RightBracket), + ("()", Token::Unit), + ("", Token::EOF), + ("", Token::EOF), + ("", Token::EOF), + ("&", Token::Ampersand), + ("&=", Token::AndAssign), + ("", Token::EOF), + ("", Token::EOF), + ("", Token::EOF), + ("%", Token::Modulo), + ("%=", Token::ModuloAssign), + ("", Token::EOF), + ("", Token::EOF), + ("", Token::EOF), + ("!", Token::Bang), + ("!=", Token::NotEqualsTo), + ("!in", Token::NotIn), + ("", Token::EOF), + ("", Token::EOF), + ("[", Token::LeftBracket), + ("if", Token::If), + ("", Token::EOF), + ("", Token::EOF), + ("", Token::EOF), + (",Token::", Token::Comma), + ("do", Token::Do), + ("", Token::EOF), + ("", Token::EOF), + ("", Token::EOF), + ("", Token::EOF), + #[cfg(not(feature = "no_module"))] + ("as", Token::As), + #[cfg(feature = "no_module")] + ("", Token::EOF), + ("", Token::EOF), + ("", Token::EOF), + ("", Token::EOF), + ("", Token::EOF), + #[cfg(not(feature = "no_index"))] + ("?[", Token::QuestionBracket), + #[cfg(feature = "no_index")] + ("", Token::EOF), + ("", Token::EOF), + ("", Token::EOF), + ("", Token::EOF), + ("", Token::EOF), + ("??", Token::DoubleQuestion), + ("", Token::EOF), + ("", Token::EOF), + ("", Token::EOF), + ("", Token::EOF), + ("&&", Token::And), + ("", Token::EOF), + ("", Token::EOF), + ("", Token::EOF), + ("", Token::EOF), + ("#{", Token::MapStart), +]; + +// Table-driven reserved symbol recognizer generated by GNU gperf. + +const MIN_RESERVED_LEN: usize = 1; +const MAX_RESERVED_LEN: usize = 10; +const MIN_RESERVED_HASH_VALUE: usize = 1; +const MAX_RESERVED_HASH_VALUE: usize = 112; + +const RESERVED_ASSOC_VALUES: [u8; 256] = [ + 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, + 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 35, 113, 45, 25, 113, + 113, 113, 60, 55, 50, 50, 113, 15, 0, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, + 10, 85, 45, 5, 55, 50, 5, 113, 113, 113, 113, 113, 85, 113, 113, 113, 113, 113, 113, 113, 113, + 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 35, 113, 113, 113, 55, 113, 10, 40, + 5, 0, 5, 35, 10, 5, 0, 113, 113, 20, 25, 5, 45, 0, 113, 0, 0, 0, 15, 30, 20, 25, 20, 113, 113, + 20, 113, 0, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, + 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, + 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, + 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, + 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, + 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, + 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, +]; + +const RESERVED_LIST: [(&str, bool); 113] = [ + ("", false), + ("~", true), + ("is", true), + ("...", true), + ("", false), + ("print", true), + ("@", true), + ("private", cfg!(feature = "no_function")), + ("", false), + ("this", true), + ("", false), + ("thread", true), + ("as", cfg!(feature = "no_module")), + ("", false), + ("", false), + ("spawn", true), + ("static", true), + (":=", true), + ("===", true), + ("case", true), + ("super", true), + ("shared", true), + ("package", true), + ("use", true), + ("with", true), + ("curry", true), + ("$", true), + ("type_of", true), + ("nil", true), + ("sync", true), + ("yield", true), + ("import", cfg!(feature = "no_module")), + ("--", true), + ("new", true), + ("exit", true), + ("async", true), + ("export", cfg!(feature = "no_module")), + ("!.", true), + ("", false), + ("call", true), + ("match", true), + ("", false), + ("fn", cfg!(feature = "no_function")), + ("var", true), + ("null", true), + ("await", true), + ("#", true), + ("default", true), + ("!==", true), + ("eval", true), + ("debug", true), + ("?", true), + ("?.", cfg!(feature = "no_object")), + ("", false), + ("protected", true), + ("", false), + ("", false), + ("go", true), + ("", false), + ("goto", true), + ("", false), + ("public", true), + ("<-", true), + ("", false), + ("is_def_fn", cfg!(not(feature = "no_function"))), + ("is_def_var", true), + ("", false), + ("<|", true), + ("::<", true), + ("", false), + ("", false), + ("", false), + ("->", true), + ("", false), + ("", false), + ("", false), + ("module", true), + ("|>", true), + ("", false), + ("void", true), + ("", false), + ("", false), + ("#!", true), + ("", false), + ("", false), + ("", false), + ("", false), + ("?[", cfg!(feature = "no_index")), + ("", false), + ("", false), + ("", false), + ("", false), + ("Fn", true), + ("", false), + ("", false), + ("", false), + ("", false), + (":;", true), + ("", false), + ("", false), + ("", false), + ("", false), + ("++", true), + ("", false), + ("", false), + ("", false), + ("", false), + ("*)", true), + ("", false), + ("", false), + ("", false), + ("", false), + ("(*", true), +]; + impl Token { /// Is the token a literal symbol? #[must_use] @@ -529,101 +868,32 @@ impl Token { } /// Reverse lookup a symbol token from a piece of syntax. + #[inline] #[must_use] pub fn lookup_symbol_from_syntax(syntax: &str) -> Option { - #[allow(clippy::enum_glob_use)] - use Token::*; + let utf8 = syntax.as_bytes(); + let len = utf8.len(); + let mut hash_val = len; - Some(match syntax { - "{" => LeftBrace, - "}" => RightBrace, - "(" => LeftParen, - ")" => RightParen, - "[" => LeftBracket, - "]" => RightBracket, - "()" => Unit, - "+" => Plus, - "-" => Minus, - "*" => Multiply, - "/" => Divide, - ";" => SemiColon, - ":" => Colon, - "::" => DoubleColon, - "=>" => DoubleArrow, - "_" => Underscore, - "," => Comma, - "." => Period, - #[cfg(not(feature = "no_object"))] - "?." => Elvis, - "??" => DoubleQuestion, - #[cfg(not(feature = "no_index"))] - "?[" => QuestionBracket, - ".." => ExclusiveRange, - "..=" => InclusiveRange, - "#{" => MapStart, - "=" => Equals, - "true" => True, - "false" => False, - "let" => Let, - "const" => Const, - "if" => If, - "else" => Else, - "switch" => Switch, - "do" => Do, - "while" => While, - "until" => Until, - "loop" => Loop, - "for" => For, - "in" => In, - "!in" => NotIn, - "<" => LessThan, - ">" => GreaterThan, - "!" => Bang, - "<=" => LessThanEqualsTo, - ">=" => GreaterThanEqualsTo, - "==" => EqualsTo, - "!=" => NotEqualsTo, - "|" => Pipe, - "||" => Or, - "&" => Ampersand, - "&&" => And, - "continue" => Continue, - "break" => Break, - "return" => Return, - "throw" => Throw, - "try" => Try, - "catch" => Catch, - "+=" => PlusAssign, - "-=" => MinusAssign, - "*=" => MultiplyAssign, - "/=" => DivideAssign, - "<<=" => LeftShiftAssign, - ">>=" => RightShiftAssign, - "&=" => AndAssign, - "|=" => OrAssign, - "^=" => XOrAssign, - "<<" => LeftShift, - ">>" => RightShift, - "^" => XOr, - "%" => Modulo, - "%=" => ModuloAssign, - "**" => PowerOf, - "**=" => PowerOfAssign, + if !(MIN_KEYWORD_LEN..=MAX_KEYWORD_LEN).contains(&len) { + return None; + } - #[cfg(not(feature = "no_function"))] - "fn" => Fn, - #[cfg(not(feature = "no_function"))] - "private" => Private, + match len { + 1 => (), + _ => hash_val += KEYWORD_ASSOC_VALUES[(utf8[1] as usize) + 1] as usize, + } + hash_val += KEYWORD_ASSOC_VALUES[utf8[0] as usize] as usize; - #[cfg(not(feature = "no_module"))] - "import" => Import, - #[cfg(not(feature = "no_module"))] - "export" => Export, - #[cfg(not(feature = "no_module"))] - "as" => As, + if !(MIN_KEYWORD_HASH_VALUE..=MAX_KEYWORD_HASH_VALUE).contains(&hash_val) { + return None; + } - _ => return None, - }) + match KEYWORDS_LIST[hash_val] { + (_, Token::EOF) => None, + (s, ref t) if s == syntax => Some(t.clone()), + _ => None, + } } /// If another operator is after these, it's probably a unary operator @@ -1269,13 +1539,110 @@ fn get_next_token_inner( pos.advance(); let start_pos = *pos; + let cc = stream.peek_next().unwrap_or('\0'); - match (c, stream.peek_next().unwrap_or('\0')) { + // Identifiers and strings that can have non-ASCII characters + match (c, cc) { + // letter or underscore ... + #[cfg(not(feature = "unicode-xid-ident"))] + ('a'..='z' | '_' | 'A'..='Z', ..) => { + return Some(parse_identifier_token(stream, state, pos, start_pos, c)); + } + #[cfg(feature = "unicode-xid-ident")] + _ if unicode_xid::UnicodeXID::is_xid_start(c) || c == '_' => { + return Some(parse_identifier_token(stream, state, pos, start_pos, c)); + } + // " - string literal + ('"', ..) => { + return parse_string_literal(stream, state, pos, c, false, true, false) + .map_or_else( + |(err, err_pos)| Some((Token::LexError(err.into()), err_pos)), + |(result, ..)| Some((Token::StringConstant(result.into()), start_pos)), + ); + } + // ` - string literal + ('`', ..) => { + // Start from the next line if at the end of line + match stream.peek_next() { + // `\r - start from next line + Some('\r') => { + eat_next_and_advance(stream, pos); + // `\r\n + if stream.peek_next() == Some('\n') { + eat_next_and_advance(stream, pos); + } + pos.new_line(); + } + // `\n - start from next line + Some('\n') => { + eat_next_and_advance(stream, pos); + pos.new_line(); + } + _ => (), + } + + return parse_string_literal(stream, state, pos, c, true, false, true).map_or_else( + |(err, err_pos)| Some((Token::LexError(err.into()), err_pos)), + |(result, interpolated, ..)| { + if interpolated { + Some((Token::InterpolatedString(result.into()), start_pos)) + } else { + Some((Token::StringConstant(result.into()), start_pos)) + } + }, + ); + } + + // ' - character literal + ('\'', '\'') => { + return Some(( + Token::LexError(LERR::MalformedChar(String::new()).into()), + start_pos, + )) + } + ('\'', ..) => { + return Some( + parse_string_literal(stream, state, pos, c, false, false, false).map_or_else( + |(err, err_pos)| (Token::LexError(err.into()), err_pos), + |(result, ..)| { + let mut chars = result.chars(); + let first = chars.next().unwrap(); + + if chars.next().is_some() { + ( + Token::LexError(LERR::MalformedChar(result.to_string()).into()), + start_pos, + ) + } else { + (Token::CharConstant(first), start_pos) + } + }, + ), + ) + } + + _ => (), + } + + // Non-ASCII inputs are not valid here + if !c.is_ascii() || !cc.is_ascii() { + return Some(( + Token::LexError(LERR::UnexpectedInput(c.to_string()).into()), + start_pos, + )); + } + + // Match ASCII byte values (faster?) + let mut buf = [0_u8; 2]; + c.encode_utf8(&mut buf[0..1]); + cc.encode_utf8(&mut buf[1..]); + + match (buf[0], buf[1]) { // \n - ('\n', ..) => pos.new_line(), + (b'\n', ..) => pos.new_line(), // digit ... - ('0'..='9', ..) => { + (b'0'..=b'9', ..) => { let mut result = SmartString::new_const(); let mut radix_base: Option = None; let mut valid: fn(char) -> bool = is_numeric_digit; @@ -1434,117 +1801,38 @@ fn get_next_token_inner( return Some((token, num_pos)); } - // letter or underscore ... - #[cfg(not(feature = "unicode-xid-ident"))] - ('a'..='z' | '_' | 'A'..='Z', ..) => { - return Some(parse_identifier_token(stream, state, pos, start_pos, c)); - } - #[cfg(feature = "unicode-xid-ident")] - (ch, ..) if unicode_xid::UnicodeXID::is_xid_start(ch) || ch == '_' => { - return Some(parse_identifier_token(stream, state, pos, start_pos, c)); - } - - // " - string literal - ('"', ..) => { - return parse_string_literal(stream, state, pos, c, false, true, false) - .map_or_else( - |(err, err_pos)| Some((Token::LexError(err.into()), err_pos)), - |(result, ..)| Some((Token::StringConstant(result.into()), start_pos)), - ); - } - // ` - string literal - ('`', ..) => { - // Start from the next line if at the end of line - match stream.peek_next() { - // `\r - start from next line - Some('\r') => { - eat_next_and_advance(stream, pos); - // `\r\n - if stream.peek_next() == Some('\n') { - eat_next_and_advance(stream, pos); - } - pos.new_line(); - } - // `\n - start from next line - Some('\n') => { - eat_next_and_advance(stream, pos); - pos.new_line(); - } - _ => (), - } - - return parse_string_literal(stream, state, pos, c, true, false, true).map_or_else( - |(err, err_pos)| Some((Token::LexError(err.into()), err_pos)), - |(result, interpolated, ..)| { - if interpolated { - Some((Token::InterpolatedString(result.into()), start_pos)) - } else { - Some((Token::StringConstant(result.into()), start_pos)) - } - }, - ); - } - - // ' - character literal - ('\'', '\'') => { - return Some(( - Token::LexError(LERR::MalformedChar(String::new()).into()), - start_pos, - )) - } - ('\'', ..) => { - return Some( - parse_string_literal(stream, state, pos, c, false, false, false).map_or_else( - |(err, err_pos)| (Token::LexError(err.into()), err_pos), - |(result, ..)| { - let mut chars = result.chars(); - let first = chars.next().unwrap(); - - if chars.next().is_some() { - ( - Token::LexError(LERR::MalformedChar(result.to_string()).into()), - start_pos, - ) - } else { - (Token::CharConstant(first), start_pos) - } - }, - ), - ) - } - // Braces - ('{', ..) => return Some((Token::LeftBrace, start_pos)), - ('}', ..) => return Some((Token::RightBrace, start_pos)), + (b'{', ..) => return Some((Token::LeftBrace, start_pos)), + (b'}', ..) => return Some((Token::RightBrace, start_pos)), // Unit - ('(', ')') => { + (b'(', b')') => { eat_next_and_advance(stream, pos); return Some((Token::Unit, start_pos)); } // Parentheses - ('(', '*') => { + (b'(', b'*') => { eat_next_and_advance(stream, pos); return Some((Token::Reserved(Box::new("(*".into())), start_pos)); } - ('(', ..) => return Some((Token::LeftParen, start_pos)), - (')', ..) => return Some((Token::RightParen, start_pos)), + (b'(', ..) => return Some((Token::LeftParen, start_pos)), + (b')', ..) => return Some((Token::RightParen, start_pos)), // Indexing - ('[', ..) => return Some((Token::LeftBracket, start_pos)), - (']', ..) => return Some((Token::RightBracket, start_pos)), + (b'[', ..) => return Some((Token::LeftBracket, start_pos)), + (b']', ..) => return Some((Token::RightBracket, start_pos)), // Map literal #[cfg(not(feature = "no_object"))] - ('#', '{') => { + (b'#', b'{') => { eat_next_and_advance(stream, pos); return Some((Token::MapStart, start_pos)); } // Shebang - ('#', '!') => return Some((Token::Reserved(Box::new("#!".into())), start_pos)), + (b'#', b'!') => return Some((Token::Reserved(Box::new("#!".into())), start_pos)), - ('#', ' ') => { + (b'#', b' ') => { eat_next_and_advance(stream, pos); let token = if stream.peek_next() == Some('{') { eat_next_and_advance(stream, pos); @@ -1555,50 +1843,50 @@ fn get_next_token_inner( return Some((Token::Reserved(Box::new(token.into())), start_pos)); } - ('#', ..) => return Some((Token::Reserved(Box::new("#".into())), start_pos)), + (b'#', ..) => return Some((Token::Reserved(Box::new("#".into())), start_pos)), // Operators - ('+', '=') => { + (b'+', b'=') => { eat_next_and_advance(stream, pos); return Some((Token::PlusAssign, start_pos)); } - ('+', '+') => { + (b'+', b'+') => { eat_next_and_advance(stream, pos); return Some((Token::Reserved(Box::new("++".into())), start_pos)); } - ('+', ..) if !state.next_token_cannot_be_unary => { + (b'+', ..) if !state.next_token_cannot_be_unary => { return Some((Token::UnaryPlus, start_pos)) } - ('+', ..) => return Some((Token::Plus, start_pos)), + (b'+', ..) => return Some((Token::Plus, start_pos)), - ('-', '0'..='9') if !state.next_token_cannot_be_unary => negated = Some(start_pos), - ('-', '0'..='9') => return Some((Token::Minus, start_pos)), - ('-', '=') => { + (b'-', b'0'..=b'9') if !state.next_token_cannot_be_unary => negated = Some(start_pos), + (b'-', b'0'..=b'9') => return Some((Token::Minus, start_pos)), + (b'-', b'=') => { eat_next_and_advance(stream, pos); return Some((Token::MinusAssign, start_pos)); } - ('-', '>') => { + (b'-', b'>') => { eat_next_and_advance(stream, pos); return Some((Token::Reserved(Box::new("->".into())), start_pos)); } - ('-', '-') => { + (b'-', b'-') => { eat_next_and_advance(stream, pos); return Some((Token::Reserved(Box::new("--".into())), start_pos)); } - ('-', ..) if !state.next_token_cannot_be_unary => { + (b'-', ..) if !state.next_token_cannot_be_unary => { return Some((Token::UnaryMinus, start_pos)) } - ('-', ..) => return Some((Token::Minus, start_pos)), + (b'-', ..) => return Some((Token::Minus, start_pos)), - ('*', ')') => { + (b'*', b')') => { eat_next_and_advance(stream, pos); return Some((Token::Reserved(Box::new("*)".into())), start_pos)); } - ('*', '=') => { + (b'*', b'=') => { eat_next_and_advance(stream, pos); return Some((Token::MultiplyAssign, start_pos)); } - ('*', '*') => { + (b'*', b'*') => { eat_next_and_advance(stream, pos); return Some(( @@ -1611,10 +1899,10 @@ fn get_next_token_inner( start_pos, )); } - ('*', ..) => return Some((Token::Multiply, start_pos)), + (b'*', ..) => return Some((Token::Multiply, start_pos)), // Comments - ('/', '/') => { + (b'/', b'/') => { eat_next_and_advance(stream, pos); let mut comment: Option = match stream.peek_next() { @@ -1671,7 +1959,7 @@ fn get_next_token_inner( } } } - ('/', '*') => { + (b'/', b'*') => { state.comment_level = 1; eat_next_and_advance(stream, pos); @@ -1699,16 +1987,16 @@ fn get_next_token_inner( } } - ('/', '=') => { + (b'/', b'=') => { eat_next_and_advance(stream, pos); return Some((Token::DivideAssign, start_pos)); } - ('/', ..) => return Some((Token::Divide, start_pos)), + (b'/', ..) => return Some((Token::Divide, start_pos)), - (';', ..) => return Some((Token::SemiColon, start_pos)), - (',', ..) => return Some((Token::Comma, start_pos)), + (b';', ..) => return Some((Token::SemiColon, start_pos)), + (b',', ..) => return Some((Token::Comma, start_pos)), - ('.', '.') => { + (b'.', b'.') => { eat_next_and_advance(stream, pos); return Some(( match stream.peek_next() { @@ -1725,9 +2013,9 @@ fn get_next_token_inner( start_pos, )); } - ('.', ..) => return Some((Token::Period, start_pos)), + (b'.', ..) => return Some((Token::Period, start_pos)), - ('=', '=') => { + (b'=', b'=') => { eat_next_and_advance(stream, pos); if stream.peek_next() == Some('=') { @@ -1737,14 +2025,14 @@ fn get_next_token_inner( return Some((Token::EqualsTo, start_pos)); } - ('=', '>') => { + (b'=', b'>') => { eat_next_and_advance(stream, pos); return Some((Token::DoubleArrow, start_pos)); } - ('=', ..) => return Some((Token::Equals, start_pos)), + (b'=', ..) => return Some((Token::Equals, start_pos)), #[cfg(not(feature = "no_module"))] - (':', ':') => { + (b':', b':') => { eat_next_and_advance(stream, pos); if stream.peek_next() == Some('<') { @@ -1754,25 +2042,25 @@ fn get_next_token_inner( return Some((Token::DoubleColon, start_pos)); } - (':', '=') => { + (b':', b'=') => { eat_next_and_advance(stream, pos); return Some((Token::Reserved(Box::new(":=".into())), start_pos)); } - (':', ';') => { + (b':', b';') => { eat_next_and_advance(stream, pos); return Some((Token::Reserved(Box::new(":;".into())), start_pos)); } - (':', ..) => return Some((Token::Colon, start_pos)), + (b':', ..) => return Some((Token::Colon, start_pos)), - ('<', '=') => { + (b'<', b'=') => { eat_next_and_advance(stream, pos); return Some((Token::LessThanEqualsTo, start_pos)); } - ('<', '-') => { + (b'<', b'-') => { eat_next_and_advance(stream, pos); return Some((Token::Reserved(Box::new("<-".into())), start_pos)); } - ('<', '<') => { + (b'<', b'<') => { eat_next_and_advance(stream, pos); return Some(( @@ -1785,17 +2073,17 @@ fn get_next_token_inner( start_pos, )); } - ('<', '|') => { + (b'<', b'|') => { eat_next_and_advance(stream, pos); return Some((Token::Reserved(Box::new("<|".into())), start_pos)); } - ('<', ..) => return Some((Token::LessThan, start_pos)), + (b'<', ..) => return Some((Token::LessThan, start_pos)), - ('>', '=') => { + (b'>', b'=') => { eat_next_and_advance(stream, pos); return Some((Token::GreaterThanEqualsTo, start_pos)); } - ('>', '>') => { + (b'>', b'>') => { eat_next_and_advance(stream, pos); return Some(( @@ -1808,9 +2096,9 @@ fn get_next_token_inner( start_pos, )); } - ('>', ..) => return Some((Token::GreaterThan, start_pos)), + (b'>', ..) => return Some((Token::GreaterThan, start_pos)), - ('!', 'i') => { + (b'!', b'i') => { stream.get_next().unwrap(); if stream.peek_next() == Some('n') { stream.get_next().unwrap(); @@ -1831,7 +2119,7 @@ fn get_next_token_inner( stream.unget('i'); return Some((Token::Bang, start_pos)); } - ('!', '=') => { + (b'!', b'=') => { eat_next_and_advance(stream, pos); if stream.peek_next() == Some('=') { @@ -1841,55 +2129,55 @@ fn get_next_token_inner( return Some((Token::NotEqualsTo, start_pos)); } - ('!', '.') => { + (b'!', b'.') => { eat_next_and_advance(stream, pos); return Some((Token::Reserved(Box::new("!.".into())), start_pos)); } - ('!', ..) => return Some((Token::Bang, start_pos)), + (b'!', ..) => return Some((Token::Bang, start_pos)), - ('|', '|') => { + (b'|', b'|') => { eat_next_and_advance(stream, pos); return Some((Token::Or, start_pos)); } - ('|', '=') => { + (b'|', b'=') => { eat_next_and_advance(stream, pos); return Some((Token::OrAssign, start_pos)); } - ('|', '>') => { + (b'|', b'>') => { eat_next_and_advance(stream, pos); return Some((Token::Reserved(Box::new("|>".into())), start_pos)); } - ('|', ..) => return Some((Token::Pipe, start_pos)), + (b'|', ..) => return Some((Token::Pipe, start_pos)), - ('&', '&') => { + (b'&', b'&') => { eat_next_and_advance(stream, pos); return Some((Token::And, start_pos)); } - ('&', '=') => { + (b'&', b'=') => { eat_next_and_advance(stream, pos); return Some((Token::AndAssign, start_pos)); } - ('&', ..) => return Some((Token::Ampersand, start_pos)), + (b'&', ..) => return Some((Token::Ampersand, start_pos)), - ('^', '=') => { + (b'^', b'=') => { eat_next_and_advance(stream, pos); return Some((Token::XOrAssign, start_pos)); } - ('^', ..) => return Some((Token::XOr, start_pos)), + (b'^', ..) => return Some((Token::XOr, start_pos)), - ('~', ..) => return Some((Token::Reserved(Box::new("~".into())), start_pos)), + (b'~', ..) => return Some((Token::Reserved(Box::new("~".into())), start_pos)), - ('%', '=') => { + (b'%', b'=') => { eat_next_and_advance(stream, pos); return Some((Token::ModuloAssign, start_pos)); } - ('%', ..) => return Some((Token::Modulo, start_pos)), + (b'%', ..) => return Some((Token::Modulo, start_pos)), - ('@', ..) => return Some((Token::Reserved(Box::new("@".into())), start_pos)), + (b'@', ..) => return Some((Token::Reserved(Box::new("@".into())), start_pos)), - ('$', ..) => return Some((Token::Reserved(Box::new("$".into())), start_pos)), + (b'$', ..) => return Some((Token::Reserved(Box::new("$".into())), start_pos)), - ('?', '.') => { + (b'?', b'.') => { eat_next_and_advance(stream, pos); return Some(( #[cfg(not(feature = "no_object"))] @@ -1899,11 +2187,11 @@ fn get_next_token_inner( start_pos, )); } - ('?', '?') => { + (b'?', b'?') => { eat_next_and_advance(stream, pos); return Some((Token::DoubleQuestion, start_pos)); } - ('?', '[') => { + (b'?', b'[') => { eat_next_and_advance(stream, pos); return Some(( #[cfg(not(feature = "no_index"))] @@ -1913,13 +2201,13 @@ fn get_next_token_inner( start_pos, )); } - ('?', ..) => return Some((Token::Reserved(Box::new("?".into())), start_pos)), + (b'?', ..) => return Some((Token::Reserved(Box::new("?".into())), start_pos)), - (ch, ..) if ch.is_whitespace() => (), + _ if c.is_whitespace() => (), - (ch, ..) => { + _ => { return Some(( - Token::LexError(LERR::UnexpectedInput(ch.to_string()).into()), + Token::LexError(LERR::UnexpectedInput(c.to_string()).into()), start_pos, )) } @@ -2065,32 +2353,33 @@ pub const fn is_id_continue(x: char) -> bool { /// Is a piece of syntax a reserved keyword or reserved symbol? #[must_use] pub fn is_reserved_keyword_or_symbol(syntax: &str) -> bool { - match syntax { - #[cfg(feature = "no_object")] - "?." => true, - #[cfg(feature = "no_index")] - "?[" => true, - #[cfg(feature = "no_function")] - "fn" | "private" => true, - #[cfg(feature = "no_module")] - "import" | "export" | "as" => true, + let utf8 = syntax.as_bytes(); + let len = utf8.len(); + let mut hash_val = len; - // List of reserved operators - "===" | "!==" | "->" | "<-" | "?" | ":=" | ":;" | "~" | "!." | "::<" | "(*" | "*)" - | "#" | "#!" | "@" | "$" | "++" | "--" | "..." | "<|" | "|>" => true, + if !(MIN_RESERVED_LEN..=MAX_RESERVED_LEN).contains(&len) { + return false; + } - // List of reserved keywords - "public" | "protected" | "super" | "new" | "use" | "module" | "package" | "var" - | "static" | "shared" | "with" | "is" | "goto" | "exit" | "match" | "case" | "default" - | "void" | "null" | "nil" | "spawn" | "thread" | "go" | "sync" | "async" | "await" - | "yield" => true, + match len { + 1 => hash_val += RESERVED_ASSOC_VALUES[utf8[0] as usize] as usize, + 2 => { + hash_val += RESERVED_ASSOC_VALUES[utf8[1] as usize] as usize; + hash_val += RESERVED_ASSOC_VALUES[utf8[0] as usize] as usize; + } + _ => { + hash_val += RESERVED_ASSOC_VALUES[utf8[2] as usize] as usize; + hash_val += RESERVED_ASSOC_VALUES[utf8[1] as usize] as usize; + hash_val += RESERVED_ASSOC_VALUES[utf8[0] as usize] as usize; + } + } - KEYWORD_PRINT | KEYWORD_DEBUG | KEYWORD_TYPE_OF | KEYWORD_EVAL | KEYWORD_FN_PTR - | KEYWORD_FN_PTR_CALL | KEYWORD_FN_PTR_CURRY | KEYWORD_THIS | KEYWORD_IS_DEF_VAR => true, - - #[cfg(not(feature = "no_function"))] - crate::engine::KEYWORD_IS_DEF_FN => true, + if !(MIN_RESERVED_HASH_VALUE..=MAX_RESERVED_HASH_VALUE).contains(&hash_val) { + return false; + } + match RESERVED_LIST[hash_val] { + (s, t) if s == syntax => t, _ => false, } }