From a35518fe498981c8bcd3462f3cb4034610e7b713 Mon Sep 17 00:00:00 2001 From: Stephen Chung Date: Wed, 15 Apr 2020 22:21:23 +0800 Subject: [PATCH] Split tokenizer into separate file, plus fix no_std feature. --- src/any.rs | 6 +- src/api.rs | 3 +- src/builtin.rs | 118 +++--- src/engine.rs | 3 +- src/error.rs | 2 +- src/fn_call.rs | 2 +- src/fn_register.rs | 2 +- src/lib.rs | 4 +- src/optimize.rs | 3 +- src/parser.rs | 971 +------------------------------------------- src/result.rs | 3 +- src/scope.rs | 10 +- src/stdlib.rs | 2 +- src/token.rs | 982 +++++++++++++++++++++++++++++++++++++++++++++ tests/time.rs | 1 + 15 files changed, 1071 insertions(+), 1041 deletions(-) create mode 100644 src/token.rs diff --git a/src/any.rs b/src/any.rs index a056179b..242969d7 100644 --- a/src/any.rs +++ b/src/any.rs @@ -10,9 +10,12 @@ use crate::stdlib::{ any::{type_name, Any, TypeId}, boxed::Box, fmt, - time::Instant, + string::String, }; +#[cfg(not(feature = "no_std"))] +use crate::stdlib::time::Instant; + /// A trait to represent any type. /// /// Currently, `Variant` is not `Send` nor `Sync`, so it can practically be any type. @@ -186,6 +189,7 @@ impl Dynamic { Union::Array(_) => "array", Union::Map(_) => "map", + #[cfg(not(feature = "no_std"))] Union::Variant(value) if value.is::() => "timestamp", Union::Variant(value) => (**value).type_name(), } diff --git a/src/api.rs b/src/api.rs index 837c0bc0..01d0a76f 100644 --- a/src/api.rs +++ b/src/api.rs @@ -6,9 +6,10 @@ use crate::error::ParseError; use crate::fn_call::FuncArgs; use crate::fn_register::RegisterFn; use crate::optimize::{optimize_into_ast, OptimizationLevel}; -use crate::parser::{lex, parse, parse_global_expr, Position, AST}; +use crate::parser::{parse, parse_global_expr, AST}; use crate::result::EvalAltResult; use crate::scope::Scope; +use crate::token::{lex, Position}; use crate::stdlib::{ any::{type_name, TypeId}, diff --git a/src/builtin.rs b/src/builtin.rs index 77abf74c..0fac046f 100644 --- a/src/builtin.rs +++ b/src/builtin.rs @@ -4,8 +4,9 @@ use crate::any::{Dynamic, Variant}; use crate::engine::{Engine, FUNC_TO_STRING, KEYWORD_DEBUG, KEYWORD_PRINT}; use crate::fn_register::{RegisterDynamicFn, RegisterFn, RegisterResultFn}; -use crate::parser::{Position, INT}; +use crate::parser::INT; use crate::result::EvalAltResult; +use crate::token::Position; #[cfg(not(feature = "no_index"))] use crate::engine::Array; @@ -27,11 +28,13 @@ use crate::stdlib::{ format, ops::{Add, BitAnd, BitOr, BitXor, Div, Mul, Neg, Range, Rem, Shl, Shr, Sub}, string::{String, ToString}, - time::Instant, vec::Vec, {i32, i64, u32}, }; +#[cfg(not(feature = "no_std"))] +use crate::stdlib::time::Instant; + #[cfg(feature = "only_i32")] const MAX_INT: INT = i32::MAX; #[cfg(not(feature = "only_i32"))] @@ -1153,80 +1156,83 @@ impl Engine<'_> { } }); - // Register date/time functions - self.register_fn("timestamp", || Instant::now()); + #[cfg(not(feature = "no_std"))] + { + // Register date/time functions + self.register_fn("timestamp", || Instant::now()); - self.register_result_fn("-", |ts1: Instant, ts2: Instant| { - if ts2 > ts1 { - #[cfg(not(feature = "no_float"))] - return Ok(-(ts2 - ts1).as_secs_f64()); + self.register_result_fn("-", |ts1: Instant, ts2: Instant| { + if ts2 > ts1 { + #[cfg(not(feature = "no_float"))] + return Ok(-(ts2 - ts1).as_secs_f64()); - #[cfg(feature = "no_float")] - { - let seconds = (ts2 - ts1).as_secs(); - - #[cfg(not(feature = "unchecked"))] + #[cfg(feature = "no_float")] { - if seconds > (MAX_INT as u64) { - return Err(EvalAltResult::ErrorArithmetic( - format!( - "Integer overflow for timestamp duration: {}", - -(seconds as i64) - ), - Position::none(), - )); + let seconds = (ts2 - ts1).as_secs(); + + #[cfg(not(feature = "unchecked"))] + { + if seconds > (MAX_INT as u64) { + return Err(EvalAltResult::ErrorArithmetic( + format!( + "Integer overflow for timestamp duration: {}", + -(seconds as i64) + ), + Position::none(), + )); + } } + return Ok(-(seconds as INT)); + } + } else { + #[cfg(not(feature = "no_float"))] + return Ok((ts1 - ts2).as_secs_f64()); + + #[cfg(feature = "no_float")] + { + let seconds = (ts1 - ts2).as_secs(); + + #[cfg(not(feature = "unchecked"))] + { + if seconds > (MAX_INT as u64) { + return Err(EvalAltResult::ErrorArithmetic( + format!("Integer overflow for timestamp duration: {}", seconds), + Position::none(), + )); + } + } + return Ok(seconds as INT); } - return Ok(-(seconds as INT)); } - } else { + }); + + reg_cmp!(self, "<", lt, Instant); + reg_cmp!(self, "<=", lte, Instant); + reg_cmp!(self, ">", gt, Instant); + reg_cmp!(self, ">=", gte, Instant); + reg_cmp!(self, "==", eq, Instant); + reg_cmp!(self, "!=", ne, Instant); + + self.register_result_fn("elapsed", |timestamp: Instant| { #[cfg(not(feature = "no_float"))] - return Ok((ts1 - ts2).as_secs_f64()); + return Ok(timestamp.elapsed().as_secs_f64()); #[cfg(feature = "no_float")] { - let seconds = (ts1 - ts2).as_secs(); + let seconds = timestamp.elapsed().as_secs(); #[cfg(not(feature = "unchecked"))] { if seconds > (MAX_INT as u64) { return Err(EvalAltResult::ErrorArithmetic( - format!("Integer overflow for timestamp duration: {}", seconds), + format!("Integer overflow for timestamp.elapsed(): {}", seconds), Position::none(), )); } } return Ok(seconds as INT); } - } - }); - - reg_cmp!(self, "<", lt, Instant); - reg_cmp!(self, "<=", lte, Instant); - reg_cmp!(self, ">", gt, Instant); - reg_cmp!(self, ">=", gte, Instant); - reg_cmp!(self, "==", eq, Instant); - reg_cmp!(self, "!=", ne, Instant); - - self.register_result_fn("elapsed", |timestamp: Instant| { - #[cfg(not(feature = "no_float"))] - return Ok(timestamp.elapsed().as_secs_f64()); - - #[cfg(feature = "no_float")] - { - let seconds = timestamp.elapsed().as_secs(); - - #[cfg(not(feature = "unchecked"))] - { - if seconds > (MAX_INT as u64) { - return Err(EvalAltResult::ErrorArithmetic( - format!("Integer overflow for timestamp.elapsed(): {}", seconds), - Position::none(), - )); - } - } - return Ok(seconds as INT); - } - }); + }); + } } } diff --git a/src/engine.rs b/src/engine.rs index ea223926..2dec3e5e 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -3,9 +3,10 @@ use crate::any::{Dynamic, Union}; use crate::error::ParseErrorType; use crate::optimize::OptimizationLevel; -use crate::parser::{Expr, FnDef, Position, ReturnType, Stmt, INT}; +use crate::parser::{Expr, FnDef, ReturnType, Stmt, INT}; use crate::result::EvalAltResult; use crate::scope::{EntryRef as ScopeSource, EntryType as ScopeEntryType, Scope}; +use crate::token::Position; use crate::stdlib::{ any::TypeId, diff --git a/src/error.rs b/src/error.rs index 6ad21bbd..077ebd22 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,6 +1,6 @@ //! Module containing error definitions for the parsing process. -use crate::parser::Position; +use crate::token::Position; use crate::stdlib::{char, error::Error, fmt, string::String}; diff --git a/src/fn_call.rs b/src/fn_call.rs index bdb2ec4b..29a9c056 100644 --- a/src/fn_call.rs +++ b/src/fn_call.rs @@ -3,7 +3,7 @@ #![allow(non_snake_case)] use crate::any::{Dynamic, Variant}; -use crate::stdlib::{string::String, vec, vec::Vec}; +use crate::stdlib::vec::Vec; /// Trait that represent arguments to a function call. /// Any data type that can be converted into a `Vec` of `Dynamic` values can be used diff --git a/src/fn_register.rs b/src/fn_register.rs index a85dc094..711c9e4b 100644 --- a/src/fn_register.rs +++ b/src/fn_register.rs @@ -4,8 +4,8 @@ use crate::any::{Dynamic, Variant}; use crate::engine::{Engine, FnCallArgs}; -use crate::parser::Position; use crate::result::EvalAltResult; +use crate::token::Position; use crate::stdlib::{any::TypeId, boxed::Box, string::ToString, vec}; diff --git a/src/lib.rs b/src/lib.rs index 66dc90af..de8e80db 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -82,15 +82,17 @@ mod parser; mod result; mod scope; mod stdlib; +mod token; pub use any::Dynamic; pub use engine::Engine; pub use error::{ParseError, ParseErrorType}; pub use fn_call::FuncArgs; pub use fn_register::{RegisterDynamicFn, RegisterFn, RegisterResultFn}; -pub use parser::{Position, AST, INT}; +pub use parser::{AST, INT}; pub use result::EvalAltResult; pub use scope::Scope; +pub use token::Position; #[cfg(not(feature = "no_function"))] pub use fn_func::Func; diff --git a/src/optimize.rs b/src/optimize.rs index 9852b649..cd4c30b5 100644 --- a/src/optimize.rs +++ b/src/optimize.rs @@ -3,9 +3,10 @@ use crate::engine::{ Engine, FnAny, FnCallArgs, FnSpec, FunctionsLib, KEYWORD_DEBUG, KEYWORD_EVAL, KEYWORD_PRINT, KEYWORD_TYPE_OF, }; -use crate::parser::{map_dynamic_to_expr, Expr, FnDef, Position, ReturnType, Stmt, AST}; +use crate::parser::{map_dynamic_to_expr, Expr, FnDef, ReturnType, Stmt, AST}; use crate::result::EvalAltResult; use crate::scope::{Entry as ScopeEntry, EntryType as ScopeEntryType, Scope}; +use crate::token::Position; use crate::stdlib::{ boxed::Box, diff --git a/src/parser.rs b/src/parser.rs index d57574b4..755609af 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -5,23 +5,21 @@ use crate::engine::{Engine, FunctionsLib}; use crate::error::{LexError, ParseError, ParseErrorType}; use crate::optimize::{optimize_into_ast, OptimizationLevel}; use crate::scope::{EntryType as ScopeEntryType, Scope}; +use crate::token::{Position, Token, TokenIterator}; use crate::stdlib::{ borrow::Cow, boxed::Box, char, collections::HashMap, - fmt, fmt::Display, format, iter::Peekable, ops::Add, rc::Rc, - str::Chars, - str::FromStr, string::{String, ToString}, sync::Arc, - usize, vec, + vec, vec::Vec, }; @@ -42,125 +40,8 @@ pub type INT = i32; /// Not available under the `no_float` feature. pub type FLOAT = f64; -type LERR = LexError; type PERR = ParseErrorType; -/// A location (line number + character position) in the input script. -#[derive(Eq, PartialEq, Ord, PartialOrd, Hash, Clone, Copy)] -pub struct Position { - /// Line number - 0 = none, MAX = EOF - line: usize, - /// Character position - 0 = BOL, MAX = EOF - pos: usize, -} - -impl Position { - /// Create a new `Position`. - pub fn new(line: usize, position: usize) -> Self { - assert!(line != 0, "line cannot be zero"); - assert!( - line != usize::MAX || position != usize::MAX, - "invalid position" - ); - - Self { - line, - pos: position, - } - } - - /// Get the line number (1-based), or `None` if no position or EOF. - pub fn line(&self) -> Option { - if self.is_none() || self.is_eof() { - None - } else { - Some(self.line) - } - } - - /// Get the character position (1-based), or `None` if at beginning of a line. - pub fn position(&self) -> Option { - if self.is_none() || self.is_eof() || self.pos == 0 { - None - } else { - Some(self.pos) - } - } - - /// Advance by one character position. - pub(crate) fn advance(&mut self) { - self.pos += 1; - } - - /// Go backwards by one character position. - /// - /// # Panics - /// - /// Panics if already at beginning of a line - cannot rewind to a previous line. - /// - pub(crate) fn rewind(&mut self) { - assert!(self.pos > 0, "cannot rewind at position 0"); - self.pos -= 1; - } - - /// Advance to the next line. - pub(crate) fn new_line(&mut self) { - self.line += 1; - self.pos = 0; - } - - /// Create a `Position` representing no position. - pub(crate) fn none() -> Self { - Self { line: 0, pos: 0 } - } - - /// Create a `Position` at EOF. - pub(crate) fn eof() -> Self { - Self { - line: usize::MAX, - pos: usize::MAX, - } - } - - /// Is there no `Position`? - pub fn is_none(&self) -> bool { - self.line == 0 && self.pos == 0 - } - - /// Is the `Position` at EOF? - pub fn is_eof(&self) -> bool { - self.line == usize::MAX && self.pos == usize::MAX - } -} - -impl Default for Position { - fn default() -> Self { - Self::new(1, 0) - } -} - -impl fmt::Display for Position { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if self.is_eof() { - write!(f, "EOF") - } else if self.is_none() { - write!(f, "none") - } else { - write!(f, "line {}, position {}", self.line, self.pos) - } - } -} - -impl fmt::Debug for Position { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if self.is_eof() { - write!(f, "(EOF)") - } else { - write!(f, "({}:{})", self.line, self.pos) - } - } -} - /// Compiled AST (abstract syntax tree) of a Rhai script. /// /// Currently, `AST` is neither `Send` nor `Sync`. Turn on the `sync` feature to make it `Send + Sync`. @@ -591,852 +472,6 @@ impl Expr { } } -/// Tokens. -#[derive(Debug, PartialEq, Clone)] -pub enum Token { - IntegerConstant(INT), - FloatConstant(FLOAT), - Identifier(String), - CharConstant(char), - StringConst(String), - LeftBrace, - RightBrace, - LeftParen, - RightParen, - LeftBracket, - RightBracket, - Plus, - UnaryPlus, - Minus, - UnaryMinus, - Multiply, - Divide, - Modulo, - PowerOf, - LeftShift, - RightShift, - SemiColon, - Colon, - Comma, - Period, - #[cfg(not(feature = "no_object"))] - MapStart, - Equals, - True, - False, - Let, - Const, - If, - Else, - While, - Loop, - For, - In, - LessThan, - GreaterThan, - LessThanEqualsTo, - GreaterThanEqualsTo, - EqualsTo, - NotEqualsTo, - Bang, - Pipe, - Or, - XOr, - Ampersand, - And, - #[cfg(not(feature = "no_function"))] - Fn, - Continue, - Break, - Return, - Throw, - PlusAssign, - MinusAssign, - MultiplyAssign, - DivideAssign, - LeftShiftAssign, - RightShiftAssign, - AndAssign, - OrAssign, - XOrAssign, - ModuloAssign, - PowerOfAssign, - LexError(Box), -} - -impl Token { - /// Get the syntax of the token. - pub fn syntax(&self) -> Cow { - use Token::*; - - match self { - IntegerConstant(i) => i.to_string().into(), - FloatConstant(f) => f.to_string().into(), - Identifier(s) => s.into(), - CharConstant(c) => c.to_string().into(), - LexError(err) => err.to_string().into(), - - token => (match token { - StringConst(_) => "string", - LeftBrace => "{", - RightBrace => "}", - LeftParen => "(", - RightParen => ")", - LeftBracket => "[", - RightBracket => "]", - Plus => "+", - UnaryPlus => "+", - Minus => "-", - UnaryMinus => "-", - Multiply => "*", - Divide => "/", - SemiColon => ";", - Colon => ":", - Comma => ",", - Period => ".", - #[cfg(not(feature = "no_object"))] - MapStart => "#{", - Equals => "=", - True => "true", - False => "false", - Let => "let", - Const => "const", - If => "if", - Else => "else", - While => "while", - Loop => "loop", - LessThan => "<", - GreaterThan => ">", - Bang => "!", - LessThanEqualsTo => "<=", - GreaterThanEqualsTo => ">=", - EqualsTo => "==", - NotEqualsTo => "!=", - Pipe => "|", - Or => "||", - Ampersand => "&", - And => "&&", - #[cfg(not(feature = "no_function"))] - Fn => "fn", - Continue => "continue", - Break => "break", - Return => "return", - Throw => "throw", - PlusAssign => "+=", - MinusAssign => "-=", - MultiplyAssign => "*=", - DivideAssign => "/=", - LeftShiftAssign => "<<=", - RightShiftAssign => ">>=", - AndAssign => "&=", - OrAssign => "|=", - XOrAssign => "^=", - LeftShift => "<<", - RightShift => ">>", - XOr => "^", - Modulo => "%", - ModuloAssign => "%=", - PowerOf => "~", - PowerOfAssign => "~=", - For => "for", - In => "in", - _ => panic!("operator should be match in outer scope"), - }) - .into(), - } - } - - // If another operator is after these, it's probably an unary operator - // (not sure about fn name). - pub fn is_next_unary(&self) -> bool { - use Token::*; - - match self { - LexError(_) | - LeftBrace | // (+expr) - is unary - // RightBrace | {expr} - expr not unary & is closing - LeftParen | // {-expr} - is unary - // RightParen | (expr) - expr not unary & is closing - LeftBracket | // [-expr] - is unary - // RightBracket | [expr] - expr not unary & is closing - Plus | - UnaryPlus | - Minus | - UnaryMinus | - Multiply | - Divide | - Colon | - Comma | - Period | - Equals | - LessThan | - GreaterThan | - Bang | - LessThanEqualsTo | - GreaterThanEqualsTo | - EqualsTo | - NotEqualsTo | - Pipe | - Or | - Ampersand | - And | - If | - While | - PlusAssign | - MinusAssign | - MultiplyAssign | - DivideAssign | - LeftShiftAssign | - RightShiftAssign | - AndAssign | - OrAssign | - XOrAssign | - LeftShift | - RightShift | - XOr | - Modulo | - ModuloAssign | - Return | - Throw | - PowerOf | - In | - PowerOfAssign => true, - - _ => false, - } - } - - /// Get the precedence number of the token. - pub fn precedence(&self) -> u8 { - use Token::*; - - match self { - Equals | PlusAssign | MinusAssign | MultiplyAssign | DivideAssign | LeftShiftAssign - | RightShiftAssign | AndAssign | OrAssign | XOrAssign | ModuloAssign - | PowerOfAssign => 10, - - Or | XOr | Pipe => 40, - - And | Ampersand => 50, - - LessThan | LessThanEqualsTo | GreaterThan | GreaterThanEqualsTo | EqualsTo - | NotEqualsTo => 60, - - In => 70, - - Plus | Minus => 80, - - Divide | Multiply | PowerOf => 90, - - LeftShift | RightShift => 100, - - Modulo => 110, - - Period => 120, - - _ => 0, - } - } - - /// Does an expression bind to the right (instead of left)? - pub fn is_bind_right(&self) -> bool { - use Token::*; - - match self { - // Assignments bind to the right - Equals | PlusAssign | MinusAssign | MultiplyAssign | DivideAssign | LeftShiftAssign - | RightShiftAssign | AndAssign | OrAssign | XOrAssign | ModuloAssign - | PowerOfAssign => true, - - // Property access binds to the right - Period => true, - - _ => false, - } - } -} - -/// An iterator on a `Token` stream. -pub struct TokenIterator<'a> { - /// Can the next token be a unary operator? - can_be_unary: bool, - /// Current position. - pos: Position, - /// The input character streams. - streams: Vec>>, -} - -impl<'a> TokenIterator<'a> { - /// Consume the next character. - fn eat_next(&mut self) { - self.get_next(); - self.advance(); - } - /// Get the next character - fn get_next(&mut self) -> Option { - loop { - if self.streams.is_empty() { - return None; - } else if let Some(ch) = self.streams[0].next() { - return Some(ch); - } else { - let _ = self.streams.remove(0); - } - } - } - /// Peek the next character - fn peek_next(&mut self) -> Option { - loop { - if self.streams.is_empty() { - return None; - } else if let Some(ch) = self.streams[0].peek() { - return Some(*ch); - } else { - let _ = self.streams.remove(0); - } - } - } - /// Move the current position one character ahead. - fn advance(&mut self) { - self.pos.advance(); - } - /// Move the current position back one character. - /// - /// # Panics - /// - /// Panics if already at the beginning of a line - cannot rewind to the previous line. - fn rewind(&mut self) { - self.pos.rewind(); - } - /// Move the current position to the next line. - fn new_line(&mut self) { - self.pos.new_line() - } - - /// Parse a string literal wrapped by `enclosing_char`. - pub fn parse_string_literal( - &mut self, - enclosing_char: char, - ) -> Result { - let mut result = Vec::new(); - let mut escape = String::with_capacity(12); - - loop { - let next_char = self.get_next(); - self.advance(); - - match next_char.ok_or((LERR::UnterminatedString, Position::eof()))? { - // \... - '\\' if escape.is_empty() => { - escape.push('\\'); - } - // \\ - '\\' if !escape.is_empty() => { - escape.clear(); - result.push('\\'); - } - // \t - 't' if !escape.is_empty() => { - escape.clear(); - result.push('\t'); - } - // \n - 'n' if !escape.is_empty() => { - escape.clear(); - result.push('\n'); - } - // \r - 'r' if !escape.is_empty() => { - escape.clear(); - result.push('\r'); - } - // \x??, \u????, \U???????? - ch @ 'x' | ch @ 'u' | ch @ 'U' if !escape.is_empty() => { - let mut seq = escape.clone(); - seq.push(ch); - escape.clear(); - - let mut out_val: u32 = 0; - let len = match ch { - 'x' => 2, - 'u' => 4, - 'U' => 8, - _ => panic!("should be 'x', 'u' or 'U'"), - }; - - for _ in 0..len { - let c = self.get_next().ok_or_else(|| { - (LERR::MalformedEscapeSequence(seq.to_string()), self.pos) - })?; - - seq.push(c); - self.advance(); - - out_val *= 16; - out_val += c.to_digit(16).ok_or_else(|| { - (LERR::MalformedEscapeSequence(seq.to_string()), self.pos) - })?; - } - - result.push( - char::from_u32(out_val) - .ok_or_else(|| (LERR::MalformedEscapeSequence(seq), self.pos))?, - ); - } - - // \{enclosing_char} - escaped - ch if enclosing_char == ch && !escape.is_empty() => { - escape.clear(); - result.push(ch) - } - - // Close wrapper - ch if enclosing_char == ch && escape.is_empty() => break, - - // Unknown escape sequence - _ if !escape.is_empty() => { - return Err((LERR::MalformedEscapeSequence(escape), self.pos)) - } - - // Cannot have new-lines inside string literals - '\n' => { - self.rewind(); - return Err((LERR::UnterminatedString, self.pos)); - } - - // All other characters - ch => { - escape.clear(); - result.push(ch); - } - } - } - - Ok(result.iter().collect()) - } - - /// Get the next token. - fn inner_next(&mut self) -> Option<(Token, Position)> { - let mut negated = false; - - while let Some(c) = self.get_next() { - self.advance(); - - let pos = self.pos; - - match (c, self.peek_next().unwrap_or('\0')) { - // \n - ('\n', _) => self.new_line(), - - // digit ... - ('0'..='9', _) => { - let mut result = Vec::new(); - let mut radix_base: Option = None; - result.push(c); - - while let Some(next_char) = self.peek_next() { - match next_char { - '0'..='9' | '_' => { - result.push(next_char); - self.eat_next(); - } - #[cfg(not(feature = "no_float"))] - '.' => { - result.push(next_char); - self.eat_next(); - while let Some(next_char_in_float) = self.peek_next() { - match next_char_in_float { - '0'..='9' | '_' => { - result.push(next_char_in_float); - self.eat_next(); - } - _ => break, - } - } - } - // 0x????, 0o????, 0b???? - ch @ 'x' | ch @ 'X' | ch @ 'o' | ch @ 'O' | ch @ 'b' | ch @ 'B' - if c == '0' => - { - result.push(next_char); - self.eat_next(); - - let valid = match ch { - 'x' | 'X' => [ - 'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F', - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '_', - ], - 'o' | 'O' => [ - '0', '1', '2', '3', '4', '5', '6', '7', '_', '_', '_', '_', - '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', - ], - 'b' | 'B' => [ - '0', '1', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', - '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', - ], - _ => panic!("unexpected character {}", ch), - }; - - radix_base = Some(match ch { - 'x' | 'X' => 16, - 'o' | 'O' => 8, - 'b' | 'B' => 2, - _ => panic!("unexpected character {}", ch), - }); - - while let Some(next_char_in_hex) = self.peek_next() { - if !valid.contains(&next_char_in_hex) { - break; - } - - result.push(next_char_in_hex); - self.eat_next(); - } - } - - _ => break, - } - } - - if negated { - result.insert(0, '-'); - } - - // Parse number - if let Some(radix) = radix_base { - let out: String = result.iter().skip(2).filter(|&&c| c != '_').collect(); - - return Some(( - INT::from_str_radix(&out, radix) - .map(Token::IntegerConstant) - .unwrap_or_else(|_| { - Token::LexError(Box::new(LERR::MalformedNumber( - result.iter().collect(), - ))) - }), - pos, - )); - } else { - let out: String = result.iter().filter(|&&c| c != '_').collect(); - let num = INT::from_str(&out).map(Token::IntegerConstant); - - // If integer parsing is unnecessary, try float instead - #[cfg(not(feature = "no_float"))] - let num = num.or_else(|_| FLOAT::from_str(&out).map(Token::FloatConstant)); - - return Some(( - num.unwrap_or_else(|_| { - Token::LexError(Box::new(LERR::MalformedNumber( - result.iter().collect(), - ))) - }), - pos, - )); - } - } - - // letter or underscore ... - ('A'..='Z', _) | ('a'..='z', _) | ('_', _) => { - let mut result = Vec::new(); - result.push(c); - - while let Some(next_char) = self.peek_next() { - match next_char { - x if x.is_ascii_alphanumeric() || x == '_' => { - result.push(x); - self.eat_next(); - } - _ => break, - } - } - - let is_valid_identifier = result - .iter() - .find(|&ch| char::is_ascii_alphanumeric(ch)) // first alpha-numeric character - .map(char::is_ascii_alphabetic) // is a letter - .unwrap_or(false); // if no alpha-numeric at all - syntax error - - let identifier: String = result.iter().collect(); - - if !is_valid_identifier { - return Some(( - Token::LexError(Box::new(LERR::MalformedIdentifier(identifier))), - pos, - )); - } - - return Some(( - match identifier.as_str() { - "true" => Token::True, - "false" => Token::False, - "let" => Token::Let, - "const" => Token::Const, - "if" => Token::If, - "else" => Token::Else, - "while" => Token::While, - "loop" => Token::Loop, - "continue" => Token::Continue, - "break" => Token::Break, - "return" => Token::Return, - "throw" => Token::Throw, - "for" => Token::For, - "in" => Token::In, - - #[cfg(not(feature = "no_function"))] - "fn" => Token::Fn, - - _ => Token::Identifier(identifier), - }, - pos, - )); - } - - // " - string literal - ('"', _) => { - return self.parse_string_literal('"').map_or_else( - |err| Some((Token::LexError(Box::new(err.0)), err.1)), - |out| Some((Token::StringConst(out), pos)), - ); - } - - // ' - character literal - ('\'', '\'') => { - return Some(( - Token::LexError(Box::new(LERR::MalformedChar("".to_string()))), - pos, - )); - } - ('\'', _) => { - return Some(self.parse_string_literal('\'').map_or_else( - |err| (Token::LexError(Box::new(err.0)), err.1), - |result| { - let mut chars = result.chars(); - let first = chars.next(); - - if chars.next().is_some() { - (Token::LexError(Box::new(LERR::MalformedChar(result))), pos) - } else { - (Token::CharConstant(first.expect("should be Some")), pos) - } - }, - )); - } - - // Braces - ('{', _) => return Some((Token::LeftBrace, pos)), - ('}', _) => return Some((Token::RightBrace, pos)), - - // Parentheses - ('(', _) => return Some((Token::LeftParen, pos)), - (')', _) => return Some((Token::RightParen, pos)), - - // Indexing - ('[', _) => return Some((Token::LeftBracket, pos)), - (']', _) => return Some((Token::RightBracket, pos)), - - // Map literal - #[cfg(not(feature = "no_object"))] - ('#', '{') => { - self.eat_next(); - return Some((Token::MapStart, pos)); - } - - // Operators - ('+', '=') => { - self.eat_next(); - return Some((Token::PlusAssign, pos)); - } - ('+', _) if self.can_be_unary => return Some((Token::UnaryPlus, pos)), - ('+', _) => return Some((Token::Plus, pos)), - - ('-', '0'..='9') if self.can_be_unary => negated = true, - ('-', '0'..='9') => return Some((Token::Minus, pos)), - ('-', '=') => { - self.eat_next(); - return Some((Token::MinusAssign, pos)); - } - ('-', _) if self.can_be_unary => return Some((Token::UnaryMinus, pos)), - ('-', _) => return Some((Token::Minus, pos)), - - ('*', '=') => { - self.eat_next(); - return Some((Token::MultiplyAssign, pos)); - } - ('*', _) => return Some((Token::Multiply, pos)), - - // Comments - ('/', '/') => { - self.eat_next(); - - while let Some(c) = self.get_next() { - if c == '\n' { - self.new_line(); - break; - } - - self.advance(); - } - } - ('/', '*') => { - let mut level = 1; - - self.eat_next(); - - while let Some(c) = self.get_next() { - self.advance(); - - match c { - '/' => { - if self.get_next() == Some('*') { - level += 1; - } - self.advance(); - } - '*' => { - if self.get_next() == Some('/') { - level -= 1; - } - self.advance(); - } - '\n' => self.new_line(), - _ => (), - } - - if level == 0 { - break; - } - } - } - - ('/', '=') => { - self.eat_next(); - return Some((Token::DivideAssign, pos)); - } - ('/', _) => return Some((Token::Divide, pos)), - - (';', _) => return Some((Token::SemiColon, pos)), - (':', _) => return Some((Token::Colon, pos)), - (',', _) => return Some((Token::Comma, pos)), - ('.', _) => return Some((Token::Period, pos)), - - ('=', '=') => { - self.eat_next(); - return Some((Token::EqualsTo, pos)); - } - ('=', _) => return Some((Token::Equals, pos)), - - ('<', '=') => { - self.eat_next(); - return Some((Token::LessThanEqualsTo, pos)); - } - ('<', '<') => { - self.eat_next(); - - return Some(( - if self.peek_next() == Some('=') { - self.eat_next(); - Token::LeftShiftAssign - } else { - Token::LeftShift - }, - pos, - )); - } - ('<', _) => return Some((Token::LessThan, pos)), - - ('>', '=') => { - self.eat_next(); - return Some((Token::GreaterThanEqualsTo, pos)); - } - ('>', '>') => { - self.eat_next(); - - return Some(( - if self.peek_next() == Some('=') { - self.eat_next(); - Token::RightShiftAssign - } else { - Token::RightShift - }, - pos, - )); - } - ('>', _) => return Some((Token::GreaterThan, pos)), - - ('!', '=') => { - self.eat_next(); - return Some((Token::NotEqualsTo, pos)); - } - ('!', _) => return Some((Token::Bang, pos)), - - ('|', '|') => { - self.eat_next(); - return Some((Token::Or, pos)); - } - ('|', '=') => { - self.eat_next(); - return Some((Token::OrAssign, pos)); - } - ('|', _) => return Some((Token::Pipe, pos)), - - ('&', '&') => { - self.eat_next(); - return Some((Token::And, pos)); - } - ('&', '=') => { - self.eat_next(); - return Some((Token::AndAssign, pos)); - } - ('&', _) => return Some((Token::Ampersand, pos)), - - ('^', '=') => { - self.eat_next(); - return Some((Token::XOrAssign, pos)); - } - ('^', _) => return Some((Token::XOr, pos)), - - ('%', '=') => { - self.eat_next(); - return Some((Token::ModuloAssign, pos)); - } - ('%', _) => return Some((Token::Modulo, pos)), - - ('~', '=') => { - self.eat_next(); - return Some((Token::PowerOfAssign, pos)); - } - ('~', _) => return Some((Token::PowerOf, pos)), - - (ch, _) if ch.is_whitespace() => (), - (ch, _) => return Some((Token::LexError(Box::new(LERR::UnexpectedChar(ch))), pos)), - } - } - - None - } -} - -impl<'a> Iterator for TokenIterator<'a> { - type Item = (Token, Position); - - fn next(&mut self) -> Option { - self.inner_next().map(|x| { - // Save the last token - self.can_be_unary = x.0.is_next_unary(); - x - }) - } -} - -/// Tokenize an input text stream. -pub fn lex<'a>(input: &'a [&'a str]) -> TokenIterator<'a> { - TokenIterator { - can_be_unary: true, - pos: Position::new(1, 0), - streams: input.iter().map(|s| s.chars().peekable()).collect(), - } -} - /// Consume a particular token, checking that it is the expected one. fn eat_token(input: &mut Peekable, token: Token) { if let Some((t, pos)) = input.next() { @@ -1932,7 +967,7 @@ fn parse_unary<'a>( } }) .ok_or_else(|| { - PERR::BadInput(LERR::MalformedNumber(format!("-{}", i)).to_string()) + PERR::BadInput(LexError::MalformedNumber(format!("-{}", i)).to_string()) .into_err(pos) }), diff --git a/src/result.rs b/src/result.rs index ddb56f0b..9d443f82 100644 --- a/src/result.rs +++ b/src/result.rs @@ -2,7 +2,8 @@ use crate::any::Dynamic; use crate::error::ParseError; -use crate::parser::{Position, INT}; +use crate::parser::INT; +use crate::token::Position; use crate::stdlib::{ error::Error, diff --git a/src/scope.rs b/src/scope.rs index 203d2402..3da54404 100644 --- a/src/scope.rs +++ b/src/scope.rs @@ -1,14 +1,10 @@ //! Module that defines the `Scope` type representing a function call-stack scope. use crate::any::{Dynamic, Variant}; -use crate::parser::{map_dynamic_to_expr, Expr, Position}; +use crate::parser::{map_dynamic_to_expr, Expr}; +use crate::token::Position; -use crate::stdlib::{ - borrow::Cow, - iter, - string::{String, ToString}, - vec::Vec, -}; +use crate::stdlib::{borrow::Cow, iter, vec::Vec}; /// Type of an entry in the Scope. #[derive(Debug, Eq, PartialEq, Hash, Copy, Clone)] diff --git a/src/stdlib.rs b/src/stdlib.rs index 4ec71d21..1d1397d5 100644 --- a/src/stdlib.rs +++ b/src/stdlib.rs @@ -8,7 +8,7 @@ mod inner { panic, pin, prelude, ptr, result, slice, str, task, time, u128, u16, u32, u64, u8, usize, }; - pub use alloc::{borrow, boxed, format, string, sync, vec}; + pub use alloc::{borrow, boxed, format, rc, string, sync, vec}; pub use core_error as error; diff --git a/src/token.rs b/src/token.rs new file mode 100644 index 00000000..55563d0f --- /dev/null +++ b/src/token.rs @@ -0,0 +1,982 @@ +//! Main module defining the lexer and parser. + +use crate::error::LexError; +use crate::parser::INT; + +#[cfg(not(feature = "no_float"))] +use crate::parser::FLOAT; + +use crate::stdlib::{ + borrow::Cow, + boxed::Box, + char, fmt, + iter::Peekable, + str::{Chars, FromStr}, + string::{String, ToString}, + usize, + vec::Vec, +}; + +type LERR = LexError; + +/// A location (line number + character position) in the input script. +#[derive(Eq, PartialEq, Ord, PartialOrd, Hash, Clone, Copy)] +pub struct Position { + /// Line number - 0 = none, MAX = EOF + line: usize, + /// Character position - 0 = BOL, MAX = EOF + pos: usize, +} + +impl Position { + /// Create a new `Position`. + pub fn new(line: usize, position: usize) -> Self { + assert!(line != 0, "line cannot be zero"); + assert!( + line != usize::MAX || position != usize::MAX, + "invalid position" + ); + + Self { + line, + pos: position, + } + } + + /// Get the line number (1-based), or `None` if no position or EOF. + pub fn line(&self) -> Option { + if self.is_none() || self.is_eof() { + None + } else { + Some(self.line) + } + } + + /// Get the character position (1-based), or `None` if at beginning of a line. + pub fn position(&self) -> Option { + if self.is_none() || self.is_eof() || self.pos == 0 { + None + } else { + Some(self.pos) + } + } + + /// Advance by one character position. + pub(crate) fn advance(&mut self) { + self.pos += 1; + } + + /// Go backwards by one character position. + /// + /// # Panics + /// + /// Panics if already at beginning of a line - cannot rewind to a previous line. + /// + pub(crate) fn rewind(&mut self) { + assert!(self.pos > 0, "cannot rewind at position 0"); + self.pos -= 1; + } + + /// Advance to the next line. + pub(crate) fn new_line(&mut self) { + self.line += 1; + self.pos = 0; + } + + /// Create a `Position` representing no position. + pub(crate) fn none() -> Self { + Self { line: 0, pos: 0 } + } + + /// Create a `Position` at EOF. + pub(crate) fn eof() -> Self { + Self { + line: usize::MAX, + pos: usize::MAX, + } + } + + /// Is there no `Position`? + pub fn is_none(&self) -> bool { + self.line == 0 && self.pos == 0 + } + + /// Is the `Position` at EOF? + pub fn is_eof(&self) -> bool { + self.line == usize::MAX && self.pos == usize::MAX + } +} + +impl Default for Position { + fn default() -> Self { + Self::new(1, 0) + } +} + +impl fmt::Display for Position { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.is_eof() { + write!(f, "EOF") + } else if self.is_none() { + write!(f, "none") + } else { + write!(f, "line {}, position {}", self.line, self.pos) + } + } +} + +impl fmt::Debug for Position { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.is_eof() { + write!(f, "(EOF)") + } else { + write!(f, "({}:{})", self.line, self.pos) + } + } +} + +/// Tokens. +#[derive(Debug, PartialEq, Clone)] +pub enum Token { + IntegerConstant(INT), + FloatConstant(FLOAT), + Identifier(String), + CharConstant(char), + StringConst(String), + LeftBrace, + RightBrace, + LeftParen, + RightParen, + LeftBracket, + RightBracket, + Plus, + UnaryPlus, + Minus, + UnaryMinus, + Multiply, + Divide, + Modulo, + PowerOf, + LeftShift, + RightShift, + SemiColon, + Colon, + Comma, + Period, + #[cfg(not(feature = "no_object"))] + MapStart, + Equals, + True, + False, + Let, + Const, + If, + Else, + While, + Loop, + For, + In, + LessThan, + GreaterThan, + LessThanEqualsTo, + GreaterThanEqualsTo, + EqualsTo, + NotEqualsTo, + Bang, + Pipe, + Or, + XOr, + Ampersand, + And, + #[cfg(not(feature = "no_function"))] + Fn, + Continue, + Break, + Return, + Throw, + PlusAssign, + MinusAssign, + MultiplyAssign, + DivideAssign, + LeftShiftAssign, + RightShiftAssign, + AndAssign, + OrAssign, + XOrAssign, + ModuloAssign, + PowerOfAssign, + LexError(Box), +} + +impl Token { + /// Get the syntax of the token. + pub fn syntax(&self) -> Cow { + use Token::*; + + match self { + IntegerConstant(i) => i.to_string().into(), + FloatConstant(f) => f.to_string().into(), + Identifier(s) => s.into(), + CharConstant(c) => c.to_string().into(), + LexError(err) => err.to_string().into(), + + token => (match token { + StringConst(_) => "string", + LeftBrace => "{", + RightBrace => "}", + LeftParen => "(", + RightParen => ")", + LeftBracket => "[", + RightBracket => "]", + Plus => "+", + UnaryPlus => "+", + Minus => "-", + UnaryMinus => "-", + Multiply => "*", + Divide => "/", + SemiColon => ";", + Colon => ":", + Comma => ",", + Period => ".", + #[cfg(not(feature = "no_object"))] + MapStart => "#{", + Equals => "=", + True => "true", + False => "false", + Let => "let", + Const => "const", + If => "if", + Else => "else", + While => "while", + Loop => "loop", + LessThan => "<", + GreaterThan => ">", + Bang => "!", + LessThanEqualsTo => "<=", + GreaterThanEqualsTo => ">=", + EqualsTo => "==", + NotEqualsTo => "!=", + Pipe => "|", + Or => "||", + Ampersand => "&", + And => "&&", + #[cfg(not(feature = "no_function"))] + Fn => "fn", + Continue => "continue", + Break => "break", + Return => "return", + Throw => "throw", + PlusAssign => "+=", + MinusAssign => "-=", + MultiplyAssign => "*=", + DivideAssign => "/=", + LeftShiftAssign => "<<=", + RightShiftAssign => ">>=", + AndAssign => "&=", + OrAssign => "|=", + XOrAssign => "^=", + LeftShift => "<<", + RightShift => ">>", + XOr => "^", + Modulo => "%", + ModuloAssign => "%=", + PowerOf => "~", + PowerOfAssign => "~=", + For => "for", + In => "in", + _ => panic!("operator should be match in outer scope"), + }) + .into(), + } + } + + // If another operator is after these, it's probably an unary operator + // (not sure about fn name). + pub fn is_next_unary(&self) -> bool { + use Token::*; + + match self { + LexError(_) | + LeftBrace | // (+expr) - is unary + // RightBrace | {expr} - expr not unary & is closing + LeftParen | // {-expr} - is unary + // RightParen | (expr) - expr not unary & is closing + LeftBracket | // [-expr] - is unary + // RightBracket | [expr] - expr not unary & is closing + Plus | + UnaryPlus | + Minus | + UnaryMinus | + Multiply | + Divide | + Colon | + Comma | + Period | + Equals | + LessThan | + GreaterThan | + Bang | + LessThanEqualsTo | + GreaterThanEqualsTo | + EqualsTo | + NotEqualsTo | + Pipe | + Or | + Ampersand | + And | + If | + While | + PlusAssign | + MinusAssign | + MultiplyAssign | + DivideAssign | + LeftShiftAssign | + RightShiftAssign | + AndAssign | + OrAssign | + XOrAssign | + LeftShift | + RightShift | + XOr | + Modulo | + ModuloAssign | + Return | + Throw | + PowerOf | + In | + PowerOfAssign => true, + + _ => false, + } + } + + /// Get the precedence number of the token. + pub fn precedence(&self) -> u8 { + use Token::*; + + match self { + Equals | PlusAssign | MinusAssign | MultiplyAssign | DivideAssign | LeftShiftAssign + | RightShiftAssign | AndAssign | OrAssign | XOrAssign | ModuloAssign + | PowerOfAssign => 10, + + Or | XOr | Pipe => 40, + + And | Ampersand => 50, + + LessThan | LessThanEqualsTo | GreaterThan | GreaterThanEqualsTo | EqualsTo + | NotEqualsTo => 60, + + In => 70, + + Plus | Minus => 80, + + Divide | Multiply | PowerOf => 90, + + LeftShift | RightShift => 100, + + Modulo => 110, + + Period => 120, + + _ => 0, + } + } + + /// Does an expression bind to the right (instead of left)? + pub fn is_bind_right(&self) -> bool { + use Token::*; + + match self { + // Assignments bind to the right + Equals | PlusAssign | MinusAssign | MultiplyAssign | DivideAssign | LeftShiftAssign + | RightShiftAssign | AndAssign | OrAssign | XOrAssign | ModuloAssign + | PowerOfAssign => true, + + // Property access binds to the right + Period => true, + + _ => false, + } + } +} + +/// An iterator on a `Token` stream. +pub struct TokenIterator<'a> { + /// Can the next token be a unary operator? + can_be_unary: bool, + /// Current position. + pos: Position, + /// The input character streams. + streams: Vec>>, +} + +impl<'a> TokenIterator<'a> { + /// Consume the next character. + fn eat_next(&mut self) { + self.get_next(); + self.advance(); + } + /// Get the next character + fn get_next(&mut self) -> Option { + loop { + if self.streams.is_empty() { + return None; + } else if let Some(ch) = self.streams[0].next() { + return Some(ch); + } else { + let _ = self.streams.remove(0); + } + } + } + /// Peek the next character + fn peek_next(&mut self) -> Option { + loop { + if self.streams.is_empty() { + return None; + } else if let Some(ch) = self.streams[0].peek() { + return Some(*ch); + } else { + let _ = self.streams.remove(0); + } + } + } + /// Move the current position one character ahead. + fn advance(&mut self) { + self.pos.advance(); + } + /// Move the current position back one character. + /// + /// # Panics + /// + /// Panics if already at the beginning of a line - cannot rewind to the previous line. + fn rewind(&mut self) { + self.pos.rewind(); + } + /// Move the current position to the next line. + fn new_line(&mut self) { + self.pos.new_line() + } + + /// Parse a string literal wrapped by `enclosing_char`. + pub fn parse_string_literal( + &mut self, + enclosing_char: char, + ) -> Result { + let mut result = Vec::new(); + let mut escape = String::with_capacity(12); + + loop { + let next_char = self.get_next(); + self.advance(); + + match next_char.ok_or((LERR::UnterminatedString, Position::eof()))? { + // \... + '\\' if escape.is_empty() => { + escape.push('\\'); + } + // \\ + '\\' if !escape.is_empty() => { + escape.clear(); + result.push('\\'); + } + // \t + 't' if !escape.is_empty() => { + escape.clear(); + result.push('\t'); + } + // \n + 'n' if !escape.is_empty() => { + escape.clear(); + result.push('\n'); + } + // \r + 'r' if !escape.is_empty() => { + escape.clear(); + result.push('\r'); + } + // \x??, \u????, \U???????? + ch @ 'x' | ch @ 'u' | ch @ 'U' if !escape.is_empty() => { + let mut seq = escape.clone(); + seq.push(ch); + escape.clear(); + + let mut out_val: u32 = 0; + let len = match ch { + 'x' => 2, + 'u' => 4, + 'U' => 8, + _ => panic!("should be 'x', 'u' or 'U'"), + }; + + for _ in 0..len { + let c = self.get_next().ok_or_else(|| { + (LERR::MalformedEscapeSequence(seq.to_string()), self.pos) + })?; + + seq.push(c); + self.advance(); + + out_val *= 16; + out_val += c.to_digit(16).ok_or_else(|| { + (LERR::MalformedEscapeSequence(seq.to_string()), self.pos) + })?; + } + + result.push( + char::from_u32(out_val) + .ok_or_else(|| (LERR::MalformedEscapeSequence(seq), self.pos))?, + ); + } + + // \{enclosing_char} - escaped + ch if enclosing_char == ch && !escape.is_empty() => { + escape.clear(); + result.push(ch) + } + + // Close wrapper + ch if enclosing_char == ch && escape.is_empty() => break, + + // Unknown escape sequence + _ if !escape.is_empty() => { + return Err((LERR::MalformedEscapeSequence(escape), self.pos)) + } + + // Cannot have new-lines inside string literals + '\n' => { + self.rewind(); + return Err((LERR::UnterminatedString, self.pos)); + } + + // All other characters + ch => { + escape.clear(); + result.push(ch); + } + } + } + + Ok(result.iter().collect()) + } + + /// Get the next token. + fn inner_next(&mut self) -> Option<(Token, Position)> { + let mut negated = false; + + while let Some(c) = self.get_next() { + self.advance(); + + let pos = self.pos; + + match (c, self.peek_next().unwrap_or('\0')) { + // \n + ('\n', _) => self.new_line(), + + // digit ... + ('0'..='9', _) => { + let mut result = Vec::new(); + let mut radix_base: Option = None; + result.push(c); + + while let Some(next_char) = self.peek_next() { + match next_char { + '0'..='9' | '_' => { + result.push(next_char); + self.eat_next(); + } + #[cfg(not(feature = "no_float"))] + '.' => { + result.push(next_char); + self.eat_next(); + while let Some(next_char_in_float) = self.peek_next() { + match next_char_in_float { + '0'..='9' | '_' => { + result.push(next_char_in_float); + self.eat_next(); + } + _ => break, + } + } + } + // 0x????, 0o????, 0b???? + ch @ 'x' | ch @ 'X' | ch @ 'o' | ch @ 'O' | ch @ 'b' | ch @ 'B' + if c == '0' => + { + result.push(next_char); + self.eat_next(); + + let valid = match ch { + 'x' | 'X' => [ + 'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '_', + ], + 'o' | 'O' => [ + '0', '1', '2', '3', '4', '5', '6', '7', '_', '_', '_', '_', + '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', + ], + 'b' | 'B' => [ + '0', '1', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', + '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', + ], + _ => panic!("unexpected character {}", ch), + }; + + radix_base = Some(match ch { + 'x' | 'X' => 16, + 'o' | 'O' => 8, + 'b' | 'B' => 2, + _ => panic!("unexpected character {}", ch), + }); + + while let Some(next_char_in_hex) = self.peek_next() { + if !valid.contains(&next_char_in_hex) { + break; + } + + result.push(next_char_in_hex); + self.eat_next(); + } + } + + _ => break, + } + } + + if negated { + result.insert(0, '-'); + } + + // Parse number + if let Some(radix) = radix_base { + let out: String = result.iter().skip(2).filter(|&&c| c != '_').collect(); + + return Some(( + INT::from_str_radix(&out, radix) + .map(Token::IntegerConstant) + .unwrap_or_else(|_| { + Token::LexError(Box::new(LERR::MalformedNumber( + result.iter().collect(), + ))) + }), + pos, + )); + } else { + let out: String = result.iter().filter(|&&c| c != '_').collect(); + let num = INT::from_str(&out).map(Token::IntegerConstant); + + // If integer parsing is unnecessary, try float instead + #[cfg(not(feature = "no_float"))] + let num = num.or_else(|_| FLOAT::from_str(&out).map(Token::FloatConstant)); + + return Some(( + num.unwrap_or_else(|_| { + Token::LexError(Box::new(LERR::MalformedNumber( + result.iter().collect(), + ))) + }), + pos, + )); + } + } + + // letter or underscore ... + ('A'..='Z', _) | ('a'..='z', _) | ('_', _) => { + let mut result = Vec::new(); + result.push(c); + + while let Some(next_char) = self.peek_next() { + match next_char { + x if x.is_ascii_alphanumeric() || x == '_' => { + result.push(x); + self.eat_next(); + } + _ => break, + } + } + + let is_valid_identifier = result + .iter() + .find(|&ch| char::is_ascii_alphanumeric(ch)) // first alpha-numeric character + .map(char::is_ascii_alphabetic) // is a letter + .unwrap_or(false); // if no alpha-numeric at all - syntax error + + let identifier: String = result.iter().collect(); + + if !is_valid_identifier { + return Some(( + Token::LexError(Box::new(LERR::MalformedIdentifier(identifier))), + pos, + )); + } + + return Some(( + match identifier.as_str() { + "true" => Token::True, + "false" => Token::False, + "let" => Token::Let, + "const" => Token::Const, + "if" => Token::If, + "else" => Token::Else, + "while" => Token::While, + "loop" => Token::Loop, + "continue" => Token::Continue, + "break" => Token::Break, + "return" => Token::Return, + "throw" => Token::Throw, + "for" => Token::For, + "in" => Token::In, + + #[cfg(not(feature = "no_function"))] + "fn" => Token::Fn, + + _ => Token::Identifier(identifier), + }, + pos, + )); + } + + // " - string literal + ('"', _) => { + return self.parse_string_literal('"').map_or_else( + |err| Some((Token::LexError(Box::new(err.0)), err.1)), + |out| Some((Token::StringConst(out), pos)), + ); + } + + // ' - character literal + ('\'', '\'') => { + return Some(( + Token::LexError(Box::new(LERR::MalformedChar("".to_string()))), + pos, + )); + } + ('\'', _) => { + return Some(self.parse_string_literal('\'').map_or_else( + |err| (Token::LexError(Box::new(err.0)), err.1), + |result| { + let mut chars = result.chars(); + let first = chars.next(); + + if chars.next().is_some() { + (Token::LexError(Box::new(LERR::MalformedChar(result))), pos) + } else { + (Token::CharConstant(first.expect("should be Some")), pos) + } + }, + )); + } + + // Braces + ('{', _) => return Some((Token::LeftBrace, pos)), + ('}', _) => return Some((Token::RightBrace, pos)), + + // Parentheses + ('(', _) => return Some((Token::LeftParen, pos)), + (')', _) => return Some((Token::RightParen, pos)), + + // Indexing + ('[', _) => return Some((Token::LeftBracket, pos)), + (']', _) => return Some((Token::RightBracket, pos)), + + // Map literal + #[cfg(not(feature = "no_object"))] + ('#', '{') => { + self.eat_next(); + return Some((Token::MapStart, pos)); + } + + // Operators + ('+', '=') => { + self.eat_next(); + return Some((Token::PlusAssign, pos)); + } + ('+', _) if self.can_be_unary => return Some((Token::UnaryPlus, pos)), + ('+', _) => return Some((Token::Plus, pos)), + + ('-', '0'..='9') if self.can_be_unary => negated = true, + ('-', '0'..='9') => return Some((Token::Minus, pos)), + ('-', '=') => { + self.eat_next(); + return Some((Token::MinusAssign, pos)); + } + ('-', _) if self.can_be_unary => return Some((Token::UnaryMinus, pos)), + ('-', _) => return Some((Token::Minus, pos)), + + ('*', '=') => { + self.eat_next(); + return Some((Token::MultiplyAssign, pos)); + } + ('*', _) => return Some((Token::Multiply, pos)), + + // Comments + ('/', '/') => { + self.eat_next(); + + while let Some(c) = self.get_next() { + if c == '\n' { + self.new_line(); + break; + } + + self.advance(); + } + } + ('/', '*') => { + let mut level = 1; + + self.eat_next(); + + while let Some(c) = self.get_next() { + self.advance(); + + match c { + '/' => { + if self.get_next() == Some('*') { + level += 1; + } + self.advance(); + } + '*' => { + if self.get_next() == Some('/') { + level -= 1; + } + self.advance(); + } + '\n' => self.new_line(), + _ => (), + } + + if level == 0 { + break; + } + } + } + + ('/', '=') => { + self.eat_next(); + return Some((Token::DivideAssign, pos)); + } + ('/', _) => return Some((Token::Divide, pos)), + + (';', _) => return Some((Token::SemiColon, pos)), + (':', _) => return Some((Token::Colon, pos)), + (',', _) => return Some((Token::Comma, pos)), + ('.', _) => return Some((Token::Period, pos)), + + ('=', '=') => { + self.eat_next(); + return Some((Token::EqualsTo, pos)); + } + ('=', _) => return Some((Token::Equals, pos)), + + ('<', '=') => { + self.eat_next(); + return Some((Token::LessThanEqualsTo, pos)); + } + ('<', '<') => { + self.eat_next(); + + return Some(( + if self.peek_next() == Some('=') { + self.eat_next(); + Token::LeftShiftAssign + } else { + Token::LeftShift + }, + pos, + )); + } + ('<', _) => return Some((Token::LessThan, pos)), + + ('>', '=') => { + self.eat_next(); + return Some((Token::GreaterThanEqualsTo, pos)); + } + ('>', '>') => { + self.eat_next(); + + return Some(( + if self.peek_next() == Some('=') { + self.eat_next(); + Token::RightShiftAssign + } else { + Token::RightShift + }, + pos, + )); + } + ('>', _) => return Some((Token::GreaterThan, pos)), + + ('!', '=') => { + self.eat_next(); + return Some((Token::NotEqualsTo, pos)); + } + ('!', _) => return Some((Token::Bang, pos)), + + ('|', '|') => { + self.eat_next(); + return Some((Token::Or, pos)); + } + ('|', '=') => { + self.eat_next(); + return Some((Token::OrAssign, pos)); + } + ('|', _) => return Some((Token::Pipe, pos)), + + ('&', '&') => { + self.eat_next(); + return Some((Token::And, pos)); + } + ('&', '=') => { + self.eat_next(); + return Some((Token::AndAssign, pos)); + } + ('&', _) => return Some((Token::Ampersand, pos)), + + ('^', '=') => { + self.eat_next(); + return Some((Token::XOrAssign, pos)); + } + ('^', _) => return Some((Token::XOr, pos)), + + ('%', '=') => { + self.eat_next(); + return Some((Token::ModuloAssign, pos)); + } + ('%', _) => return Some((Token::Modulo, pos)), + + ('~', '=') => { + self.eat_next(); + return Some((Token::PowerOfAssign, pos)); + } + ('~', _) => return Some((Token::PowerOf, pos)), + + (ch, _) if ch.is_whitespace() => (), + (ch, _) => return Some((Token::LexError(Box::new(LERR::UnexpectedChar(ch))), pos)), + } + } + + None + } +} + +impl<'a> Iterator for TokenIterator<'a> { + type Item = (Token, Position); + + fn next(&mut self) -> Option { + self.inner_next().map(|x| { + // Save the last token + self.can_be_unary = x.0.is_next_unary(); + x + }) + } +} + +/// Tokenize an input text stream. +pub fn lex<'a>(input: &'a [&'a str]) -> TokenIterator<'a> { + TokenIterator { + can_be_unary: true, + pos: Position::new(1, 0), + streams: input.iter().map(|s| s.chars().peekable()).collect(), + } +} diff --git a/tests/time.rs b/tests/time.rs index 10b232fe..eddb652d 100644 --- a/tests/time.rs +++ b/tests/time.rs @@ -1,4 +1,5 @@ #![cfg(not(feature = "no_stdlib"))] +#![cfg(not(feature = "no_std"))] use rhai::{Engine, EvalAltResult, INT};