diff --git a/README.md b/README.md index bb4a9256..a88c574d 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,7 @@ Features * Some support for [object-oriented programming (OOP)](https://schungx.github.io/rhai/language/oop.html). * Organize code base with dynamically-loadable [modules](https://schungx.github.io/rhai/language/modules.html). * Serialization/deserialization support via [serde](https://crates.io/crates/serde) (requires the `serde` feature). +* Surgically disable keywords and operators to restrict the language. * Scripts are [optimized](https://schungx.github.io/rhai/engine/optimize.html) (useful for template-based machine-generated scripts) for repeated evaluations. * Support for [minimal builds](https://schungx.github.io/rhai/start/builds/minimal.html) by excluding unneeded language [features](https://schungx.github.io/rhai/start/features.html). diff --git a/RELEASES.md b/RELEASES.md index 0a1ead2b..4306c015 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -4,7 +4,10 @@ Rhai Release Notes Version 0.17.0 ============== -This version adds [`serde`](https://crates.io/crates/serde) support for working with `Dynamic` values (particularly _object maps_). +This version adds: + +* [`serde`](https://crates.io/crates/serde) support for working with `Dynamic` values (particularly _object maps_) +* Ability to surgically disable keywords and/or operators in the language Breaking changes ---------------- @@ -16,6 +19,7 @@ New features * New `serde` feature to allow serializating/deserializating to/from `Dynamic` values using [`serde`](https://crates.io/crates/serde). This is particularly useful when converting a Rust `struct` to a `Dynamic` _object map_ and back. +* `Engine::disable_symbol` to surgically disable keywords and/or operators. Version 0.16.1 diff --git a/doc/src/SUMMARY.md b/doc/src/SUMMARY.md index a7c3946b..39c9fb68 100644 --- a/doc/src/SUMMARY.md +++ b/doc/src/SUMMARY.md @@ -95,17 +95,18 @@ The Rhai Scripting Language 8. [Maximum Call Stack Depth](safety/max-call-stack.md) 9. [Maximum Statement Depth](safety/max-stmt-depth.md) 8. [Advanced Topics](advanced.md) - 1. [Object-Oriented Programming (OOP)](language/oop.md) - 2. [Serialization/Deserialization of `Dynamic` with `serde`](rust/serde.md) - 3. [Script Optimization](engine/optimize/index.md) + 1. [Disable Keywords and/or Operators](engine/disable.md) + 2. [Object-Oriented Programming (OOP)](language/oop.md) + 3. [Serialization/Deserialization of `Dynamic` with `serde`](rust/serde.md) + 4. [Script Optimization](engine/optimize/index.md) 1. [Optimization Levels](engine/optimize/optimize-levels.md) 2. [Re-Optimize an AST](engine/optimize/reoptimize.md) 3. [Eager Function Evaluation](engine/optimize/eager.md) 4. [Side-Effect Considerations](engine/optimize/side-effects.md) 5. [Volatility Considerations](engine/optimize/volatility.md) 6. [Subtle Semantic Changes](engine/optimize/semantics.md) - 4. [Eval Statement](language/eval.md) -9. [Appendix](appendix/index.md) - 1. [Keywords](appendix/keywords.md) - 2. [Operators](appendix/operators.md) - 3. [Literals](appendix/literals.md) + 5. [Eval Statement](language/eval.md) +9. [Appendix](appendix/index.md) + 6. [Keywords](appendix/keywords.md) + 7. [Operators](appendix/operators.md) + 8. [Literals](appendix/literals.md) diff --git a/doc/src/about/features.md b/doc/src/about/features.md index 7d3ff3cd..28f20aef 100644 --- a/doc/src/about/features.md +++ b/doc/src/about/features.md @@ -64,3 +64,5 @@ Flexible * Support for [minimal builds] by excluding unneeded language [features]. * Supports [most build targets](targets.md) including `no-std` and [WASM]. + +* Surgically [disable keywords and operators] to restrict the language. diff --git a/doc/src/engine/disable.md b/doc/src/engine/disable.md new file mode 100644 index 00000000..f34e763c --- /dev/null +++ b/doc/src/engine/disable.md @@ -0,0 +1,28 @@ +Disable Certain Keywords and/or Operators +======================================== + +{{#include ../links.md}} + +For certain embedded usage, it is sometimes necessary to restrict the language to a strict subset of Rhai +to prevent usage of certain language features. + +Rhai supports surgically disabling a keyword or operator via the `Engine::disable_symbol` method. + +```rust +use rhai::Engine; + +let mut engine = Engine::new(); + +engine.disable_symbol("if"); // disable the 'if' keyword +engine.disable_symbol("+="); // disable the '+=' operator + +// The following all return parse errors. + +engine.compile("let x = if true { 42 } else { 0 };")?; +// ^ missing ';' after statement end +// ^ 'if' is parsed as a variable name + +engine.compile("let x = 40 + 2; x += 1;")?; +// ^ '+=' is not recognized as an operator +// ^ other operators are not affected +``` diff --git a/doc/src/links.md b/doc/src/links.md index cf367206..7ef02a10 100644 --- a/doc/src/links.md +++ b/doc/src/links.md @@ -102,3 +102,5 @@ [`OptimizationLevel::Full`]: {{rootUrl}}/engine/optimize/optimize-levels.md [`OptimizationLevel::Simple`]: {{rootUrl}}/engine/optimize/optimize-levels.md [`OptimizationLevel::None`]: {{rootUrl}}/engine/optimize/optimize-levels.md + +[disable keywords and operators]: {{rootUrl}}/engine/disable.md diff --git a/src/api.rs b/src/api.rs index 49c7b486..5b6acdb5 100644 --- a/src/api.rs +++ b/src/api.rs @@ -118,8 +118,13 @@ impl Engine { /// ``` #[cfg(not(feature = "no_object"))] pub fn register_type_with_name(&mut self, name: &str) { + if self.type_names.is_none() { + self.type_names = Some(Default::default()); + } // Add the pretty-print type name into the map self.type_names + .as_mut() + .unwrap() .insert(type_name::().to_string(), name.to_string()); } @@ -548,7 +553,7 @@ impl Engine { scripts: &[&str], optimization_level: OptimizationLevel, ) -> Result { - let stream = lex(scripts, self.max_string_size); + let stream = lex(scripts, self.max_string_size, self.disable_tokens.as_ref()); self.parse(&mut stream.peekable(), scope, optimization_level) } @@ -673,7 +678,7 @@ impl Engine { // Trims the JSON string and add a '#' in front let scripts = ["#", json.trim()]; - let stream = lex(&scripts, self.max_string_size); + let stream = lex(&scripts, self.max_string_size, self.disable_tokens.as_ref()); let ast = self.parse_global_expr(&mut stream.peekable(), &scope, OptimizationLevel::None)?; @@ -754,7 +759,7 @@ impl Engine { script: &str, ) -> Result { let scripts = [script]; - let stream = lex(&scripts, self.max_string_size); + let stream = lex(&scripts, self.max_string_size, self.disable_tokens.as_ref()); { let mut peekable = stream.peekable(); self.parse_global_expr(&mut peekable, scope, self.optimization_level) @@ -909,7 +914,7 @@ impl Engine { script: &str, ) -> Result> { let scripts = [script]; - let stream = lex(&scripts, self.max_string_size); + let stream = lex(&scripts, self.max_string_size, self.disable_tokens.as_ref()); // No need to optimize a lone expression let ast = self.parse_global_expr(&mut stream.peekable(), scope, OptimizationLevel::None)?; @@ -1042,7 +1047,7 @@ impl Engine { script: &str, ) -> Result<(), Box> { let scripts = [script]; - let stream = lex(&scripts, self.max_string_size); + let stream = lex(&scripts, self.max_string_size, self.disable_tokens.as_ref()); let ast = self.parse(&mut stream.peekable(), scope, self.optimization_level)?; self.consume_ast_with_scope(scope, &ast) } diff --git a/src/engine.rs b/src/engine.rs index 2bea2ab7..9dff48e4 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -6,7 +6,7 @@ use crate::error::ParseErrorType; use crate::fn_native::{CallableFunction, Callback, FnCallArgs, FnPtr}; use crate::module::{resolvers, Module, ModuleRef, ModuleResolver}; use crate::optimize::OptimizationLevel; -use crate::packages::{Package, PackageLibrary, PackagesCollection, StandardPackage}; +use crate::packages::{Package, PackagesCollection, StandardPackage}; use crate::parser::{Expr, FnAccess, ImmutableString, ReturnType, ScriptFnDef, Stmt, AST, INT}; use crate::r#unsafe::unsafe_cast_var_name_to_lifetime; use crate::result::EvalAltResult; @@ -21,7 +21,7 @@ use crate::stdlib::{ any::{type_name, TypeId}, borrow::Cow, boxed::Box, - collections::HashMap, + collections::{HashMap, HashSet}, format, iter::{empty, once}, mem, @@ -266,7 +266,10 @@ pub struct Engine { pub(crate) module_resolver: Option>, /// A hashmap mapping type names to pretty-print names. - pub(crate) type_names: HashMap, + pub(crate) type_names: Option>, + + /// A hash-set containing tokens to disable. + pub(crate) disable_tokens: Option>, /// Callback closure for implementing the `print` command. pub(crate) print: Callback, @@ -313,7 +316,8 @@ impl Default for Engine { #[cfg(any(feature = "no_module", feature = "no_std", target_arch = "wasm32",))] module_resolver: None, - type_names: Default::default(), + type_names: None, + disable_tokens: None, // default print/debug implementations print: Box::new(default_print), @@ -492,7 +496,9 @@ impl Engine { global_module: Default::default(), module_resolver: None, - type_names: Default::default(), + type_names: None, + disable_tokens: None, + print: Box::new(|_| {}), debug: Box::new(|_| {}), progress: None, @@ -514,158 +520,6 @@ impl Engine { } } - /// Load a new package into the `Engine`. - /// - /// When searching for functions, packages loaded later are preferred. - /// In other words, loaded packages are searched in reverse order. - pub fn load_package(&mut self, package: PackageLibrary) { - // Push the package to the top - packages are searched in reverse order - self.packages.push(package); - } - - /// Load a new package into the `Engine`. - /// - /// When searching for functions, packages loaded later are preferred. - /// In other words, loaded packages are searched in reverse order. - pub fn load_packages(&mut self, package: PackageLibrary) { - // Push the package to the top - packages are searched in reverse order - self.packages.push(package); - } - - /// Control whether and how the `Engine` will optimize an AST after compilation. - /// - /// Not available under the `no_optimize` feature. - #[cfg(not(feature = "no_optimize"))] - pub fn set_optimization_level(&mut self, optimization_level: OptimizationLevel) { - self.optimization_level = optimization_level - } - - /// The current optimization level. - /// It controls whether and how the `Engine` will optimize an AST after compilation. - /// - /// Not available under the `no_optimize` feature. - #[cfg(not(feature = "no_optimize"))] - pub fn optimization_level(&self) -> OptimizationLevel { - self.optimization_level - } - - /// Set the maximum levels of function calls allowed for a script in order to avoid - /// infinite recursion and stack overflows. - #[cfg(not(feature = "unchecked"))] - pub fn set_max_call_levels(&mut self, levels: usize) { - self.max_call_stack_depth = levels - } - - /// The maximum levels of function calls allowed for a script. - #[cfg(not(feature = "unchecked"))] - pub fn max_call_levels(&self) -> usize { - self.max_call_stack_depth - } - - /// Set the maximum number of operations allowed for a script to run to avoid - /// consuming too much resources (0 for unlimited). - #[cfg(not(feature = "unchecked"))] - pub fn set_max_operations(&mut self, operations: u64) { - self.max_operations = if operations == u64::MAX { - 0 - } else { - operations - }; - } - - /// The maximum number of operations allowed for a script to run (0 for unlimited). - #[cfg(not(feature = "unchecked"))] - pub fn max_operations(&self) -> u64 { - self.max_operations - } - - /// Set the maximum number of imported modules allowed for a script. - #[cfg(not(feature = "unchecked"))] - pub fn set_max_modules(&mut self, modules: usize) { - self.max_modules = modules; - } - - /// The maximum number of imported modules allowed for a script. - #[cfg(not(feature = "unchecked"))] - pub fn max_modules(&self) -> usize { - self.max_modules - } - - /// Set the depth limits for expressions (0 for unlimited). - #[cfg(not(feature = "unchecked"))] - pub fn set_max_expr_depths(&mut self, max_expr_depth: usize, max_function_expr_depth: usize) { - self.max_expr_depth = if max_expr_depth == usize::MAX { - 0 - } else { - max_expr_depth - }; - self.max_function_expr_depth = if max_function_expr_depth == usize::MAX { - 0 - } else { - max_function_expr_depth - }; - } - - /// The depth limit for expressions (0 for unlimited). - #[cfg(not(feature = "unchecked"))] - pub fn max_expr_depth(&self) -> usize { - self.max_expr_depth - } - - /// The depth limit for expressions in functions (0 for unlimited). - #[cfg(not(feature = "unchecked"))] - pub fn max_function_expr_depth(&self) -> usize { - self.max_function_expr_depth - } - - /// Set the maximum length of strings (0 for unlimited). - #[cfg(not(feature = "unchecked"))] - pub fn set_max_string_size(&mut self, max_size: usize) { - self.max_string_size = if max_size == usize::MAX { 0 } else { max_size }; - } - - /// The maximum length of strings (0 for unlimited). - #[cfg(not(feature = "unchecked"))] - pub fn max_string_size(&self) -> usize { - self.max_string_size - } - - /// Set the maximum length of arrays (0 for unlimited). - #[cfg(not(feature = "unchecked"))] - #[cfg(not(feature = "no_index"))] - pub fn set_max_array_size(&mut self, max_size: usize) { - self.max_array_size = if max_size == usize::MAX { 0 } else { max_size }; - } - - /// The maximum length of arrays (0 for unlimited). - #[cfg(not(feature = "unchecked"))] - #[cfg(not(feature = "no_index"))] - pub fn max_array_size(&self) -> usize { - self.max_array_size - } - - /// Set the maximum length of object maps (0 for unlimited). - #[cfg(not(feature = "unchecked"))] - #[cfg(not(feature = "no_object"))] - pub fn set_max_map_size(&mut self, max_size: usize) { - self.max_map_size = if max_size == usize::MAX { 0 } else { max_size }; - } - - /// The maximum length of object maps (0 for unlimited). - #[cfg(not(feature = "unchecked"))] - #[cfg(not(feature = "no_object"))] - pub fn max_map_size(&self) -> usize { - self.max_map_size - } - - /// Set the module resolution service used by the `Engine`. - /// - /// Not available under the `no_module` feature. - #[cfg(not(feature = "no_module"))] - pub fn set_module_resolver(&mut self, resolver: Option) { - self.module_resolver = resolver.map(|f| Box::new(f) as Box); - } - /// Universal method for calling functions either registered with the `Engine` or written in Rhai. /// Position in `EvalAltResult` is None and must be set afterwards. /// @@ -2520,8 +2374,8 @@ impl Engine { /// Map a type_name into a pretty-print name pub(crate) fn map_type_name<'a>(&'a self, name: &'a str) -> &'a str { self.type_names - .get(name) - .map(String::as_str) + .as_ref() + .and_then(|t| t.get(name).map(String::as_str)) .unwrap_or(map_std_type_name(name)) } } diff --git a/src/error.rs b/src/error.rs index 61f2c58f..6b3308ba 100644 --- a/src/error.rs +++ b/src/error.rs @@ -5,7 +5,6 @@ use crate::token::Position; use crate::stdlib::{ boxed::Box, - char, error::Error, fmt, string::{String, ToString}, @@ -15,8 +14,8 @@ use crate::stdlib::{ #[derive(Debug, Eq, PartialEq, Clone, Hash)] #[non_exhaustive] pub enum LexError { - /// An unexpected character is encountered when tokenizing the script text. - UnexpectedChar(char), + /// An unexpected symbol is encountered when tokenizing the script text. + UnexpectedInput(String), /// A string literal is not terminated before a new-line or EOF. UnterminatedString, /// An identifier is in an invalid format. @@ -38,7 +37,7 @@ impl Error for LexError {} impl fmt::Display for LexError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - Self::UnexpectedChar(c) => write!(f, "Unexpected '{}'", c), + Self::UnexpectedInput(s) => write!(f, "Unexpected '{}'", s), Self::MalformedEscapeSequence(s) => write!(f, "Invalid escape sequence: '{}'", s), Self::MalformedNumber(s) => write!(f, "Invalid number: '{}'", s), Self::MalformedChar(s) => write!(f, "Invalid character: '{}'", s), diff --git a/src/lib.rs b/src/lib.rs index 0b825113..c3563e38 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -89,6 +89,7 @@ mod result; mod scope; #[cfg(feature = "serde")] mod serde; +mod settings; mod stdlib; mod token; mod r#unsafe; diff --git a/src/parser.rs b/src/parser.rs index 37743e0f..797e28f4 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1272,7 +1272,7 @@ fn parse_map_literal( _ => { let (name, pos) = match input.next().unwrap() { (Token::Identifier(s), pos) => (s, pos), - (Token::StringConst(s), pos) => (s, pos), + (Token::StringConstant(s), pos) => (s, pos), (Token::LexError(err), pos) => return Err(err.into_err(pos)), (_, pos) if map.is_empty() => { return Err(PERR::MissingToken( @@ -1380,7 +1380,7 @@ fn parse_primary( #[cfg(not(feature = "no_float"))] Token::FloatConstant(x) => Expr::FloatConstant(Box::new((x, settings.pos))), Token::CharConstant(c) => Expr::CharConstant(Box::new((c, settings.pos))), - Token::StringConst(s) => Expr::StringConstant(Box::new((s.into(), settings.pos))), + Token::StringConstant(s) => Expr::StringConstant(Box::new((s.into(), settings.pos))), Token::Identifier(s) => { let index = state.find_var(&s); Expr::Variable(Box::new(((s, settings.pos), None, 0, index))) diff --git a/src/settings.rs b/src/settings.rs new file mode 100644 index 00000000..a4379d79 --- /dev/null +++ b/src/settings.rs @@ -0,0 +1,203 @@ +use crate::engine::Engine; +use crate::module::ModuleResolver; +use crate::optimize::OptimizationLevel; +use crate::packages::PackageLibrary; + +impl Engine { + /// Load a new package into the `Engine`. + /// + /// When searching for functions, packages loaded later are preferred. + /// In other words, loaded packages are searched in reverse order. + pub fn load_package(&mut self, package: PackageLibrary) { + // Push the package to the top - packages are searched in reverse order + self.packages.push(package); + } + + /// Load a new package into the `Engine`. + /// + /// When searching for functions, packages loaded later are preferred. + /// In other words, loaded packages are searched in reverse order. + pub fn load_packages(&mut self, package: PackageLibrary) { + // Push the package to the top - packages are searched in reverse order + self.packages.push(package); + } + + /// Control whether and how the `Engine` will optimize an AST after compilation. + /// + /// Not available under the `no_optimize` feature. + #[cfg(not(feature = "no_optimize"))] + pub fn set_optimization_level(&mut self, optimization_level: OptimizationLevel) { + self.optimization_level = optimization_level + } + + /// The current optimization level. + /// It controls whether and how the `Engine` will optimize an AST after compilation. + /// + /// Not available under the `no_optimize` feature. + #[cfg(not(feature = "no_optimize"))] + pub fn optimization_level(&self) -> OptimizationLevel { + self.optimization_level + } + + /// Set the maximum levels of function calls allowed for a script in order to avoid + /// infinite recursion and stack overflows. + #[cfg(not(feature = "unchecked"))] + pub fn set_max_call_levels(&mut self, levels: usize) { + self.max_call_stack_depth = levels + } + + /// The maximum levels of function calls allowed for a script. + #[cfg(not(feature = "unchecked"))] + pub fn max_call_levels(&self) -> usize { + self.max_call_stack_depth + } + + /// Set the maximum number of operations allowed for a script to run to avoid + /// consuming too much resources (0 for unlimited). + #[cfg(not(feature = "unchecked"))] + pub fn set_max_operations(&mut self, operations: u64) { + self.max_operations = if operations == u64::MAX { + 0 + } else { + operations + }; + } + + /// The maximum number of operations allowed for a script to run (0 for unlimited). + #[cfg(not(feature = "unchecked"))] + pub fn max_operations(&self) -> u64 { + self.max_operations + } + + /// Set the maximum number of imported modules allowed for a script. + #[cfg(not(feature = "unchecked"))] + pub fn set_max_modules(&mut self, modules: usize) { + self.max_modules = modules; + } + + /// The maximum number of imported modules allowed for a script. + #[cfg(not(feature = "unchecked"))] + pub fn max_modules(&self) -> usize { + self.max_modules + } + + /// Set the depth limits for expressions (0 for unlimited). + #[cfg(not(feature = "unchecked"))] + pub fn set_max_expr_depths(&mut self, max_expr_depth: usize, max_function_expr_depth: usize) { + self.max_expr_depth = if max_expr_depth == usize::MAX { + 0 + } else { + max_expr_depth + }; + self.max_function_expr_depth = if max_function_expr_depth == usize::MAX { + 0 + } else { + max_function_expr_depth + }; + } + + /// The depth limit for expressions (0 for unlimited). + #[cfg(not(feature = "unchecked"))] + pub fn max_expr_depth(&self) -> usize { + self.max_expr_depth + } + + /// The depth limit for expressions in functions (0 for unlimited). + #[cfg(not(feature = "unchecked"))] + pub fn max_function_expr_depth(&self) -> usize { + self.max_function_expr_depth + } + + /// Set the maximum length of strings (0 for unlimited). + #[cfg(not(feature = "unchecked"))] + pub fn set_max_string_size(&mut self, max_size: usize) { + self.max_string_size = if max_size == usize::MAX { 0 } else { max_size }; + } + + /// The maximum length of strings (0 for unlimited). + #[cfg(not(feature = "unchecked"))] + pub fn max_string_size(&self) -> usize { + self.max_string_size + } + + /// Set the maximum length of arrays (0 for unlimited). + #[cfg(not(feature = "unchecked"))] + #[cfg(not(feature = "no_index"))] + pub fn set_max_array_size(&mut self, max_size: usize) { + self.max_array_size = if max_size == usize::MAX { 0 } else { max_size }; + } + + /// The maximum length of arrays (0 for unlimited). + #[cfg(not(feature = "unchecked"))] + #[cfg(not(feature = "no_index"))] + pub fn max_array_size(&self) -> usize { + self.max_array_size + } + + /// Set the maximum length of object maps (0 for unlimited). + #[cfg(not(feature = "unchecked"))] + #[cfg(not(feature = "no_object"))] + pub fn set_max_map_size(&mut self, max_size: usize) { + self.max_map_size = if max_size == usize::MAX { 0 } else { max_size }; + } + + /// The maximum length of object maps (0 for unlimited). + #[cfg(not(feature = "unchecked"))] + #[cfg(not(feature = "no_object"))] + pub fn max_map_size(&self) -> usize { + self.max_map_size + } + + /// Set the module resolution service used by the `Engine`. + /// + /// Not available under the `no_module` feature. + #[cfg(not(feature = "no_module"))] + pub fn set_module_resolver(&mut self, resolver: Option) { + self.module_resolver = resolver.map(|f| Box::new(f) as Box); + } + + /// Disable a particular keyword or operator in the language. + /// + /// # Examples + /// + /// The following will raise an error during parsing because the `if` keyword is disabled + /// and is recognized as a variable name! + /// + /// ```rust,should_panic + /// # fn main() -> Result<(), rhai::ParseError> { + /// use rhai::Engine; + /// + /// let mut engine = Engine::new(); + /// + /// engine.disable_symbol("if"); // disable the 'if' keyword + /// + /// engine.compile("let x = if true { 42 } else { 0 };")?; + /// // ^ 'if' is parsed as a variable name + /// // ^ missing ';' after statement end + /// # Ok(()) + /// # } + /// ``` + /// + /// The following will raise an error during parsing because the `+=` operator is disabled. + /// + /// ```rust,should_panic + /// # fn main() -> Result<(), rhai::ParseError> { + /// use rhai::Engine; + /// + /// let mut engine = Engine::new(); + /// + /// engine.disable_symbol("+="); // disable the '+=' operator + /// + /// engine.compile("let x = 42; x += 1;")?; + /// // ^ unknown operator + /// # Ok(()) + /// # } + /// ``` + pub fn disable_symbol(&mut self, symbol: &str) { + if self.disable_tokens.is_none() { + self.disable_tokens = Some(Default::default()); + } + + self.disable_tokens.as_mut().unwrap().insert(symbol.into()); + } +} diff --git a/src/token.rs b/src/token.rs index 4b19ab49..fc527917 100644 --- a/src/token.rs +++ b/src/token.rs @@ -10,7 +10,9 @@ use crate::parser::FLOAT; use crate::stdlib::{ borrow::Cow, boxed::Box, - char, fmt, + char, + collections::HashSet, + fmt, iter::Peekable, str::{Chars, FromStr}, string::{String, ToString}, @@ -19,7 +21,7 @@ use crate::stdlib::{ type LERR = LexError; -pub type TokenStream<'a> = Peekable>; +pub type TokenStream<'a, 't> = Peekable>; /// A location (line number + character position) in the input script. /// @@ -137,7 +139,7 @@ pub enum Token { FloatConstant(FLOAT), Identifier(String), CharConstant(char), - StringConst(String), + StringConstant(String), LeftBrace, RightBrace, LeftParen, @@ -226,8 +228,8 @@ impl Token { CharConstant(c) => c.to_string().into(), LexError(err) => err.to_string().into(), - token => (match token { - StringConst(_) => "string", + token => match token { + StringConstant(_) => "string", LeftBrace => "{", RightBrace => "}", LeftParen => "(", @@ -292,13 +294,15 @@ impl Token { PowerOfAssign => "~=", #[cfg(not(feature = "no_function"))] Private => "private", + #[cfg(not(feature = "no_module"))] Import => "import", #[cfg(not(feature = "no_module"))] Export => "export", + #[cfg(not(feature = "no_module"))] As => "as", EOF => "{EOF}", _ => unreachable!("operator should be match in outer scope"), - }) + } .into(), } } @@ -422,6 +426,41 @@ impl Token { _ => false, } } + + /// Is this token an operator? + pub fn is_operator(&self) -> bool { + use Token::*; + + match self { + LeftBrace | RightBrace | LeftParen | RightParen | LeftBracket | RightBracket | Plus + | UnaryPlus | Minus | UnaryMinus | Multiply | Divide | Modulo | PowerOf | LeftShift + | RightShift | SemiColon | Colon | DoubleColon | Comma | Period | MapStart | Equals + | LessThan | GreaterThan | LessThanEqualsTo | GreaterThanEqualsTo | EqualsTo + | NotEqualsTo | Bang | Pipe | Or | XOr | Ampersand | And | PlusAssign | MinusAssign + | MultiplyAssign | DivideAssign | LeftShiftAssign | RightShiftAssign | AndAssign + | OrAssign | XOrAssign | ModuloAssign | PowerOfAssign => true, + + _ => false, + } + } + + /// Is this token a keyword? + pub fn is_keyword(&self) -> bool { + use Token::*; + + match self { + #[cfg(not(feature = "no_function"))] + Fn | Private => true, + + #[cfg(not(feature = "no_module"))] + Import | Export | As => true, + + True | False | Let | Const | If | Else | While | Loop | For | In | Continue | Break + | Return | Throw => true, + + _ => false, + } + } } impl From for String { @@ -431,7 +470,7 @@ impl From for String { } /// State of the tokenizer. -#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Default)] +#[derive(Debug, Clone, Eq, PartialEq, Default)] pub struct TokenizeState { /// Maximum length of a string (0 = unlimited). pub max_string_size: usize, @@ -644,7 +683,7 @@ pub fn get_next_token( let result = get_next_token_inner(stream, state, pos); // Save the last token's state - if let Some((token, _)) = &result { + if let Some((ref token, _)) = result { state.non_unary = !token.is_next_unary(); } @@ -848,7 +887,7 @@ fn get_next_token_inner( ('"', _) => return parse_string_literal(stream, state, pos, '"') .map_or_else( |err| Some((Token::LexError(Box::new(err.0)), err.1)), - |out| Some((Token::StringConst(out), start_pos)), + |out| Some((Token::StringConstant(out), start_pos)), ), // ' - character literal @@ -1118,7 +1157,7 @@ fn get_next_token_inner( ('\0', _) => unreachable!(), (ch, _) if ch.is_whitespace() => (), - (ch, _) => return Some((Token::LexError(Box::new(LERR::UnexpectedChar(ch))), start_pos)), + (ch, _) => return Some((Token::LexError(Box::new(LERR::UnexpectedInput(ch.to_string()))), start_pos)), } } @@ -1172,7 +1211,9 @@ impl InputStream for MultiInputsStream<'_> { } /// An iterator on a `Token` stream. -pub struct TokenIterator<'a> { +pub struct TokenIterator<'a, 't> { + /// Disable certain tokens. + pub disable_tokens: Option<&'t HashSet>, /// Current state. state: TokenizeState, /// Current position. @@ -1181,17 +1222,49 @@ pub struct TokenIterator<'a> { stream: MultiInputsStream<'a>, } -impl<'a> Iterator for TokenIterator<'a> { +impl<'a> Iterator for TokenIterator<'a, '_> { type Item = (Token, Position); fn next(&mut self) -> Option { - get_next_token(&mut self.stream, &mut self.state, &mut self.pos) + match get_next_token(&mut self.stream, &mut self.state, &mut self.pos) { + None => None, + r @ Some(_) if self.disable_tokens.is_none() => r, + Some((token, pos)) + if token.is_operator() + && self + .disable_tokens + .unwrap() + .contains(token.syntax().as_ref()) => + { + // Convert disallowed operators into lex errors + Some(( + Token::LexError(Box::new(LexError::UnexpectedInput(token.syntax().into()))), + pos, + )) + } + Some((token, pos)) + if token.is_keyword() + && self + .disable_tokens + .unwrap() + .contains(token.syntax().as_ref()) => + { + // Convert disallowed keywords into identifiers + Some((Token::Identifier(token.syntax().into()), pos)) + } + r => r, + } } } /// Tokenize an input text stream. -pub fn lex<'a>(input: &'a [&'a str], max_string_size: usize) -> TokenIterator<'a> { +pub fn lex<'a, 't>( + input: &'a [&'a str], + max_string_size: usize, + disable_tokens: Option<&'t HashSet>, +) -> TokenIterator<'a, 't> { TokenIterator { + disable_tokens, state: TokenizeState { max_string_size, non_unary: false, diff --git a/tests/tokens.rs b/tests/tokens.rs new file mode 100644 index 00000000..5fa0b9b6 --- /dev/null +++ b/tests/tokens.rs @@ -0,0 +1,20 @@ +use rhai::{Engine, ParseErrorType}; + +#[test] +fn test_tokens_disabled() { + let mut engine = Engine::new(); + + engine.disable_symbol("if"); // disable the 'if' keyword + + assert!(matches!( + *engine.compile("let x = if true { 42 } else { 0 };").expect_err("should error").0, + ParseErrorType::MissingToken(ref token, _) if token == ";" + )); + + engine.disable_symbol("+="); // disable the '+=' operator + + assert!(matches!( + *engine.compile("let x = 40 + 2; x += 1;").expect_err("should error").0, + ParseErrorType::BadInput(ref s) if s == "Unexpected '+='" + )); +}