From bb691a03138b0a9f920e765650836c9ec2d70d7c Mon Sep 17 00:00:00 2001 From: Stephen Chung Date: Sun, 14 Jun 2020 14:25:47 +0800 Subject: [PATCH 1/5] Add maximum data size limits. --- README.md | 122 ++++++++++++++++++--- RELEASES.md | 1 + src/api.rs | 10 +- src/engine.rs | 162 ++++++++++++++++++++++++---- src/error.rs | 17 ++- src/parser.rs | 254 ++++++++++++++++++++++++++------------------ src/result.rs | 8 ++ src/token.rs | 58 ++++++---- tests/data_size.rs | 234 ++++++++++++++++++++++++++++++++++++++++ tests/modules.rs | 2 +- tests/operations.rs | 17 +++ 11 files changed, 725 insertions(+), 160 deletions(-) create mode 100644 tests/data_size.rs diff --git a/README.md b/README.md index 57c56afa..c0931fa6 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ to add scripting to any application. Features -------- -* Easy-to-use language similar to JS+Rust with dynamic typing but _no_ garbage collector. +* Easy-to-use language similar to JS+Rust with dynamic typing. * Tight integration with native Rust [functions](#working-with-functions) and [types](#custom-types-and-methods), including [getters/setters](#getters-and-setters), [methods](#members-and-methods) and [indexers](#indexers). * Freely pass Rust variables/constants into a script via an external [`Scope`]. @@ -25,7 +25,7 @@ Features one single source file, all with names starting with `"unsafe_"`). * Re-entrant scripting [`Engine`] can be made `Send + Sync` (via the [`sync`] feature). * Sand-boxed - the scripting [`Engine`], if declared immutable, cannot mutate the containing environment unless explicitly permitted (e.g. via a `RefCell`). -* Rugged (protection against [stack-overflow](#maximum-call-stack-depth) and [runaway scripts](#maximum-number-of-operations) etc.). +* Rugged - protection against malicious attacks (such as [stack-overflow](#maximum-call-stack-depth), [over-sized data](#maximum-length-of-strings), and [runaway scripts](#maximum-number-of-operations) etc.) that may come from untrusted third-party user-land scripts. * Track script evaluation [progress](#tracking-progress-and-force-terminate-script-run) and manually terminate a script run. * [`no-std`](#optional-features) support. * [Function overloading](#function-overloading). @@ -1191,13 +1191,16 @@ fn main() -> Result<(), Box> Engine configuration options --------------------------- -| Method | Description | -| ------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------- | -| `set_optimization_level` | Set the amount of script _optimizations_ performed. See [script optimization]. | -| `set_max_expr_depths` | Set the maximum nesting levels of an expression/statement. See [maximum statement depth](#maximum-statement-depth). | -| `set_max_call_levels` | Set the maximum number of function call levels (default 50) to avoid infinite recursion. See [maximum call stack depth](#maximum-call-stack-depth). | -| `set_max_operations` | Set the maximum number of _operations_ that a script is allowed to consume. See [maximum number of operations](#maximum-number-of-operations). | -| `set_max_modules` | Set the maximum number of [modules] that a script is allowed to load. See [maximum number of modules](#maximum-number-of-modules). | +| Method | Not available under | Description | +| ------------------------ | ---------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------- | +| `set_optimization_level` | [`no_optimize`] | Set the amount of script _optimizations_ performed. See [script optimization]. | +| `set_max_expr_depths` | [`unchecked`] | Set the maximum nesting levels of an expression/statement. See [maximum statement depth](#maximum-statement-depth). | +| `set_max_call_levels` | [`unchecked`] | Set the maximum number of function call levels (default 50) to avoid infinite recursion. See [maximum call stack depth](#maximum-call-stack-depth). | +| `set_max_operations` | [`unchecked`] | Set the maximum number of _operations_ that a script is allowed to consume. See [maximum number of operations](#maximum-number-of-operations). | +| `set_max_modules` | [`unchecked`] | Set the maximum number of [modules] that a script is allowed to load. See [maximum number of modules](#maximum-number-of-modules). | +| `set_max_string_size` | [`unchecked`] | Set the maximum length (in UTF-8 bytes) for [strings]. See [maximum length of strings](#maximum-length-of-strings). | +| `set_max_array_size` | [`unchecked`], [`no_index`] | Set the maximum size for [arrays]. See [maximum size of arrays](#maximum-size-of-arrays). | +| `set_max_map_size` | [`unchecked`], [`no_object`] | Set the maximum number of properties for [object maps]. See [maximum size of object maps](#maximum-size-of-object-maps). | ------- @@ -1498,6 +1501,9 @@ record == "Bob X. Davis: age 42 ❤\n"; 'C' in record == false; ``` +The maximum allowed length of a string can be controlled via `Engine::set_max_string_size` +(see [maximum length of strings](#maximum-length-of-strings)). + ### Built-in functions The following standard methods (mostly defined in the [`MoreStringPackage`](#packages) but excluded if using a [raw `Engine`]) operate on strings: @@ -1673,6 +1679,9 @@ y.len == 0; engine.register_fn("push", |list: &mut Array, item: MyType| list.push(Box::new(item)) ); ``` +The maximum allowed size of an array can be controlled via `Engine::set_max_array_size` +(see [maximum size of arrays](#maximum-size-of-arrays)). + Object maps ----------- @@ -1776,6 +1785,9 @@ y.clear(); // empty the object map y.len() == 0; ``` +The maximum allowed size of an object map can be controlled via `Engine::set_max_map_size` +(see [maximum size of object maps](#maximum-size-of-object-maps)). + ### Parsing from JSON The syntax for an object map is extremely similar to JSON, with the exception of `null` values which can @@ -2439,7 +2451,7 @@ a script so that it does not consume more resources that it is allowed to. The most important resources to watch out for are: -* **Memory**: A malicous script may continuously grow an [array] or [object map] until all memory is consumed. +* **Memory**: A malicous script may continuously grow a [string], an [array] or [object map] until all memory is consumed. It may also create a large [array] or [object map] literal that exhausts all memory during parsing. * **CPU**: A malicous script may run an infinite tight loop that consumes all CPU cycles. * **Time**: A malicous script may run indefinitely, thereby blocking the calling system which is waiting for a result. @@ -2455,6 +2467,89 @@ The most important resources to watch out for are: * **Data**: A malicous script may attempt to read from and/or write to data that it does not own. If this happens, it is a severe security breach and may put the entire system at risk. +### Maximum length of strings + +Rhai by default does not limit how long a [string] can be. +This can be changed via the `Engine::set_max_string_size` method, with zero being unlimited (the default). + +```rust +let mut engine = Engine::new(); + +engine.set_max_string_size(500); // allow strings only up to 500 bytes long (in UTF-8 format) + +engine.set_max_string_size(0); // allow unlimited string length +``` + +A script attempting to create a string literal longer than the maximum will terminate with a parse error. +Any script operation that produces a string longer than the maximum also terminates the script with an error result. +This check can be disabled via the [`unchecked`] feature for higher performance +(but higher risks as well). + +Be conservative when setting a maximum limit and always consider the fact that a registered function may grow +a string's length without Rhai noticing until the very end. For instance, the built-in '`+`' operator for strings +concatenates two strings together to form one longer string; if both strings are _slightly_ below the maximum +length limit, the resultant string may be almost _twice_ the maximum length. The '`pad`' function grows a string +to a specified length which may be longer than the allowed maximum +(to trap this risk, register a custom '`pad`' function that checks the arguments). + +### Maximum size of arrays + +Rhai by default does not limit how large an [array] can be. +This can be changed via the `Engine::set_max_array_size` method, with zero being unlimited (the default). + +```rust +let mut engine = Engine::new(); + +engine.set_max_array_size(500); // allow arrays only up to 500 items + +engine.set_max_array_size(0); // allow unlimited arrays +``` + +A script attempting to create an array literal larger than the maximum will terminate with a parse error. +Any script operation that produces an array larger than the maximum also terminates the script with an error result. +This check can be disabled via the [`unchecked`] feature for higher performance +(but higher risks as well). + +Be conservative when setting a maximum limit and always consider the fact that a registered function may grow +an array's size without Rhai noticing until the very end. +For instance, the built-in '`+`' operator for arrays concatenates two arrays together to form one larger array; +if both arrays are _slightly_ below the maximum size limit, the resultant array may be almost _twice_ the maximum size. +The '`pad`' function grows an array to a specified size which may be larger than the allowed maximum +(to trap this risk, register a custom '`pad`' function that checks the arguments). + +As a malicious script may create a deeply-nested array which consumes huge amounts of memory while each individual +array still stays under the maximum size limit, Rhai also recursively adds up the sizes of all strings, arrays +and object maps contained within each array to make sure that the _aggregate_ sizes of none of these data structures +exceed their respective maximum size limits (if any). + +### Maximum size of object maps + +Rhai by default does not limit how large (i.e. the number of properties) an [object map] can be. +This can be changed via the `Engine::set_max_map_size` method, with zero being unlimited (the default). + +```rust +let mut engine = Engine::new(); + +engine.set_max_map_size(500); // allow object maps with only up to 500 properties + +engine.set_max_map_size(0); // allow unlimited object maps +``` + +A script attempting to create an object map literal with more properties than the maximum will terminate with a parse error. +Any script operation that produces an object map with more properties than the maximum also terminates the script with an error result. +This check can be disabled via the [`unchecked`] feature for higher performance +(but higher risks as well). + +Be conservative when setting a maximum limit and always consider the fact that a registered function may grow +an object map's size without Rhai noticing until the very end. For instance, the built-in '`+`' operator for object maps +concatenates two object maps together to form one larger object map; if both object maps are _slightly_ below the maximum +size limit, the resultant object map may be almost _twice_ the maximum size. + +As a malicious script may create a deeply-nested object map which consumes huge amounts of memory while each individual +object map still stays under the maximum size limit, Rhai also recursively adds up the sizes of all strings, arrays +and object maps contained within each object map to make sure that the _aggregate_ sizes of none of these data structures +exceed their respective maximum size limits (if any). + ### Maximum number of operations Rhai by default does not limit how much time or CPU a script consumes. @@ -2516,14 +2611,17 @@ total number of operations for a typical run. ### Maximum number of modules Rhai by default does not limit how many [modules] can be loaded via [`import`] statements. -This can be changed via the `Engine::set_max_modules` method, with zero being unlimited (the default). +This can be changed via the `Engine::set_max_modules` method. Notice that setting the maximum number +of modules to zero does _not_ indicate unlimited modules, but disallows loading any module altogether. ```rust let mut engine = Engine::new(); engine.set_max_modules(5); // allow loading only up to 5 modules -engine.set_max_modules(0); // allow unlimited modules +engine.set_max_modules(0); // disallow loading any module (maximum = zero) + +engine.set_max_modules(1000); // set to a large number for effectively unlimited modules ``` A script attempting to load more than the maximum number of modules will terminate with an error result. diff --git a/RELEASES.md b/RELEASES.md index 6667774c..b19caf44 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -21,6 +21,7 @@ New features * Indexers are now split into getters and setters (which now support updates). The API is split into `Engine::register_indexer_get` and `Engine::register_indexer_set` with `Engine::register_indexer_get_set` being a shorthand. Similarly, `Module::set_indexer_get_fn` and `Module::set_indexer_set_fn` are added. * `Engine:register_fn` and `Engine:register_result_fn` accepts functions that take parameters of type `&str` (immutable string slice), which maps directly to `ImmutableString`. This is to avoid needing wrappers for functions taking string parameters. +* Set maximum limit on data sizes: `Engine::set_max_string_size`, `Engine::set_max_array_size` and `Engine::set_max_map_size`. Version 0.15.0 diff --git a/src/api.rs b/src/api.rs index 8e2af013..56adc08c 100644 --- a/src/api.rs +++ b/src/api.rs @@ -547,7 +547,7 @@ impl Engine { scripts: &[&str], optimization_level: OptimizationLevel, ) -> Result { - let stream = lex(scripts); + let stream = lex(scripts, self.max_string_size); self.parse(&mut stream.peekable(), scope, optimization_level) } @@ -669,7 +669,7 @@ impl Engine { // Trims the JSON string and add a '#' in front let scripts = ["#", json.trim()]; - let stream = lex(&scripts); + let stream = lex(&scripts, self.max_string_size); let ast = self.parse_global_expr(&mut stream.peekable(), &scope, OptimizationLevel::None)?; @@ -750,7 +750,7 @@ impl Engine { script: &str, ) -> Result { let scripts = [script]; - let stream = lex(&scripts); + let stream = lex(&scripts, self.max_string_size); { let mut peekable = stream.peekable(); @@ -904,7 +904,7 @@ impl Engine { script: &str, ) -> Result> { let scripts = [script]; - let stream = lex(&scripts); + let stream = lex(&scripts, self.max_string_size); let ast = self.parse_global_expr( &mut stream.peekable(), @@ -1034,7 +1034,7 @@ impl Engine { script: &str, ) -> Result<(), Box> { let scripts = [script]; - let stream = lex(&scripts); + let stream = lex(&scripts, self.max_string_size); let ast = self.parse(&mut stream.peekable(), scope, self.optimization_level)?; self.consume_ast_with_scope(scope, &ast) diff --git a/src/engine.rs b/src/engine.rs index b3d0536d..83878c3c 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -182,7 +182,7 @@ pub struct State { /// Number of operations performed. pub operations: u64, /// Number of modules loaded. - pub modules: u64, + pub modules: usize, } impl State { @@ -261,7 +261,13 @@ pub struct Engine { /// Maximum number of operations allowed to run. pub(crate) max_operations: u64, /// Maximum number of modules allowed to load. - pub(crate) max_modules: u64, + pub(crate) max_modules: usize, + /// Maximum length of a string. + pub(crate) max_string_size: usize, + /// Maximum length of an array. + pub(crate) max_array_size: usize, + /// Maximum number of properties in a map. + pub(crate) max_map_size: usize, } impl Default for Engine { @@ -296,8 +302,11 @@ impl Default for Engine { max_call_stack_depth: MAX_CALL_STACK_DEPTH, max_expr_depth: MAX_EXPR_DEPTH, max_function_expr_depth: MAX_FUNCTION_EXPR_DEPTH, - max_operations: u64::MAX, - max_modules: u64::MAX, + max_operations: 0, + max_modules: usize::MAX, + max_string_size: 0, + max_array_size: 0, + max_map_size: 0, }; engine.load_package(StandardPackage::new().get()); @@ -440,8 +449,11 @@ impl Engine { max_call_stack_depth: MAX_CALL_STACK_DEPTH, max_expr_depth: MAX_EXPR_DEPTH, max_function_expr_depth: MAX_FUNCTION_EXPR_DEPTH, - max_operations: u64::MAX, - max_modules: u64::MAX, + max_operations: 0, + max_modules: usize::MAX, + max_string_size: 0, + max_array_size: 0, + max_map_size: 0, } } @@ -482,26 +494,42 @@ impl Engine { /// consuming too much resources (0 for unlimited). #[cfg(not(feature = "unchecked"))] pub fn set_max_operations(&mut self, operations: u64) { - self.max_operations = if operations == 0 { - u64::MAX - } else { - operations - }; + self.max_operations = operations; } - /// Set the maximum number of imported modules allowed for a script (0 for unlimited). + /// Set the maximum number of imported modules allowed for a script. #[cfg(not(feature = "unchecked"))] - pub fn set_max_modules(&mut self, modules: u64) { - self.max_modules = if modules == 0 { u64::MAX } else { modules }; + pub fn set_max_modules(&mut self, modules: usize) { + self.max_modules = modules; } - /// Set the depth limits for expressions/statements. + /// Set the depth limits for expressions/statements (0 for unlimited). #[cfg(not(feature = "unchecked"))] pub fn set_max_expr_depths(&mut self, max_expr_depth: usize, max_function_expr_depth: usize) { self.max_expr_depth = max_expr_depth; self.max_function_expr_depth = max_function_expr_depth; } + /// Set the maximum length of strings (0 for unlimited). + #[cfg(not(feature = "unchecked"))] + pub fn set_max_string_size(&mut self, max_size: usize) { + self.max_string_size = max_size; + } + + /// Set the maximum length of arrays (0 for unlimited). + #[cfg(not(feature = "unchecked"))] + #[cfg(not(feature = "no_index"))] + pub fn set_max_array_size(&mut self, max_size: usize) { + self.max_array_size = max_size; + } + + /// Set the maximum length of object maps (0 for unlimited). + #[cfg(not(feature = "unchecked"))] + #[cfg(not(feature = "no_object"))] + pub fn set_max_map_size(&mut self, max_size: usize) { + self.max_map_size = max_size; + } + /// Set the module resolution service used by the `Engine`. /// /// Not available under the `no_module` feature. @@ -1395,7 +1423,7 @@ impl Engine { self.inc_operations(state) .map_err(|err| EvalAltResult::new_position(err, expr.position()))?; - match expr { + let result = match expr { Expr::Expr(x) => self.eval_expr(scope, state, lib, x.as_ref(), level), Expr::IntegerConstant(x) => Ok(x.0.into()), @@ -1731,7 +1759,13 @@ impl Engine { Expr::Unit(_) => Ok(().into()), _ => unreachable!(), + }; + + if let Ok(val) = &result { + self.check_data_size(val)?; } + + result } /// Evaluate a statement @@ -1746,7 +1780,7 @@ impl Engine { self.inc_operations(state) .map_err(|err| EvalAltResult::new_position(err, stmt.position()))?; - match stmt { + let result = match stmt { // No-op Stmt::Noop(_) => Ok(Default::default()), @@ -1998,6 +2032,98 @@ impl Engine { } Ok(Default::default()) } + }; + + if let Ok(val) = &result { + self.check_data_size(val)?; + } + + result + } + + /// Check a `Dynamic` value to ensure that its size is within allowable limit. + fn check_data_size(&self, value: &Dynamic) -> Result<(), Box> { + #[cfg(feature = "unchecked")] + return Ok(()); + + if self.max_string_size + self.max_array_size + self.max_map_size == 0 { + return Ok(()); + } + + // Recursively calculate the size of a value (especially `Array` and `Map`) + fn calc_size(value: &Dynamic) -> (usize, usize, usize) { + match value { + #[cfg(not(feature = "no_index"))] + Dynamic(Union::Array(arr)) => { + let mut arrays = 0; + let mut maps = 0; + + arr.iter().for_each(|value| match value { + Dynamic(Union::Array(_)) | Dynamic(Union::Map(_)) => { + let (a, m, _) = calc_size(value); + arrays += a; + maps += m; + } + _ => arrays += 1, + }); + + (arrays, maps, 0) + } + #[cfg(not(feature = "no_object"))] + Dynamic(Union::Map(map)) => { + let mut arrays = 0; + let mut maps = 0; + + map.values().for_each(|value| match value { + Dynamic(Union::Array(_)) | Dynamic(Union::Map(_)) => { + let (a, m, _) = calc_size(value); + arrays += a; + maps += m; + } + _ => maps += 1, + }); + + (arrays, maps, 0) + } + Dynamic(Union::Str(s)) => (0, 0, s.len()), + _ => (0, 0, 0), + } + } + + match value { + Dynamic(Union::Str(_)) if self.max_string_size > 0 => (), + #[cfg(not(feature = "no_index"))] + Dynamic(Union::Array(_)) if self.max_array_size > 0 => (), + #[cfg(not(feature = "no_object"))] + Dynamic(Union::Map(_)) if self.max_map_size > 0 => (), + _ => return Ok(()), + }; + + let (arr, map, s) = calc_size(value); + + if s > self.max_string_size { + Err(Box::new(EvalAltResult::ErrorDataTooLarge( + "Length of string".to_string(), + self.max_string_size, + s, + Position::none(), + ))) + } else if arr > self.max_array_size { + Err(Box::new(EvalAltResult::ErrorDataTooLarge( + "Length of array".to_string(), + self.max_array_size, + arr, + Position::none(), + ))) + } else if map > self.max_map_size { + Err(Box::new(EvalAltResult::ErrorDataTooLarge( + "Number of properties in object map".to_string(), + self.max_map_size, + map, + Position::none(), + ))) + } else { + Ok(()) } } @@ -2009,7 +2135,7 @@ impl Engine { #[cfg(not(feature = "unchecked"))] { // Guard against too many operations - if state.operations > self.max_operations { + if self.max_operations > 0 && state.operations > self.max_operations { return Err(Box::new(EvalAltResult::ErrorTooManyOperations( Position::none(), ))); diff --git a/src/error.rs b/src/error.rs index dd1be4ef..3d4c47c7 100644 --- a/src/error.rs +++ b/src/error.rs @@ -12,6 +12,8 @@ pub enum LexError { UnexpectedChar(char), /// A string literal is not terminated before a new-line or EOF. UnterminatedString, + /// An identifier is in an invalid format. + StringTooLong(usize), /// An string/character/numeric escape sequence is in an invalid format. MalformedEscapeSequence(String), /// An numeric literal is in an invalid format. @@ -35,6 +37,11 @@ impl fmt::Display for LexError { Self::MalformedChar(s) => write!(f, "Invalid character: '{}'", s), Self::MalformedIdentifier(s) => write!(f, "Variable name is not proper: '{}'", s), Self::UnterminatedString => write!(f, "Open string is not terminated"), + Self::StringTooLong(max) => write!( + f, + "Length of string literal exceeds the maximum limit ({})", + max + ), Self::ImproperKeyword(s) => write!(f, "{}", s), } } @@ -109,12 +116,16 @@ pub enum ParseErrorType { WrongExport, /// Assignment to a copy of a value. AssignmentToCopy, - /// Assignment to an a constant variable. + /// Assignment to an a constant variable. Wrapped value is the constant variable name. AssignmentToConstant(String), /// Expression exceeding the maximum levels of complexity. /// /// Never appears under the `unchecked` feature. ExprTooDeep, + /// Literal exceeding the maximum size. Wrapped values are the data type name and the maximum size. + /// + /// Never appears under the `unchecked` feature. + LiteralTooLarge(String, usize), /// Break statement not inside a loop. LoopBreak, } @@ -149,6 +160,7 @@ impl ParseErrorType { Self::AssignmentToCopy => "Only a copy of the value is change with this assignment", Self::AssignmentToConstant(_) => "Cannot assign to a constant value", Self::ExprTooDeep => "Expression exceeds maximum complexity", + Self::LiteralTooLarge(_, _) => "Literal exceeds maximum limit", Self::LoopBreak => "Break statement should only be used inside a loop" } } @@ -197,6 +209,9 @@ impl fmt::Display for ParseErrorType { Self::AssignmentToConstant(s) if s.is_empty() => write!(f, "{}", self.desc()), Self::AssignmentToConstant(s) => write!(f, "Cannot assign to constant '{}'", s), + Self::LiteralTooLarge(typ, max) => { + write!(f, "{} exceeds the maximum limit ({})", typ, max) + } _ => write!(f, "{}", self.desc()), } } diff --git a/src/parser.rs b/src/parser.rs index 2eed69b1..ec99adaf 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -201,13 +201,27 @@ struct ParseState { stack: Vec<(String, ScopeEntryType)>, /// Maximum levels of expression nesting. max_expr_depth: usize, + /// Maximum length of a string. + pub max_string_size: usize, + /// Maximum length of an array. + pub max_array_size: usize, + /// Maximum number of properties in a map. + pub max_map_size: usize, } impl ParseState { /// Create a new `ParseState`. - pub fn new(max_expr_depth: usize) -> Self { + pub fn new( + max_expr_depth: usize, + max_string_size: usize, + max_array_size: usize, + max_map_size: usize, + ) -> Self { Self { max_expr_depth, + max_string_size, + max_array_size, + max_map_size, ..Default::default() } } @@ -284,7 +298,9 @@ impl ParseSettings { } /// Make sure that the current level of expression nesting is within the maximum limit. pub fn ensure_level_within_max_limit(&self, limit: usize) -> Result<(), ParseError> { - if self.level > limit { + if limit == 0 { + Ok(()) + } else if self.level > limit { Err(PERR::ExprTooDeep.into_err(self.pos)) } else { Ok(()) @@ -610,7 +626,7 @@ impl Expr { Self::Variable(_) => true, - expr => expr.is_constant(), + _ => self.is_constant(), } } @@ -1070,6 +1086,14 @@ fn parse_array_literal( if !match_token(input, Token::RightBracket)? { while !input.peek().unwrap().0.is_eof() { + if state.max_array_size > 0 && arr.len() >= state.max_array_size { + return Err(PERR::LiteralTooLarge( + "Size of array literal".to_string(), + state.max_array_size, + ) + .into_err(input.peek().unwrap().1)); + } + let expr = parse_expr(input, state, settings.level_up())?; arr.push(expr); @@ -1155,8 +1179,15 @@ fn parse_map_literal( } }; - let expr = parse_expr(input, state, settings.level_up())?; + if state.max_map_size > 0 && map.len() >= state.max_map_size { + return Err(PERR::LiteralTooLarge( + "Number of properties in object map literal".to_string(), + state.max_map_size, + ) + .into_err(input.peek().unwrap().1)); + } + let expr = parse_expr(input, state, settings.level_up())?; map.push(((name, pos), expr)); match input.peek().unwrap() { @@ -1239,7 +1270,7 @@ fn parse_primary( Token::True => Expr::True(settings.pos), Token::False => Expr::False(settings.pos), Token::LexError(err) => return Err(PERR::BadInput(err.to_string()).into_err(settings.pos)), - token => { + _ => { return Err( PERR::BadInput(format!("Unexpected '{}'", token.syntax())).into_err(settings.pos) ) @@ -1817,7 +1848,7 @@ fn parse_binary_op( make_dot_expr(current_lhs, rhs, pos)? } - token => return Err(PERR::UnknownOperator(token.into()).into_err(pos)), + op_token => return Err(PERR::UnknownOperator(op_token.into()).into_err(pos)), }; } } @@ -2408,102 +2439,6 @@ fn parse_fn( }) } -/// Parse the global level statements. -fn parse_global_level( - input: &mut TokenStream, - max_expr_depth: usize, - max_function_expr_depth: usize, -) -> Result<(Vec, Vec), ParseError> { - let mut statements = Vec::::new(); - let mut functions = HashMap::::with_hasher(StraightHasherBuilder); - let mut state = ParseState::new(max_expr_depth); - - while !input.peek().unwrap().0.is_eof() { - // Collect all the function definitions - #[cfg(not(feature = "no_function"))] - { - let (access, must_be_fn) = if match_token(input, Token::Private)? { - (FnAccess::Private, true) - } else { - (FnAccess::Public, false) - }; - - match input.peek().unwrap() { - #[cfg(not(feature = "no_function"))] - (Token::Fn, pos) => { - let mut state = ParseState::new(max_function_expr_depth); - let settings = ParseSettings { - allow_if_expr: true, - allow_stmt_expr: true, - is_global: false, - is_breakable: false, - level: 0, - pos: *pos, - }; - let func = parse_fn(input, &mut state, access, settings)?; - - // Qualifiers (none) + function name + number of arguments. - let hash = calc_fn_hash(empty(), &func.name, func.params.len(), empty()); - - functions.insert(hash, func); - continue; - } - (_, pos) if must_be_fn => { - return Err(PERR::MissingToken( - Token::Fn.into(), - format!("following '{}'", Token::Private.syntax()), - ) - .into_err(*pos)) - } - _ => (), - } - } - - // Actual statement - let settings = ParseSettings { - allow_if_expr: true, - allow_stmt_expr: true, - is_global: true, - is_breakable: false, - level: 0, - pos: Position::none(), - }; - let stmt = parse_stmt(input, &mut state, settings)?; - - let need_semicolon = !stmt.is_self_terminated(); - - statements.push(stmt); - - match input.peek().unwrap() { - // EOF - (Token::EOF, _) => break, - // stmt ; - (Token::SemiColon, _) if need_semicolon => { - eat_token(input, Token::SemiColon); - } - // stmt ; - (Token::SemiColon, _) if !need_semicolon => (), - // { stmt } ??? - (_, _) if !need_semicolon => (), - // stmt - (Token::LexError(err), pos) => { - return Err(PERR::BadInput(err.to_string()).into_err(*pos)) - } - // stmt ??? - (_, pos) => { - // Semicolons are not optional between statements - return Err(PERR::MissingToken( - Token::SemiColon.into(), - "to terminate this statement".into(), - ) - .into_err(*pos)); - } - } - } - - Ok((statements, functions.into_iter().map(|(_, v)| v).collect())) -} - impl Engine { pub(crate) fn parse_global_expr( &self, @@ -2511,7 +2446,12 @@ impl Engine { scope: &Scope, optimization_level: OptimizationLevel, ) -> Result { - let mut state = ParseState::new(self.max_expr_depth); + let mut state = ParseState::new( + self.max_expr_depth, + self.max_string_size, + self.max_array_size, + self.max_map_size, + ); let settings = ParseSettings { allow_if_expr: false, allow_stmt_expr: false, @@ -2540,6 +2480,111 @@ impl Engine { ) } + /// Parse the global level statements. + fn parse_global_level( + &self, + input: &mut TokenStream, + ) -> Result<(Vec, Vec), ParseError> { + let mut statements = Vec::::new(); + let mut functions = HashMap::::with_hasher(StraightHasherBuilder); + let mut state = ParseState::new( + self.max_expr_depth, + self.max_string_size, + self.max_array_size, + self.max_map_size, + ); + + while !input.peek().unwrap().0.is_eof() { + // Collect all the function definitions + #[cfg(not(feature = "no_function"))] + { + let (access, must_be_fn) = if match_token(input, Token::Private)? { + (FnAccess::Private, true) + } else { + (FnAccess::Public, false) + }; + + match input.peek().unwrap() { + #[cfg(not(feature = "no_function"))] + (Token::Fn, pos) => { + let mut state = ParseState::new( + self.max_function_expr_depth, + self.max_string_size, + self.max_array_size, + self.max_map_size, + ); + let settings = ParseSettings { + allow_if_expr: true, + allow_stmt_expr: true, + is_global: false, + is_breakable: false, + level: 0, + pos: *pos, + }; + let func = parse_fn(input, &mut state, access, settings)?; + + // Qualifiers (none) + function name + number of arguments. + let hash = calc_fn_hash(empty(), &func.name, func.params.len(), empty()); + + functions.insert(hash, func); + continue; + } + (_, pos) if must_be_fn => { + return Err(PERR::MissingToken( + Token::Fn.into(), + format!("following '{}'", Token::Private.syntax()), + ) + .into_err(*pos)) + } + _ => (), + } + } + + // Actual statement + let settings = ParseSettings { + allow_if_expr: true, + allow_stmt_expr: true, + is_global: true, + is_breakable: false, + level: 0, + pos: Position::none(), + }; + let stmt = parse_stmt(input, &mut state, settings)?; + + let need_semicolon = !stmt.is_self_terminated(); + + statements.push(stmt); + + match input.peek().unwrap() { + // EOF + (Token::EOF, _) => break, + // stmt ; + (Token::SemiColon, _) if need_semicolon => { + eat_token(input, Token::SemiColon); + } + // stmt ; + (Token::SemiColon, _) if !need_semicolon => (), + // { stmt } ??? + (_, _) if !need_semicolon => (), + // stmt + (Token::LexError(err), pos) => { + return Err(PERR::BadInput(err.to_string()).into_err(*pos)) + } + // stmt ??? + (_, pos) => { + // Semicolons are not optional between statements + return Err(PERR::MissingToken( + Token::SemiColon.into(), + "to terminate this statement".into(), + ) + .into_err(*pos)); + } + } + } + + Ok((statements, functions.into_iter().map(|(_, v)| v).collect())) + } + /// Run the parser on an input stream, returning an AST. pub(crate) fn parse( &self, @@ -2547,8 +2592,7 @@ impl Engine { scope: &Scope, optimization_level: OptimizationLevel, ) -> Result { - let (statements, lib) = - parse_global_level(input, self.max_expr_depth, self.max_function_expr_depth)?; + let (statements, lib) = self.parse_global_level(input)?; Ok( // Optimize AST diff --git a/src/result.rs b/src/result.rs index 36bc18f6..83943df2 100644 --- a/src/result.rs +++ b/src/result.rs @@ -81,6 +81,8 @@ pub enum EvalAltResult { ErrorTooManyModules(Position), /// Call stack over maximum limit. ErrorStackOverflow(Position), + /// Data value over maximum size limit. Wrapped values are the data type, maximum size and current size. + ErrorDataTooLarge(String, usize, usize, Position), /// The script is prematurely terminated. ErrorTerminated(Position), /// Run-time error encountered. Wrapped value is the error message. @@ -139,6 +141,7 @@ impl EvalAltResult { Self::ErrorTooManyOperations(_) => "Too many operations", Self::ErrorTooManyModules(_) => "Too many modules imported", Self::ErrorStackOverflow(_) => "Stack overflow", + Self::ErrorDataTooLarge(_, _, _, _) => "Data size exceeds maximum limit", Self::ErrorTerminated(_) => "Script terminated.", Self::ErrorRuntime(_, _) => "Runtime error", Self::ErrorLoopBreak(true, _) => "Break statement not inside a loop", @@ -228,6 +231,9 @@ impl fmt::Display for EvalAltResult { "String index {} is out of bounds: only {} characters in the string", index, max )?, + Self::ErrorDataTooLarge(typ, max, size, _) => { + write!(f, "{} ({}) exceeds the maximum limit ({})", typ, size, max)? + } } // Do not write any position if None @@ -279,6 +285,7 @@ impl EvalAltResult { | Self::ErrorTooManyOperations(pos) | Self::ErrorTooManyModules(pos) | Self::ErrorStackOverflow(pos) + | Self::ErrorDataTooLarge(_, _, _, pos) | Self::ErrorTerminated(pos) | Self::ErrorRuntime(_, pos) | Self::ErrorLoopBreak(_, pos) @@ -316,6 +323,7 @@ impl EvalAltResult { | Self::ErrorTooManyOperations(pos) | Self::ErrorTooManyModules(pos) | Self::ErrorStackOverflow(pos) + | Self::ErrorDataTooLarge(_, _, _, pos) | Self::ErrorTerminated(pos) | Self::ErrorRuntime(_, pos) | Self::ErrorLoopBreak(_, pos) diff --git a/src/token.rs b/src/token.rs index d6799a6f..614783b4 100644 --- a/src/token.rs +++ b/src/token.rs @@ -429,6 +429,8 @@ impl From for String { /// An iterator on a `Token` stream. pub struct TokenIterator<'a> { + /// Maximum length of a string (0 = unlimited). + max_string_size: usize, /// Can the next token be a unary operator? can_be_unary: bool, /// Current position. @@ -494,6 +496,7 @@ impl<'a> TokenIterator<'a> { pub fn parse_string_literal( &mut self, enclosing_char: char, + max_length: usize, ) -> Result { let mut result = Vec::new(); let mut escape = String::with_capacity(12); @@ -505,6 +508,10 @@ impl<'a> TokenIterator<'a> { self.advance(); + if max_length > 0 && result.len() > max_length { + return Err((LexError::StringTooLong(max_length), self.pos)); + } + match next_char { // \... '\\' if escape.is_empty() => { @@ -592,7 +599,13 @@ impl<'a> TokenIterator<'a> { } } - Ok(result.iter().collect()) + let s = result.iter().collect::(); + + if max_length > 0 && s.len() > max_length { + return Err((LexError::StringTooLong(max_length), self.pos)); + } + + Ok(s) } /// Get the next token. @@ -779,10 +792,12 @@ impl<'a> TokenIterator<'a> { // " - string literal ('"', _) => { - return self.parse_string_literal('"').map_or_else( - |err| Some((Token::LexError(Box::new(err.0)), err.1)), - |out| Some((Token::StringConst(out), pos)), - ); + return self + .parse_string_literal('"', self.max_string_size) + .map_or_else( + |err| Some((Token::LexError(Box::new(err.0)), err.1)), + |out| Some((Token::StringConst(out), pos)), + ); } // ' - character literal @@ -793,19 +808,25 @@ impl<'a> TokenIterator<'a> { )); } ('\'', _) => { - return Some(self.parse_string_literal('\'').map_or_else( - |err| (Token::LexError(Box::new(err.0)), err.1), - |result| { - let mut chars = result.chars(); - let first = chars.next(); + return Some( + self.parse_string_literal('\'', self.max_string_size) + .map_or_else( + |err| (Token::LexError(Box::new(err.0)), err.1), + |result| { + let mut chars = result.chars(); + let first = chars.next(); - if chars.next().is_some() { - (Token::LexError(Box::new(LERR::MalformedChar(result))), pos) - } else { - (Token::CharConstant(first.expect("should be Some")), pos) - } - }, - )); + if chars.next().is_some() { + ( + Token::LexError(Box::new(LERR::MalformedChar(result))), + pos, + ) + } else { + (Token::CharConstant(first.expect("should be Some")), pos) + } + }, + ), + ); } // Braces @@ -1047,8 +1068,9 @@ impl<'a> Iterator for TokenIterator<'a> { } /// Tokenize an input text stream. -pub fn lex<'a>(input: &'a [&'a str]) -> TokenIterator<'a> { +pub fn lex<'a>(input: &'a [&'a str], max_string_size: usize) -> TokenIterator<'a> { TokenIterator { + max_string_size, can_be_unary: true, pos: Position::new(1, 0), streams: input.iter().map(|s| s.chars().peekable()).collect(), diff --git a/tests/data_size.rs b/tests/data_size.rs new file mode 100644 index 00000000..77e660fa --- /dev/null +++ b/tests/data_size.rs @@ -0,0 +1,234 @@ +#![cfg(not(feature = "unchecked"))] +use rhai::{Engine, EvalAltResult, ParseError, ParseErrorType}; + +#[cfg(not(feature = "no_index"))] +use rhai::Array; + +#[cfg(not(feature = "no_object"))] +use rhai::Map; + +#[test] +fn test_max_string_size() -> Result<(), Box> { + let mut engine = Engine::new(); + engine.set_max_string_size(10); + + assert!(matches!( + engine.compile(r#"let x = "hello, world!";"#).expect_err("should error"), + ParseError(x, _) if *x == ParseErrorType::BadInput("Length of string literal exceeds the maximum limit (10)".to_string()) + )); + + assert!(matches!( + engine.compile(r#"let x = "朝に紅顔、暮に白骨";"#).expect_err("should error"), + ParseError(x, _) if *x == ParseErrorType::BadInput("Length of string literal exceeds the maximum limit (10)".to_string()) + )); + + assert!(matches!( + *engine + .eval::( + r#" + let x = "hello, "; + let y = "world!"; + x + y + "# + ) + .expect_err("should error"), + EvalAltResult::ErrorDataTooLarge(_, 10, 13, _) + )); + + engine.set_max_string_size(0); + + assert_eq!( + engine.eval::( + r#" + let x = "hello, "; + let y = "world!"; + x + y + "# + )?, + "hello, world!" + ); + + Ok(()) +} + +#[test] +#[cfg(not(feature = "no_index"))] +fn test_max_array_size() -> Result<(), Box> { + let mut engine = Engine::new(); + engine.set_max_array_size(10); + + #[cfg(not(feature = "no_object"))] + engine.set_max_map_size(10); + + assert!(matches!( + engine + .compile("let x = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15];") + .expect_err("should error"), + ParseError(x, _) if *x == ParseErrorType::LiteralTooLarge("Size of array literal".to_string(), 10) + )); + + assert!(matches!( + *engine + .eval::( + r" + let x = [1,2,3,4,5,6]; + let y = [7,8,9,10,11,12]; + x + y + " + ) + .expect_err("should error"), + EvalAltResult::ErrorDataTooLarge(_, 10, 12, _) + )); + + assert!(matches!( + *engine + .eval::( + r" + let x = [1,2,3]; + [x, x, x, x] + " + ) + .expect_err("should error"), + EvalAltResult::ErrorDataTooLarge(_, 10, 12, _) + )); + + #[cfg(not(feature = "no_object"))] + assert!(matches!( + *engine + .eval::( + r" + let x = #{a:1, b:2, c:3}; + [x, x, x, x] + " + ) + .expect_err("should error"), + EvalAltResult::ErrorDataTooLarge(_, 10, 12, _) + )); + + assert!(matches!( + *engine + .eval::( + r" + let x = [1]; + let y = [x, x]; + let z = [y, y]; + [z, z, z] + " + ) + .expect_err("should error"), + EvalAltResult::ErrorDataTooLarge(_, 10, 12, _) + )); + + engine.set_max_array_size(0); + + assert_eq!( + engine + .eval::( + r" + let x = [1,2,3,4,5,6]; + let y = [7,8,9,10,11,12]; + x + y + " + )? + .len(), + 12 + ); + + assert_eq!( + engine + .eval::( + r" + let x = [1,2,3]; + [x, x, x, x] + " + )? + .len(), + 4 + ); + + Ok(()) +} + +#[test] +#[cfg(not(feature = "no_object"))] +fn test_max_map_size() -> Result<(), Box> { + let mut engine = Engine::new(); + engine.set_max_map_size(10); + + #[cfg(not(feature = "no_index"))] + engine.set_max_array_size(10); + + assert!(matches!( + engine + .compile("let x = #{a:1,b:2,c:3,d:4,e:5,f:6,g:7,h:8,i:9,j:10,k:11,l:12,m:13,n:14,o:15};") + .expect_err("should error"), + ParseError(x, _) if *x == ParseErrorType::LiteralTooLarge("Number of properties in object map literal".to_string(), 10) + )); + + assert!(matches!( + *engine + .eval::( + r" + let x = #{a:1,b:2,c:3,d:4,e:5,f:6}; + let y = #{g:7,h:8,i:9,j:10,k:11,l:12}; + x + y + " + ) + .expect_err("should error"), + EvalAltResult::ErrorDataTooLarge(_, 10, 12, _) + )); + + assert!(matches!( + *engine + .eval::( + r" + let x = #{a:1,b:2,c:3}; + #{u:x, v:x, w:x, z:x} + " + ) + .expect_err("should error"), + EvalAltResult::ErrorDataTooLarge(_, 10, 12, _) + )); + + #[cfg(not(feature = "no_index"))] + assert!(matches!( + *engine + .eval::( + r" + let x = [1, 2, 3]; + #{u:x, v:x, w:x, z:x} + " + ) + .expect_err("should error"), + EvalAltResult::ErrorDataTooLarge(_, 10, 12, _) + )); + + engine.set_max_map_size(0); + + assert_eq!( + engine + .eval::( + r" + let x = #{a:1,b:2,c:3,d:4,e:5,f:6}; + let y = #{g:7,h:8,i:9,j:10,k:11,l:12}; + x + y + " + )? + .len(), + 12 + ); + + assert_eq!( + engine + .eval::( + r" + let x = #{a:1,b:2,c:3}; + #{u:x, v:x, w:x, z:x} + " + )? + .len(), + 4 + ); + + Ok(()) +} diff --git a/tests/modules.rs b/tests/modules.rs index 10c138ad..60d02247 100644 --- a/tests/modules.rs +++ b/tests/modules.rs @@ -130,7 +130,7 @@ fn test_module_resolver() -> Result<(), Box> { EvalAltResult::ErrorInFunctionCall(fn_name, _, _) if fn_name == "foo" )); - engine.set_max_modules(0); + engine.set_max_modules(1000); #[cfg(not(feature = "no_function"))] engine.eval::<()>( diff --git a/tests/operations.rs b/tests/operations.rs index 91df26c9..dae539f0 100644 --- a/tests/operations.rs +++ b/tests/operations.rs @@ -111,3 +111,20 @@ fn test_max_operations_eval() -> Result<(), Box> { Ok(()) } + +#[test] +fn test_max_operations_progress() -> Result<(), Box> { + let mut engine = Engine::new(); + engine.set_max_operations(500); + + engine.on_progress(|&count| count < 100); + + assert!(matches!( + *engine + .eval::<()>("for x in range(0, 500) {}") + .expect_err("should error"), + EvalAltResult::ErrorTerminated(_) + )); + + Ok(()) +} From 0c6a939c666934275bdb3aa18dfb599354f1b79c Mon Sep 17 00:00:00 2001 From: Stephen Chung Date: Sun, 14 Jun 2020 16:56:36 +0800 Subject: [PATCH 2/5] Better convert LexError to ParseError. --- README.md | 2 +- src/error.rs | 24 ++++++++++++++--- src/parser.rs | 65 ++++++++++++++-------------------------------- tests/data_size.rs | 4 +-- 4 files changed, 44 insertions(+), 51 deletions(-) diff --git a/README.md b/README.md index c0931fa6..7bea89bb 100644 --- a/README.md +++ b/README.md @@ -2480,7 +2480,7 @@ engine.set_max_string_size(500); // allow strings only up to 500 byte engine.set_max_string_size(0); // allow unlimited string length ``` -A script attempting to create a string literal longer than the maximum will terminate with a parse error. +A script attempting to create a string literal longer than the maximum length will terminate with a parse error. Any script operation that produces a string longer than the maximum also terminates the script with an error result. This check can be disabled via the [`unchecked`] feature for higher performance (but higher risks as well). diff --git a/src/error.rs b/src/error.rs index 3d4c47c7..ce8615ed 100644 --- a/src/error.rs +++ b/src/error.rs @@ -22,8 +22,8 @@ pub enum LexError { MalformedChar(String), /// An identifier is in an invalid format. MalformedIdentifier(String), - /// Bad keyword encountered when tokenizing the script text. - ImproperKeyword(String), + /// Bad symbol encountered when tokenizing the script text. + ImproperSymbol(String), } impl Error for LexError {} @@ -42,11 +42,18 @@ impl fmt::Display for LexError { "Length of string literal exceeds the maximum limit ({})", max ), - Self::ImproperKeyword(s) => write!(f, "{}", s), + Self::ImproperSymbol(s) => write!(f, "{}", s), } } } +impl LexError { + /// Convert a `LexError` into a `ParseError`. + pub fn into_err(&self, pos: Position) -> ParseError { + ParseError(Box::new(self.into()), pos) + } +} + /// Type of error encountered when parsing a script. /// /// Some errors never appear when certain features are turned on. @@ -217,6 +224,17 @@ impl fmt::Display for ParseErrorType { } } +impl From<&LexError> for ParseErrorType { + fn from(err: &LexError) -> Self { + match err { + LexError::StringTooLong(max) => { + Self::LiteralTooLarge("Length of string literal".to_string(), *max) + } + _ => Self::BadInput(err.to_string()), + } + } +} + /// Error when parsing a script. #[derive(Debug, Eq, PartialEq, Clone, Hash)] pub struct ParseError(pub Box, pub Position); diff --git a/src/parser.rs b/src/parser.rs index ec99adaf..acbb59e5 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -767,7 +767,7 @@ fn parse_paren_expr( // ( xxx ) (Token::RightParen, _) => Ok(expr), // ( - (Token::LexError(err), pos) => return Err(PERR::BadInput(err.to_string()).into_err(pos)), + (Token::LexError(err), pos) => return Err(err.into_err(pos)), // ( xxx ??? (_, pos) => Err(PERR::MissingToken( Token::RightParen.into(), @@ -800,7 +800,7 @@ fn parse_call_expr( .into_err(settings.pos)) } // id - Token::LexError(err) => return Err(PERR::BadInput(err.to_string()).into_err(settings.pos)), + Token::LexError(err) => return Err(err.into_err(settings.pos)), // id() Token::RightParen => { eat_token(input, Token::RightParen); @@ -880,9 +880,7 @@ fn parse_call_expr( .into_err(*pos)) } // id(...args - (Token::LexError(err), pos) => { - return Err(PERR::BadInput(err.to_string()).into_err(*pos)) - } + (Token::LexError(err), pos) => return Err(err.into_err(*pos)), // id(...args ??? (_, pos) => { return Err(PERR::MissingToken( @@ -1065,7 +1063,7 @@ fn parse_index_chain( } } } - (Token::LexError(err), pos) => return Err(PERR::BadInput(err.to_string()).into_err(*pos)), + (Token::LexError(err), pos) => return Err(err.into_err(*pos)), (_, pos) => Err(PERR::MissingToken( Token::RightBracket.into(), "for a matching [ in this index expression".into(), @@ -1110,9 +1108,7 @@ fn parse_array_literal( ) .into_err(*pos)) } - (Token::LexError(err), pos) => { - return Err(PERR::BadInput(err.to_string()).into_err(*pos)) - } + (Token::LexError(err), pos) => return Err(err.into_err(*pos)), (_, pos) => { return Err(PERR::MissingToken( Token::Comma.into(), @@ -1144,9 +1140,7 @@ fn parse_map_literal( let (name, pos) = match input.next().unwrap() { (Token::Identifier(s), pos) => (s, pos), (Token::StringConst(s), pos) => (s, pos), - (Token::LexError(err), pos) => { - return Err(PERR::BadInput(err.to_string()).into_err(pos)) - } + (Token::LexError(err), pos) => return Err(err.into_err(pos)), (_, pos) if map.is_empty() => { return Err( PERR::MissingToken(Token::RightBrace.into(), MISSING_RBRACE.into()) @@ -1164,9 +1158,7 @@ fn parse_map_literal( match input.next().unwrap() { (Token::Colon, _) => (), - (Token::LexError(err), pos) => { - return Err(PERR::BadInput(err.to_string()).into_err(pos)) - } + (Token::LexError(err), pos) => return Err(err.into_err(pos)), (_, pos) => { return Err(PERR::MissingToken( Token::Colon.into(), @@ -1205,9 +1197,7 @@ fn parse_map_literal( ) .into_err(*pos)) } - (Token::LexError(err), pos) => { - return Err(PERR::BadInput(err.to_string()).into_err(*pos)) - } + (Token::LexError(err), pos) => return Err(err.into_err(*pos)), (_, pos) => { return Err( PERR::MissingToken(Token::RightBrace.into(), MISSING_RBRACE.into()) @@ -1269,7 +1259,7 @@ fn parse_primary( Token::MapStart => parse_map_literal(input, state, settings.level_up())?, Token::True => Expr::True(settings.pos), Token::False => Expr::False(settings.pos), - Token::LexError(err) => return Err(PERR::BadInput(err.to_string()).into_err(settings.pos)), + Token::LexError(err) => return Err(err.into_err(settings.pos)), _ => { return Err( PERR::BadInput(format!("Unexpected '{}'", token.syntax())).into_err(settings.pos) @@ -1380,12 +1370,7 @@ fn parse_unary( None } }) - .ok_or_else(|| { - PERR::BadInput( - LexError::MalformedNumber(format!("-{}", x.0)).to_string(), - ) - .into_err(pos) - }) + .ok_or_else(|| LexError::MalformedNumber(format!("-{}", x.0)).into_err(pos)) } // Negative float @@ -1990,7 +1975,7 @@ fn parse_for( // Variable name (Token::Identifier(s), _) => s, // Bad identifier - (Token::LexError(err), pos) => return Err(PERR::BadInput(err.to_string()).into_err(pos)), + (Token::LexError(err), pos) => return Err(err.into_err(pos)), // EOF (Token::EOF, pos) => return Err(PERR::VariableExpected.into_err(pos)), // Not a variable name @@ -2000,7 +1985,7 @@ fn parse_for( // for name in ... match input.next().unwrap() { (Token::In, _) => (), - (Token::LexError(err), pos) => return Err(PERR::BadInput(err.to_string()).into_err(pos)), + (Token::LexError(err), pos) => return Err(err.into_err(pos)), (_, pos) => { return Err( PERR::MissingToken(Token::In.into(), "after the iteration variable".into()) @@ -2038,7 +2023,7 @@ fn parse_let( // let name ... let (name, pos) = match input.next().unwrap() { (Token::Identifier(s), pos) => (s, pos), - (Token::LexError(err), pos) => return Err(PERR::BadInput(err.to_string()).into_err(pos)), + (Token::LexError(err), pos) => return Err(err.into_err(pos)), (_, pos) => return Err(PERR::VariableExpected.into_err(pos)), }; @@ -2109,7 +2094,7 @@ fn parse_import( // import expr as name ... let (name, _) = match input.next().unwrap() { (Token::Identifier(s), pos) => (s, pos), - (Token::LexError(err), pos) => return Err(PERR::BadInput(err.to_string()).into_err(pos)), + (Token::LexError(err), pos) => return Err(err.into_err(pos)), (_, pos) => return Err(PERR::VariableExpected.into_err(pos)), }; @@ -2132,9 +2117,7 @@ fn parse_export( loop { let (id, id_pos) = match input.next().unwrap() { (Token::Identifier(s), pos) => (s.clone(), pos), - (Token::LexError(err), pos) => { - return Err(PERR::BadInput(err.to_string()).into_err(pos)) - } + (Token::LexError(err), pos) => return Err(err.into_err(pos)), (_, pos) => return Err(PERR::VariableExpected.into_err(pos)), }; @@ -2189,7 +2172,7 @@ fn parse_block( // Must start with { settings.pos = match input.next().unwrap() { (Token::LeftBrace, pos) => pos, - (Token::LexError(err), pos) => return Err(PERR::BadInput(err.to_string()).into_err(pos)), + (Token::LexError(err), pos) => return Err(err.into_err(pos)), (_, pos) => { return Err(PERR::MissingToken( Token::LeftBrace.into(), @@ -2229,9 +2212,7 @@ fn parse_block( // { ... { stmt } ??? (_, _) if !need_semicolon => (), // { ... stmt - (Token::LexError(err), pos) => { - return Err(PERR::BadInput(err.to_string()).into_err(*pos)) - } + (Token::LexError(err), pos) => return Err(err.into_err(*pos)), // { ... stmt ??? (_, pos) => { // Semicolons are not optional between statements @@ -2380,9 +2361,7 @@ fn parse_fn( state.push((s.clone(), ScopeEntryType::Normal)); params.push((s, pos)) } - (Token::LexError(err), pos) => { - return Err(PERR::BadInput(err.to_string()).into_err(pos)) - } + (Token::LexError(err), pos) => return Err(err.into_err(pos)), (_, pos) => { return Err(PERR::MissingToken(Token::RightParen.into(), end_err).into_err(pos)) } @@ -2394,9 +2373,7 @@ fn parse_fn( (Token::Identifier(_), pos) => { return Err(PERR::MissingToken(Token::Comma.into(), sep_err).into_err(pos)) } - (Token::LexError(err), pos) => { - return Err(PERR::BadInput(err.to_string()).into_err(pos)) - } + (Token::LexError(err), pos) => return Err(err.into_err(pos)), (_, pos) => { return Err(PERR::MissingToken(Token::Comma.into(), sep_err).into_err(pos)) } @@ -2567,9 +2544,7 @@ impl Engine { // { stmt } ??? (_, _) if !need_semicolon => (), // stmt - (Token::LexError(err), pos) => { - return Err(PERR::BadInput(err.to_string()).into_err(*pos)) - } + (Token::LexError(err), pos) => return Err(err.into_err(*pos)), // stmt ??? (_, pos) => { // Semicolons are not optional between statements diff --git a/tests/data_size.rs b/tests/data_size.rs index 77e660fa..7884971f 100644 --- a/tests/data_size.rs +++ b/tests/data_size.rs @@ -14,12 +14,12 @@ fn test_max_string_size() -> Result<(), Box> { assert!(matches!( engine.compile(r#"let x = "hello, world!";"#).expect_err("should error"), - ParseError(x, _) if *x == ParseErrorType::BadInput("Length of string literal exceeds the maximum limit (10)".to_string()) + ParseError(x, _) if *x == ParseErrorType::LiteralTooLarge("Length of string literal".to_string(), 10) )); assert!(matches!( engine.compile(r#"let x = "朝に紅顔、暮に白骨";"#).expect_err("should error"), - ParseError(x, _) if *x == ParseErrorType::BadInput("Length of string literal exceeds the maximum limit (10)".to_string()) + ParseError(x, _) if *x == ParseErrorType::LiteralTooLarge("Length of string literal".to_string(), 10) )); assert!(matches!( From f26c12b8ea9e95ad023f45f3fae56297d865e6c7 Mon Sep 17 00:00:00 2001 From: Stephen Chung Date: Sun, 14 Jun 2020 19:13:11 +0800 Subject: [PATCH 3/5] Better error messages for unrecognized tokens. --- src/token.rs | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/src/token.rs b/src/token.rs index 614783b4..0bb0cbe3 100644 --- a/src/token.rs +++ b/src/token.rs @@ -862,6 +862,14 @@ impl<'a> TokenIterator<'a> { self.eat_next(); return Some((Token::MinusAssign, pos)); } + ('-', '>') => { + return Some(( + Token::LexError(Box::new(LERR::ImproperSymbol( + "'->' is not a valid symbol. This is not C or C++!".to_string(), + ))), + pos, + )) + } ('-', _) if self.can_be_unary => return Some((Token::UnaryMinus, pos)), ('-', _) => return Some((Token::Minus, pos)), @@ -931,7 +939,7 @@ impl<'a> TokenIterator<'a> { // Warn against `===` if self.peek_next() == Some('=') { return Some(( - Token::LexError(Box::new(LERR::ImproperKeyword( + Token::LexError(Box::new(LERR::ImproperSymbol( "'===' is not a valid operator. This is not JavaScript! Should it be '=='?" .to_string(), ))), @@ -941,18 +949,44 @@ impl<'a> TokenIterator<'a> { return Some((Token::EqualsTo, pos)); } + ('=', '>') => { + return Some(( + Token::LexError(Box::new(LERR::ImproperSymbol( + "'=>' is not a valid symbol. This is not Rust! Should it be '>='?" + .to_string(), + ))), + pos, + )) + } ('=', _) => return Some((Token::Equals, pos)), (':', ':') => { self.eat_next(); return Some((Token::DoubleColon, pos)); } + (':', '=') => { + return Some(( + Token::LexError(Box::new(LERR::ImproperSymbol( + "':=' is not a valid assignment operator. This is not Pascal! Should it be simply '='?" + .to_string(), + ))), + pos, + )) + } (':', _) => return Some((Token::Colon, pos)), ('<', '=') => { self.eat_next(); return Some((Token::LessThanEqualsTo, pos)); } + ('<', '-') => { + return Some(( + Token::LexError(Box::new(LERR::ImproperSymbol( + "'<-' is not a valid symbol. Should it be '<='?".to_string(), + ))), + pos, + )) + } ('<', '<') => { self.eat_next(); @@ -993,7 +1027,7 @@ impl<'a> TokenIterator<'a> { // Warn against `!==` if self.peek_next() == Some('=') { return Some(( - Token::LexError(Box::new(LERR::ImproperKeyword( + Token::LexError(Box::new(LERR::ImproperSymbol( "'!==' is not a valid operator. This is not JavaScript! Should it be '!='?" .to_string(), ))), From 31d2fa410bc943150b0e1ecf28893563931fa8a5 Mon Sep 17 00:00:00 2001 From: Stephen Chung Date: Sun, 14 Jun 2020 22:44:59 +0800 Subject: [PATCH 4/5] Streamline code. --- README.md | 2 +- src/engine.rs | 48 +++++++++++++++++++++++++----------------------- 2 files changed, 26 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 7bea89bb..642b660d 100644 --- a/README.md +++ b/README.md @@ -2207,7 +2207,7 @@ let a = new_ts(); // constructor function a.field = 500; // property setter a.update(); // method call, 'a' can be modified -update(a); // <- this de-sugars to 'a.update()' this if 'a' is a simple variable +update(a); // <- this de-sugars to 'a.update()' thus if 'a' is a simple variable // unlike scripted functions, 'a' can be modified and is not a copy let array = [ a ]; diff --git a/src/engine.rs b/src/engine.rs index 83878c3c..7ce521ea 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -63,9 +63,9 @@ pub const MAX_FUNCTION_EXPR_DEPTH: usize = 32; #[cfg(feature = "unchecked")] pub const MAX_CALL_STACK_DEPTH: usize = usize::MAX; #[cfg(feature = "unchecked")] -pub const MAX_EXPR_DEPTH: usize = usize::MAX; +pub const MAX_EXPR_DEPTH: usize = 0; #[cfg(feature = "unchecked")] -pub const MAX_FUNCTION_EXPR_DEPTH: usize = usize::MAX; +pub const MAX_FUNCTION_EXPR_DEPTH: usize = 0; pub const KEYWORD_PRINT: &str = "print"; pub const KEYWORD_DEBUG: &str = "debug"; @@ -1761,11 +1761,7 @@ impl Engine { _ => unreachable!(), }; - if let Ok(val) = &result { - self.check_data_size(val)?; - } - - result + self.check_data_size(result) } /// Evaluate a statement @@ -2034,20 +2030,20 @@ impl Engine { } }; - if let Ok(val) = &result { - self.check_data_size(val)?; - } - - result + self.check_data_size(result) } - /// Check a `Dynamic` value to ensure that its size is within allowable limit. - fn check_data_size(&self, value: &Dynamic) -> Result<(), Box> { + /// Check a result to ensure that the data size is within allowable limit. + fn check_data_size( + &self, + result: Result>, + ) -> Result> { #[cfg(feature = "unchecked")] - return Ok(()); + return result; + // If no data size limits, just return if self.max_string_size + self.max_array_size + self.max_map_size == 0 { - return Ok(()); + return result; } // Recursively calculate the size of a value (especially `Array` and `Map`) @@ -2090,16 +2086,22 @@ impl Engine { } } - match value { - Dynamic(Union::Str(_)) if self.max_string_size > 0 => (), + match result { + // Simply return all errors + Err(_) => return result, + // String with limit + Ok(Dynamic(Union::Str(_))) if self.max_string_size > 0 => (), + // Array with limit #[cfg(not(feature = "no_index"))] - Dynamic(Union::Array(_)) if self.max_array_size > 0 => (), + Ok(Dynamic(Union::Array(_))) if self.max_array_size > 0 => (), + // Map with limit #[cfg(not(feature = "no_object"))] - Dynamic(Union::Map(_)) if self.max_map_size > 0 => (), - _ => return Ok(()), + Ok(Dynamic(Union::Map(_))) if self.max_map_size > 0 => (), + // Everything else is simply returned + Ok(_) => return result, }; - let (arr, map, s) = calc_size(value); + let (arr, map, s) = calc_size(result.as_ref().unwrap()); if s > self.max_string_size { Err(Box::new(EvalAltResult::ErrorDataTooLarge( @@ -2123,7 +2125,7 @@ impl Engine { Position::none(), ))) } else { - Ok(()) + result } } From a417bdd8e3ecf4fdfc74d224af2b8c3a36972710 Mon Sep 17 00:00:00 2001 From: Stephen Chung Date: Mon, 15 Jun 2020 21:49:02 +0800 Subject: [PATCH 5/5] Support registering functions with a reference to the scripting engine. --- README.md | 6 +---- src/api.rs | 10 +++----- src/engine.rs | 10 ++++---- src/fn_native.rs | 6 +++-- src/fn_register.rs | 3 +-- src/module.rs | 48 +++++++++++++++++++++++++++---------- src/packages/array_basic.rs | 41 +++++++++++++++++++++++++------ src/packages/string_more.rs | 43 ++++++++++++++++++++++++++++----- tests/data_size.rs | 25 +++++++++++++++++++ tests/modules.rs | 5 +++- 10 files changed, 151 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index 642b660d..78fbae94 100644 --- a/README.md +++ b/README.md @@ -2488,9 +2488,7 @@ This check can be disabled via the [`unchecked`] feature for higher performance Be conservative when setting a maximum limit and always consider the fact that a registered function may grow a string's length without Rhai noticing until the very end. For instance, the built-in '`+`' operator for strings concatenates two strings together to form one longer string; if both strings are _slightly_ below the maximum -length limit, the resultant string may be almost _twice_ the maximum length. The '`pad`' function grows a string -to a specified length which may be longer than the allowed maximum -(to trap this risk, register a custom '`pad`' function that checks the arguments). +length limit, the resultant string may be almost _twice_ the maximum length. ### Maximum size of arrays @@ -2514,8 +2512,6 @@ Be conservative when setting a maximum limit and always consider the fact that a an array's size without Rhai noticing until the very end. For instance, the built-in '`+`' operator for arrays concatenates two arrays together to form one larger array; if both arrays are _slightly_ below the maximum size limit, the resultant array may be almost _twice_ the maximum size. -The '`pad`' function grows an array to a specified size which may be larger than the allowed maximum -(to trap this risk, register a custom '`pad`' function that checks the arguments). As a malicious script may create a deeply-nested array which consumes huge amounts of memory while each individual array still stays under the maximum size limit, Rhai also recursively adds up the sizes of all strings, arrays diff --git a/src/api.rs b/src/api.rs index 56adc08c..6a6dbf5c 100644 --- a/src/api.rs +++ b/src/api.rs @@ -751,7 +751,6 @@ impl Engine { ) -> Result { let scripts = [script]; let stream = lex(&scripts, self.max_string_size); - { let mut peekable = stream.peekable(); self.parse_global_expr(&mut peekable, scope, self.optimization_level) @@ -906,11 +905,8 @@ impl Engine { let scripts = [script]; let stream = lex(&scripts, self.max_string_size); - let ast = self.parse_global_expr( - &mut stream.peekable(), - scope, - OptimizationLevel::None, // No need to optimize a lone expression - )?; + // No need to optimize a lone expression + let ast = self.parse_global_expr(&mut stream.peekable(), scope, OptimizationLevel::None)?; self.eval_ast_with_scope(scope, &ast) } @@ -983,6 +979,7 @@ impl Engine { }); } + /// Evaluate an `AST` with own scope. pub(crate) fn eval_ast_with_scope_raw( &self, scope: &mut Scope, @@ -1035,7 +1032,6 @@ impl Engine { ) -> Result<(), Box> { let scripts = [script]; let stream = lex(&scripts, self.max_string_size); - let ast = self.parse(&mut stream.peekable(), scope, self.optimization_level)?; self.consume_ast_with_scope(scope, &ast) } diff --git a/src/engine.rs b/src/engine.rs index 7ce521ea..8ca14356 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -645,7 +645,7 @@ impl Engine { return Ok((result, false)); } else { // Run external function - let result = func.get_native_fn()(args)?; + let result = func.get_native_fn()(self, args)?; // Restore the original reference restore_first_arg(old_this_ptr, args); @@ -1474,7 +1474,7 @@ impl Engine { .or_else(|| self.packages.get_fn(hash_fn)) { // Overriding exact implementation - func(&mut [lhs_ptr, &mut rhs_val])?; + func(self, &mut [lhs_ptr, &mut rhs_val])?; } else if run_builtin_op_assignment(op, lhs_ptr, &rhs_val)?.is_none() { // Not built in, map to `var = var op rhs` let op = &op[..op.len() - 1]; // extract operator without = @@ -1705,7 +1705,9 @@ impl Engine { self.call_script_fn(&mut scope, state, lib, name, fn_def, args, level) .map_err(|err| EvalAltResult::new_position(err, *pos)) } - Ok(f) => f.get_native_fn()(args.as_mut()).map_err(|err| err.new_position(*pos)), + Ok(f) => { + f.get_native_fn()(self, args.as_mut()).map_err(|err| err.new_position(*pos)) + } Err(err) if def_val.is_some() && matches!(*err, EvalAltResult::ErrorFunctionNotFound(_, _)) => @@ -2112,7 +2114,7 @@ impl Engine { ))) } else if arr > self.max_array_size { Err(Box::new(EvalAltResult::ErrorDataTooLarge( - "Length of array".to_string(), + "Size of array".to_string(), self.max_array_size, arr, Position::none(), diff --git a/src/fn_native.rs b/src/fn_native.rs index fc478512..b7805ff5 100644 --- a/src/fn_native.rs +++ b/src/fn_native.rs @@ -1,4 +1,5 @@ use crate::any::Dynamic; +use crate::engine::Engine; use crate::parser::ScriptFnDef; use crate::result::EvalAltResult; @@ -51,9 +52,10 @@ pub fn shared_take(value: Shared) -> T { pub type FnCallArgs<'a> = [&'a mut Dynamic]; #[cfg(not(feature = "sync"))] -pub type FnAny = dyn Fn(&mut FnCallArgs) -> Result>; +pub type FnAny = dyn Fn(&Engine, &mut FnCallArgs) -> Result>; #[cfg(feature = "sync")] -pub type FnAny = dyn Fn(&mut FnCallArgs) -> Result> + Send + Sync; +pub type FnAny = + dyn Fn(&Engine, &mut FnCallArgs) -> Result> + Send + Sync; pub type IteratorFn = fn(Dynamic) -> Box>; diff --git a/src/fn_register.rs b/src/fn_register.rs index 86e7ada9..1b07cbe8 100644 --- a/src/fn_register.rs +++ b/src/fn_register.rs @@ -1,5 +1,4 @@ //! Module which defines the function registration mechanism. - #![allow(non_snake_case)] use crate::any::{Dynamic, Variant}; @@ -120,7 +119,7 @@ macro_rules! make_func { // ^ function parameter generic type name (A, B, C etc.) // ^ dereferencing function - Box::new(move |args: &mut FnCallArgs| { + Box::new(move |_: &Engine, args: &mut FnCallArgs| { // The arguments are assumed to be of the correct number and types! #[allow(unused_variables, unused_mut)] diff --git a/src/module.rs b/src/module.rs index 91fa3388..d1206b5d 100644 --- a/src/module.rs +++ b/src/module.rs @@ -305,6 +305,29 @@ impl Module { hash_fn } + /// Set a Rust function taking a reference to the scripting `Engine`, plus a list of + /// mutable `Dynamic` references into the module, returning a hash key. + /// A list of `TypeId`'s is taken as the argument types. + /// + /// Use this to register a built-in function which must reference settings on the scripting + /// `Engine` (e.g. to prevent growing an array beyond the allowed maximum size). + /// + /// If there is a similar existing Rust function, it is replaced. + pub(crate) fn set_fn_var_args( + &mut self, + name: impl Into, + args: &[TypeId], + func: impl Fn(&Engine, &mut [&mut Dynamic]) -> FuncReturn + SendSync + 'static, + ) -> u64 { + let f = move |engine: &Engine, args: &mut FnCallArgs| func(engine, args).map(Dynamic::from); + self.set_fn( + name, + Public, + args, + CallableFunction::from_method(Box::new(f)), + ) + } + /// Set a Rust function taking no parameters into the module, returning a hash key. /// /// If there is a similar existing Rust function, it is replaced. @@ -323,7 +346,7 @@ impl Module { name: impl Into, func: impl Fn() -> FuncReturn + SendSync + 'static, ) -> u64 { - let f = move |_: &mut FnCallArgs| func().map(Dynamic::from); + let f = move |_: &Engine, _: &mut FnCallArgs| func().map(Dynamic::from); let args = []; self.set_fn( name, @@ -351,8 +374,9 @@ impl Module { name: impl Into, func: impl Fn(A) -> FuncReturn + SendSync + 'static, ) -> u64 { - let f = - move |args: &mut FnCallArgs| func(mem::take(args[0]).cast::()).map(Dynamic::from); + let f = move |_: &Engine, args: &mut FnCallArgs| { + func(mem::take(args[0]).cast::()).map(Dynamic::from) + }; let args = [TypeId::of::()]; self.set_fn( name, @@ -380,7 +404,7 @@ impl Module { name: impl Into, func: impl Fn(&mut A) -> FuncReturn + SendSync + 'static, ) -> u64 { - let f = move |args: &mut FnCallArgs| { + let f = move |_: &Engine, args: &mut FnCallArgs| { func(args[0].downcast_mut::().unwrap()).map(Dynamic::from) }; let args = [TypeId::of::()]; @@ -434,7 +458,7 @@ impl Module { name: impl Into, func: impl Fn(A, B) -> FuncReturn + SendSync + 'static, ) -> u64 { - let f = move |args: &mut FnCallArgs| { + let f = move |_: &Engine, args: &mut FnCallArgs| { let a = mem::take(args[0]).cast::(); let b = mem::take(args[1]).cast::(); @@ -470,7 +494,7 @@ impl Module { name: impl Into, func: impl Fn(&mut A, B) -> FuncReturn + SendSync + 'static, ) -> u64 { - let f = move |args: &mut FnCallArgs| { + let f = move |_: &Engine, args: &mut FnCallArgs| { let b = mem::take(args[1]).cast::(); let a = args[0].downcast_mut::().unwrap(); @@ -561,7 +585,7 @@ impl Module { name: impl Into, func: impl Fn(A, B, C) -> FuncReturn + SendSync + 'static, ) -> u64 { - let f = move |args: &mut FnCallArgs| { + let f = move |_: &Engine, args: &mut FnCallArgs| { let a = mem::take(args[0]).cast::(); let b = mem::take(args[1]).cast::(); let c = mem::take(args[2]).cast::(); @@ -603,7 +627,7 @@ impl Module { name: impl Into, func: impl Fn(&mut A, B, C) -> FuncReturn + SendSync + 'static, ) -> u64 { - let f = move |args: &mut FnCallArgs| { + let f = move |_: &Engine, args: &mut FnCallArgs| { let b = mem::take(args[1]).cast::(); let c = mem::take(args[2]).cast::(); let a = args[0].downcast_mut::().unwrap(); @@ -640,7 +664,7 @@ impl Module { &mut self, func: impl Fn(&mut A, B, A) -> FuncReturn<()> + SendSync + 'static, ) -> u64 { - let f = move |args: &mut FnCallArgs| { + let f = move |_: &Engine, args: &mut FnCallArgs| { let b = mem::take(args[1]).cast::(); let c = mem::take(args[2]).cast::(); let a = args[0].downcast_mut::().unwrap(); @@ -682,7 +706,7 @@ impl Module { name: impl Into, func: impl Fn(A, B, C, D) -> FuncReturn + SendSync + 'static, ) -> u64 { - let f = move |args: &mut FnCallArgs| { + let f = move |_: &Engine, args: &mut FnCallArgs| { let a = mem::take(args[0]).cast::(); let b = mem::take(args[1]).cast::(); let c = mem::take(args[2]).cast::(); @@ -731,7 +755,7 @@ impl Module { name: impl Into, func: impl Fn(&mut A, B, C, D) -> FuncReturn + SendSync + 'static, ) -> u64 { - let f = move |args: &mut FnCallArgs| { + let f = move |_: &Engine, args: &mut FnCallArgs| { let b = mem::take(args[1]).cast::(); let c = mem::take(args[2]).cast::(); let d = mem::take(args[3]).cast::(); @@ -1019,7 +1043,7 @@ pub trait ModuleResolver: SendSync { /// Resolve a module based on a path string. fn resolve( &self, - engine: &Engine, + _: &Engine, scope: Scope, path: &str, pos: Position, diff --git a/src/packages/array_basic.rs b/src/packages/array_basic.rs index 19206e36..c8615865 100644 --- a/src/packages/array_basic.rs +++ b/src/packages/array_basic.rs @@ -2,9 +2,11 @@ use crate::any::{Dynamic, Variant}; use crate::def_package; -use crate::engine::Array; +use crate::engine::{Array, Engine}; use crate::module::FuncReturn; use crate::parser::{ImmutableString, INT}; +use crate::result::EvalAltResult; +use crate::token::Position; use crate::stdlib::{any::TypeId, boxed::Box}; @@ -23,13 +25,28 @@ fn ins(list: &mut Array, position: INT, item: T) -> FuncRetu } Ok(()) } -fn pad(list: &mut Array, len: INT, item: T) -> FuncReturn<()> { - if len >= 0 { +fn pad(engine: &Engine, args: &mut [&mut Dynamic]) -> FuncReturn<()> { + let len = *args[1].downcast_ref::().unwrap(); + + // Check if array will be over max size limit + if engine.max_array_size > 0 && len > 0 && (len as usize) > engine.max_array_size { + Err(Box::new(EvalAltResult::ErrorDataTooLarge( + "Size of array".to_string(), + engine.max_array_size, + len as usize, + Position::none(), + ))) + } else if len >= 0 { + let item = args[2].downcast_ref::().unwrap().clone(); + let list = args[0].downcast_mut::().unwrap(); + while list.len() < len as usize { push(list, item.clone())?; } + Ok(()) + } else { + Ok(()) } - Ok(()) } macro_rules! reg_op { @@ -42,11 +59,21 @@ macro_rules! reg_tri { $( $lib.set_fn_3_mut($op, $func::<$par>); )* }; } +macro_rules! reg_pad { + ($lib:expr, $op:expr, $func:ident, $($par:ty),*) => { + $({ + $lib.set_fn_var_args($op, + &[TypeId::of::(), TypeId::of::(), TypeId::of::<$par>()], + $func::<$par> + ); + })* + }; +} #[cfg(not(feature = "no_index"))] def_package!(crate:BasicArrayPackage:"Basic array utilities.", lib, { reg_op!(lib, "push", push, INT, bool, char, ImmutableString, Array, ()); - reg_tri!(lib, "pad", pad, INT, bool, char, ImmutableString, Array, ()); + reg_pad!(lib, "pad", pad, INT, bool, char, ImmutableString, Array, ()); reg_tri!(lib, "insert", ins, INT, bool, char, ImmutableString, Array, ()); lib.set_fn_2_mut("append", |x: &mut Array, y: Array| { @@ -69,14 +96,14 @@ def_package!(crate:BasicArrayPackage:"Basic array utilities.", lib, { #[cfg(not(feature = "only_i64"))] { reg_op!(lib, "push", push, i8, u8, i16, u16, i32, i64, u32, u64, i128, u128); - reg_tri!(lib, "pad", pad, i8, u8, i16, u16, i32, u32, i64, u64, i128, u128); + reg_pad!(lib, "pad", pad, i8, u8, i16, u16, i32, u32, i64, u64, i128, u128); reg_tri!(lib, "insert", ins, i8, u8, i16, u16, i32, i64, u32, u64, i128, u128); } #[cfg(not(feature = "no_float"))] { reg_op!(lib, "push", push, f32, f64); - reg_tri!(lib, "pad", pad, f32, f64); + reg_pad!(lib, "pad", pad, f32, f64); reg_tri!(lib, "insert", ins, f32, f64); } diff --git a/src/packages/string_more.rs b/src/packages/string_more.rs index e86f5fb1..6b5928c1 100644 --- a/src/packages/string_more.rs +++ b/src/packages/string_more.rs @@ -1,12 +1,17 @@ +use crate::any::Dynamic; use crate::def_package; +use crate::engine::Engine; use crate::module::FuncReturn; use crate::parser::{ImmutableString, INT}; +use crate::result::EvalAltResult; +use crate::token::Position; use crate::utils::StaticVec; #[cfg(not(feature = "no_index"))] use crate::engine::Array; use crate::stdlib::{ + any::TypeId, fmt::Display, format, string::{String, ToString}, @@ -210,14 +215,40 @@ def_package!(crate:MoreStringPackage:"Additional string utilities, including str Ok(()) }, ); - lib.set_fn_3_mut( + lib.set_fn_var_args( "pad", - |s: &mut ImmutableString, len: INT, ch: char| { - let copy = s.make_mut(); - for _ in 0..copy.chars().count() - len as usize { - copy.push(ch); + &[TypeId::of::(), TypeId::of::(), TypeId::of::()], + |engine: &Engine, args: &mut [&mut Dynamic]| { + let len = *args[1].downcast_ref::< INT>().unwrap(); + + // Check if string will be over max size limit + if engine.max_string_size > 0 && len > 0 && (len as usize) > engine.max_string_size { + Err(Box::new(EvalAltResult::ErrorDataTooLarge( + "Length of string".to_string(), + engine.max_string_size, + len as usize, + Position::none(), + ))) + } else { + let ch = *args[2].downcast_ref::< char>().unwrap(); + let s = args[0].downcast_mut::().unwrap(); + + let copy = s.make_mut(); + for _ in 0..copy.chars().count() - len as usize { + copy.push(ch); + } + + if engine.max_string_size > 0 && copy.len() > engine.max_string_size { + Err(Box::new(EvalAltResult::ErrorDataTooLarge( + "Length of string".to_string(), + engine.max_string_size, + copy.len(), + Position::none(), + ))) + } else { + Ok(()) + } } - Ok(()) }, ); lib.set_fn_3_mut( diff --git a/tests/data_size.rs b/tests/data_size.rs index 7884971f..e073d443 100644 --- a/tests/data_size.rs +++ b/tests/data_size.rs @@ -35,6 +35,19 @@ fn test_max_string_size() -> Result<(), Box> { EvalAltResult::ErrorDataTooLarge(_, 10, 13, _) )); + assert!(matches!( + *engine + .eval::( + r#" + let x = "hello"; + x.pad(100, '!'); + x + "# + ) + .expect_err("should error"), + EvalAltResult::ErrorDataTooLarge(_, 10, 100, _) + )); + engine.set_max_string_size(0); assert_eq!( @@ -79,6 +92,18 @@ fn test_max_array_size() -> Result<(), Box> { .expect_err("should error"), EvalAltResult::ErrorDataTooLarge(_, 10, 12, _) )); + assert!(matches!( + *engine + .eval::( + r" + let x = [1,2,3,4,5,6]; + x.pad(100, 42); + x + " + ) + .expect_err("should error"), + EvalAltResult::ErrorDataTooLarge(_, 10, 100, _) + )); assert!(matches!( *engine diff --git a/tests/modules.rs b/tests/modules.rs index 60d02247..771aa490 100644 --- a/tests/modules.rs +++ b/tests/modules.rs @@ -1,7 +1,9 @@ #![cfg(not(feature = "no_module"))] use rhai::{ - module_resolvers, Engine, EvalAltResult, Module, ParseError, ParseErrorType, Scope, INT, + module_resolvers, Dynamic, Engine, EvalAltResult, Module, ParseError, ParseErrorType, Scope, + INT, }; +use std::any::TypeId; #[test] fn test_module() { @@ -20,6 +22,7 @@ fn test_module_sub_module() -> Result<(), Box> { let mut sub_module2 = Module::new(); sub_module2.set_var("answer", 41 as INT); + let hash_inc = sub_module2.set_fn_1("inc", |x: INT| Ok(x + 1)); sub_module.set_sub_module("universe", sub_module2);