From bb691a03138b0a9f920e765650836c9ec2d70d7c Mon Sep 17 00:00:00 2001 From: Stephen Chung Date: Sun, 14 Jun 2020 14:25:47 +0800 Subject: [PATCH] Add maximum data size limits. --- README.md | 122 ++++++++++++++++++--- RELEASES.md | 1 + src/api.rs | 10 +- src/engine.rs | 162 ++++++++++++++++++++++++---- src/error.rs | 17 ++- src/parser.rs | 254 ++++++++++++++++++++++++++------------------ src/result.rs | 8 ++ src/token.rs | 58 ++++++---- tests/data_size.rs | 234 ++++++++++++++++++++++++++++++++++++++++ tests/modules.rs | 2 +- tests/operations.rs | 17 +++ 11 files changed, 725 insertions(+), 160 deletions(-) create mode 100644 tests/data_size.rs diff --git a/README.md b/README.md index 57c56afa..c0931fa6 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ to add scripting to any application. Features -------- -* Easy-to-use language similar to JS+Rust with dynamic typing but _no_ garbage collector. +* Easy-to-use language similar to JS+Rust with dynamic typing. * Tight integration with native Rust [functions](#working-with-functions) and [types](#custom-types-and-methods), including [getters/setters](#getters-and-setters), [methods](#members-and-methods) and [indexers](#indexers). * Freely pass Rust variables/constants into a script via an external [`Scope`]. @@ -25,7 +25,7 @@ Features one single source file, all with names starting with `"unsafe_"`). * Re-entrant scripting [`Engine`] can be made `Send + Sync` (via the [`sync`] feature). * Sand-boxed - the scripting [`Engine`], if declared immutable, cannot mutate the containing environment unless explicitly permitted (e.g. via a `RefCell`). -* Rugged (protection against [stack-overflow](#maximum-call-stack-depth) and [runaway scripts](#maximum-number-of-operations) etc.). +* Rugged - protection against malicious attacks (such as [stack-overflow](#maximum-call-stack-depth), [over-sized data](#maximum-length-of-strings), and [runaway scripts](#maximum-number-of-operations) etc.) that may come from untrusted third-party user-land scripts. * Track script evaluation [progress](#tracking-progress-and-force-terminate-script-run) and manually terminate a script run. * [`no-std`](#optional-features) support. * [Function overloading](#function-overloading). @@ -1191,13 +1191,16 @@ fn main() -> Result<(), Box> Engine configuration options --------------------------- -| Method | Description | -| ------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------- | -| `set_optimization_level` | Set the amount of script _optimizations_ performed. See [script optimization]. | -| `set_max_expr_depths` | Set the maximum nesting levels of an expression/statement. See [maximum statement depth](#maximum-statement-depth). | -| `set_max_call_levels` | Set the maximum number of function call levels (default 50) to avoid infinite recursion. See [maximum call stack depth](#maximum-call-stack-depth). | -| `set_max_operations` | Set the maximum number of _operations_ that a script is allowed to consume. See [maximum number of operations](#maximum-number-of-operations). | -| `set_max_modules` | Set the maximum number of [modules] that a script is allowed to load. See [maximum number of modules](#maximum-number-of-modules). | +| Method | Not available under | Description | +| ------------------------ | ---------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------- | +| `set_optimization_level` | [`no_optimize`] | Set the amount of script _optimizations_ performed. See [script optimization]. | +| `set_max_expr_depths` | [`unchecked`] | Set the maximum nesting levels of an expression/statement. See [maximum statement depth](#maximum-statement-depth). | +| `set_max_call_levels` | [`unchecked`] | Set the maximum number of function call levels (default 50) to avoid infinite recursion. See [maximum call stack depth](#maximum-call-stack-depth). | +| `set_max_operations` | [`unchecked`] | Set the maximum number of _operations_ that a script is allowed to consume. See [maximum number of operations](#maximum-number-of-operations). | +| `set_max_modules` | [`unchecked`] | Set the maximum number of [modules] that a script is allowed to load. See [maximum number of modules](#maximum-number-of-modules). | +| `set_max_string_size` | [`unchecked`] | Set the maximum length (in UTF-8 bytes) for [strings]. See [maximum length of strings](#maximum-length-of-strings). | +| `set_max_array_size` | [`unchecked`], [`no_index`] | Set the maximum size for [arrays]. See [maximum size of arrays](#maximum-size-of-arrays). | +| `set_max_map_size` | [`unchecked`], [`no_object`] | Set the maximum number of properties for [object maps]. See [maximum size of object maps](#maximum-size-of-object-maps). | ------- @@ -1498,6 +1501,9 @@ record == "Bob X. Davis: age 42 ❤\n"; 'C' in record == false; ``` +The maximum allowed length of a string can be controlled via `Engine::set_max_string_size` +(see [maximum length of strings](#maximum-length-of-strings)). + ### Built-in functions The following standard methods (mostly defined in the [`MoreStringPackage`](#packages) but excluded if using a [raw `Engine`]) operate on strings: @@ -1673,6 +1679,9 @@ y.len == 0; engine.register_fn("push", |list: &mut Array, item: MyType| list.push(Box::new(item)) ); ``` +The maximum allowed size of an array can be controlled via `Engine::set_max_array_size` +(see [maximum size of arrays](#maximum-size-of-arrays)). + Object maps ----------- @@ -1776,6 +1785,9 @@ y.clear(); // empty the object map y.len() == 0; ``` +The maximum allowed size of an object map can be controlled via `Engine::set_max_map_size` +(see [maximum size of object maps](#maximum-size-of-object-maps)). + ### Parsing from JSON The syntax for an object map is extremely similar to JSON, with the exception of `null` values which can @@ -2439,7 +2451,7 @@ a script so that it does not consume more resources that it is allowed to. The most important resources to watch out for are: -* **Memory**: A malicous script may continuously grow an [array] or [object map] until all memory is consumed. +* **Memory**: A malicous script may continuously grow a [string], an [array] or [object map] until all memory is consumed. It may also create a large [array] or [object map] literal that exhausts all memory during parsing. * **CPU**: A malicous script may run an infinite tight loop that consumes all CPU cycles. * **Time**: A malicous script may run indefinitely, thereby blocking the calling system which is waiting for a result. @@ -2455,6 +2467,89 @@ The most important resources to watch out for are: * **Data**: A malicous script may attempt to read from and/or write to data that it does not own. If this happens, it is a severe security breach and may put the entire system at risk. +### Maximum length of strings + +Rhai by default does not limit how long a [string] can be. +This can be changed via the `Engine::set_max_string_size` method, with zero being unlimited (the default). + +```rust +let mut engine = Engine::new(); + +engine.set_max_string_size(500); // allow strings only up to 500 bytes long (in UTF-8 format) + +engine.set_max_string_size(0); // allow unlimited string length +``` + +A script attempting to create a string literal longer than the maximum will terminate with a parse error. +Any script operation that produces a string longer than the maximum also terminates the script with an error result. +This check can be disabled via the [`unchecked`] feature for higher performance +(but higher risks as well). + +Be conservative when setting a maximum limit and always consider the fact that a registered function may grow +a string's length without Rhai noticing until the very end. For instance, the built-in '`+`' operator for strings +concatenates two strings together to form one longer string; if both strings are _slightly_ below the maximum +length limit, the resultant string may be almost _twice_ the maximum length. The '`pad`' function grows a string +to a specified length which may be longer than the allowed maximum +(to trap this risk, register a custom '`pad`' function that checks the arguments). + +### Maximum size of arrays + +Rhai by default does not limit how large an [array] can be. +This can be changed via the `Engine::set_max_array_size` method, with zero being unlimited (the default). + +```rust +let mut engine = Engine::new(); + +engine.set_max_array_size(500); // allow arrays only up to 500 items + +engine.set_max_array_size(0); // allow unlimited arrays +``` + +A script attempting to create an array literal larger than the maximum will terminate with a parse error. +Any script operation that produces an array larger than the maximum also terminates the script with an error result. +This check can be disabled via the [`unchecked`] feature for higher performance +(but higher risks as well). + +Be conservative when setting a maximum limit and always consider the fact that a registered function may grow +an array's size without Rhai noticing until the very end. +For instance, the built-in '`+`' operator for arrays concatenates two arrays together to form one larger array; +if both arrays are _slightly_ below the maximum size limit, the resultant array may be almost _twice_ the maximum size. +The '`pad`' function grows an array to a specified size which may be larger than the allowed maximum +(to trap this risk, register a custom '`pad`' function that checks the arguments). + +As a malicious script may create a deeply-nested array which consumes huge amounts of memory while each individual +array still stays under the maximum size limit, Rhai also recursively adds up the sizes of all strings, arrays +and object maps contained within each array to make sure that the _aggregate_ sizes of none of these data structures +exceed their respective maximum size limits (if any). + +### Maximum size of object maps + +Rhai by default does not limit how large (i.e. the number of properties) an [object map] can be. +This can be changed via the `Engine::set_max_map_size` method, with zero being unlimited (the default). + +```rust +let mut engine = Engine::new(); + +engine.set_max_map_size(500); // allow object maps with only up to 500 properties + +engine.set_max_map_size(0); // allow unlimited object maps +``` + +A script attempting to create an object map literal with more properties than the maximum will terminate with a parse error. +Any script operation that produces an object map with more properties than the maximum also terminates the script with an error result. +This check can be disabled via the [`unchecked`] feature for higher performance +(but higher risks as well). + +Be conservative when setting a maximum limit and always consider the fact that a registered function may grow +an object map's size without Rhai noticing until the very end. For instance, the built-in '`+`' operator for object maps +concatenates two object maps together to form one larger object map; if both object maps are _slightly_ below the maximum +size limit, the resultant object map may be almost _twice_ the maximum size. + +As a malicious script may create a deeply-nested object map which consumes huge amounts of memory while each individual +object map still stays under the maximum size limit, Rhai also recursively adds up the sizes of all strings, arrays +and object maps contained within each object map to make sure that the _aggregate_ sizes of none of these data structures +exceed their respective maximum size limits (if any). + ### Maximum number of operations Rhai by default does not limit how much time or CPU a script consumes. @@ -2516,14 +2611,17 @@ total number of operations for a typical run. ### Maximum number of modules Rhai by default does not limit how many [modules] can be loaded via [`import`] statements. -This can be changed via the `Engine::set_max_modules` method, with zero being unlimited (the default). +This can be changed via the `Engine::set_max_modules` method. Notice that setting the maximum number +of modules to zero does _not_ indicate unlimited modules, but disallows loading any module altogether. ```rust let mut engine = Engine::new(); engine.set_max_modules(5); // allow loading only up to 5 modules -engine.set_max_modules(0); // allow unlimited modules +engine.set_max_modules(0); // disallow loading any module (maximum = zero) + +engine.set_max_modules(1000); // set to a large number for effectively unlimited modules ``` A script attempting to load more than the maximum number of modules will terminate with an error result. diff --git a/RELEASES.md b/RELEASES.md index 6667774c..b19caf44 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -21,6 +21,7 @@ New features * Indexers are now split into getters and setters (which now support updates). The API is split into `Engine::register_indexer_get` and `Engine::register_indexer_set` with `Engine::register_indexer_get_set` being a shorthand. Similarly, `Module::set_indexer_get_fn` and `Module::set_indexer_set_fn` are added. * `Engine:register_fn` and `Engine:register_result_fn` accepts functions that take parameters of type `&str` (immutable string slice), which maps directly to `ImmutableString`. This is to avoid needing wrappers for functions taking string parameters. +* Set maximum limit on data sizes: `Engine::set_max_string_size`, `Engine::set_max_array_size` and `Engine::set_max_map_size`. Version 0.15.0 diff --git a/src/api.rs b/src/api.rs index 8e2af013..56adc08c 100644 --- a/src/api.rs +++ b/src/api.rs @@ -547,7 +547,7 @@ impl Engine { scripts: &[&str], optimization_level: OptimizationLevel, ) -> Result { - let stream = lex(scripts); + let stream = lex(scripts, self.max_string_size); self.parse(&mut stream.peekable(), scope, optimization_level) } @@ -669,7 +669,7 @@ impl Engine { // Trims the JSON string and add a '#' in front let scripts = ["#", json.trim()]; - let stream = lex(&scripts); + let stream = lex(&scripts, self.max_string_size); let ast = self.parse_global_expr(&mut stream.peekable(), &scope, OptimizationLevel::None)?; @@ -750,7 +750,7 @@ impl Engine { script: &str, ) -> Result { let scripts = [script]; - let stream = lex(&scripts); + let stream = lex(&scripts, self.max_string_size); { let mut peekable = stream.peekable(); @@ -904,7 +904,7 @@ impl Engine { script: &str, ) -> Result> { let scripts = [script]; - let stream = lex(&scripts); + let stream = lex(&scripts, self.max_string_size); let ast = self.parse_global_expr( &mut stream.peekable(), @@ -1034,7 +1034,7 @@ impl Engine { script: &str, ) -> Result<(), Box> { let scripts = [script]; - let stream = lex(&scripts); + let stream = lex(&scripts, self.max_string_size); let ast = self.parse(&mut stream.peekable(), scope, self.optimization_level)?; self.consume_ast_with_scope(scope, &ast) diff --git a/src/engine.rs b/src/engine.rs index b3d0536d..83878c3c 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -182,7 +182,7 @@ pub struct State { /// Number of operations performed. pub operations: u64, /// Number of modules loaded. - pub modules: u64, + pub modules: usize, } impl State { @@ -261,7 +261,13 @@ pub struct Engine { /// Maximum number of operations allowed to run. pub(crate) max_operations: u64, /// Maximum number of modules allowed to load. - pub(crate) max_modules: u64, + pub(crate) max_modules: usize, + /// Maximum length of a string. + pub(crate) max_string_size: usize, + /// Maximum length of an array. + pub(crate) max_array_size: usize, + /// Maximum number of properties in a map. + pub(crate) max_map_size: usize, } impl Default for Engine { @@ -296,8 +302,11 @@ impl Default for Engine { max_call_stack_depth: MAX_CALL_STACK_DEPTH, max_expr_depth: MAX_EXPR_DEPTH, max_function_expr_depth: MAX_FUNCTION_EXPR_DEPTH, - max_operations: u64::MAX, - max_modules: u64::MAX, + max_operations: 0, + max_modules: usize::MAX, + max_string_size: 0, + max_array_size: 0, + max_map_size: 0, }; engine.load_package(StandardPackage::new().get()); @@ -440,8 +449,11 @@ impl Engine { max_call_stack_depth: MAX_CALL_STACK_DEPTH, max_expr_depth: MAX_EXPR_DEPTH, max_function_expr_depth: MAX_FUNCTION_EXPR_DEPTH, - max_operations: u64::MAX, - max_modules: u64::MAX, + max_operations: 0, + max_modules: usize::MAX, + max_string_size: 0, + max_array_size: 0, + max_map_size: 0, } } @@ -482,26 +494,42 @@ impl Engine { /// consuming too much resources (0 for unlimited). #[cfg(not(feature = "unchecked"))] pub fn set_max_operations(&mut self, operations: u64) { - self.max_operations = if operations == 0 { - u64::MAX - } else { - operations - }; + self.max_operations = operations; } - /// Set the maximum number of imported modules allowed for a script (0 for unlimited). + /// Set the maximum number of imported modules allowed for a script. #[cfg(not(feature = "unchecked"))] - pub fn set_max_modules(&mut self, modules: u64) { - self.max_modules = if modules == 0 { u64::MAX } else { modules }; + pub fn set_max_modules(&mut self, modules: usize) { + self.max_modules = modules; } - /// Set the depth limits for expressions/statements. + /// Set the depth limits for expressions/statements (0 for unlimited). #[cfg(not(feature = "unchecked"))] pub fn set_max_expr_depths(&mut self, max_expr_depth: usize, max_function_expr_depth: usize) { self.max_expr_depth = max_expr_depth; self.max_function_expr_depth = max_function_expr_depth; } + /// Set the maximum length of strings (0 for unlimited). + #[cfg(not(feature = "unchecked"))] + pub fn set_max_string_size(&mut self, max_size: usize) { + self.max_string_size = max_size; + } + + /// Set the maximum length of arrays (0 for unlimited). + #[cfg(not(feature = "unchecked"))] + #[cfg(not(feature = "no_index"))] + pub fn set_max_array_size(&mut self, max_size: usize) { + self.max_array_size = max_size; + } + + /// Set the maximum length of object maps (0 for unlimited). + #[cfg(not(feature = "unchecked"))] + #[cfg(not(feature = "no_object"))] + pub fn set_max_map_size(&mut self, max_size: usize) { + self.max_map_size = max_size; + } + /// Set the module resolution service used by the `Engine`. /// /// Not available under the `no_module` feature. @@ -1395,7 +1423,7 @@ impl Engine { self.inc_operations(state) .map_err(|err| EvalAltResult::new_position(err, expr.position()))?; - match expr { + let result = match expr { Expr::Expr(x) => self.eval_expr(scope, state, lib, x.as_ref(), level), Expr::IntegerConstant(x) => Ok(x.0.into()), @@ -1731,7 +1759,13 @@ impl Engine { Expr::Unit(_) => Ok(().into()), _ => unreachable!(), + }; + + if let Ok(val) = &result { + self.check_data_size(val)?; } + + result } /// Evaluate a statement @@ -1746,7 +1780,7 @@ impl Engine { self.inc_operations(state) .map_err(|err| EvalAltResult::new_position(err, stmt.position()))?; - match stmt { + let result = match stmt { // No-op Stmt::Noop(_) => Ok(Default::default()), @@ -1998,6 +2032,98 @@ impl Engine { } Ok(Default::default()) } + }; + + if let Ok(val) = &result { + self.check_data_size(val)?; + } + + result + } + + /// Check a `Dynamic` value to ensure that its size is within allowable limit. + fn check_data_size(&self, value: &Dynamic) -> Result<(), Box> { + #[cfg(feature = "unchecked")] + return Ok(()); + + if self.max_string_size + self.max_array_size + self.max_map_size == 0 { + return Ok(()); + } + + // Recursively calculate the size of a value (especially `Array` and `Map`) + fn calc_size(value: &Dynamic) -> (usize, usize, usize) { + match value { + #[cfg(not(feature = "no_index"))] + Dynamic(Union::Array(arr)) => { + let mut arrays = 0; + let mut maps = 0; + + arr.iter().for_each(|value| match value { + Dynamic(Union::Array(_)) | Dynamic(Union::Map(_)) => { + let (a, m, _) = calc_size(value); + arrays += a; + maps += m; + } + _ => arrays += 1, + }); + + (arrays, maps, 0) + } + #[cfg(not(feature = "no_object"))] + Dynamic(Union::Map(map)) => { + let mut arrays = 0; + let mut maps = 0; + + map.values().for_each(|value| match value { + Dynamic(Union::Array(_)) | Dynamic(Union::Map(_)) => { + let (a, m, _) = calc_size(value); + arrays += a; + maps += m; + } + _ => maps += 1, + }); + + (arrays, maps, 0) + } + Dynamic(Union::Str(s)) => (0, 0, s.len()), + _ => (0, 0, 0), + } + } + + match value { + Dynamic(Union::Str(_)) if self.max_string_size > 0 => (), + #[cfg(not(feature = "no_index"))] + Dynamic(Union::Array(_)) if self.max_array_size > 0 => (), + #[cfg(not(feature = "no_object"))] + Dynamic(Union::Map(_)) if self.max_map_size > 0 => (), + _ => return Ok(()), + }; + + let (arr, map, s) = calc_size(value); + + if s > self.max_string_size { + Err(Box::new(EvalAltResult::ErrorDataTooLarge( + "Length of string".to_string(), + self.max_string_size, + s, + Position::none(), + ))) + } else if arr > self.max_array_size { + Err(Box::new(EvalAltResult::ErrorDataTooLarge( + "Length of array".to_string(), + self.max_array_size, + arr, + Position::none(), + ))) + } else if map > self.max_map_size { + Err(Box::new(EvalAltResult::ErrorDataTooLarge( + "Number of properties in object map".to_string(), + self.max_map_size, + map, + Position::none(), + ))) + } else { + Ok(()) } } @@ -2009,7 +2135,7 @@ impl Engine { #[cfg(not(feature = "unchecked"))] { // Guard against too many operations - if state.operations > self.max_operations { + if self.max_operations > 0 && state.operations > self.max_operations { return Err(Box::new(EvalAltResult::ErrorTooManyOperations( Position::none(), ))); diff --git a/src/error.rs b/src/error.rs index dd1be4ef..3d4c47c7 100644 --- a/src/error.rs +++ b/src/error.rs @@ -12,6 +12,8 @@ pub enum LexError { UnexpectedChar(char), /// A string literal is not terminated before a new-line or EOF. UnterminatedString, + /// An identifier is in an invalid format. + StringTooLong(usize), /// An string/character/numeric escape sequence is in an invalid format. MalformedEscapeSequence(String), /// An numeric literal is in an invalid format. @@ -35,6 +37,11 @@ impl fmt::Display for LexError { Self::MalformedChar(s) => write!(f, "Invalid character: '{}'", s), Self::MalformedIdentifier(s) => write!(f, "Variable name is not proper: '{}'", s), Self::UnterminatedString => write!(f, "Open string is not terminated"), + Self::StringTooLong(max) => write!( + f, + "Length of string literal exceeds the maximum limit ({})", + max + ), Self::ImproperKeyword(s) => write!(f, "{}", s), } } @@ -109,12 +116,16 @@ pub enum ParseErrorType { WrongExport, /// Assignment to a copy of a value. AssignmentToCopy, - /// Assignment to an a constant variable. + /// Assignment to an a constant variable. Wrapped value is the constant variable name. AssignmentToConstant(String), /// Expression exceeding the maximum levels of complexity. /// /// Never appears under the `unchecked` feature. ExprTooDeep, + /// Literal exceeding the maximum size. Wrapped values are the data type name and the maximum size. + /// + /// Never appears under the `unchecked` feature. + LiteralTooLarge(String, usize), /// Break statement not inside a loop. LoopBreak, } @@ -149,6 +160,7 @@ impl ParseErrorType { Self::AssignmentToCopy => "Only a copy of the value is change with this assignment", Self::AssignmentToConstant(_) => "Cannot assign to a constant value", Self::ExprTooDeep => "Expression exceeds maximum complexity", + Self::LiteralTooLarge(_, _) => "Literal exceeds maximum limit", Self::LoopBreak => "Break statement should only be used inside a loop" } } @@ -197,6 +209,9 @@ impl fmt::Display for ParseErrorType { Self::AssignmentToConstant(s) if s.is_empty() => write!(f, "{}", self.desc()), Self::AssignmentToConstant(s) => write!(f, "Cannot assign to constant '{}'", s), + Self::LiteralTooLarge(typ, max) => { + write!(f, "{} exceeds the maximum limit ({})", typ, max) + } _ => write!(f, "{}", self.desc()), } } diff --git a/src/parser.rs b/src/parser.rs index 2eed69b1..ec99adaf 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -201,13 +201,27 @@ struct ParseState { stack: Vec<(String, ScopeEntryType)>, /// Maximum levels of expression nesting. max_expr_depth: usize, + /// Maximum length of a string. + pub max_string_size: usize, + /// Maximum length of an array. + pub max_array_size: usize, + /// Maximum number of properties in a map. + pub max_map_size: usize, } impl ParseState { /// Create a new `ParseState`. - pub fn new(max_expr_depth: usize) -> Self { + pub fn new( + max_expr_depth: usize, + max_string_size: usize, + max_array_size: usize, + max_map_size: usize, + ) -> Self { Self { max_expr_depth, + max_string_size, + max_array_size, + max_map_size, ..Default::default() } } @@ -284,7 +298,9 @@ impl ParseSettings { } /// Make sure that the current level of expression nesting is within the maximum limit. pub fn ensure_level_within_max_limit(&self, limit: usize) -> Result<(), ParseError> { - if self.level > limit { + if limit == 0 { + Ok(()) + } else if self.level > limit { Err(PERR::ExprTooDeep.into_err(self.pos)) } else { Ok(()) @@ -610,7 +626,7 @@ impl Expr { Self::Variable(_) => true, - expr => expr.is_constant(), + _ => self.is_constant(), } } @@ -1070,6 +1086,14 @@ fn parse_array_literal( if !match_token(input, Token::RightBracket)? { while !input.peek().unwrap().0.is_eof() { + if state.max_array_size > 0 && arr.len() >= state.max_array_size { + return Err(PERR::LiteralTooLarge( + "Size of array literal".to_string(), + state.max_array_size, + ) + .into_err(input.peek().unwrap().1)); + } + let expr = parse_expr(input, state, settings.level_up())?; arr.push(expr); @@ -1155,8 +1179,15 @@ fn parse_map_literal( } }; - let expr = parse_expr(input, state, settings.level_up())?; + if state.max_map_size > 0 && map.len() >= state.max_map_size { + return Err(PERR::LiteralTooLarge( + "Number of properties in object map literal".to_string(), + state.max_map_size, + ) + .into_err(input.peek().unwrap().1)); + } + let expr = parse_expr(input, state, settings.level_up())?; map.push(((name, pos), expr)); match input.peek().unwrap() { @@ -1239,7 +1270,7 @@ fn parse_primary( Token::True => Expr::True(settings.pos), Token::False => Expr::False(settings.pos), Token::LexError(err) => return Err(PERR::BadInput(err.to_string()).into_err(settings.pos)), - token => { + _ => { return Err( PERR::BadInput(format!("Unexpected '{}'", token.syntax())).into_err(settings.pos) ) @@ -1817,7 +1848,7 @@ fn parse_binary_op( make_dot_expr(current_lhs, rhs, pos)? } - token => return Err(PERR::UnknownOperator(token.into()).into_err(pos)), + op_token => return Err(PERR::UnknownOperator(op_token.into()).into_err(pos)), }; } } @@ -2408,102 +2439,6 @@ fn parse_fn( }) } -/// Parse the global level statements. -fn parse_global_level( - input: &mut TokenStream, - max_expr_depth: usize, - max_function_expr_depth: usize, -) -> Result<(Vec, Vec), ParseError> { - let mut statements = Vec::::new(); - let mut functions = HashMap::::with_hasher(StraightHasherBuilder); - let mut state = ParseState::new(max_expr_depth); - - while !input.peek().unwrap().0.is_eof() { - // Collect all the function definitions - #[cfg(not(feature = "no_function"))] - { - let (access, must_be_fn) = if match_token(input, Token::Private)? { - (FnAccess::Private, true) - } else { - (FnAccess::Public, false) - }; - - match input.peek().unwrap() { - #[cfg(not(feature = "no_function"))] - (Token::Fn, pos) => { - let mut state = ParseState::new(max_function_expr_depth); - let settings = ParseSettings { - allow_if_expr: true, - allow_stmt_expr: true, - is_global: false, - is_breakable: false, - level: 0, - pos: *pos, - }; - let func = parse_fn(input, &mut state, access, settings)?; - - // Qualifiers (none) + function name + number of arguments. - let hash = calc_fn_hash(empty(), &func.name, func.params.len(), empty()); - - functions.insert(hash, func); - continue; - } - (_, pos) if must_be_fn => { - return Err(PERR::MissingToken( - Token::Fn.into(), - format!("following '{}'", Token::Private.syntax()), - ) - .into_err(*pos)) - } - _ => (), - } - } - - // Actual statement - let settings = ParseSettings { - allow_if_expr: true, - allow_stmt_expr: true, - is_global: true, - is_breakable: false, - level: 0, - pos: Position::none(), - }; - let stmt = parse_stmt(input, &mut state, settings)?; - - let need_semicolon = !stmt.is_self_terminated(); - - statements.push(stmt); - - match input.peek().unwrap() { - // EOF - (Token::EOF, _) => break, - // stmt ; - (Token::SemiColon, _) if need_semicolon => { - eat_token(input, Token::SemiColon); - } - // stmt ; - (Token::SemiColon, _) if !need_semicolon => (), - // { stmt } ??? - (_, _) if !need_semicolon => (), - // stmt - (Token::LexError(err), pos) => { - return Err(PERR::BadInput(err.to_string()).into_err(*pos)) - } - // stmt ??? - (_, pos) => { - // Semicolons are not optional between statements - return Err(PERR::MissingToken( - Token::SemiColon.into(), - "to terminate this statement".into(), - ) - .into_err(*pos)); - } - } - } - - Ok((statements, functions.into_iter().map(|(_, v)| v).collect())) -} - impl Engine { pub(crate) fn parse_global_expr( &self, @@ -2511,7 +2446,12 @@ impl Engine { scope: &Scope, optimization_level: OptimizationLevel, ) -> Result { - let mut state = ParseState::new(self.max_expr_depth); + let mut state = ParseState::new( + self.max_expr_depth, + self.max_string_size, + self.max_array_size, + self.max_map_size, + ); let settings = ParseSettings { allow_if_expr: false, allow_stmt_expr: false, @@ -2540,6 +2480,111 @@ impl Engine { ) } + /// Parse the global level statements. + fn parse_global_level( + &self, + input: &mut TokenStream, + ) -> Result<(Vec, Vec), ParseError> { + let mut statements = Vec::::new(); + let mut functions = HashMap::::with_hasher(StraightHasherBuilder); + let mut state = ParseState::new( + self.max_expr_depth, + self.max_string_size, + self.max_array_size, + self.max_map_size, + ); + + while !input.peek().unwrap().0.is_eof() { + // Collect all the function definitions + #[cfg(not(feature = "no_function"))] + { + let (access, must_be_fn) = if match_token(input, Token::Private)? { + (FnAccess::Private, true) + } else { + (FnAccess::Public, false) + }; + + match input.peek().unwrap() { + #[cfg(not(feature = "no_function"))] + (Token::Fn, pos) => { + let mut state = ParseState::new( + self.max_function_expr_depth, + self.max_string_size, + self.max_array_size, + self.max_map_size, + ); + let settings = ParseSettings { + allow_if_expr: true, + allow_stmt_expr: true, + is_global: false, + is_breakable: false, + level: 0, + pos: *pos, + }; + let func = parse_fn(input, &mut state, access, settings)?; + + // Qualifiers (none) + function name + number of arguments. + let hash = calc_fn_hash(empty(), &func.name, func.params.len(), empty()); + + functions.insert(hash, func); + continue; + } + (_, pos) if must_be_fn => { + return Err(PERR::MissingToken( + Token::Fn.into(), + format!("following '{}'", Token::Private.syntax()), + ) + .into_err(*pos)) + } + _ => (), + } + } + + // Actual statement + let settings = ParseSettings { + allow_if_expr: true, + allow_stmt_expr: true, + is_global: true, + is_breakable: false, + level: 0, + pos: Position::none(), + }; + let stmt = parse_stmt(input, &mut state, settings)?; + + let need_semicolon = !stmt.is_self_terminated(); + + statements.push(stmt); + + match input.peek().unwrap() { + // EOF + (Token::EOF, _) => break, + // stmt ; + (Token::SemiColon, _) if need_semicolon => { + eat_token(input, Token::SemiColon); + } + // stmt ; + (Token::SemiColon, _) if !need_semicolon => (), + // { stmt } ??? + (_, _) if !need_semicolon => (), + // stmt + (Token::LexError(err), pos) => { + return Err(PERR::BadInput(err.to_string()).into_err(*pos)) + } + // stmt ??? + (_, pos) => { + // Semicolons are not optional between statements + return Err(PERR::MissingToken( + Token::SemiColon.into(), + "to terminate this statement".into(), + ) + .into_err(*pos)); + } + } + } + + Ok((statements, functions.into_iter().map(|(_, v)| v).collect())) + } + /// Run the parser on an input stream, returning an AST. pub(crate) fn parse( &self, @@ -2547,8 +2592,7 @@ impl Engine { scope: &Scope, optimization_level: OptimizationLevel, ) -> Result { - let (statements, lib) = - parse_global_level(input, self.max_expr_depth, self.max_function_expr_depth)?; + let (statements, lib) = self.parse_global_level(input)?; Ok( // Optimize AST diff --git a/src/result.rs b/src/result.rs index 36bc18f6..83943df2 100644 --- a/src/result.rs +++ b/src/result.rs @@ -81,6 +81,8 @@ pub enum EvalAltResult { ErrorTooManyModules(Position), /// Call stack over maximum limit. ErrorStackOverflow(Position), + /// Data value over maximum size limit. Wrapped values are the data type, maximum size and current size. + ErrorDataTooLarge(String, usize, usize, Position), /// The script is prematurely terminated. ErrorTerminated(Position), /// Run-time error encountered. Wrapped value is the error message. @@ -139,6 +141,7 @@ impl EvalAltResult { Self::ErrorTooManyOperations(_) => "Too many operations", Self::ErrorTooManyModules(_) => "Too many modules imported", Self::ErrorStackOverflow(_) => "Stack overflow", + Self::ErrorDataTooLarge(_, _, _, _) => "Data size exceeds maximum limit", Self::ErrorTerminated(_) => "Script terminated.", Self::ErrorRuntime(_, _) => "Runtime error", Self::ErrorLoopBreak(true, _) => "Break statement not inside a loop", @@ -228,6 +231,9 @@ impl fmt::Display for EvalAltResult { "String index {} is out of bounds: only {} characters in the string", index, max )?, + Self::ErrorDataTooLarge(typ, max, size, _) => { + write!(f, "{} ({}) exceeds the maximum limit ({})", typ, size, max)? + } } // Do not write any position if None @@ -279,6 +285,7 @@ impl EvalAltResult { | Self::ErrorTooManyOperations(pos) | Self::ErrorTooManyModules(pos) | Self::ErrorStackOverflow(pos) + | Self::ErrorDataTooLarge(_, _, _, pos) | Self::ErrorTerminated(pos) | Self::ErrorRuntime(_, pos) | Self::ErrorLoopBreak(_, pos) @@ -316,6 +323,7 @@ impl EvalAltResult { | Self::ErrorTooManyOperations(pos) | Self::ErrorTooManyModules(pos) | Self::ErrorStackOverflow(pos) + | Self::ErrorDataTooLarge(_, _, _, pos) | Self::ErrorTerminated(pos) | Self::ErrorRuntime(_, pos) | Self::ErrorLoopBreak(_, pos) diff --git a/src/token.rs b/src/token.rs index d6799a6f..614783b4 100644 --- a/src/token.rs +++ b/src/token.rs @@ -429,6 +429,8 @@ impl From for String { /// An iterator on a `Token` stream. pub struct TokenIterator<'a> { + /// Maximum length of a string (0 = unlimited). + max_string_size: usize, /// Can the next token be a unary operator? can_be_unary: bool, /// Current position. @@ -494,6 +496,7 @@ impl<'a> TokenIterator<'a> { pub fn parse_string_literal( &mut self, enclosing_char: char, + max_length: usize, ) -> Result { let mut result = Vec::new(); let mut escape = String::with_capacity(12); @@ -505,6 +508,10 @@ impl<'a> TokenIterator<'a> { self.advance(); + if max_length > 0 && result.len() > max_length { + return Err((LexError::StringTooLong(max_length), self.pos)); + } + match next_char { // \... '\\' if escape.is_empty() => { @@ -592,7 +599,13 @@ impl<'a> TokenIterator<'a> { } } - Ok(result.iter().collect()) + let s = result.iter().collect::(); + + if max_length > 0 && s.len() > max_length { + return Err((LexError::StringTooLong(max_length), self.pos)); + } + + Ok(s) } /// Get the next token. @@ -779,10 +792,12 @@ impl<'a> TokenIterator<'a> { // " - string literal ('"', _) => { - return self.parse_string_literal('"').map_or_else( - |err| Some((Token::LexError(Box::new(err.0)), err.1)), - |out| Some((Token::StringConst(out), pos)), - ); + return self + .parse_string_literal('"', self.max_string_size) + .map_or_else( + |err| Some((Token::LexError(Box::new(err.0)), err.1)), + |out| Some((Token::StringConst(out), pos)), + ); } // ' - character literal @@ -793,19 +808,25 @@ impl<'a> TokenIterator<'a> { )); } ('\'', _) => { - return Some(self.parse_string_literal('\'').map_or_else( - |err| (Token::LexError(Box::new(err.0)), err.1), - |result| { - let mut chars = result.chars(); - let first = chars.next(); + return Some( + self.parse_string_literal('\'', self.max_string_size) + .map_or_else( + |err| (Token::LexError(Box::new(err.0)), err.1), + |result| { + let mut chars = result.chars(); + let first = chars.next(); - if chars.next().is_some() { - (Token::LexError(Box::new(LERR::MalformedChar(result))), pos) - } else { - (Token::CharConstant(first.expect("should be Some")), pos) - } - }, - )); + if chars.next().is_some() { + ( + Token::LexError(Box::new(LERR::MalformedChar(result))), + pos, + ) + } else { + (Token::CharConstant(first.expect("should be Some")), pos) + } + }, + ), + ); } // Braces @@ -1047,8 +1068,9 @@ impl<'a> Iterator for TokenIterator<'a> { } /// Tokenize an input text stream. -pub fn lex<'a>(input: &'a [&'a str]) -> TokenIterator<'a> { +pub fn lex<'a>(input: &'a [&'a str], max_string_size: usize) -> TokenIterator<'a> { TokenIterator { + max_string_size, can_be_unary: true, pos: Position::new(1, 0), streams: input.iter().map(|s| s.chars().peekable()).collect(), diff --git a/tests/data_size.rs b/tests/data_size.rs new file mode 100644 index 00000000..77e660fa --- /dev/null +++ b/tests/data_size.rs @@ -0,0 +1,234 @@ +#![cfg(not(feature = "unchecked"))] +use rhai::{Engine, EvalAltResult, ParseError, ParseErrorType}; + +#[cfg(not(feature = "no_index"))] +use rhai::Array; + +#[cfg(not(feature = "no_object"))] +use rhai::Map; + +#[test] +fn test_max_string_size() -> Result<(), Box> { + let mut engine = Engine::new(); + engine.set_max_string_size(10); + + assert!(matches!( + engine.compile(r#"let x = "hello, world!";"#).expect_err("should error"), + ParseError(x, _) if *x == ParseErrorType::BadInput("Length of string literal exceeds the maximum limit (10)".to_string()) + )); + + assert!(matches!( + engine.compile(r#"let x = "朝に紅顔、暮に白骨";"#).expect_err("should error"), + ParseError(x, _) if *x == ParseErrorType::BadInput("Length of string literal exceeds the maximum limit (10)".to_string()) + )); + + assert!(matches!( + *engine + .eval::( + r#" + let x = "hello, "; + let y = "world!"; + x + y + "# + ) + .expect_err("should error"), + EvalAltResult::ErrorDataTooLarge(_, 10, 13, _) + )); + + engine.set_max_string_size(0); + + assert_eq!( + engine.eval::( + r#" + let x = "hello, "; + let y = "world!"; + x + y + "# + )?, + "hello, world!" + ); + + Ok(()) +} + +#[test] +#[cfg(not(feature = "no_index"))] +fn test_max_array_size() -> Result<(), Box> { + let mut engine = Engine::new(); + engine.set_max_array_size(10); + + #[cfg(not(feature = "no_object"))] + engine.set_max_map_size(10); + + assert!(matches!( + engine + .compile("let x = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15];") + .expect_err("should error"), + ParseError(x, _) if *x == ParseErrorType::LiteralTooLarge("Size of array literal".to_string(), 10) + )); + + assert!(matches!( + *engine + .eval::( + r" + let x = [1,2,3,4,5,6]; + let y = [7,8,9,10,11,12]; + x + y + " + ) + .expect_err("should error"), + EvalAltResult::ErrorDataTooLarge(_, 10, 12, _) + )); + + assert!(matches!( + *engine + .eval::( + r" + let x = [1,2,3]; + [x, x, x, x] + " + ) + .expect_err("should error"), + EvalAltResult::ErrorDataTooLarge(_, 10, 12, _) + )); + + #[cfg(not(feature = "no_object"))] + assert!(matches!( + *engine + .eval::( + r" + let x = #{a:1, b:2, c:3}; + [x, x, x, x] + " + ) + .expect_err("should error"), + EvalAltResult::ErrorDataTooLarge(_, 10, 12, _) + )); + + assert!(matches!( + *engine + .eval::( + r" + let x = [1]; + let y = [x, x]; + let z = [y, y]; + [z, z, z] + " + ) + .expect_err("should error"), + EvalAltResult::ErrorDataTooLarge(_, 10, 12, _) + )); + + engine.set_max_array_size(0); + + assert_eq!( + engine + .eval::( + r" + let x = [1,2,3,4,5,6]; + let y = [7,8,9,10,11,12]; + x + y + " + )? + .len(), + 12 + ); + + assert_eq!( + engine + .eval::( + r" + let x = [1,2,3]; + [x, x, x, x] + " + )? + .len(), + 4 + ); + + Ok(()) +} + +#[test] +#[cfg(not(feature = "no_object"))] +fn test_max_map_size() -> Result<(), Box> { + let mut engine = Engine::new(); + engine.set_max_map_size(10); + + #[cfg(not(feature = "no_index"))] + engine.set_max_array_size(10); + + assert!(matches!( + engine + .compile("let x = #{a:1,b:2,c:3,d:4,e:5,f:6,g:7,h:8,i:9,j:10,k:11,l:12,m:13,n:14,o:15};") + .expect_err("should error"), + ParseError(x, _) if *x == ParseErrorType::LiteralTooLarge("Number of properties in object map literal".to_string(), 10) + )); + + assert!(matches!( + *engine + .eval::( + r" + let x = #{a:1,b:2,c:3,d:4,e:5,f:6}; + let y = #{g:7,h:8,i:9,j:10,k:11,l:12}; + x + y + " + ) + .expect_err("should error"), + EvalAltResult::ErrorDataTooLarge(_, 10, 12, _) + )); + + assert!(matches!( + *engine + .eval::( + r" + let x = #{a:1,b:2,c:3}; + #{u:x, v:x, w:x, z:x} + " + ) + .expect_err("should error"), + EvalAltResult::ErrorDataTooLarge(_, 10, 12, _) + )); + + #[cfg(not(feature = "no_index"))] + assert!(matches!( + *engine + .eval::( + r" + let x = [1, 2, 3]; + #{u:x, v:x, w:x, z:x} + " + ) + .expect_err("should error"), + EvalAltResult::ErrorDataTooLarge(_, 10, 12, _) + )); + + engine.set_max_map_size(0); + + assert_eq!( + engine + .eval::( + r" + let x = #{a:1,b:2,c:3,d:4,e:5,f:6}; + let y = #{g:7,h:8,i:9,j:10,k:11,l:12}; + x + y + " + )? + .len(), + 12 + ); + + assert_eq!( + engine + .eval::( + r" + let x = #{a:1,b:2,c:3}; + #{u:x, v:x, w:x, z:x} + " + )? + .len(), + 4 + ); + + Ok(()) +} diff --git a/tests/modules.rs b/tests/modules.rs index 10c138ad..60d02247 100644 --- a/tests/modules.rs +++ b/tests/modules.rs @@ -130,7 +130,7 @@ fn test_module_resolver() -> Result<(), Box> { EvalAltResult::ErrorInFunctionCall(fn_name, _, _) if fn_name == "foo" )); - engine.set_max_modules(0); + engine.set_max_modules(1000); #[cfg(not(feature = "no_function"))] engine.eval::<()>( diff --git a/tests/operations.rs b/tests/operations.rs index 91df26c9..dae539f0 100644 --- a/tests/operations.rs +++ b/tests/operations.rs @@ -111,3 +111,20 @@ fn test_max_operations_eval() -> Result<(), Box> { Ok(()) } + +#[test] +fn test_max_operations_progress() -> Result<(), Box> { + let mut engine = Engine::new(); + engine.set_max_operations(500); + + engine.on_progress(|&count| count < 100); + + assert!(matches!( + *engine + .eval::<()>("for x in range(0, 500) {}") + .expect_err("should error"), + EvalAltResult::ErrorTerminated(_) + )); + + Ok(()) +}