From d2ded7733a99c73b4a501f62695826754df0d108 Mon Sep 17 00:00:00 2001 From: Stephen Chung Date: Tue, 30 Mar 2021 00:21:09 +0800 Subject: [PATCH] Add support for line continuation and multi-line string literals. --- CHANGELOG.md | 18 ++++++++-- src/token.rs | 95 +++++++++++++++++++++++++++++++------------------ tests/string.rs | 8 +++++ 3 files changed, 84 insertions(+), 37 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 50b54db4..9b4bbbc5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,7 +16,14 @@ an object map is small. `HashMap` and `BTreeMap` have almost identical public API's so this change is unlikely to break existing code. -Im addition, all function signature/metadata methods are now grouped under the umbrella `metadata` feature. +[`SmartString`](https://crates.io/crates/smartstring) is used to store identifiers (which tends to +be short, fewer than 23 characters, and ASCII-based) because they can usually be stored inline. +`Map` keys now also use [`SmartString`](https://crates.io/crates/smartstring). + +In addition, there is now support for line continuation in strings (put `\` at the end of line) as +well as multi-line literal strings (wrapped by back-ticks: \`...\`). + +Finally, all function signature/metadata methods are now grouped under the umbrella `metadata` feature. This avoids spending precious resources maintaining metadata for functions for the vast majority of use cases where such information is not required. @@ -24,7 +31,6 @@ use cases where such information is not required. Breaking changes ---------------- -* `Map` is now an alias to `BTreeMap` instead of `HashMap` because most object maps hold few properties. * The traits `RegisterFn` and `RegisterResultFn` are removed. `Engine::register_fn` and `Engine::register_result_fn` are now implemented directly on `Engine`. * `FnPtr::call_dynamic` now takes `&NativeCallContext` instead of consuming it. * All `Module::set_fn_XXX` methods are removed, in favor of `Module::set_native_fn`. @@ -35,6 +41,13 @@ Breaking changes * The shebang `#!` is now a reserved symbol. * Shebangs at the very beginning of script files are skipped when loading them. * [`smartstring`](https://crates.io/crates/smartstring) is used for identifiers by default. Currently, a PR branch is pulled because it breaks on `no-std` builds. The official crate will be used once `smartstring` is fixed to support `no-std`. +* `Map` is now an alias to `BTreeMap` instead of `HashMap` because most object maps hold few properties. + +New features +------------ + +* Line continuation (via `\`) and multi-line literal strings (wrapped with \`) support are added. +* Rhai scripts can now start with a shebang `#!` which is ignored. Enhancements ------------ @@ -42,7 +55,6 @@ Enhancements * Replaced all `HashMap` usage with `BTreeMap` for better performance because collections in Rhai are tiny. * `Engine::register_result_fn` no longer requires the successful return type to be `Dynamic`. It can now be any clonable type. * `#[rhai_fn(return_raw)]` can now return `Result>` where `T` is any clonable type instead of `Result>`. -* Rhai scripts can now start with a shebang `#!`. Version 0.19.14 diff --git a/src/token.rs b/src/token.rs index 16087267..ecd4d661 100644 --- a/src/token.rs +++ b/src/token.rs @@ -842,7 +842,7 @@ pub trait InputStream { fn peek_next(&mut self) -> Option; } -/// _(INTERNALS)_ Parse a string literal wrapped by `enclosing_char`. +/// _(INTERNALS)_ Parse a string literal ended by `termination_char`. /// Exported under the `internals` feature only. /// /// # Volatile API @@ -852,12 +852,15 @@ pub fn parse_string_literal( stream: &mut impl InputStream, state: &mut TokenizeState, pos: &mut Position, - enclosing_char: char, + termination_char: char, + continuation: bool, + verbatim: bool, ) -> Result { let mut result: smallvec::SmallVec<[char; 16]> = Default::default(); let mut escape: smallvec::SmallVec<[char; 12]> = Default::default(); let start = *pos; + let mut skip_whitespace_until = 0; loop { let next_char = stream.get_next().ok_or((LERR::UnterminatedString, start))?; @@ -871,8 +874,10 @@ pub fn parse_string_literal( } match next_char { + // \r - ignore if followed by \n + '\r' if stream.peek_next().unwrap_or('\0') == '\n' => {} // \... - '\\' if escape.is_empty() => { + '\\' if escape.is_empty() && !verbatim => { escape.push('\\'); } // \\ @@ -937,18 +942,37 @@ pub fn parse_string_literal( })?); } - // \{enclosing_char} - escaped - ch if enclosing_char == ch && !escape.is_empty() => { + // \{termination_char} - escaped + _ if termination_char == next_char && !escape.is_empty() => { escape.clear(); - result.push(ch) + result.push(next_char) } // Close wrapper - ch if enclosing_char == ch && escape.is_empty() => break, + _ if termination_char == next_char && escape.is_empty() => break, + + // Line continuation + '\n' if continuation && !escape.is_empty() => { + escape.clear(); + pos.new_line(); + skip_whitespace_until = start.position().unwrap() + 1; + } + + // New-line cannot be escaped + // Cannot have new-lines inside non-multi-line string literals + '\n' if !escape.is_empty() || !verbatim => { + pos.rewind(); + return Err((LERR::UnterminatedString, start)); + } + + '\n' => { + pos.new_line(); + result.push(next_char); + } // Unknown escape sequence - ch if !escape.is_empty() => { - escape.push(ch); + _ if !escape.is_empty() => { + escape.push(next_char); return Err(( LERR::MalformedEscapeSequence(escape.into_iter().collect()), @@ -956,16 +980,14 @@ pub fn parse_string_literal( )); } - // Cannot have new-lines inside string literals - '\n' => { - pos.rewind(); - return Err((LERR::UnterminatedString, start)); - } + // Whitespace to skip + _ if next_char.is_whitespace() && pos.position().unwrap() < skip_whitespace_until => {} // All other characters - ch => { + _ => { escape.clear(); - result.push(ch); + result.push(next_char); + skip_whitespace_until = 0; } } } @@ -1272,12 +1294,15 @@ fn get_next_token_inner( return get_identifier(stream, pos, start_pos, c); } - // " - string literal - ('"', _) => { - return parse_string_literal(stream, state, pos, '"').map_or_else( - |err| Some((Token::LexError(err.0), err.1)), - |out| Some((Token::StringConstant(out), start_pos)), - ) + // " or ` - string literal + ('"', _) | ('`', _) => { + let multi_line = c == '`'; + + return parse_string_literal(stream, state, pos, c, !multi_line, multi_line) + .map_or_else( + |err| Some((Token::LexError(err.0), err.1)), + |out| Some((Token::StringConstant(out), start_pos)), + ); } // ' - character literal @@ -1288,19 +1313,21 @@ fn get_next_token_inner( )) } ('\'', _) => { - return Some(parse_string_literal(stream, state, pos, '\'').map_or_else( - |err| (Token::LexError(err.0), err.1), - |result| { - let mut chars = result.chars(); - let first = chars.next().unwrap(); + return Some( + parse_string_literal(stream, state, pos, c, false, false).map_or_else( + |err| (Token::LexError(err.0), err.1), + |result| { + let mut chars = result.chars(); + let first = chars.next().unwrap(); - if chars.next().is_some() { - (Token::LexError(LERR::MalformedChar(result)), start_pos) - } else { - (Token::CharConstant(first), start_pos) - } - }, - )) + if chars.next().is_some() { + (Token::LexError(LERR::MalformedChar(result)), start_pos) + } else { + (Token::CharConstant(first), start_pos) + } + }, + ), + ) } // Braces diff --git a/tests/string.rs b/tests/string.rs index ff787116..d6885a34 100644 --- a/tests/string.rs +++ b/tests/string.rs @@ -8,6 +8,14 @@ fn test_string() -> Result<(), Box> { engine.eval::(r#""Test string: \u2764""#)?, "Test string: ❤" ); + assert_eq!( + engine.eval::(" \"Test string: \\u2764\\\n hello, world!\"")?, + "Test string: ❤ hello, world!" + ); + assert_eq!( + engine.eval::(" `Test string: \\u2764\nhello,\\nworld!`")?, + "Test string: \\u2764\nhello,\\nworld!" + ); assert_eq!( engine.eval::(r#""Test string: \x58""#)?, "Test string: X"