Add support for line continuation and multi-line string literals.

This commit is contained in:
Stephen Chung 2021-03-30 00:21:09 +08:00
parent 3a6e6848fd
commit d2ded7733a
3 changed files with 84 additions and 37 deletions

View File

@ -16,7 +16,14 @@ an object map is small.
`HashMap` and `BTreeMap` have almost identical public API's so this change is unlikely to break `HashMap` and `BTreeMap` have almost identical public API's so this change is unlikely to break
existing code. existing code.
Im addition, all function signature/metadata methods are now grouped under the umbrella `metadata` feature. [`SmartString`](https://crates.io/crates/smartstring) is used to store identifiers (which tends to
be short, fewer than 23 characters, and ASCII-based) because they can usually be stored inline.
`Map` keys now also use [`SmartString`](https://crates.io/crates/smartstring).
In addition, there is now support for line continuation in strings (put `\` at the end of line) as
well as multi-line literal strings (wrapped by back-ticks: <code>\`...\`</code>).
Finally, all function signature/metadata methods are now grouped under the umbrella `metadata` feature.
This avoids spending precious resources maintaining metadata for functions for the vast majority of This avoids spending precious resources maintaining metadata for functions for the vast majority of
use cases where such information is not required. use cases where such information is not required.
@ -24,7 +31,6 @@ use cases where such information is not required.
Breaking changes Breaking changes
---------------- ----------------
* `Map` is now an alias to `BTreeMap` instead of `HashMap` because most object maps hold few properties.
* The traits `RegisterFn` and `RegisterResultFn` are removed. `Engine::register_fn` and `Engine::register_result_fn` are now implemented directly on `Engine`. * The traits `RegisterFn` and `RegisterResultFn` are removed. `Engine::register_fn` and `Engine::register_result_fn` are now implemented directly on `Engine`.
* `FnPtr::call_dynamic` now takes `&NativeCallContext` instead of consuming it. * `FnPtr::call_dynamic` now takes `&NativeCallContext` instead of consuming it.
* All `Module::set_fn_XXX` methods are removed, in favor of `Module::set_native_fn`. * All `Module::set_fn_XXX` methods are removed, in favor of `Module::set_native_fn`.
@ -35,6 +41,13 @@ Breaking changes
* The shebang `#!` is now a reserved symbol. * The shebang `#!` is now a reserved symbol.
* Shebangs at the very beginning of script files are skipped when loading them. * Shebangs at the very beginning of script files are skipped when loading them.
* [`smartstring`](https://crates.io/crates/smartstring) is used for identifiers by default. Currently, a PR branch is pulled because it breaks on `no-std` builds. The official crate will be used once `smartstring` is fixed to support `no-std`. * [`smartstring`](https://crates.io/crates/smartstring) is used for identifiers by default. Currently, a PR branch is pulled because it breaks on `no-std` builds. The official crate will be used once `smartstring` is fixed to support `no-std`.
* `Map` is now an alias to `BTreeMap<SmartString, Dynamic>` instead of `HashMap` because most object maps hold few properties.
New features
------------
* Line continuation (via `\`) and multi-line literal strings (wrapped with <code>\`</code>) support are added.
* Rhai scripts can now start with a shebang `#!` which is ignored.
Enhancements Enhancements
------------ ------------
@ -42,7 +55,6 @@ Enhancements
* Replaced all `HashMap` usage with `BTreeMap` for better performance because collections in Rhai are tiny. * Replaced all `HashMap` usage with `BTreeMap` for better performance because collections in Rhai are tiny.
* `Engine::register_result_fn` no longer requires the successful return type to be `Dynamic`. It can now be any clonable type. * `Engine::register_result_fn` no longer requires the successful return type to be `Dynamic`. It can now be any clonable type.
* `#[rhai_fn(return_raw)]` can now return `Result<T, Box<EvalAltResult>>` where `T` is any clonable type instead of `Result<Dynamic, Box<EvalAltResult>>`. * `#[rhai_fn(return_raw)]` can now return `Result<T, Box<EvalAltResult>>` where `T` is any clonable type instead of `Result<Dynamic, Box<EvalAltResult>>`.
* Rhai scripts can now start with a shebang `#!`.
Version 0.19.14 Version 0.19.14

View File

@ -842,7 +842,7 @@ pub trait InputStream {
fn peek_next(&mut self) -> Option<char>; fn peek_next(&mut self) -> Option<char>;
} }
/// _(INTERNALS)_ Parse a string literal wrapped by `enclosing_char`. /// _(INTERNALS)_ Parse a string literal ended by `termination_char`.
/// Exported under the `internals` feature only. /// Exported under the `internals` feature only.
/// ///
/// # Volatile API /// # Volatile API
@ -852,12 +852,15 @@ pub fn parse_string_literal(
stream: &mut impl InputStream, stream: &mut impl InputStream,
state: &mut TokenizeState, state: &mut TokenizeState,
pos: &mut Position, pos: &mut Position,
enclosing_char: char, termination_char: char,
continuation: bool,
verbatim: bool,
) -> Result<String, (LexError, Position)> { ) -> Result<String, (LexError, Position)> {
let mut result: smallvec::SmallVec<[char; 16]> = Default::default(); let mut result: smallvec::SmallVec<[char; 16]> = Default::default();
let mut escape: smallvec::SmallVec<[char; 12]> = Default::default(); let mut escape: smallvec::SmallVec<[char; 12]> = Default::default();
let start = *pos; let start = *pos;
let mut skip_whitespace_until = 0;
loop { loop {
let next_char = stream.get_next().ok_or((LERR::UnterminatedString, start))?; let next_char = stream.get_next().ok_or((LERR::UnterminatedString, start))?;
@ -871,8 +874,10 @@ pub fn parse_string_literal(
} }
match next_char { match next_char {
// \r - ignore if followed by \n
'\r' if stream.peek_next().unwrap_or('\0') == '\n' => {}
// \... // \...
'\\' if escape.is_empty() => { '\\' if escape.is_empty() && !verbatim => {
escape.push('\\'); escape.push('\\');
} }
// \\ // \\
@ -937,18 +942,37 @@ pub fn parse_string_literal(
})?); })?);
} }
// \{enclosing_char} - escaped // \{termination_char} - escaped
ch if enclosing_char == ch && !escape.is_empty() => { _ if termination_char == next_char && !escape.is_empty() => {
escape.clear(); escape.clear();
result.push(ch) result.push(next_char)
} }
// Close wrapper // Close wrapper
ch if enclosing_char == ch && escape.is_empty() => break, _ if termination_char == next_char && escape.is_empty() => break,
// Line continuation
'\n' if continuation && !escape.is_empty() => {
escape.clear();
pos.new_line();
skip_whitespace_until = start.position().unwrap() + 1;
}
// New-line cannot be escaped
// Cannot have new-lines inside non-multi-line string literals
'\n' if !escape.is_empty() || !verbatim => {
pos.rewind();
return Err((LERR::UnterminatedString, start));
}
'\n' => {
pos.new_line();
result.push(next_char);
}
// Unknown escape sequence // Unknown escape sequence
ch if !escape.is_empty() => { _ if !escape.is_empty() => {
escape.push(ch); escape.push(next_char);
return Err(( return Err((
LERR::MalformedEscapeSequence(escape.into_iter().collect()), LERR::MalformedEscapeSequence(escape.into_iter().collect()),
@ -956,16 +980,14 @@ pub fn parse_string_literal(
)); ));
} }
// Cannot have new-lines inside string literals // Whitespace to skip
'\n' => { _ if next_char.is_whitespace() && pos.position().unwrap() < skip_whitespace_until => {}
pos.rewind();
return Err((LERR::UnterminatedString, start));
}
// All other characters // All other characters
ch => { _ => {
escape.clear(); escape.clear();
result.push(ch); result.push(next_char);
skip_whitespace_until = 0;
} }
} }
} }
@ -1272,12 +1294,15 @@ fn get_next_token_inner(
return get_identifier(stream, pos, start_pos, c); return get_identifier(stream, pos, start_pos, c);
} }
// " - string literal // " or ` - string literal
('"', _) => { ('"', _) | ('`', _) => {
return parse_string_literal(stream, state, pos, '"').map_or_else( let multi_line = c == '`';
return parse_string_literal(stream, state, pos, c, !multi_line, multi_line)
.map_or_else(
|err| Some((Token::LexError(err.0), err.1)), |err| Some((Token::LexError(err.0), err.1)),
|out| Some((Token::StringConstant(out), start_pos)), |out| Some((Token::StringConstant(out), start_pos)),
) );
} }
// ' - character literal // ' - character literal
@ -1288,7 +1313,8 @@ fn get_next_token_inner(
)) ))
} }
('\'', _) => { ('\'', _) => {
return Some(parse_string_literal(stream, state, pos, '\'').map_or_else( return Some(
parse_string_literal(stream, state, pos, c, false, false).map_or_else(
|err| (Token::LexError(err.0), err.1), |err| (Token::LexError(err.0), err.1),
|result| { |result| {
let mut chars = result.chars(); let mut chars = result.chars();
@ -1300,7 +1326,8 @@ fn get_next_token_inner(
(Token::CharConstant(first), start_pos) (Token::CharConstant(first), start_pos)
} }
}, },
)) ),
)
} }
// Braces // Braces

View File

@ -8,6 +8,14 @@ fn test_string() -> Result<(), Box<EvalAltResult>> {
engine.eval::<String>(r#""Test string: \u2764""#)?, engine.eval::<String>(r#""Test string: \u2764""#)?,
"Test string: ❤" "Test string: ❤"
); );
assert_eq!(
engine.eval::<String>(" \"Test string: \\u2764\\\n hello, world!\"")?,
"Test string: ❤ hello, world!"
);
assert_eq!(
engine.eval::<String>(" `Test string: \\u2764\nhello,\\nworld!`")?,
"Test string: \\u2764\nhello,\\nworld!"
);
assert_eq!( assert_eq!(
engine.eval::<String>(r#""Test string: \x58""#)?, engine.eval::<String>(r#""Test string: \x58""#)?,
"Test string: X" "Test string: X"