Add support for line continuation and multi-line string literals.

This commit is contained in:
Stephen Chung 2021-03-30 00:21:09 +08:00
parent 3a6e6848fd
commit d2ded7733a
3 changed files with 84 additions and 37 deletions

View File

@ -16,7 +16,14 @@ an object map is small.
`HashMap` and `BTreeMap` have almost identical public API's so this change is unlikely to break
existing code.
Im addition, all function signature/metadata methods are now grouped under the umbrella `metadata` feature.
[`SmartString`](https://crates.io/crates/smartstring) is used to store identifiers (which tends to
be short, fewer than 23 characters, and ASCII-based) because they can usually be stored inline.
`Map` keys now also use [`SmartString`](https://crates.io/crates/smartstring).
In addition, there is now support for line continuation in strings (put `\` at the end of line) as
well as multi-line literal strings (wrapped by back-ticks: <code>\`...\`</code>).
Finally, all function signature/metadata methods are now grouped under the umbrella `metadata` feature.
This avoids spending precious resources maintaining metadata for functions for the vast majority of
use cases where such information is not required.
@ -24,7 +31,6 @@ use cases where such information is not required.
Breaking changes
----------------
* `Map` is now an alias to `BTreeMap` instead of `HashMap` because most object maps hold few properties.
* The traits `RegisterFn` and `RegisterResultFn` are removed. `Engine::register_fn` and `Engine::register_result_fn` are now implemented directly on `Engine`.
* `FnPtr::call_dynamic` now takes `&NativeCallContext` instead of consuming it.
* All `Module::set_fn_XXX` methods are removed, in favor of `Module::set_native_fn`.
@ -35,6 +41,13 @@ Breaking changes
* The shebang `#!` is now a reserved symbol.
* Shebangs at the very beginning of script files are skipped when loading them.
* [`smartstring`](https://crates.io/crates/smartstring) is used for identifiers by default. Currently, a PR branch is pulled because it breaks on `no-std` builds. The official crate will be used once `smartstring` is fixed to support `no-std`.
* `Map` is now an alias to `BTreeMap<SmartString, Dynamic>` instead of `HashMap` because most object maps hold few properties.
New features
------------
* Line continuation (via `\`) and multi-line literal strings (wrapped with <code>\`</code>) support are added.
* Rhai scripts can now start with a shebang `#!` which is ignored.
Enhancements
------------
@ -42,7 +55,6 @@ Enhancements
* Replaced all `HashMap` usage with `BTreeMap` for better performance because collections in Rhai are tiny.
* `Engine::register_result_fn` no longer requires the successful return type to be `Dynamic`. It can now be any clonable type.
* `#[rhai_fn(return_raw)]` can now return `Result<T, Box<EvalAltResult>>` where `T` is any clonable type instead of `Result<Dynamic, Box<EvalAltResult>>`.
* Rhai scripts can now start with a shebang `#!`.
Version 0.19.14

View File

@ -842,7 +842,7 @@ pub trait InputStream {
fn peek_next(&mut self) -> Option<char>;
}
/// _(INTERNALS)_ Parse a string literal wrapped by `enclosing_char`.
/// _(INTERNALS)_ Parse a string literal ended by `termination_char`.
/// Exported under the `internals` feature only.
///
/// # Volatile API
@ -852,12 +852,15 @@ pub fn parse_string_literal(
stream: &mut impl InputStream,
state: &mut TokenizeState,
pos: &mut Position,
enclosing_char: char,
termination_char: char,
continuation: bool,
verbatim: bool,
) -> Result<String, (LexError, Position)> {
let mut result: smallvec::SmallVec<[char; 16]> = Default::default();
let mut escape: smallvec::SmallVec<[char; 12]> = Default::default();
let start = *pos;
let mut skip_whitespace_until = 0;
loop {
let next_char = stream.get_next().ok_or((LERR::UnterminatedString, start))?;
@ -871,8 +874,10 @@ pub fn parse_string_literal(
}
match next_char {
// \r - ignore if followed by \n
'\r' if stream.peek_next().unwrap_or('\0') == '\n' => {}
// \...
'\\' if escape.is_empty() => {
'\\' if escape.is_empty() && !verbatim => {
escape.push('\\');
}
// \\
@ -937,18 +942,37 @@ pub fn parse_string_literal(
})?);
}
// \{enclosing_char} - escaped
ch if enclosing_char == ch && !escape.is_empty() => {
// \{termination_char} - escaped
_ if termination_char == next_char && !escape.is_empty() => {
escape.clear();
result.push(ch)
result.push(next_char)
}
// Close wrapper
ch if enclosing_char == ch && escape.is_empty() => break,
_ if termination_char == next_char && escape.is_empty() => break,
// Line continuation
'\n' if continuation && !escape.is_empty() => {
escape.clear();
pos.new_line();
skip_whitespace_until = start.position().unwrap() + 1;
}
// New-line cannot be escaped
// Cannot have new-lines inside non-multi-line string literals
'\n' if !escape.is_empty() || !verbatim => {
pos.rewind();
return Err((LERR::UnterminatedString, start));
}
'\n' => {
pos.new_line();
result.push(next_char);
}
// Unknown escape sequence
ch if !escape.is_empty() => {
escape.push(ch);
_ if !escape.is_empty() => {
escape.push(next_char);
return Err((
LERR::MalformedEscapeSequence(escape.into_iter().collect()),
@ -956,16 +980,14 @@ pub fn parse_string_literal(
));
}
// Cannot have new-lines inside string literals
'\n' => {
pos.rewind();
return Err((LERR::UnterminatedString, start));
}
// Whitespace to skip
_ if next_char.is_whitespace() && pos.position().unwrap() < skip_whitespace_until => {}
// All other characters
ch => {
_ => {
escape.clear();
result.push(ch);
result.push(next_char);
skip_whitespace_until = 0;
}
}
}
@ -1272,12 +1294,15 @@ fn get_next_token_inner(
return get_identifier(stream, pos, start_pos, c);
}
// " - string literal
('"', _) => {
return parse_string_literal(stream, state, pos, '"').map_or_else(
// " or ` - string literal
('"', _) | ('`', _) => {
let multi_line = c == '`';
return parse_string_literal(stream, state, pos, c, !multi_line, multi_line)
.map_or_else(
|err| Some((Token::LexError(err.0), err.1)),
|out| Some((Token::StringConstant(out), start_pos)),
)
);
}
// ' - character literal
@ -1288,7 +1313,8 @@ fn get_next_token_inner(
))
}
('\'', _) => {
return Some(parse_string_literal(stream, state, pos, '\'').map_or_else(
return Some(
parse_string_literal(stream, state, pos, c, false, false).map_or_else(
|err| (Token::LexError(err.0), err.1),
|result| {
let mut chars = result.chars();
@ -1300,7 +1326,8 @@ fn get_next_token_inner(
(Token::CharConstant(first), start_pos)
}
},
))
),
)
}
// Braces

View File

@ -8,6 +8,14 @@ fn test_string() -> Result<(), Box<EvalAltResult>> {
engine.eval::<String>(r#""Test string: \u2764""#)?,
"Test string: ❤"
);
assert_eq!(
engine.eval::<String>(" \"Test string: \\u2764\\\n hello, world!\"")?,
"Test string: ❤ hello, world!"
);
assert_eq!(
engine.eval::<String>(" `Test string: \\u2764\nhello,\\nworld!`")?,
"Test string: \\u2764\nhello,\\nworld!"
);
assert_eq!(
engine.eval::<String>(r#""Test string: \x58""#)?,
"Test string: X"