Merge pull request #391 from schungx/master

Fix string parsing.
This commit is contained in:
Stephen Chung 2021-04-10 10:47:14 +08:00 committed by GitHub
commit 30cd7a7c7b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 182 additions and 108 deletions

View File

@ -17,6 +17,7 @@ Breaking changes
* `ModuleResolver` trait methods take an additional parameter `source_path` that contains the path of the current environment. This is to facilitate loading other script files always from the current directory.
* `FileModuleResolver` now resolves relative paths under the source path if there is no base path set.
* `FileModuleResolver::base_path` now returns `Option<&str>` which is `None` if there is no base path set.
* Doc-comments now require the `metadata` feature.
New features
------------

View File

@ -5,5 +5,4 @@ print(x[1]);
x[1] = 5;
print("x[1] should be 5:");
print(x[1]);
print(`x[1] should be 5: ${x[1]}`);

View File

@ -1,4 +1,5 @@
print("x should be 78:");
let x = 78;
print(x);

View File

@ -8,4 +8,4 @@ let /* I am a spy in a variable declaration! */ x = 5;
/* look /* at /* that, /* multi-line */ comments */ can be */ nested */
/* surrounded by */ x // comments
/* surrounded by */ this_is_not_a_comment = true // comments

View File

@ -4,6 +4,6 @@ fn bob() {
return 3;
}
print("bob() should be 3:");
let result = bob();
print(bob());
print(`bob() should be 3: ${result}`);

View File

@ -7,10 +7,8 @@ fn addme(a, b) {
a + b; // notice that the last value is returned even if terminated by a semicolon
}
print("addme(a, 4) should be 46:");
let result = addme(a, 4);
print(addme(a, 4));
print(!addme(a, 4) should be 46: ${result}``);
print("a should still be 3:");
print(a); // should print 3 - 'a' is never changed
print(`a should still be 3: ${a}`); // should print 3 - 'a' is never changed

View File

@ -11,4 +11,4 @@ if a > b {
print(x); // should print 0
} else {
print("Oops! a == b");
}
}

View File

@ -10,3 +10,5 @@ loop {
if x <= 0 { break; }
}
export x as foo;

View File

@ -12,11 +12,11 @@ fn new_mat(x, y) {
fn mat_gen(n) {
let m = new_mat(n, n);
let tmp = 1.0 / n.to_float() / n.to_float();
let tmp = 1.0 / n / n;
for i in range(0, n) {
for j in range(0, n) {
m[i][j] = tmp * (i.to_float() - j.to_float()) * (i.to_float() + j.to_float());
m[i][j] = tmp * (i - j) * (i + j);
}
}

View File

@ -1,3 +1,3 @@
import "loop";
import "loop" as x;
print("Module test!");
print(`Module test! foo = ${x::foo}`);

View File

@ -1,4 +1,5 @@
print("The result should be 182:");
let x = 12 + 34 * 5;
print(x);

View File

@ -1,4 +1,5 @@
print("The result should be 230:");
let x = (12 + 34) * 5;
print(x);

View File

@ -30,4 +30,4 @@ print(`Run time = ${now.elapsed} seconds.`);
if total_primes_found != 78_498 {
print("The answer is WRONG! Should be 78,498!");
}
}

View File

@ -34,4 +34,13 @@ made using multi-line literal
print(s);
// Interpolation
let s = `This is interpolation ${
let x = `within ${let y = "yet another level \
of interpolation!"; y} interpolation`;
x
} within literal string.`;
print(s);
print(">>> END <<<");

View File

@ -11,7 +11,6 @@ use crate::stdlib::{
iter::empty,
num::{NonZeroU8, NonZeroUsize},
ops::{Add, AddAssign},
string::String,
vec,
vec::Vec,
};
@ -65,7 +64,9 @@ pub struct ScriptFnDef {
#[cfg(not(feature = "no_closure"))]
pub externals: crate::stdlib::collections::BTreeSet<Identifier>,
/// Function doc-comments (if any).
pub comments: StaticVec<String>,
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
pub comments: StaticVec<crate::stdlib::string::String>,
}
impl fmt::Display for ScriptFnDef {
@ -103,6 +104,8 @@ pub struct ScriptFnMetadata<'a> {
///
/// Leading white-spaces are stripped, and each string slice always starts with the corresponding
/// doc-comment leader: `///` or `/**`.
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
pub comments: Vec<&'a str>,
/// Function access mode.
pub access: FnAccess,
@ -134,6 +137,8 @@ impl<'a> Into<ScriptFnMetadata<'a>> for &'a ScriptFnDef {
#[inline(always)]
fn into(self) -> ScriptFnMetadata<'a> {
ScriptFnMetadata {
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
comments: self.comments.iter().map(|s| s.as_str()).collect(),
access: self.access,
name: &self.name,

View File

@ -758,6 +758,8 @@ pub struct Engine {
pub(crate) limits: Limits,
/// Disable doc-comments?
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
pub(crate) disable_doc_comments: bool,
}
@ -874,6 +876,8 @@ impl Engine {
max_map_size: None,
},
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
disable_doc_comments: false,
};
@ -930,6 +934,8 @@ impl Engine {
max_map_size: None,
},
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
disable_doc_comments: false,
}
}

View File

@ -33,7 +33,11 @@ impl Engine {
pub fn optimization_level(&self) -> crate::OptimizationLevel {
self.optimization_level
}
/// Enable/disable doc-comments.
/// _(METADATA)_ Enable/disable doc-comments for functions.
/// Exported under the `metadata` feature only.
/// Not available under `no_function`.
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
#[inline(always)]
pub fn enable_doc_comments(&mut self, enable: bool) -> &mut Self {
self.disable_doc_comments = !enable;

View File

@ -1002,6 +1002,8 @@ pub fn optimize_into_ast(
lib: None,
#[cfg(not(feature = "no_module"))]
mods: Default::default(),
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
comments: Default::default(),
})
.for_each(|fn_def| {

View File

@ -16,7 +16,7 @@ use crate::stdlib::{
hash::{Hash, Hasher},
iter::empty,
num::{NonZeroU8, NonZeroUsize},
string::{String, ToString},
string::ToString,
vec,
vec::Vec,
};
@ -2483,10 +2483,10 @@ fn parse_stmt(
) -> Result<Stmt, ParseError> {
use AccessMode::{ReadOnly, ReadWrite};
let mut _comments: StaticVec<String> = Default::default();
#[cfg(not(feature = "no_function"))]
{
#[cfg(feature = "metadata")]
let comments = {
let mut comments: StaticVec<crate::stdlib::string::String> = Default::default();
let mut comments_pos = Position::NONE;
// Handle doc-comments.
@ -2505,7 +2505,7 @@ fn parse_stmt(
match input.next().unwrap().0 {
Token::Comment(comment) => {
_comments.push(comment);
comments.push(comment);
match input.peek().unwrap() {
(Token::Fn, _) | (Token::Private, _) => break,
@ -2516,7 +2516,9 @@ fn parse_stmt(
_ => unreachable!(),
}
}
}
comments
};
let (token, token_pos) = match input.peek().unwrap() {
(Token::EOF, pos) => return Ok(Stmt::Noop(*pos)),
@ -2572,7 +2574,17 @@ fn parse_stmt(
pos: pos,
};
let func = parse_fn(input, &mut new_state, lib, access, settings, _comments)?;
let func = parse_fn(
input,
&mut new_state,
lib,
access,
settings,
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
comments,
)?;
let hash = calc_fn_hash(empty(), &func.name, func.params.len());
if lib.contains_key(&hash) {
@ -2727,7 +2739,9 @@ fn parse_fn(
lib: &mut FunctionsLib,
access: FnAccess,
mut settings: ParseSettings,
comments: StaticVec<String>,
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
comments: StaticVec<crate::stdlib::string::String>,
) -> Result<ScriptFnDef, ParseError> {
#[cfg(not(feature = "unchecked"))]
settings.ensure_level_within_max_limit(state.max_expr_depth)?;
@ -2814,6 +2828,8 @@ fn parse_fn(
lib: None,
#[cfg(not(feature = "no_module"))]
mods: Default::default(),
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
comments,
})
}
@ -2967,6 +2983,8 @@ fn parse_anon_fn(
lib: None,
#[cfg(not(feature = "no_module"))]
mods: Default::default(),
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
comments: Default::default(),
};

View File

@ -838,6 +838,8 @@ pub struct TokenizeState {
/// Include comments?
pub include_comments: bool,
/// Disable doc-comments?
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
pub disable_doc_comments: bool,
/// Is the current tokenizer position within the text stream of an interpolated string?
pub is_within_text_terminated_by: Option<char>,
@ -876,39 +878,31 @@ pub fn parse_string_literal(
termination_char: char,
continuation: bool,
verbatim: bool,
skip_first_new_line: bool,
allow_interpolation: bool,
) -> Result<(String, bool), (LexError, Position)> {
let mut result: smallvec::SmallVec<[char; 16]> = Default::default();
let mut escape: smallvec::SmallVec<[char; 12]> = Default::default();
let mut result = String::with_capacity(12);
let mut escape = String::with_capacity(12);
let start = *pos;
let mut skip_whitespace_until = 0;
let mut interpolated = false;
if skip_first_new_line {
// Start from the next line if at the end of line
match stream.peek_next() {
// `\r - start from next line
Some('\r') => {
eat_next(stream, pos);
// `\r\n
if stream.peek_next().map(|ch| ch == '\n').unwrap_or(false) {
eat_next(stream, pos);
}
}
// `\n - start from next line
Some('\n') => {
eat_next(stream, pos);
}
_ => (),
}
}
state.is_within_text_terminated_by = Some(termination_char);
loop {
let next_char = stream.get_next().ok_or((LERR::UnterminatedString, start))?;
pos.advance();
let next_char = match stream.get_next() {
Some(ch) => {
pos.advance();
ch
}
None => {
if !continuation || escape != "\\" {
result += &escape;
}
pos.advance();
break;
}
};
// String interpolation?
if allow_interpolation
@ -917,6 +911,7 @@ pub fn parse_string_literal(
&& stream.peek_next().map(|ch| ch == '{').unwrap_or(false)
{
interpolated = true;
state.is_within_text_terminated_by = None;
break;
}
@ -968,31 +963,23 @@ pub fn parse_string_literal(
};
for _ in 0..len {
let c = stream.get_next().ok_or_else(|| {
(
LERR::MalformedEscapeSequence(seq.iter().cloned().collect()),
*pos,
)
})?;
let c = stream
.get_next()
.ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
seq.push(c);
pos.advance();
out_val *= 16;
out_val += c.to_digit(16).ok_or_else(|| {
(
LERR::MalformedEscapeSequence(seq.iter().cloned().collect()),
*pos,
)
})?;
out_val += c
.to_digit(16)
.ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
}
result.push(char::from_u32(out_val).ok_or_else(|| {
(
LERR::MalformedEscapeSequence(seq.into_iter().collect()),
*pos,
)
})?);
result.push(
char::from_u32(out_val)
.ok_or_else(|| (LERR::MalformedEscapeSequence(seq), *pos))?,
);
}
// \{termination_char} - escaped
@ -1002,7 +989,10 @@ pub fn parse_string_literal(
}
// Close wrapper
_ if termination_char == next_char && escape.is_empty() => break,
_ if termination_char == next_char && escape.is_empty() => {
state.is_within_text_terminated_by = None;
break;
}
// Line continuation
'\n' if continuation && !escape.is_empty() => {
@ -1015,7 +1005,7 @@ pub fn parse_string_literal(
// Cannot have new-lines inside non-multi-line string literals
'\n' if !escape.is_empty() || !verbatim => {
pos.rewind();
return Err((LERR::UnterminatedString, start));
return Err((LERR::UnterminatedString, *pos));
}
'\n' => {
@ -1027,10 +1017,7 @@ pub fn parse_string_literal(
_ if !escape.is_empty() => {
escape.push(next_char);
return Err((
LERR::MalformedEscapeSequence(escape.into_iter().collect()),
*pos,
));
return Err((LERR::MalformedEscapeSequence(escape), *pos));
}
// Whitespace to skip
@ -1045,15 +1032,13 @@ pub fn parse_string_literal(
}
}
let s = result.iter().collect::<String>();
if let Some(max) = state.max_string_size {
if s.len() > max.get() {
if result.len() > max.get() {
return Err((LexError::StringTooLong(max.get()), *pos));
}
}
Ok((s, interpolated))
Ok((result, interpolated))
}
/// Consume the next character.
@ -1155,6 +1140,8 @@ fn is_numeric_digit(c: char) -> bool {
}
/// Test if the comment block is a doc-comment.
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
#[inline(always)]
pub fn is_doc_comment(comment: &str) -> bool {
(comment.starts_with("///") && !comment.starts_with("////"))
@ -1178,10 +1165,22 @@ fn get_next_token_inner(
state.comment_level = scan_block_comment(stream, state.comment_level, pos, &mut comment);
if state.include_comments
|| (!state.disable_doc_comments && is_doc_comment(comment.as_ref().unwrap()))
{
let include_comments = state.include_comments;
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
let include_comments =
if !state.disable_doc_comments && is_doc_comment(comment.as_ref().unwrap()) {
true
} else {
include_comments
};
if include_comments {
return Some((Token::Comment(comment.unwrap()), start_pos));
} else if state.comment_level > 0 {
// Reached EOF without ending comment block
return None;
}
}
@ -1189,7 +1188,7 @@ fn get_next_token_inner(
if let Some(ch) = state.is_within_text_terminated_by.take() {
let start_pos = *pos;
return parse_string_literal(stream, state, pos, ch, false, true, true, true).map_or_else(
return parse_string_literal(stream, state, pos, ch, false, true, true).map_or_else(
|(err, err_pos)| Some((Token::LexError(err), err_pos)),
|(result, interpolated)| {
if interpolated {
@ -1365,7 +1364,7 @@ fn get_next_token_inner(
// " - string literal
('"', _) => {
return parse_string_literal(stream, state, pos, c, true, false, false, false)
return parse_string_literal(stream, state, pos, c, true, false, false)
.map_or_else(
|(err, err_pos)| Some((Token::LexError(err), err_pos)),
|(result, _)| Some((Token::StringConstant(result), start_pos)),
@ -1373,17 +1372,35 @@ fn get_next_token_inner(
}
// ` - string literal
('`', _) => {
return parse_string_literal(stream, state, pos, c, false, true, true, true)
.map_or_else(
|(err, err_pos)| Some((Token::LexError(err), err_pos)),
|(result, interpolated)| {
if interpolated {
Some((Token::InterpolatedString(result), start_pos))
} else {
Some((Token::StringConstant(result), start_pos))
}
},
);
// Start from the next line if at the end of line
match stream.peek_next() {
// `\r - start from next line
Some('\r') => {
eat_next(stream, pos);
pos.new_line();
// `\r\n
if stream.peek_next().map(|ch| ch == '\n').unwrap_or(false) {
eat_next(stream, pos);
}
}
// `\n - start from next line
Some('\n') => {
eat_next(stream, pos);
pos.new_line();
}
_ => (),
}
return parse_string_literal(stream, state, pos, c, false, true, true).map_or_else(
|(err, err_pos)| Some((Token::LexError(err), err_pos)),
|(result, interpolated)| {
if interpolated {
Some((Token::InterpolatedString(result), start_pos))
} else {
Some((Token::StringConstant(result), start_pos))
}
},
);
}
// ' - character literal
@ -1395,20 +1412,19 @@ fn get_next_token_inner(
}
('\'', _) => {
return Some(
parse_string_literal(stream, state, pos, c, false, false, false, false)
.map_or_else(
|(err, err_pos)| (Token::LexError(err), err_pos),
|(result, _)| {
let mut chars = result.chars();
let first = chars.next().unwrap();
parse_string_literal(stream, state, pos, c, false, false, false).map_or_else(
|(err, err_pos)| (Token::LexError(err), err_pos),
|(result, _)| {
let mut chars = result.chars();
let first = chars.next().unwrap();
if chars.next().is_some() {
(Token::LexError(LERR::MalformedChar(result)), start_pos)
} else {
(Token::CharConstant(first), start_pos)
}
},
),
if chars.next().is_some() {
(Token::LexError(LERR::MalformedChar(result)), start_pos)
} else {
(Token::CharConstant(first), start_pos)
}
},
),
)
}
@ -1496,6 +1512,8 @@ fn get_next_token_inner(
eat_next(stream, pos);
let mut comment = match stream.peek_next() {
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
Some('/') if !state.disable_doc_comments => {
eat_next(stream, pos);
@ -1529,6 +1547,8 @@ fn get_next_token_inner(
eat_next(stream, pos);
let mut comment = match stream.peek_next() {
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
Some('*') if !state.disable_doc_comments => {
eat_next(stream, pos);
@ -1903,6 +1923,10 @@ impl<'a> Iterator for TokenIterator<'a> {
let (token, pos) = match get_next_token(&mut self.stream, &mut self.state, &mut self.pos) {
// {EOF}
None => return None,
// Unterminated string at EOF
Some((Token::StringConstant(_), _)) if self.state.is_within_text_terminated_by.is_some() => {
return Some((Token::LexError(LERR::UnterminatedString), self.pos));
}
// Reserved keyword/symbol
Some((Token::Reserved(s), pos)) => (match
(s.as_str(), self.engine.custom_keywords.contains_key(s.as_str()))
@ -2024,6 +2048,8 @@ impl Engine {
comment_level: 0,
end_with_none: false,
include_comments: false,
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
disable_doc_comments: self.disable_doc_comments,
is_within_text_terminated_by: None,
},

View File

@ -27,6 +27,7 @@ fn test_comments() -> Result<(), Box<EvalAltResult>> {
}
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
#[test]
fn test_comments_doc() -> Result<(), Box<EvalAltResult>> {
let mut engine = Engine::new();