Merge pull request #391 from schungx/master

Fix string parsing.
This commit is contained in:
Stephen Chung 2021-04-10 10:47:14 +08:00 committed by GitHub
commit 30cd7a7c7b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 182 additions and 108 deletions

View File

@ -17,6 +17,7 @@ Breaking changes
* `ModuleResolver` trait methods take an additional parameter `source_path` that contains the path of the current environment. This is to facilitate loading other script files always from the current directory. * `ModuleResolver` trait methods take an additional parameter `source_path` that contains the path of the current environment. This is to facilitate loading other script files always from the current directory.
* `FileModuleResolver` now resolves relative paths under the source path if there is no base path set. * `FileModuleResolver` now resolves relative paths under the source path if there is no base path set.
* `FileModuleResolver::base_path` now returns `Option<&str>` which is `None` if there is no base path set. * `FileModuleResolver::base_path` now returns `Option<&str>` which is `None` if there is no base path set.
* Doc-comments now require the `metadata` feature.
New features New features
------------ ------------

View File

@ -5,5 +5,4 @@ print(x[1]);
x[1] = 5; x[1] = 5;
print("x[1] should be 5:"); print(`x[1] should be 5: ${x[1]}`);
print(x[1]);

View File

@ -1,4 +1,5 @@
print("x should be 78:"); print("x should be 78:");
let x = 78; let x = 78;
print(x); print(x);

View File

@ -8,4 +8,4 @@ let /* I am a spy in a variable declaration! */ x = 5;
/* look /* at /* that, /* multi-line */ comments */ can be */ nested */ /* look /* at /* that, /* multi-line */ comments */ can be */ nested */
/* surrounded by */ x // comments /* surrounded by */ this_is_not_a_comment = true // comments

View File

@ -4,6 +4,6 @@ fn bob() {
return 3; return 3;
} }
print("bob() should be 3:"); let result = bob();
print(bob()); print(`bob() should be 3: ${result}`);

View File

@ -7,10 +7,8 @@ fn addme(a, b) {
a + b; // notice that the last value is returned even if terminated by a semicolon a + b; // notice that the last value is returned even if terminated by a semicolon
} }
print("addme(a, 4) should be 46:"); let result = addme(a, 4);
print(addme(a, 4)); print(!addme(a, 4) should be 46: ${result}``);
print("a should still be 3:"); print(`a should still be 3: ${a}`); // should print 3 - 'a' is never changed
print(a); // should print 3 - 'a' is never changed

View File

@ -10,3 +10,5 @@ loop {
if x <= 0 { break; } if x <= 0 { break; }
} }
export x as foo;

View File

@ -12,11 +12,11 @@ fn new_mat(x, y) {
fn mat_gen(n) { fn mat_gen(n) {
let m = new_mat(n, n); let m = new_mat(n, n);
let tmp = 1.0 / n.to_float() / n.to_float(); let tmp = 1.0 / n / n;
for i in range(0, n) { for i in range(0, n) {
for j in range(0, n) { for j in range(0, n) {
m[i][j] = tmp * (i.to_float() - j.to_float()) * (i.to_float() + j.to_float()); m[i][j] = tmp * (i - j) * (i + j);
} }
} }

View File

@ -1,3 +1,3 @@
import "loop"; import "loop" as x;
print("Module test!"); print(`Module test! foo = ${x::foo}`);

View File

@ -1,4 +1,5 @@
print("The result should be 182:"); print("The result should be 182:");
let x = 12 + 34 * 5; let x = 12 + 34 * 5;
print(x); print(x);

View File

@ -1,4 +1,5 @@
print("The result should be 230:"); print("The result should be 230:");
let x = (12 + 34) * 5; let x = (12 + 34) * 5;
print(x); print(x);

View File

@ -34,4 +34,13 @@ made using multi-line literal
print(s); print(s);
// Interpolation
let s = `This is interpolation ${
let x = `within ${let y = "yet another level \
of interpolation!"; y} interpolation`;
x
} within literal string.`;
print(s);
print(">>> END <<<"); print(">>> END <<<");

View File

@ -11,7 +11,6 @@ use crate::stdlib::{
iter::empty, iter::empty,
num::{NonZeroU8, NonZeroUsize}, num::{NonZeroU8, NonZeroUsize},
ops::{Add, AddAssign}, ops::{Add, AddAssign},
string::String,
vec, vec,
vec::Vec, vec::Vec,
}; };
@ -65,7 +64,9 @@ pub struct ScriptFnDef {
#[cfg(not(feature = "no_closure"))] #[cfg(not(feature = "no_closure"))]
pub externals: crate::stdlib::collections::BTreeSet<Identifier>, pub externals: crate::stdlib::collections::BTreeSet<Identifier>,
/// Function doc-comments (if any). /// Function doc-comments (if any).
pub comments: StaticVec<String>, #[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
pub comments: StaticVec<crate::stdlib::string::String>,
} }
impl fmt::Display for ScriptFnDef { impl fmt::Display for ScriptFnDef {
@ -103,6 +104,8 @@ pub struct ScriptFnMetadata<'a> {
/// ///
/// Leading white-spaces are stripped, and each string slice always starts with the corresponding /// Leading white-spaces are stripped, and each string slice always starts with the corresponding
/// doc-comment leader: `///` or `/**`. /// doc-comment leader: `///` or `/**`.
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
pub comments: Vec<&'a str>, pub comments: Vec<&'a str>,
/// Function access mode. /// Function access mode.
pub access: FnAccess, pub access: FnAccess,
@ -134,6 +137,8 @@ impl<'a> Into<ScriptFnMetadata<'a>> for &'a ScriptFnDef {
#[inline(always)] #[inline(always)]
fn into(self) -> ScriptFnMetadata<'a> { fn into(self) -> ScriptFnMetadata<'a> {
ScriptFnMetadata { ScriptFnMetadata {
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
comments: self.comments.iter().map(|s| s.as_str()).collect(), comments: self.comments.iter().map(|s| s.as_str()).collect(),
access: self.access, access: self.access,
name: &self.name, name: &self.name,

View File

@ -758,6 +758,8 @@ pub struct Engine {
pub(crate) limits: Limits, pub(crate) limits: Limits,
/// Disable doc-comments? /// Disable doc-comments?
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
pub(crate) disable_doc_comments: bool, pub(crate) disable_doc_comments: bool,
} }
@ -874,6 +876,8 @@ impl Engine {
max_map_size: None, max_map_size: None,
}, },
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
disable_doc_comments: false, disable_doc_comments: false,
}; };
@ -930,6 +934,8 @@ impl Engine {
max_map_size: None, max_map_size: None,
}, },
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
disable_doc_comments: false, disable_doc_comments: false,
} }
} }

View File

@ -33,7 +33,11 @@ impl Engine {
pub fn optimization_level(&self) -> crate::OptimizationLevel { pub fn optimization_level(&self) -> crate::OptimizationLevel {
self.optimization_level self.optimization_level
} }
/// Enable/disable doc-comments. /// _(METADATA)_ Enable/disable doc-comments for functions.
/// Exported under the `metadata` feature only.
/// Not available under `no_function`.
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
#[inline(always)] #[inline(always)]
pub fn enable_doc_comments(&mut self, enable: bool) -> &mut Self { pub fn enable_doc_comments(&mut self, enable: bool) -> &mut Self {
self.disable_doc_comments = !enable; self.disable_doc_comments = !enable;

View File

@ -1002,6 +1002,8 @@ pub fn optimize_into_ast(
lib: None, lib: None,
#[cfg(not(feature = "no_module"))] #[cfg(not(feature = "no_module"))]
mods: Default::default(), mods: Default::default(),
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
comments: Default::default(), comments: Default::default(),
}) })
.for_each(|fn_def| { .for_each(|fn_def| {

View File

@ -16,7 +16,7 @@ use crate::stdlib::{
hash::{Hash, Hasher}, hash::{Hash, Hasher},
iter::empty, iter::empty,
num::{NonZeroU8, NonZeroUsize}, num::{NonZeroU8, NonZeroUsize},
string::{String, ToString}, string::ToString,
vec, vec,
vec::Vec, vec::Vec,
}; };
@ -2483,10 +2483,10 @@ fn parse_stmt(
) -> Result<Stmt, ParseError> { ) -> Result<Stmt, ParseError> {
use AccessMode::{ReadOnly, ReadWrite}; use AccessMode::{ReadOnly, ReadWrite};
let mut _comments: StaticVec<String> = Default::default();
#[cfg(not(feature = "no_function"))] #[cfg(not(feature = "no_function"))]
{ #[cfg(feature = "metadata")]
let comments = {
let mut comments: StaticVec<crate::stdlib::string::String> = Default::default();
let mut comments_pos = Position::NONE; let mut comments_pos = Position::NONE;
// Handle doc-comments. // Handle doc-comments.
@ -2505,7 +2505,7 @@ fn parse_stmt(
match input.next().unwrap().0 { match input.next().unwrap().0 {
Token::Comment(comment) => { Token::Comment(comment) => {
_comments.push(comment); comments.push(comment);
match input.peek().unwrap() { match input.peek().unwrap() {
(Token::Fn, _) | (Token::Private, _) => break, (Token::Fn, _) | (Token::Private, _) => break,
@ -2516,7 +2516,9 @@ fn parse_stmt(
_ => unreachable!(), _ => unreachable!(),
} }
} }
}
comments
};
let (token, token_pos) = match input.peek().unwrap() { let (token, token_pos) = match input.peek().unwrap() {
(Token::EOF, pos) => return Ok(Stmt::Noop(*pos)), (Token::EOF, pos) => return Ok(Stmt::Noop(*pos)),
@ -2572,7 +2574,17 @@ fn parse_stmt(
pos: pos, pos: pos,
}; };
let func = parse_fn(input, &mut new_state, lib, access, settings, _comments)?; let func = parse_fn(
input,
&mut new_state,
lib,
access,
settings,
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
comments,
)?;
let hash = calc_fn_hash(empty(), &func.name, func.params.len()); let hash = calc_fn_hash(empty(), &func.name, func.params.len());
if lib.contains_key(&hash) { if lib.contains_key(&hash) {
@ -2727,7 +2739,9 @@ fn parse_fn(
lib: &mut FunctionsLib, lib: &mut FunctionsLib,
access: FnAccess, access: FnAccess,
mut settings: ParseSettings, mut settings: ParseSettings,
comments: StaticVec<String>, #[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
comments: StaticVec<crate::stdlib::string::String>,
) -> Result<ScriptFnDef, ParseError> { ) -> Result<ScriptFnDef, ParseError> {
#[cfg(not(feature = "unchecked"))] #[cfg(not(feature = "unchecked"))]
settings.ensure_level_within_max_limit(state.max_expr_depth)?; settings.ensure_level_within_max_limit(state.max_expr_depth)?;
@ -2814,6 +2828,8 @@ fn parse_fn(
lib: None, lib: None,
#[cfg(not(feature = "no_module"))] #[cfg(not(feature = "no_module"))]
mods: Default::default(), mods: Default::default(),
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
comments, comments,
}) })
} }
@ -2967,6 +2983,8 @@ fn parse_anon_fn(
lib: None, lib: None,
#[cfg(not(feature = "no_module"))] #[cfg(not(feature = "no_module"))]
mods: Default::default(), mods: Default::default(),
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
comments: Default::default(), comments: Default::default(),
}; };

View File

@ -838,6 +838,8 @@ pub struct TokenizeState {
/// Include comments? /// Include comments?
pub include_comments: bool, pub include_comments: bool,
/// Disable doc-comments? /// Disable doc-comments?
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
pub disable_doc_comments: bool, pub disable_doc_comments: bool,
/// Is the current tokenizer position within the text stream of an interpolated string? /// Is the current tokenizer position within the text stream of an interpolated string?
pub is_within_text_terminated_by: Option<char>, pub is_within_text_terminated_by: Option<char>,
@ -876,39 +878,31 @@ pub fn parse_string_literal(
termination_char: char, termination_char: char,
continuation: bool, continuation: bool,
verbatim: bool, verbatim: bool,
skip_first_new_line: bool,
allow_interpolation: bool, allow_interpolation: bool,
) -> Result<(String, bool), (LexError, Position)> { ) -> Result<(String, bool), (LexError, Position)> {
let mut result: smallvec::SmallVec<[char; 16]> = Default::default(); let mut result = String::with_capacity(12);
let mut escape: smallvec::SmallVec<[char; 12]> = Default::default(); let mut escape = String::with_capacity(12);
let start = *pos; let start = *pos;
let mut skip_whitespace_until = 0; let mut skip_whitespace_until = 0;
let mut interpolated = false; let mut interpolated = false;
if skip_first_new_line { state.is_within_text_terminated_by = Some(termination_char);
// Start from the next line if at the end of line
match stream.peek_next() {
// `\r - start from next line
Some('\r') => {
eat_next(stream, pos);
// `\r\n
if stream.peek_next().map(|ch| ch == '\n').unwrap_or(false) {
eat_next(stream, pos);
}
}
// `\n - start from next line
Some('\n') => {
eat_next(stream, pos);
}
_ => (),
}
}
loop { loop {
let next_char = stream.get_next().ok_or((LERR::UnterminatedString, start))?; let next_char = match stream.get_next() {
Some(ch) => {
pos.advance(); pos.advance();
ch
}
None => {
if !continuation || escape != "\\" {
result += &escape;
}
pos.advance();
break;
}
};
// String interpolation? // String interpolation?
if allow_interpolation if allow_interpolation
@ -917,6 +911,7 @@ pub fn parse_string_literal(
&& stream.peek_next().map(|ch| ch == '{').unwrap_or(false) && stream.peek_next().map(|ch| ch == '{').unwrap_or(false)
{ {
interpolated = true; interpolated = true;
state.is_within_text_terminated_by = None;
break; break;
} }
@ -968,31 +963,23 @@ pub fn parse_string_literal(
}; };
for _ in 0..len { for _ in 0..len {
let c = stream.get_next().ok_or_else(|| { let c = stream
( .get_next()
LERR::MalformedEscapeSequence(seq.iter().cloned().collect()), .ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
*pos,
)
})?;
seq.push(c); seq.push(c);
pos.advance(); pos.advance();
out_val *= 16; out_val *= 16;
out_val += c.to_digit(16).ok_or_else(|| { out_val += c
( .to_digit(16)
LERR::MalformedEscapeSequence(seq.iter().cloned().collect()), .ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
*pos,
)
})?;
} }
result.push(char::from_u32(out_val).ok_or_else(|| { result.push(
( char::from_u32(out_val)
LERR::MalformedEscapeSequence(seq.into_iter().collect()), .ok_or_else(|| (LERR::MalformedEscapeSequence(seq), *pos))?,
*pos, );
)
})?);
} }
// \{termination_char} - escaped // \{termination_char} - escaped
@ -1002,7 +989,10 @@ pub fn parse_string_literal(
} }
// Close wrapper // Close wrapper
_ if termination_char == next_char && escape.is_empty() => break, _ if termination_char == next_char && escape.is_empty() => {
state.is_within_text_terminated_by = None;
break;
}
// Line continuation // Line continuation
'\n' if continuation && !escape.is_empty() => { '\n' if continuation && !escape.is_empty() => {
@ -1015,7 +1005,7 @@ pub fn parse_string_literal(
// Cannot have new-lines inside non-multi-line string literals // Cannot have new-lines inside non-multi-line string literals
'\n' if !escape.is_empty() || !verbatim => { '\n' if !escape.is_empty() || !verbatim => {
pos.rewind(); pos.rewind();
return Err((LERR::UnterminatedString, start)); return Err((LERR::UnterminatedString, *pos));
} }
'\n' => { '\n' => {
@ -1027,10 +1017,7 @@ pub fn parse_string_literal(
_ if !escape.is_empty() => { _ if !escape.is_empty() => {
escape.push(next_char); escape.push(next_char);
return Err(( return Err((LERR::MalformedEscapeSequence(escape), *pos));
LERR::MalformedEscapeSequence(escape.into_iter().collect()),
*pos,
));
} }
// Whitespace to skip // Whitespace to skip
@ -1045,15 +1032,13 @@ pub fn parse_string_literal(
} }
} }
let s = result.iter().collect::<String>();
if let Some(max) = state.max_string_size { if let Some(max) = state.max_string_size {
if s.len() > max.get() { if result.len() > max.get() {
return Err((LexError::StringTooLong(max.get()), *pos)); return Err((LexError::StringTooLong(max.get()), *pos));
} }
} }
Ok((s, interpolated)) Ok((result, interpolated))
} }
/// Consume the next character. /// Consume the next character.
@ -1155,6 +1140,8 @@ fn is_numeric_digit(c: char) -> bool {
} }
/// Test if the comment block is a doc-comment. /// Test if the comment block is a doc-comment.
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
#[inline(always)] #[inline(always)]
pub fn is_doc_comment(comment: &str) -> bool { pub fn is_doc_comment(comment: &str) -> bool {
(comment.starts_with("///") && !comment.starts_with("////")) (comment.starts_with("///") && !comment.starts_with("////"))
@ -1178,10 +1165,22 @@ fn get_next_token_inner(
state.comment_level = scan_block_comment(stream, state.comment_level, pos, &mut comment); state.comment_level = scan_block_comment(stream, state.comment_level, pos, &mut comment);
if state.include_comments let include_comments = state.include_comments;
|| (!state.disable_doc_comments && is_doc_comment(comment.as_ref().unwrap()))
{ #[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
let include_comments =
if !state.disable_doc_comments && is_doc_comment(comment.as_ref().unwrap()) {
true
} else {
include_comments
};
if include_comments {
return Some((Token::Comment(comment.unwrap()), start_pos)); return Some((Token::Comment(comment.unwrap()), start_pos));
} else if state.comment_level > 0 {
// Reached EOF without ending comment block
return None;
} }
} }
@ -1189,7 +1188,7 @@ fn get_next_token_inner(
if let Some(ch) = state.is_within_text_terminated_by.take() { if let Some(ch) = state.is_within_text_terminated_by.take() {
let start_pos = *pos; let start_pos = *pos;
return parse_string_literal(stream, state, pos, ch, false, true, true, true).map_or_else( return parse_string_literal(stream, state, pos, ch, false, true, true).map_or_else(
|(err, err_pos)| Some((Token::LexError(err), err_pos)), |(err, err_pos)| Some((Token::LexError(err), err_pos)),
|(result, interpolated)| { |(result, interpolated)| {
if interpolated { if interpolated {
@ -1365,7 +1364,7 @@ fn get_next_token_inner(
// " - string literal // " - string literal
('"', _) => { ('"', _) => {
return parse_string_literal(stream, state, pos, c, true, false, false, false) return parse_string_literal(stream, state, pos, c, true, false, false)
.map_or_else( .map_or_else(
|(err, err_pos)| Some((Token::LexError(err), err_pos)), |(err, err_pos)| Some((Token::LexError(err), err_pos)),
|(result, _)| Some((Token::StringConstant(result), start_pos)), |(result, _)| Some((Token::StringConstant(result), start_pos)),
@ -1373,8 +1372,26 @@ fn get_next_token_inner(
} }
// ` - string literal // ` - string literal
('`', _) => { ('`', _) => {
return parse_string_literal(stream, state, pos, c, false, true, true, true) // Start from the next line if at the end of line
.map_or_else( match stream.peek_next() {
// `\r - start from next line
Some('\r') => {
eat_next(stream, pos);
pos.new_line();
// `\r\n
if stream.peek_next().map(|ch| ch == '\n').unwrap_or(false) {
eat_next(stream, pos);
}
}
// `\n - start from next line
Some('\n') => {
eat_next(stream, pos);
pos.new_line();
}
_ => (),
}
return parse_string_literal(stream, state, pos, c, false, true, true).map_or_else(
|(err, err_pos)| Some((Token::LexError(err), err_pos)), |(err, err_pos)| Some((Token::LexError(err), err_pos)),
|(result, interpolated)| { |(result, interpolated)| {
if interpolated { if interpolated {
@ -1395,8 +1412,7 @@ fn get_next_token_inner(
} }
('\'', _) => { ('\'', _) => {
return Some( return Some(
parse_string_literal(stream, state, pos, c, false, false, false, false) parse_string_literal(stream, state, pos, c, false, false, false).map_or_else(
.map_or_else(
|(err, err_pos)| (Token::LexError(err), err_pos), |(err, err_pos)| (Token::LexError(err), err_pos),
|(result, _)| { |(result, _)| {
let mut chars = result.chars(); let mut chars = result.chars();
@ -1496,6 +1512,8 @@ fn get_next_token_inner(
eat_next(stream, pos); eat_next(stream, pos);
let mut comment = match stream.peek_next() { let mut comment = match stream.peek_next() {
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
Some('/') if !state.disable_doc_comments => { Some('/') if !state.disable_doc_comments => {
eat_next(stream, pos); eat_next(stream, pos);
@ -1529,6 +1547,8 @@ fn get_next_token_inner(
eat_next(stream, pos); eat_next(stream, pos);
let mut comment = match stream.peek_next() { let mut comment = match stream.peek_next() {
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
Some('*') if !state.disable_doc_comments => { Some('*') if !state.disable_doc_comments => {
eat_next(stream, pos); eat_next(stream, pos);
@ -1903,6 +1923,10 @@ impl<'a> Iterator for TokenIterator<'a> {
let (token, pos) = match get_next_token(&mut self.stream, &mut self.state, &mut self.pos) { let (token, pos) = match get_next_token(&mut self.stream, &mut self.state, &mut self.pos) {
// {EOF} // {EOF}
None => return None, None => return None,
// Unterminated string at EOF
Some((Token::StringConstant(_), _)) if self.state.is_within_text_terminated_by.is_some() => {
return Some((Token::LexError(LERR::UnterminatedString), self.pos));
}
// Reserved keyword/symbol // Reserved keyword/symbol
Some((Token::Reserved(s), pos)) => (match Some((Token::Reserved(s), pos)) => (match
(s.as_str(), self.engine.custom_keywords.contains_key(s.as_str())) (s.as_str(), self.engine.custom_keywords.contains_key(s.as_str()))
@ -2024,6 +2048,8 @@ impl Engine {
comment_level: 0, comment_level: 0,
end_with_none: false, end_with_none: false,
include_comments: false, include_comments: false,
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
disable_doc_comments: self.disable_doc_comments, disable_doc_comments: self.disable_doc_comments,
is_within_text_terminated_by: None, is_within_text_terminated_by: None,
}, },

View File

@ -27,6 +27,7 @@ fn test_comments() -> Result<(), Box<EvalAltResult>> {
} }
#[cfg(not(feature = "no_function"))] #[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
#[test] #[test]
fn test_comments_doc() -> Result<(), Box<EvalAltResult>> { fn test_comments_doc() -> Result<(), Box<EvalAltResult>> {
let mut engine = Engine::new(); let mut engine = Engine::new();