From 2b1555cff81711f2fd99d4cc53d9760485114df4 Mon Sep 17 00:00:00 2001 From: Stephen Chung Date: Fri, 9 Apr 2021 22:48:47 +0800 Subject: [PATCH 1/4] Refine example scripts. --- scripts/array.rhai | 3 +-- scripts/assignment.rhai | 1 + scripts/comments.rhai | 2 +- scripts/function_decl1.rhai | 4 ++-- scripts/function_decl2.rhai | 8 +++----- scripts/if1.rhai | 2 +- scripts/loop.rhai | 2 ++ scripts/mat_mul.rhai | 4 ++-- scripts/module.rhai | 4 ++-- scripts/op2.rhai | 1 + scripts/op3.rhai | 1 + scripts/primes.rhai | 2 +- scripts/string.rhai | 9 +++++++++ 13 files changed, 27 insertions(+), 16 deletions(-) diff --git a/scripts/array.rhai b/scripts/array.rhai index cc6fc642..1c449fd0 100644 --- a/scripts/array.rhai +++ b/scripts/array.rhai @@ -5,5 +5,4 @@ print(x[1]); x[1] = 5; -print("x[1] should be 5:"); -print(x[1]); +print(`x[1] should be 5: ${x[1]}`); diff --git a/scripts/assignment.rhai b/scripts/assignment.rhai index 82ff7d92..93a03c45 100644 --- a/scripts/assignment.rhai +++ b/scripts/assignment.rhai @@ -1,4 +1,5 @@ print("x should be 78:"); let x = 78; + print(x); diff --git a/scripts/comments.rhai b/scripts/comments.rhai index f5d24c93..dd3612fa 100644 --- a/scripts/comments.rhai +++ b/scripts/comments.rhai @@ -8,4 +8,4 @@ let /* I am a spy in a variable declaration! */ x = 5; /* look /* at /* that, /* multi-line */ comments */ can be */ nested */ -/* surrounded by */ x // comments +/* surrounded by */ this_is_not_a_comment = true // comments diff --git a/scripts/function_decl1.rhai b/scripts/function_decl1.rhai index 908bcdec..2e6a8958 100644 --- a/scripts/function_decl1.rhai +++ b/scripts/function_decl1.rhai @@ -4,6 +4,6 @@ fn bob() { return 3; } -print("bob() should be 3:"); +let result = bob(); -print(bob()); +print(`bob() should be 3: ${result}`); diff --git a/scripts/function_decl2.rhai b/scripts/function_decl2.rhai index 0d72743e..ae14c454 100644 --- a/scripts/function_decl2.rhai +++ b/scripts/function_decl2.rhai @@ -7,10 +7,8 @@ fn addme(a, b) { a + b; // notice that the last value is returned even if terminated by a semicolon } -print("addme(a, 4) should be 46:"); +let result = addme(a, 4); -print(addme(a, 4)); +print(!addme(a, 4) should be 46: ${result}``); -print("a should still be 3:"); - -print(a); // should print 3 - 'a' is never changed +print(`a should still be 3: ${a}`); // should print 3 - 'a' is never changed diff --git a/scripts/if1.rhai b/scripts/if1.rhai index cbfe2938..4e264d64 100644 --- a/scripts/if1.rhai +++ b/scripts/if1.rhai @@ -11,4 +11,4 @@ if a > b { print(x); // should print 0 } else { print("Oops! a == b"); -} \ No newline at end of file +} diff --git a/scripts/loop.rhai b/scripts/loop.rhai index 1b514baf..846450f0 100644 --- a/scripts/loop.rhai +++ b/scripts/loop.rhai @@ -10,3 +10,5 @@ loop { if x <= 0 { break; } } + +export x as foo; diff --git a/scripts/mat_mul.rhai b/scripts/mat_mul.rhai index 9fed0d77..65743a37 100644 --- a/scripts/mat_mul.rhai +++ b/scripts/mat_mul.rhai @@ -12,11 +12,11 @@ fn new_mat(x, y) { fn mat_gen(n) { let m = new_mat(n, n); - let tmp = 1.0 / n.to_float() / n.to_float(); + let tmp = 1.0 / n / n; for i in range(0, n) { for j in range(0, n) { - m[i][j] = tmp * (i.to_float() - j.to_float()) * (i.to_float() + j.to_float()); + m[i][j] = tmp * (i - j) * (i + j); } } diff --git a/scripts/module.rhai b/scripts/module.rhai index 77ad12ac..f92fb162 100644 --- a/scripts/module.rhai +++ b/scripts/module.rhai @@ -1,3 +1,3 @@ -import "loop"; +import "loop" as x; -print("Module test!"); +print(`Module test! foo = ${x::foo}`); diff --git a/scripts/op2.rhai b/scripts/op2.rhai index e00a1b99..471e8ee4 100644 --- a/scripts/op2.rhai +++ b/scripts/op2.rhai @@ -1,4 +1,5 @@ print("The result should be 182:"); let x = 12 + 34 * 5; + print(x); diff --git a/scripts/op3.rhai b/scripts/op3.rhai index aa7349a8..73cec23b 100644 --- a/scripts/op3.rhai +++ b/scripts/op3.rhai @@ -1,4 +1,5 @@ print("The result should be 230:"); let x = (12 + 34) * 5; + print(x); diff --git a/scripts/primes.rhai b/scripts/primes.rhai index f5b65763..380750d7 100644 --- a/scripts/primes.rhai +++ b/scripts/primes.rhai @@ -30,4 +30,4 @@ print(`Run time = ${now.elapsed} seconds.`); if total_primes_found != 78_498 { print("The answer is WRONG! Should be 78,498!"); -} \ No newline at end of file +} diff --git a/scripts/string.rhai b/scripts/string.rhai index a507648c..21ea022b 100644 --- a/scripts/string.rhai +++ b/scripts/string.rhai @@ -34,4 +34,13 @@ made using multi-line literal print(s); +// Interpolation +let s = `This is interpolation ${ + let x = `within ${let y = "yet another level \ + of interpolation!"; y} interpolation`; + x +} within literal string.`; + +print(s); + print(">>> END <<<"); From 29d186b361a35a884b71456c5a9303f8d5453866 Mon Sep 17 00:00:00 2001 From: Stephen Chung Date: Fri, 9 Apr 2021 22:49:47 +0800 Subject: [PATCH 2/4] Put doc-comments under metadata flag. --- CHANGELOG.md | 1 + src/ast.rs | 9 +++++++-- src/optimize.rs | 2 ++ src/parser.rs | 34 ++++++++++++++++++++++++++-------- 4 files changed, 36 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e53f2148..2ebebf9d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ Breaking changes * `ModuleResolver` trait methods take an additional parameter `source_path` that contains the path of the current environment. This is to facilitate loading other script files always from the current directory. * `FileModuleResolver` now resolves relative paths under the source path if there is no base path set. * `FileModuleResolver::base_path` now returns `Option<&str>` which is `None` if there is no base path set. +* Doc-comments now require the `metadata` feature. New features ------------ diff --git a/src/ast.rs b/src/ast.rs index 9d7902d8..d7b50786 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -11,7 +11,6 @@ use crate::stdlib::{ iter::empty, num::{NonZeroU8, NonZeroUsize}, ops::{Add, AddAssign}, - string::String, vec, vec::Vec, }; @@ -65,7 +64,9 @@ pub struct ScriptFnDef { #[cfg(not(feature = "no_closure"))] pub externals: crate::stdlib::collections::BTreeSet, /// Function doc-comments (if any). - pub comments: StaticVec, + #[cfg(not(feature = "no_function"))] + #[cfg(feature = "metadata")] + pub comments: StaticVec, } impl fmt::Display for ScriptFnDef { @@ -103,6 +104,8 @@ pub struct ScriptFnMetadata<'a> { /// /// Leading white-spaces are stripped, and each string slice always starts with the corresponding /// doc-comment leader: `///` or `/**`. + #[cfg(not(feature = "no_function"))] + #[cfg(feature = "metadata")] pub comments: Vec<&'a str>, /// Function access mode. pub access: FnAccess, @@ -134,6 +137,8 @@ impl<'a> Into> for &'a ScriptFnDef { #[inline(always)] fn into(self) -> ScriptFnMetadata<'a> { ScriptFnMetadata { + #[cfg(not(feature = "no_function"))] + #[cfg(feature = "metadata")] comments: self.comments.iter().map(|s| s.as_str()).collect(), access: self.access, name: &self.name, diff --git a/src/optimize.rs b/src/optimize.rs index 03814584..7e4bcb88 100644 --- a/src/optimize.rs +++ b/src/optimize.rs @@ -1002,6 +1002,8 @@ pub fn optimize_into_ast( lib: None, #[cfg(not(feature = "no_module"))] mods: Default::default(), + #[cfg(not(feature = "no_function"))] + #[cfg(feature = "metadata")] comments: Default::default(), }) .for_each(|fn_def| { diff --git a/src/parser.rs b/src/parser.rs index ab0aa0bc..9bac4053 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -16,7 +16,7 @@ use crate::stdlib::{ hash::{Hash, Hasher}, iter::empty, num::{NonZeroU8, NonZeroUsize}, - string::{String, ToString}, + string::ToString, vec, vec::Vec, }; @@ -2483,10 +2483,10 @@ fn parse_stmt( ) -> Result { use AccessMode::{ReadOnly, ReadWrite}; - let mut _comments: StaticVec = Default::default(); - #[cfg(not(feature = "no_function"))] - { + #[cfg(feature = "metadata")] + let comments = { + let mut comments: StaticVec = Default::default(); let mut comments_pos = Position::NONE; // Handle doc-comments. @@ -2505,7 +2505,7 @@ fn parse_stmt( match input.next().unwrap().0 { Token::Comment(comment) => { - _comments.push(comment); + comments.push(comment); match input.peek().unwrap() { (Token::Fn, _) | (Token::Private, _) => break, @@ -2516,7 +2516,9 @@ fn parse_stmt( _ => unreachable!(), } } - } + + comments + }; let (token, token_pos) = match input.peek().unwrap() { (Token::EOF, pos) => return Ok(Stmt::Noop(*pos)), @@ -2572,7 +2574,17 @@ fn parse_stmt( pos: pos, }; - let func = parse_fn(input, &mut new_state, lib, access, settings, _comments)?; + let func = parse_fn( + input, + &mut new_state, + lib, + access, + settings, + #[cfg(not(feature = "no_function"))] + #[cfg(feature = "metadata")] + comments, + )?; + let hash = calc_fn_hash(empty(), &func.name, func.params.len()); if lib.contains_key(&hash) { @@ -2727,7 +2739,9 @@ fn parse_fn( lib: &mut FunctionsLib, access: FnAccess, mut settings: ParseSettings, - comments: StaticVec, + #[cfg(not(feature = "no_function"))] + #[cfg(feature = "metadata")] + comments: StaticVec, ) -> Result { #[cfg(not(feature = "unchecked"))] settings.ensure_level_within_max_limit(state.max_expr_depth)?; @@ -2814,6 +2828,8 @@ fn parse_fn( lib: None, #[cfg(not(feature = "no_module"))] mods: Default::default(), + #[cfg(not(feature = "no_function"))] + #[cfg(feature = "metadata")] comments, }) } @@ -2967,6 +2983,8 @@ fn parse_anon_fn( lib: None, #[cfg(not(feature = "no_module"))] mods: Default::default(), + #[cfg(not(feature = "no_function"))] + #[cfg(feature = "metadata")] comments: Default::default(), }; From 4aaf957d73b7a2b5369211af1873ec6fe893296c Mon Sep 17 00:00:00 2001 From: Stephen Chung Date: Fri, 9 Apr 2021 23:13:33 +0800 Subject: [PATCH 3/4] Gate doc-comment tokenizing. --- src/engine.rs | 6 ++++++ src/engine_settings.rs | 6 +++++- src/token.rs | 28 +++++++++++++++++++++++++--- tests/comments.rs | 1 + 4 files changed, 37 insertions(+), 4 deletions(-) diff --git a/src/engine.rs b/src/engine.rs index dfc1225c..ff549a1c 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -758,6 +758,8 @@ pub struct Engine { pub(crate) limits: Limits, /// Disable doc-comments? + #[cfg(not(feature = "no_function"))] + #[cfg(feature = "metadata")] pub(crate) disable_doc_comments: bool, } @@ -874,6 +876,8 @@ impl Engine { max_map_size: None, }, + #[cfg(not(feature = "no_function"))] + #[cfg(feature = "metadata")] disable_doc_comments: false, }; @@ -930,6 +934,8 @@ impl Engine { max_map_size: None, }, + #[cfg(not(feature = "no_function"))] + #[cfg(feature = "metadata")] disable_doc_comments: false, } } diff --git a/src/engine_settings.rs b/src/engine_settings.rs index 017d19b0..f2860b25 100644 --- a/src/engine_settings.rs +++ b/src/engine_settings.rs @@ -33,7 +33,11 @@ impl Engine { pub fn optimization_level(&self) -> crate::OptimizationLevel { self.optimization_level } - /// Enable/disable doc-comments. + /// _(METADATA)_ Enable/disable doc-comments for functions. + /// Exported under the `metadata` feature only. + /// Not available under `no_function`. + #[cfg(not(feature = "no_function"))] + #[cfg(feature = "metadata")] #[inline(always)] pub fn enable_doc_comments(&mut self, enable: bool) -> &mut Self { self.disable_doc_comments = !enable; diff --git a/src/token.rs b/src/token.rs index cf5b06f2..e0aaa36c 100644 --- a/src/token.rs +++ b/src/token.rs @@ -838,6 +838,8 @@ pub struct TokenizeState { /// Include comments? pub include_comments: bool, /// Disable doc-comments? + #[cfg(not(feature = "no_function"))] + #[cfg(feature = "metadata")] pub disable_doc_comments: bool, /// Is the current tokenizer position within the text stream of an interpolated string? pub is_within_text_terminated_by: Option, @@ -1155,6 +1157,8 @@ fn is_numeric_digit(c: char) -> bool { } /// Test if the comment block is a doc-comment. +#[cfg(not(feature = "no_function"))] +#[cfg(feature = "metadata")] #[inline(always)] pub fn is_doc_comment(comment: &str) -> bool { (comment.starts_with("///") && !comment.starts_with("////")) @@ -1178,10 +1182,22 @@ fn get_next_token_inner( state.comment_level = scan_block_comment(stream, state.comment_level, pos, &mut comment); - if state.include_comments - || (!state.disable_doc_comments && is_doc_comment(comment.as_ref().unwrap())) - { + let include_comments = state.include_comments; + + #[cfg(not(feature = "no_function"))] + #[cfg(feature = "metadata")] + let include_comments = + if !state.disable_doc_comments && is_doc_comment(comment.as_ref().unwrap()) { + true + } else { + include_comments + }; + + if include_comments { return Some((Token::Comment(comment.unwrap()), start_pos)); + } else if state.comment_level > 0 { + // Reached EOF without ending comment block + return None; } } @@ -1496,6 +1512,8 @@ fn get_next_token_inner( eat_next(stream, pos); let mut comment = match stream.peek_next() { + #[cfg(not(feature = "no_function"))] + #[cfg(feature = "metadata")] Some('/') if !state.disable_doc_comments => { eat_next(stream, pos); @@ -1529,6 +1547,8 @@ fn get_next_token_inner( eat_next(stream, pos); let mut comment = match stream.peek_next() { + #[cfg(not(feature = "no_function"))] + #[cfg(feature = "metadata")] Some('*') if !state.disable_doc_comments => { eat_next(stream, pos); @@ -2024,6 +2044,8 @@ impl Engine { comment_level: 0, end_with_none: false, include_comments: false, + #[cfg(not(feature = "no_function"))] + #[cfg(feature = "metadata")] disable_doc_comments: self.disable_doc_comments, is_within_text_terminated_by: None, }, diff --git a/tests/comments.rs b/tests/comments.rs index 72f64aed..4c3452f7 100644 --- a/tests/comments.rs +++ b/tests/comments.rs @@ -27,6 +27,7 @@ fn test_comments() -> Result<(), Box> { } #[cfg(not(feature = "no_function"))] +#[cfg(feature = "metadata")] #[test] fn test_comments_doc() -> Result<(), Box> { let mut engine = Engine::new(); From 42555ac732e880df9315e84604ad195ed5136dae Mon Sep 17 00:00:00 2001 From: Stephen Chung Date: Sat, 10 Apr 2021 10:20:17 +0800 Subject: [PATCH 4/4] Unify string continuation parsing. --- src/token.rs | 160 ++++++++++++++++++++++++++------------------------- 1 file changed, 82 insertions(+), 78 deletions(-) diff --git a/src/token.rs b/src/token.rs index e0aaa36c..7f454667 100644 --- a/src/token.rs +++ b/src/token.rs @@ -878,39 +878,31 @@ pub fn parse_string_literal( termination_char: char, continuation: bool, verbatim: bool, - skip_first_new_line: bool, allow_interpolation: bool, ) -> Result<(String, bool), (LexError, Position)> { - let mut result: smallvec::SmallVec<[char; 16]> = Default::default(); - let mut escape: smallvec::SmallVec<[char; 12]> = Default::default(); + let mut result = String::with_capacity(12); + let mut escape = String::with_capacity(12); let start = *pos; let mut skip_whitespace_until = 0; let mut interpolated = false; - if skip_first_new_line { - // Start from the next line if at the end of line - match stream.peek_next() { - // `\r - start from next line - Some('\r') => { - eat_next(stream, pos); - // `\r\n - if stream.peek_next().map(|ch| ch == '\n').unwrap_or(false) { - eat_next(stream, pos); - } - } - // `\n - start from next line - Some('\n') => { - eat_next(stream, pos); - } - _ => (), - } - } + state.is_within_text_terminated_by = Some(termination_char); loop { - let next_char = stream.get_next().ok_or((LERR::UnterminatedString, start))?; - - pos.advance(); + let next_char = match stream.get_next() { + Some(ch) => { + pos.advance(); + ch + } + None => { + if !continuation || escape != "\\" { + result += &escape; + } + pos.advance(); + break; + } + }; // String interpolation? if allow_interpolation @@ -919,6 +911,7 @@ pub fn parse_string_literal( && stream.peek_next().map(|ch| ch == '{').unwrap_or(false) { interpolated = true; + state.is_within_text_terminated_by = None; break; } @@ -970,31 +963,23 @@ pub fn parse_string_literal( }; for _ in 0..len { - let c = stream.get_next().ok_or_else(|| { - ( - LERR::MalformedEscapeSequence(seq.iter().cloned().collect()), - *pos, - ) - })?; + let c = stream + .get_next() + .ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?; seq.push(c); pos.advance(); out_val *= 16; - out_val += c.to_digit(16).ok_or_else(|| { - ( - LERR::MalformedEscapeSequence(seq.iter().cloned().collect()), - *pos, - ) - })?; + out_val += c + .to_digit(16) + .ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?; } - result.push(char::from_u32(out_val).ok_or_else(|| { - ( - LERR::MalformedEscapeSequence(seq.into_iter().collect()), - *pos, - ) - })?); + result.push( + char::from_u32(out_val) + .ok_or_else(|| (LERR::MalformedEscapeSequence(seq), *pos))?, + ); } // \{termination_char} - escaped @@ -1004,7 +989,10 @@ pub fn parse_string_literal( } // Close wrapper - _ if termination_char == next_char && escape.is_empty() => break, + _ if termination_char == next_char && escape.is_empty() => { + state.is_within_text_terminated_by = None; + break; + } // Line continuation '\n' if continuation && !escape.is_empty() => { @@ -1017,7 +1005,7 @@ pub fn parse_string_literal( // Cannot have new-lines inside non-multi-line string literals '\n' if !escape.is_empty() || !verbatim => { pos.rewind(); - return Err((LERR::UnterminatedString, start)); + return Err((LERR::UnterminatedString, *pos)); } '\n' => { @@ -1029,10 +1017,7 @@ pub fn parse_string_literal( _ if !escape.is_empty() => { escape.push(next_char); - return Err(( - LERR::MalformedEscapeSequence(escape.into_iter().collect()), - *pos, - )); + return Err((LERR::MalformedEscapeSequence(escape), *pos)); } // Whitespace to skip @@ -1047,15 +1032,13 @@ pub fn parse_string_literal( } } - let s = result.iter().collect::(); - if let Some(max) = state.max_string_size { - if s.len() > max.get() { + if result.len() > max.get() { return Err((LexError::StringTooLong(max.get()), *pos)); } } - Ok((s, interpolated)) + Ok((result, interpolated)) } /// Consume the next character. @@ -1205,7 +1188,7 @@ fn get_next_token_inner( if let Some(ch) = state.is_within_text_terminated_by.take() { let start_pos = *pos; - return parse_string_literal(stream, state, pos, ch, false, true, true, true).map_or_else( + return parse_string_literal(stream, state, pos, ch, false, true, true).map_or_else( |(err, err_pos)| Some((Token::LexError(err), err_pos)), |(result, interpolated)| { if interpolated { @@ -1381,7 +1364,7 @@ fn get_next_token_inner( // " - string literal ('"', _) => { - return parse_string_literal(stream, state, pos, c, true, false, false, false) + return parse_string_literal(stream, state, pos, c, true, false, false) .map_or_else( |(err, err_pos)| Some((Token::LexError(err), err_pos)), |(result, _)| Some((Token::StringConstant(result), start_pos)), @@ -1389,17 +1372,35 @@ fn get_next_token_inner( } // ` - string literal ('`', _) => { - return parse_string_literal(stream, state, pos, c, false, true, true, true) - .map_or_else( - |(err, err_pos)| Some((Token::LexError(err), err_pos)), - |(result, interpolated)| { - if interpolated { - Some((Token::InterpolatedString(result), start_pos)) - } else { - Some((Token::StringConstant(result), start_pos)) - } - }, - ); + // Start from the next line if at the end of line + match stream.peek_next() { + // `\r - start from next line + Some('\r') => { + eat_next(stream, pos); + pos.new_line(); + // `\r\n + if stream.peek_next().map(|ch| ch == '\n').unwrap_or(false) { + eat_next(stream, pos); + } + } + // `\n - start from next line + Some('\n') => { + eat_next(stream, pos); + pos.new_line(); + } + _ => (), + } + + return parse_string_literal(stream, state, pos, c, false, true, true).map_or_else( + |(err, err_pos)| Some((Token::LexError(err), err_pos)), + |(result, interpolated)| { + if interpolated { + Some((Token::InterpolatedString(result), start_pos)) + } else { + Some((Token::StringConstant(result), start_pos)) + } + }, + ); } // ' - character literal @@ -1411,20 +1412,19 @@ fn get_next_token_inner( } ('\'', _) => { return Some( - parse_string_literal(stream, state, pos, c, false, false, false, false) - .map_or_else( - |(err, err_pos)| (Token::LexError(err), err_pos), - |(result, _)| { - let mut chars = result.chars(); - let first = chars.next().unwrap(); + parse_string_literal(stream, state, pos, c, false, false, false).map_or_else( + |(err, err_pos)| (Token::LexError(err), err_pos), + |(result, _)| { + let mut chars = result.chars(); + let first = chars.next().unwrap(); - if chars.next().is_some() { - (Token::LexError(LERR::MalformedChar(result)), start_pos) - } else { - (Token::CharConstant(first), start_pos) - } - }, - ), + if chars.next().is_some() { + (Token::LexError(LERR::MalformedChar(result)), start_pos) + } else { + (Token::CharConstant(first), start_pos) + } + }, + ), ) } @@ -1923,6 +1923,10 @@ impl<'a> Iterator for TokenIterator<'a> { let (token, pos) = match get_next_token(&mut self.stream, &mut self.state, &mut self.pos) { // {EOF} None => return None, + // Unterminated string at EOF + Some((Token::StringConstant(_), _)) if self.state.is_within_text_terminated_by.is_some() => { + return Some((Token::LexError(LERR::UnterminatedString), self.pos)); + } // Reserved keyword/symbol Some((Token::Reserved(s), pos)) => (match (s.as_str(), self.engine.custom_keywords.contains_key(s.as_str()))