Process block comments better.

This commit is contained in:
Stephen Chung 2022-12-02 14:06:31 +08:00
parent eb5b15e954
commit e10576abff
4 changed files with 56 additions and 22 deletions

View File

@ -44,6 +44,7 @@ Enhancements
* `Engine::set_XXX` API can now be chained.
* `EvalContext::scope_mut` now returns `&mut Scope` instead of `&mut &mut Scope`.
* Line-style doc-comments are now merged into a single string to avoid creating many strings. Block-style doc-comments continue to be independent strings.
* Block-style doc-comments are now "un-indented" for better formatting.
* Doc-comments on plugin modules are now captured in the module's `doc` field.

View File

@ -223,7 +223,7 @@ impl Engine {
let state = &mut ParseState::new(scope, interned_strings, tc);
let mut _ast = self.parse(stream.peekable(), state, optimization_level)?;
#[cfg(feature = "metadata")]
_ast.set_doc(state.tokenizer_control.borrow().global_comments.join("\n"));
_ast.set_doc(&state.tokenizer_control.borrow().global_comments);
Ok(_ast)
}
/// Compile a string containing an expression into an [`AST`],

View File

@ -492,6 +492,35 @@ fn match_token(input: &mut TokenStream, token: Token) -> (bool, Position) {
}
}
/// Process a block comment such that it indents properly relative to the start token.
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
#[inline]
fn unindent_block_comment(comment: String, pos: usize) -> String {
if pos == 0 || !comment.contains('\n') {
return comment;
}
let offset = comment
.split('\n')
.skip(1)
.map(|s| s.len() - s.trim_start().len())
.min()
.unwrap_or(pos)
.min(pos);
if offset == 0 {
return comment;
}
comment
.split('\n')
.enumerate()
.map(|(i, s)| if i > 0 { &s[offset..] } else { s })
.collect::<Vec<_>>()
.join("\n")
}
/// Parse a variable name.
fn parse_var_name(input: &mut TokenStream) -> ParseResult<(SmartString, Position)> {
match input.next().expect(NEVER_ENDS) {
@ -3206,9 +3235,9 @@ impl Engine {
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
let comments = {
let mut comments = StaticVec::<SmartString>::new();
let mut comments = StaticVec::<String>::new();
let mut comments_pos = Position::NONE;
let mut buf = SmartString::new_const();
let mut buf = String::new();
// Handle doc-comments.
while let (Token::Comment(ref comment), pos) = input.peek().expect(NEVER_ENDS) {
@ -3224,15 +3253,17 @@ impl Engine {
return Err(PERR::WrongDocComment.into_err(comments_pos));
}
match input.next().expect(NEVER_ENDS).0 {
Token::Comment(comment) => {
match input.next().expect(NEVER_ENDS) {
(Token::Comment(comment), pos) => {
if comment.contains('\n') {
// Assume block comment
if !buf.is_empty() {
comments.push(buf.clone());
buf.clear();
}
comments.push(*comment);
let c =
unindent_block_comment(*comment, pos.position().unwrap_or(1) - 1);
comments.push(c);
} else {
if !buf.is_empty() {
buf.push('\n');
@ -3246,7 +3277,7 @@ impl Engine {
_ => return Err(PERR::WrongDocComment.into_err(comments_pos)),
}
}
token => unreachable!("Token::Comment expected but gets {:?}", token),
(token, ..) => unreachable!("Token::Comment expected but gets {:?}", token),
}
}
@ -3545,7 +3576,7 @@ impl Engine {
settings: ParseSettings,
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
comments: StaticVec<SmartString>,
comments: impl IntoIterator<Item = String>,
) -> ParseResult<ScriptFnDef> {
let settings = settings;
@ -3633,7 +3664,7 @@ impl Engine {
#[cfg(feature = "metadata")]
comments: comments
.into_iter()
.map(|s| s.to_string().into_boxed_str())
.map(|s| s.into_boxed_str())
.collect::<Vec<_>>()
.into_boxed_slice(),
})

View File

@ -23,9 +23,9 @@ pub struct TokenizerControlBlock {
/// Is the current tokenizer position within an interpolated text string?
/// This flag allows switching the tokenizer back to _text_ parsing after an interpolation stream.
pub is_within_text: bool,
/// Collection of global comments.
/// Global comments.
#[cfg(feature = "metadata")]
pub global_comments: Vec<SmartString>,
pub global_comments: String,
}
impl TokenizerControlBlock {
@ -36,7 +36,7 @@ impl TokenizerControlBlock {
Self {
is_within_text: false,
#[cfg(feature = "metadata")]
global_comments: Vec::new(),
global_comments: String::new(),
}
}
}
@ -262,7 +262,7 @@ pub enum Token {
/// A lexer error.
LexError(Box<LexError>),
/// A comment block.
Comment(Box<SmartString>),
Comment(Box<String>),
/// A reserved symbol.
Reserved(Box<SmartString>),
/// A custom keyword.
@ -1149,7 +1149,7 @@ fn scan_block_comment(
stream: &mut impl InputStream,
level: usize,
pos: &mut Position,
comment: Option<&mut SmartString>,
comment: Option<&mut String>,
) -> usize {
let mut level = level;
let mut comment = comment;
@ -1244,7 +1244,7 @@ fn get_next_token_inner(
if state.comment_level > 0 {
let start_pos = *pos;
let mut comment = if state.include_comments {
Some(SmartString::new_const())
Some(String::new())
} else {
None
};
@ -1637,7 +1637,7 @@ fn get_next_token_inner(
('/', '/') => {
eat_next(stream, pos);
let mut comment: Option<SmartString> = match stream.peek_next() {
let mut comment: Option<String> = match stream.peek_next() {
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
Some('/') => {
@ -1680,11 +1680,13 @@ fn get_next_token_inner(
if let Some(comment) = comment {
match comment {
#[cfg(feature = "metadata")]
_ if comment.starts_with("//!") => state
.tokenizer_control
.borrow_mut()
.global_comments
.push(comment),
_ if comment.starts_with("//!") => {
let g = &mut state.tokenizer_control.borrow_mut().global_comments;
if !g.is_empty() {
g.push('\n');
}
g.push_str(&comment);
}
_ => return Some((Token::Comment(comment.into()), start_pos)),
}
}
@ -1693,7 +1695,7 @@ fn get_next_token_inner(
state.comment_level = 1;
eat_next(stream, pos);
let mut comment: Option<SmartString> = match stream.peek_next() {
let mut comment: Option<String> = match stream.peek_next() {
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
Some('*') => {