Fix string handling at EOF.

This commit is contained in:
Stephen Chung 2021-04-11 21:49:03 +08:00
parent ccbbf3eb34
commit 44e2e6aeb4

View File

@ -833,8 +833,6 @@ pub struct TokenizeState {
pub non_unary: bool, pub non_unary: bool,
/// Is the tokenizer currently inside a block comment? /// Is the tokenizer currently inside a block comment?
pub comment_level: usize, pub comment_level: usize,
/// Return [`None`] at the end of the stream instead of [`Some(Token::EOF)`][Token::EOF]?
pub end_with_none: bool,
/// Include comments? /// Include comments?
pub include_comments: bool, pub include_comments: bool,
/// Disable doc-comments? /// Disable doc-comments?
@ -882,7 +880,8 @@ pub trait InputStream {
/// |`` `hello``_{EOF}_ |`StringConstant("hello")` |``Some('`')`` | /// |`` `hello``_{EOF}_ |`StringConstant("hello")` |``Some('`')`` |
/// |`` `hello``_{LF}{EOF}_ |`StringConstant("hello\n")` |``Some('`')`` | /// |`` `hello``_{LF}{EOF}_ |`StringConstant("hello\n")` |``Some('`')`` |
/// |`` `hello ${`` |`InterpolatedString("hello ")`<br/>next token is `{`|`None` | /// |`` `hello ${`` |`InterpolatedString("hello ")`<br/>next token is `{`|`None` |
/// |`` } hello` `` |`StringConstant(" hello")` |``Some('`')`` | /// |`` } hello` `` |`StringConstant(" hello")` |`None` |
/// |`} hello`_{EOF}_ |`StringConstant(" hello")` |``Some('`')`` |
pub fn parse_string_literal( pub fn parse_string_literal(
stream: &mut impl InputStream, stream: &mut impl InputStream,
state: &mut TokenizeState, state: &mut TokenizeState,
@ -902,23 +901,32 @@ pub fn parse_string_literal(
state.is_within_text_terminated_by = Some(termination_char); state.is_within_text_terminated_by = Some(termination_char);
loop { loop {
assert!(
!verbatim || escape.is_empty(),
"verbatim strings should not have any escapes"
);
let next_char = match stream.get_next() { let next_char = match stream.get_next() {
Some(ch) => { Some(ch) => {
pos.advance(); pos.advance();
ch ch
} }
None if !continuation && !verbatim => { None if verbatim => {
assert_eq!(escape, "", "verbatim strings should not have any escapes");
pos.advance();
break;
}
None if continuation && !escape.is_empty() => {
assert_eq!(escape, "\\", "unexpected escape {} at end of line", escape);
pos.advance();
break;
}
None => {
result += &escape;
pos.advance(); pos.advance();
state.is_within_text_terminated_by = None; state.is_within_text_terminated_by = None;
return Err((LERR::UnterminatedString, start)); return Err((LERR::UnterminatedString, start));
} }
None => {
if verbatim || escape != "\\" {
result += &escape;
}
pos.advance();
break;
}
}; };
// String interpolation? // String interpolation?
@ -942,7 +950,7 @@ pub fn parse_string_literal(
// \r - ignore if followed by \n // \r - ignore if followed by \n
'\r' if stream.peek_next().map(|ch| ch == '\n').unwrap_or(false) => {} '\r' if stream.peek_next().map(|ch| ch == '\n').unwrap_or(false) => {}
// \... // \...
'\\' if escape.is_empty() && !verbatim => { '\\' if !verbatim && escape.is_empty() => {
escape.push('\\'); escape.push('\\');
} }
// \\ // \\
@ -1011,26 +1019,28 @@ pub fn parse_string_literal(
break; break;
} }
// Verbatim
'\n' if verbatim => {
assert_eq!(escape, "", "verbatim strings should not have any escapes");
pos.new_line();
result.push(next_char);
}
// Line continuation // Line continuation
'\n' if continuation && !escape.is_empty() => { '\n' if continuation && !escape.is_empty() => {
assert_eq!(escape, "\\", "unexpected escape {} at end of line", escape);
escape.clear(); escape.clear();
pos.new_line(); pos.new_line();
skip_whitespace_until = start.position().unwrap() + 1; skip_whitespace_until = start.position().unwrap() + 1;
} }
// New-line cannot be escaped // Unterminated string
// Cannot have new-lines inside non-verbatim strings '\n' => {
'\n' if !verbatim || !escape.is_empty() => {
pos.rewind(); pos.rewind();
state.is_within_text_terminated_by = None; state.is_within_text_terminated_by = None;
return Err((LERR::UnterminatedString, start)); return Err((LERR::UnterminatedString, start));
} }
'\n' => {
pos.new_line();
result.push(next_char);
}
// Unknown escape sequence // Unknown escape sequence
_ if !escape.is_empty() => { _ if !escape.is_empty() => {
escape.push(next_char); escape.push(next_char);
@ -1746,12 +1756,8 @@ fn get_next_token_inner(
pos.advance(); pos.advance();
if state.end_with_none {
None
} else {
Some((Token::EOF, *pos)) Some((Token::EOF, *pos))
} }
}
/// Get the next identifier. /// Get the next identifier.
fn get_identifier( fn get_identifier(
@ -1941,7 +1947,7 @@ impl<'a> Iterator for TokenIterator<'a> {
let (token, pos) = match get_next_token(&mut self.stream, &mut self.state, &mut self.pos) { let (token, pos) = match get_next_token(&mut self.stream, &mut self.state, &mut self.pos) {
// {EOF} // {EOF}
None => return None, None => return None,
// Unterminated string at EOF // {EOF} after unterminated string
Some((Token::StringConstant(_), pos)) if self.state.is_within_text_terminated_by.is_some() => { Some((Token::StringConstant(_), pos)) if self.state.is_within_text_terminated_by.is_some() => {
self.state.is_within_text_terminated_by = None; self.state.is_within_text_terminated_by = None;
return Some((Token::LexError(LERR::UnterminatedString), pos)); return Some((Token::LexError(LERR::UnterminatedString), pos));
@ -2065,7 +2071,6 @@ impl Engine {
max_string_size: None, max_string_size: None,
non_unary: false, non_unary: false,
comment_level: 0, comment_level: 0,
end_with_none: false,
include_comments: false, include_comments: false,
#[cfg(not(feature = "no_function"))] #[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")] #[cfg(feature = "metadata")]