Unify string continuation parsing.

This commit is contained in:
Stephen Chung 2021-04-10 10:20:17 +08:00
parent 4aaf957d73
commit 42555ac732

View File

@ -878,39 +878,31 @@ pub fn parse_string_literal(
termination_char: char,
continuation: bool,
verbatim: bool,
skip_first_new_line: bool,
allow_interpolation: bool,
) -> Result<(String, bool), (LexError, Position)> {
let mut result: smallvec::SmallVec<[char; 16]> = Default::default();
let mut escape: smallvec::SmallVec<[char; 12]> = Default::default();
let mut result = String::with_capacity(12);
let mut escape = String::with_capacity(12);
let start = *pos;
let mut skip_whitespace_until = 0;
let mut interpolated = false;
if skip_first_new_line {
// Start from the next line if at the end of line
match stream.peek_next() {
// `\r - start from next line
Some('\r') => {
eat_next(stream, pos);
// `\r\n
if stream.peek_next().map(|ch| ch == '\n').unwrap_or(false) {
eat_next(stream, pos);
}
}
// `\n - start from next line
Some('\n') => {
eat_next(stream, pos);
}
_ => (),
}
}
state.is_within_text_terminated_by = Some(termination_char);
loop {
let next_char = stream.get_next().ok_or((LERR::UnterminatedString, start))?;
let next_char = match stream.get_next() {
Some(ch) => {
pos.advance();
ch
}
None => {
if !continuation || escape != "\\" {
result += &escape;
}
pos.advance();
break;
}
};
// String interpolation?
if allow_interpolation
@ -919,6 +911,7 @@ pub fn parse_string_literal(
&& stream.peek_next().map(|ch| ch == '{').unwrap_or(false)
{
interpolated = true;
state.is_within_text_terminated_by = None;
break;
}
@ -970,31 +963,23 @@ pub fn parse_string_literal(
};
for _ in 0..len {
let c = stream.get_next().ok_or_else(|| {
(
LERR::MalformedEscapeSequence(seq.iter().cloned().collect()),
*pos,
)
})?;
let c = stream
.get_next()
.ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
seq.push(c);
pos.advance();
out_val *= 16;
out_val += c.to_digit(16).ok_or_else(|| {
(
LERR::MalformedEscapeSequence(seq.iter().cloned().collect()),
*pos,
)
})?;
out_val += c
.to_digit(16)
.ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
}
result.push(char::from_u32(out_val).ok_or_else(|| {
(
LERR::MalformedEscapeSequence(seq.into_iter().collect()),
*pos,
)
})?);
result.push(
char::from_u32(out_val)
.ok_or_else(|| (LERR::MalformedEscapeSequence(seq), *pos))?,
);
}
// \{termination_char} - escaped
@ -1004,7 +989,10 @@ pub fn parse_string_literal(
}
// Close wrapper
_ if termination_char == next_char && escape.is_empty() => break,
_ if termination_char == next_char && escape.is_empty() => {
state.is_within_text_terminated_by = None;
break;
}
// Line continuation
'\n' if continuation && !escape.is_empty() => {
@ -1017,7 +1005,7 @@ pub fn parse_string_literal(
// Cannot have new-lines inside non-multi-line string literals
'\n' if !escape.is_empty() || !verbatim => {
pos.rewind();
return Err((LERR::UnterminatedString, start));
return Err((LERR::UnterminatedString, *pos));
}
'\n' => {
@ -1029,10 +1017,7 @@ pub fn parse_string_literal(
_ if !escape.is_empty() => {
escape.push(next_char);
return Err((
LERR::MalformedEscapeSequence(escape.into_iter().collect()),
*pos,
));
return Err((LERR::MalformedEscapeSequence(escape), *pos));
}
// Whitespace to skip
@ -1047,15 +1032,13 @@ pub fn parse_string_literal(
}
}
let s = result.iter().collect::<String>();
if let Some(max) = state.max_string_size {
if s.len() > max.get() {
if result.len() > max.get() {
return Err((LexError::StringTooLong(max.get()), *pos));
}
}
Ok((s, interpolated))
Ok((result, interpolated))
}
/// Consume the next character.
@ -1205,7 +1188,7 @@ fn get_next_token_inner(
if let Some(ch) = state.is_within_text_terminated_by.take() {
let start_pos = *pos;
return parse_string_literal(stream, state, pos, ch, false, true, true, true).map_or_else(
return parse_string_literal(stream, state, pos, ch, false, true, true).map_or_else(
|(err, err_pos)| Some((Token::LexError(err), err_pos)),
|(result, interpolated)| {
if interpolated {
@ -1381,7 +1364,7 @@ fn get_next_token_inner(
// " - string literal
('"', _) => {
return parse_string_literal(stream, state, pos, c, true, false, false, false)
return parse_string_literal(stream, state, pos, c, true, false, false)
.map_or_else(
|(err, err_pos)| Some((Token::LexError(err), err_pos)),
|(result, _)| Some((Token::StringConstant(result), start_pos)),
@ -1389,8 +1372,26 @@ fn get_next_token_inner(
}
// ` - string literal
('`', _) => {
return parse_string_literal(stream, state, pos, c, false, true, true, true)
.map_or_else(
// Start from the next line if at the end of line
match stream.peek_next() {
// `\r - start from next line
Some('\r') => {
eat_next(stream, pos);
pos.new_line();
// `\r\n
if stream.peek_next().map(|ch| ch == '\n').unwrap_or(false) {
eat_next(stream, pos);
}
}
// `\n - start from next line
Some('\n') => {
eat_next(stream, pos);
pos.new_line();
}
_ => (),
}
return parse_string_literal(stream, state, pos, c, false, true, true).map_or_else(
|(err, err_pos)| Some((Token::LexError(err), err_pos)),
|(result, interpolated)| {
if interpolated {
@ -1411,8 +1412,7 @@ fn get_next_token_inner(
}
('\'', _) => {
return Some(
parse_string_literal(stream, state, pos, c, false, false, false, false)
.map_or_else(
parse_string_literal(stream, state, pos, c, false, false, false).map_or_else(
|(err, err_pos)| (Token::LexError(err), err_pos),
|(result, _)| {
let mut chars = result.chars();
@ -1923,6 +1923,10 @@ impl<'a> Iterator for TokenIterator<'a> {
let (token, pos) = match get_next_token(&mut self.stream, &mut self.state, &mut self.pos) {
// {EOF}
None => return None,
// Unterminated string at EOF
Some((Token::StringConstant(_), _)) if self.state.is_within_text_terminated_by.is_some() => {
return Some((Token::LexError(LERR::UnterminatedString), self.pos));
}
// Reserved keyword/symbol
Some((Token::Reserved(s), pos)) => (match
(s.as_str(), self.engine.custom_keywords.contains_key(s.as_str()))