Unify string continuation parsing.

This commit is contained in:
Stephen Chung 2021-04-10 10:20:17 +08:00
parent 4aaf957d73
commit 42555ac732

View File

@ -878,39 +878,31 @@ pub fn parse_string_literal(
termination_char: char, termination_char: char,
continuation: bool, continuation: bool,
verbatim: bool, verbatim: bool,
skip_first_new_line: bool,
allow_interpolation: bool, allow_interpolation: bool,
) -> Result<(String, bool), (LexError, Position)> { ) -> Result<(String, bool), (LexError, Position)> {
let mut result: smallvec::SmallVec<[char; 16]> = Default::default(); let mut result = String::with_capacity(12);
let mut escape: smallvec::SmallVec<[char; 12]> = Default::default(); let mut escape = String::with_capacity(12);
let start = *pos; let start = *pos;
let mut skip_whitespace_until = 0; let mut skip_whitespace_until = 0;
let mut interpolated = false; let mut interpolated = false;
if skip_first_new_line { state.is_within_text_terminated_by = Some(termination_char);
// Start from the next line if at the end of line
match stream.peek_next() {
// `\r - start from next line
Some('\r') => {
eat_next(stream, pos);
// `\r\n
if stream.peek_next().map(|ch| ch == '\n').unwrap_or(false) {
eat_next(stream, pos);
}
}
// `\n - start from next line
Some('\n') => {
eat_next(stream, pos);
}
_ => (),
}
}
loop { loop {
let next_char = stream.get_next().ok_or((LERR::UnterminatedString, start))?; let next_char = match stream.get_next() {
Some(ch) => {
pos.advance(); pos.advance();
ch
}
None => {
if !continuation || escape != "\\" {
result += &escape;
}
pos.advance();
break;
}
};
// String interpolation? // String interpolation?
if allow_interpolation if allow_interpolation
@ -919,6 +911,7 @@ pub fn parse_string_literal(
&& stream.peek_next().map(|ch| ch == '{').unwrap_or(false) && stream.peek_next().map(|ch| ch == '{').unwrap_or(false)
{ {
interpolated = true; interpolated = true;
state.is_within_text_terminated_by = None;
break; break;
} }
@ -970,31 +963,23 @@ pub fn parse_string_literal(
}; };
for _ in 0..len { for _ in 0..len {
let c = stream.get_next().ok_or_else(|| { let c = stream
( .get_next()
LERR::MalformedEscapeSequence(seq.iter().cloned().collect()), .ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
*pos,
)
})?;
seq.push(c); seq.push(c);
pos.advance(); pos.advance();
out_val *= 16; out_val *= 16;
out_val += c.to_digit(16).ok_or_else(|| { out_val += c
( .to_digit(16)
LERR::MalformedEscapeSequence(seq.iter().cloned().collect()), .ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
*pos,
)
})?;
} }
result.push(char::from_u32(out_val).ok_or_else(|| { result.push(
( char::from_u32(out_val)
LERR::MalformedEscapeSequence(seq.into_iter().collect()), .ok_or_else(|| (LERR::MalformedEscapeSequence(seq), *pos))?,
*pos, );
)
})?);
} }
// \{termination_char} - escaped // \{termination_char} - escaped
@ -1004,7 +989,10 @@ pub fn parse_string_literal(
} }
// Close wrapper // Close wrapper
_ if termination_char == next_char && escape.is_empty() => break, _ if termination_char == next_char && escape.is_empty() => {
state.is_within_text_terminated_by = None;
break;
}
// Line continuation // Line continuation
'\n' if continuation && !escape.is_empty() => { '\n' if continuation && !escape.is_empty() => {
@ -1017,7 +1005,7 @@ pub fn parse_string_literal(
// Cannot have new-lines inside non-multi-line string literals // Cannot have new-lines inside non-multi-line string literals
'\n' if !escape.is_empty() || !verbatim => { '\n' if !escape.is_empty() || !verbatim => {
pos.rewind(); pos.rewind();
return Err((LERR::UnterminatedString, start)); return Err((LERR::UnterminatedString, *pos));
} }
'\n' => { '\n' => {
@ -1029,10 +1017,7 @@ pub fn parse_string_literal(
_ if !escape.is_empty() => { _ if !escape.is_empty() => {
escape.push(next_char); escape.push(next_char);
return Err(( return Err((LERR::MalformedEscapeSequence(escape), *pos));
LERR::MalformedEscapeSequence(escape.into_iter().collect()),
*pos,
));
} }
// Whitespace to skip // Whitespace to skip
@ -1047,15 +1032,13 @@ pub fn parse_string_literal(
} }
} }
let s = result.iter().collect::<String>();
if let Some(max) = state.max_string_size { if let Some(max) = state.max_string_size {
if s.len() > max.get() { if result.len() > max.get() {
return Err((LexError::StringTooLong(max.get()), *pos)); return Err((LexError::StringTooLong(max.get()), *pos));
} }
} }
Ok((s, interpolated)) Ok((result, interpolated))
} }
/// Consume the next character. /// Consume the next character.
@ -1205,7 +1188,7 @@ fn get_next_token_inner(
if let Some(ch) = state.is_within_text_terminated_by.take() { if let Some(ch) = state.is_within_text_terminated_by.take() {
let start_pos = *pos; let start_pos = *pos;
return parse_string_literal(stream, state, pos, ch, false, true, true, true).map_or_else( return parse_string_literal(stream, state, pos, ch, false, true, true).map_or_else(
|(err, err_pos)| Some((Token::LexError(err), err_pos)), |(err, err_pos)| Some((Token::LexError(err), err_pos)),
|(result, interpolated)| { |(result, interpolated)| {
if interpolated { if interpolated {
@ -1381,7 +1364,7 @@ fn get_next_token_inner(
// " - string literal // " - string literal
('"', _) => { ('"', _) => {
return parse_string_literal(stream, state, pos, c, true, false, false, false) return parse_string_literal(stream, state, pos, c, true, false, false)
.map_or_else( .map_or_else(
|(err, err_pos)| Some((Token::LexError(err), err_pos)), |(err, err_pos)| Some((Token::LexError(err), err_pos)),
|(result, _)| Some((Token::StringConstant(result), start_pos)), |(result, _)| Some((Token::StringConstant(result), start_pos)),
@ -1389,17 +1372,35 @@ fn get_next_token_inner(
} }
// ` - string literal // ` - string literal
('`', _) => { ('`', _) => {
return parse_string_literal(stream, state, pos, c, false, true, true, true) // Start from the next line if at the end of line
.map_or_else( match stream.peek_next() {
|(err, err_pos)| Some((Token::LexError(err), err_pos)), // `\r - start from next line
|(result, interpolated)| { Some('\r') => {
if interpolated { eat_next(stream, pos);
Some((Token::InterpolatedString(result), start_pos)) pos.new_line();
} else { // `\r\n
Some((Token::StringConstant(result), start_pos)) if stream.peek_next().map(|ch| ch == '\n').unwrap_or(false) {
} eat_next(stream, pos);
}, }
); }
// `\n - start from next line
Some('\n') => {
eat_next(stream, pos);
pos.new_line();
}
_ => (),
}
return parse_string_literal(stream, state, pos, c, false, true, true).map_or_else(
|(err, err_pos)| Some((Token::LexError(err), err_pos)),
|(result, interpolated)| {
if interpolated {
Some((Token::InterpolatedString(result), start_pos))
} else {
Some((Token::StringConstant(result), start_pos))
}
},
);
} }
// ' - character literal // ' - character literal
@ -1411,20 +1412,19 @@ fn get_next_token_inner(
} }
('\'', _) => { ('\'', _) => {
return Some( return Some(
parse_string_literal(stream, state, pos, c, false, false, false, false) parse_string_literal(stream, state, pos, c, false, false, false).map_or_else(
.map_or_else( |(err, err_pos)| (Token::LexError(err), err_pos),
|(err, err_pos)| (Token::LexError(err), err_pos), |(result, _)| {
|(result, _)| { let mut chars = result.chars();
let mut chars = result.chars(); let first = chars.next().unwrap();
let first = chars.next().unwrap();
if chars.next().is_some() { if chars.next().is_some() {
(Token::LexError(LERR::MalformedChar(result)), start_pos) (Token::LexError(LERR::MalformedChar(result)), start_pos)
} else { } else {
(Token::CharConstant(first), start_pos) (Token::CharConstant(first), start_pos)
} }
}, },
), ),
) )
} }
@ -1923,6 +1923,10 @@ impl<'a> Iterator for TokenIterator<'a> {
let (token, pos) = match get_next_token(&mut self.stream, &mut self.state, &mut self.pos) { let (token, pos) = match get_next_token(&mut self.stream, &mut self.state, &mut self.pos) {
// {EOF} // {EOF}
None => return None, None => return None,
// Unterminated string at EOF
Some((Token::StringConstant(_), _)) if self.state.is_within_text_terminated_by.is_some() => {
return Some((Token::LexError(LERR::UnterminatedString), self.pos));
}
// Reserved keyword/symbol // Reserved keyword/symbol
Some((Token::Reserved(s), pos)) => (match Some((Token::Reserved(s), pos)) => (match
(s.as_str(), self.engine.custom_keywords.contains_key(s.as_str())) (s.as_str(), self.engine.custom_keywords.contains_key(s.as_str()))