Unify string continuation parsing.
This commit is contained in:
parent
4aaf957d73
commit
42555ac732
160
src/token.rs
160
src/token.rs
@ -878,39 +878,31 @@ pub fn parse_string_literal(
|
|||||||
termination_char: char,
|
termination_char: char,
|
||||||
continuation: bool,
|
continuation: bool,
|
||||||
verbatim: bool,
|
verbatim: bool,
|
||||||
skip_first_new_line: bool,
|
|
||||||
allow_interpolation: bool,
|
allow_interpolation: bool,
|
||||||
) -> Result<(String, bool), (LexError, Position)> {
|
) -> Result<(String, bool), (LexError, Position)> {
|
||||||
let mut result: smallvec::SmallVec<[char; 16]> = Default::default();
|
let mut result = String::with_capacity(12);
|
||||||
let mut escape: smallvec::SmallVec<[char; 12]> = Default::default();
|
let mut escape = String::with_capacity(12);
|
||||||
|
|
||||||
let start = *pos;
|
let start = *pos;
|
||||||
let mut skip_whitespace_until = 0;
|
let mut skip_whitespace_until = 0;
|
||||||
let mut interpolated = false;
|
let mut interpolated = false;
|
||||||
|
|
||||||
if skip_first_new_line {
|
state.is_within_text_terminated_by = Some(termination_char);
|
||||||
// Start from the next line if at the end of line
|
|
||||||
match stream.peek_next() {
|
|
||||||
// `\r - start from next line
|
|
||||||
Some('\r') => {
|
|
||||||
eat_next(stream, pos);
|
|
||||||
// `\r\n
|
|
||||||
if stream.peek_next().map(|ch| ch == '\n').unwrap_or(false) {
|
|
||||||
eat_next(stream, pos);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// `\n - start from next line
|
|
||||||
Some('\n') => {
|
|
||||||
eat_next(stream, pos);
|
|
||||||
}
|
|
||||||
_ => (),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
let next_char = stream.get_next().ok_or((LERR::UnterminatedString, start))?;
|
let next_char = match stream.get_next() {
|
||||||
|
Some(ch) => {
|
||||||
pos.advance();
|
pos.advance();
|
||||||
|
ch
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
if !continuation || escape != "\\" {
|
||||||
|
result += &escape;
|
||||||
|
}
|
||||||
|
pos.advance();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// String interpolation?
|
// String interpolation?
|
||||||
if allow_interpolation
|
if allow_interpolation
|
||||||
@ -919,6 +911,7 @@ pub fn parse_string_literal(
|
|||||||
&& stream.peek_next().map(|ch| ch == '{').unwrap_or(false)
|
&& stream.peek_next().map(|ch| ch == '{').unwrap_or(false)
|
||||||
{
|
{
|
||||||
interpolated = true;
|
interpolated = true;
|
||||||
|
state.is_within_text_terminated_by = None;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -970,31 +963,23 @@ pub fn parse_string_literal(
|
|||||||
};
|
};
|
||||||
|
|
||||||
for _ in 0..len {
|
for _ in 0..len {
|
||||||
let c = stream.get_next().ok_or_else(|| {
|
let c = stream
|
||||||
(
|
.get_next()
|
||||||
LERR::MalformedEscapeSequence(seq.iter().cloned().collect()),
|
.ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
|
||||||
*pos,
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
|
|
||||||
seq.push(c);
|
seq.push(c);
|
||||||
pos.advance();
|
pos.advance();
|
||||||
|
|
||||||
out_val *= 16;
|
out_val *= 16;
|
||||||
out_val += c.to_digit(16).ok_or_else(|| {
|
out_val += c
|
||||||
(
|
.to_digit(16)
|
||||||
LERR::MalformedEscapeSequence(seq.iter().cloned().collect()),
|
.ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
|
||||||
*pos,
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
result.push(char::from_u32(out_val).ok_or_else(|| {
|
result.push(
|
||||||
(
|
char::from_u32(out_val)
|
||||||
LERR::MalformedEscapeSequence(seq.into_iter().collect()),
|
.ok_or_else(|| (LERR::MalformedEscapeSequence(seq), *pos))?,
|
||||||
*pos,
|
);
|
||||||
)
|
|
||||||
})?);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// \{termination_char} - escaped
|
// \{termination_char} - escaped
|
||||||
@ -1004,7 +989,10 @@ pub fn parse_string_literal(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Close wrapper
|
// Close wrapper
|
||||||
_ if termination_char == next_char && escape.is_empty() => break,
|
_ if termination_char == next_char && escape.is_empty() => {
|
||||||
|
state.is_within_text_terminated_by = None;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
// Line continuation
|
// Line continuation
|
||||||
'\n' if continuation && !escape.is_empty() => {
|
'\n' if continuation && !escape.is_empty() => {
|
||||||
@ -1017,7 +1005,7 @@ pub fn parse_string_literal(
|
|||||||
// Cannot have new-lines inside non-multi-line string literals
|
// Cannot have new-lines inside non-multi-line string literals
|
||||||
'\n' if !escape.is_empty() || !verbatim => {
|
'\n' if !escape.is_empty() || !verbatim => {
|
||||||
pos.rewind();
|
pos.rewind();
|
||||||
return Err((LERR::UnterminatedString, start));
|
return Err((LERR::UnterminatedString, *pos));
|
||||||
}
|
}
|
||||||
|
|
||||||
'\n' => {
|
'\n' => {
|
||||||
@ -1029,10 +1017,7 @@ pub fn parse_string_literal(
|
|||||||
_ if !escape.is_empty() => {
|
_ if !escape.is_empty() => {
|
||||||
escape.push(next_char);
|
escape.push(next_char);
|
||||||
|
|
||||||
return Err((
|
return Err((LERR::MalformedEscapeSequence(escape), *pos));
|
||||||
LERR::MalformedEscapeSequence(escape.into_iter().collect()),
|
|
||||||
*pos,
|
|
||||||
));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Whitespace to skip
|
// Whitespace to skip
|
||||||
@ -1047,15 +1032,13 @@ pub fn parse_string_literal(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let s = result.iter().collect::<String>();
|
|
||||||
|
|
||||||
if let Some(max) = state.max_string_size {
|
if let Some(max) = state.max_string_size {
|
||||||
if s.len() > max.get() {
|
if result.len() > max.get() {
|
||||||
return Err((LexError::StringTooLong(max.get()), *pos));
|
return Err((LexError::StringTooLong(max.get()), *pos));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok((s, interpolated))
|
Ok((result, interpolated))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Consume the next character.
|
/// Consume the next character.
|
||||||
@ -1205,7 +1188,7 @@ fn get_next_token_inner(
|
|||||||
if let Some(ch) = state.is_within_text_terminated_by.take() {
|
if let Some(ch) = state.is_within_text_terminated_by.take() {
|
||||||
let start_pos = *pos;
|
let start_pos = *pos;
|
||||||
|
|
||||||
return parse_string_literal(stream, state, pos, ch, false, true, true, true).map_or_else(
|
return parse_string_literal(stream, state, pos, ch, false, true, true).map_or_else(
|
||||||
|(err, err_pos)| Some((Token::LexError(err), err_pos)),
|
|(err, err_pos)| Some((Token::LexError(err), err_pos)),
|
||||||
|(result, interpolated)| {
|
|(result, interpolated)| {
|
||||||
if interpolated {
|
if interpolated {
|
||||||
@ -1381,7 +1364,7 @@ fn get_next_token_inner(
|
|||||||
|
|
||||||
// " - string literal
|
// " - string literal
|
||||||
('"', _) => {
|
('"', _) => {
|
||||||
return parse_string_literal(stream, state, pos, c, true, false, false, false)
|
return parse_string_literal(stream, state, pos, c, true, false, false)
|
||||||
.map_or_else(
|
.map_or_else(
|
||||||
|(err, err_pos)| Some((Token::LexError(err), err_pos)),
|
|(err, err_pos)| Some((Token::LexError(err), err_pos)),
|
||||||
|(result, _)| Some((Token::StringConstant(result), start_pos)),
|
|(result, _)| Some((Token::StringConstant(result), start_pos)),
|
||||||
@ -1389,17 +1372,35 @@ fn get_next_token_inner(
|
|||||||
}
|
}
|
||||||
// ` - string literal
|
// ` - string literal
|
||||||
('`', _) => {
|
('`', _) => {
|
||||||
return parse_string_literal(stream, state, pos, c, false, true, true, true)
|
// Start from the next line if at the end of line
|
||||||
.map_or_else(
|
match stream.peek_next() {
|
||||||
|(err, err_pos)| Some((Token::LexError(err), err_pos)),
|
// `\r - start from next line
|
||||||
|(result, interpolated)| {
|
Some('\r') => {
|
||||||
if interpolated {
|
eat_next(stream, pos);
|
||||||
Some((Token::InterpolatedString(result), start_pos))
|
pos.new_line();
|
||||||
} else {
|
// `\r\n
|
||||||
Some((Token::StringConstant(result), start_pos))
|
if stream.peek_next().map(|ch| ch == '\n').unwrap_or(false) {
|
||||||
}
|
eat_next(stream, pos);
|
||||||
},
|
}
|
||||||
);
|
}
|
||||||
|
// `\n - start from next line
|
||||||
|
Some('\n') => {
|
||||||
|
eat_next(stream, pos);
|
||||||
|
pos.new_line();
|
||||||
|
}
|
||||||
|
_ => (),
|
||||||
|
}
|
||||||
|
|
||||||
|
return parse_string_literal(stream, state, pos, c, false, true, true).map_or_else(
|
||||||
|
|(err, err_pos)| Some((Token::LexError(err), err_pos)),
|
||||||
|
|(result, interpolated)| {
|
||||||
|
if interpolated {
|
||||||
|
Some((Token::InterpolatedString(result), start_pos))
|
||||||
|
} else {
|
||||||
|
Some((Token::StringConstant(result), start_pos))
|
||||||
|
}
|
||||||
|
},
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ' - character literal
|
// ' - character literal
|
||||||
@ -1411,20 +1412,19 @@ fn get_next_token_inner(
|
|||||||
}
|
}
|
||||||
('\'', _) => {
|
('\'', _) => {
|
||||||
return Some(
|
return Some(
|
||||||
parse_string_literal(stream, state, pos, c, false, false, false, false)
|
parse_string_literal(stream, state, pos, c, false, false, false).map_or_else(
|
||||||
.map_or_else(
|
|(err, err_pos)| (Token::LexError(err), err_pos),
|
||||||
|(err, err_pos)| (Token::LexError(err), err_pos),
|
|(result, _)| {
|
||||||
|(result, _)| {
|
let mut chars = result.chars();
|
||||||
let mut chars = result.chars();
|
let first = chars.next().unwrap();
|
||||||
let first = chars.next().unwrap();
|
|
||||||
|
|
||||||
if chars.next().is_some() {
|
if chars.next().is_some() {
|
||||||
(Token::LexError(LERR::MalformedChar(result)), start_pos)
|
(Token::LexError(LERR::MalformedChar(result)), start_pos)
|
||||||
} else {
|
} else {
|
||||||
(Token::CharConstant(first), start_pos)
|
(Token::CharConstant(first), start_pos)
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1923,6 +1923,10 @@ impl<'a> Iterator for TokenIterator<'a> {
|
|||||||
let (token, pos) = match get_next_token(&mut self.stream, &mut self.state, &mut self.pos) {
|
let (token, pos) = match get_next_token(&mut self.stream, &mut self.state, &mut self.pos) {
|
||||||
// {EOF}
|
// {EOF}
|
||||||
None => return None,
|
None => return None,
|
||||||
|
// Unterminated string at EOF
|
||||||
|
Some((Token::StringConstant(_), _)) if self.state.is_within_text_terminated_by.is_some() => {
|
||||||
|
return Some((Token::LexError(LERR::UnterminatedString), self.pos));
|
||||||
|
}
|
||||||
// Reserved keyword/symbol
|
// Reserved keyword/symbol
|
||||||
Some((Token::Reserved(s), pos)) => (match
|
Some((Token::Reserved(s), pos)) => (match
|
||||||
(s.as_str(), self.engine.custom_keywords.contains_key(s.as_str()))
|
(s.as_str(), self.engine.custom_keywords.contains_key(s.as_str()))
|
||||||
|
Loading…
Reference in New Issue
Block a user