diff --git a/src/token.rs b/src/token.rs index e0aaa36c..7f454667 100644 --- a/src/token.rs +++ b/src/token.rs @@ -878,39 +878,31 @@ pub fn parse_string_literal( termination_char: char, continuation: bool, verbatim: bool, - skip_first_new_line: bool, allow_interpolation: bool, ) -> Result<(String, bool), (LexError, Position)> { - let mut result: smallvec::SmallVec<[char; 16]> = Default::default(); - let mut escape: smallvec::SmallVec<[char; 12]> = Default::default(); + let mut result = String::with_capacity(12); + let mut escape = String::with_capacity(12); let start = *pos; let mut skip_whitespace_until = 0; let mut interpolated = false; - if skip_first_new_line { - // Start from the next line if at the end of line - match stream.peek_next() { - // `\r - start from next line - Some('\r') => { - eat_next(stream, pos); - // `\r\n - if stream.peek_next().map(|ch| ch == '\n').unwrap_or(false) { - eat_next(stream, pos); - } - } - // `\n - start from next line - Some('\n') => { - eat_next(stream, pos); - } - _ => (), - } - } + state.is_within_text_terminated_by = Some(termination_char); loop { - let next_char = stream.get_next().ok_or((LERR::UnterminatedString, start))?; - - pos.advance(); + let next_char = match stream.get_next() { + Some(ch) => { + pos.advance(); + ch + } + None => { + if !continuation || escape != "\\" { + result += &escape; + } + pos.advance(); + break; + } + }; // String interpolation? if allow_interpolation @@ -919,6 +911,7 @@ pub fn parse_string_literal( && stream.peek_next().map(|ch| ch == '{').unwrap_or(false) { interpolated = true; + state.is_within_text_terminated_by = None; break; } @@ -970,31 +963,23 @@ pub fn parse_string_literal( }; for _ in 0..len { - let c = stream.get_next().ok_or_else(|| { - ( - LERR::MalformedEscapeSequence(seq.iter().cloned().collect()), - *pos, - ) - })?; + let c = stream + .get_next() + .ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?; seq.push(c); pos.advance(); out_val *= 16; - out_val += c.to_digit(16).ok_or_else(|| { - ( - LERR::MalformedEscapeSequence(seq.iter().cloned().collect()), - *pos, - ) - })?; + out_val += c + .to_digit(16) + .ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?; } - result.push(char::from_u32(out_val).ok_or_else(|| { - ( - LERR::MalformedEscapeSequence(seq.into_iter().collect()), - *pos, - ) - })?); + result.push( + char::from_u32(out_val) + .ok_or_else(|| (LERR::MalformedEscapeSequence(seq), *pos))?, + ); } // \{termination_char} - escaped @@ -1004,7 +989,10 @@ pub fn parse_string_literal( } // Close wrapper - _ if termination_char == next_char && escape.is_empty() => break, + _ if termination_char == next_char && escape.is_empty() => { + state.is_within_text_terminated_by = None; + break; + } // Line continuation '\n' if continuation && !escape.is_empty() => { @@ -1017,7 +1005,7 @@ pub fn parse_string_literal( // Cannot have new-lines inside non-multi-line string literals '\n' if !escape.is_empty() || !verbatim => { pos.rewind(); - return Err((LERR::UnterminatedString, start)); + return Err((LERR::UnterminatedString, *pos)); } '\n' => { @@ -1029,10 +1017,7 @@ pub fn parse_string_literal( _ if !escape.is_empty() => { escape.push(next_char); - return Err(( - LERR::MalformedEscapeSequence(escape.into_iter().collect()), - *pos, - )); + return Err((LERR::MalformedEscapeSequence(escape), *pos)); } // Whitespace to skip @@ -1047,15 +1032,13 @@ pub fn parse_string_literal( } } - let s = result.iter().collect::(); - if let Some(max) = state.max_string_size { - if s.len() > max.get() { + if result.len() > max.get() { return Err((LexError::StringTooLong(max.get()), *pos)); } } - Ok((s, interpolated)) + Ok((result, interpolated)) } /// Consume the next character. @@ -1205,7 +1188,7 @@ fn get_next_token_inner( if let Some(ch) = state.is_within_text_terminated_by.take() { let start_pos = *pos; - return parse_string_literal(stream, state, pos, ch, false, true, true, true).map_or_else( + return parse_string_literal(stream, state, pos, ch, false, true, true).map_or_else( |(err, err_pos)| Some((Token::LexError(err), err_pos)), |(result, interpolated)| { if interpolated { @@ -1381,7 +1364,7 @@ fn get_next_token_inner( // " - string literal ('"', _) => { - return parse_string_literal(stream, state, pos, c, true, false, false, false) + return parse_string_literal(stream, state, pos, c, true, false, false) .map_or_else( |(err, err_pos)| Some((Token::LexError(err), err_pos)), |(result, _)| Some((Token::StringConstant(result), start_pos)), @@ -1389,17 +1372,35 @@ fn get_next_token_inner( } // ` - string literal ('`', _) => { - return parse_string_literal(stream, state, pos, c, false, true, true, true) - .map_or_else( - |(err, err_pos)| Some((Token::LexError(err), err_pos)), - |(result, interpolated)| { - if interpolated { - Some((Token::InterpolatedString(result), start_pos)) - } else { - Some((Token::StringConstant(result), start_pos)) - } - }, - ); + // Start from the next line if at the end of line + match stream.peek_next() { + // `\r - start from next line + Some('\r') => { + eat_next(stream, pos); + pos.new_line(); + // `\r\n + if stream.peek_next().map(|ch| ch == '\n').unwrap_or(false) { + eat_next(stream, pos); + } + } + // `\n - start from next line + Some('\n') => { + eat_next(stream, pos); + pos.new_line(); + } + _ => (), + } + + return parse_string_literal(stream, state, pos, c, false, true, true).map_or_else( + |(err, err_pos)| Some((Token::LexError(err), err_pos)), + |(result, interpolated)| { + if interpolated { + Some((Token::InterpolatedString(result), start_pos)) + } else { + Some((Token::StringConstant(result), start_pos)) + } + }, + ); } // ' - character literal @@ -1411,20 +1412,19 @@ fn get_next_token_inner( } ('\'', _) => { return Some( - parse_string_literal(stream, state, pos, c, false, false, false, false) - .map_or_else( - |(err, err_pos)| (Token::LexError(err), err_pos), - |(result, _)| { - let mut chars = result.chars(); - let first = chars.next().unwrap(); + parse_string_literal(stream, state, pos, c, false, false, false).map_or_else( + |(err, err_pos)| (Token::LexError(err), err_pos), + |(result, _)| { + let mut chars = result.chars(); + let first = chars.next().unwrap(); - if chars.next().is_some() { - (Token::LexError(LERR::MalformedChar(result)), start_pos) - } else { - (Token::CharConstant(first), start_pos) - } - }, - ), + if chars.next().is_some() { + (Token::LexError(LERR::MalformedChar(result)), start_pos) + } else { + (Token::CharConstant(first), start_pos) + } + }, + ), ) } @@ -1923,6 +1923,10 @@ impl<'a> Iterator for TokenIterator<'a> { let (token, pos) = match get_next_token(&mut self.stream, &mut self.state, &mut self.pos) { // {EOF} None => return None, + // Unterminated string at EOF + Some((Token::StringConstant(_), _)) if self.state.is_within_text_terminated_by.is_some() => { + return Some((Token::LexError(LERR::UnterminatedString), self.pos)); + } // Reserved keyword/symbol Some((Token::Reserved(s), pos)) => (match (s.as_str(), self.engine.custom_keywords.contains_key(s.as_str()))