Refactor tokenizer.
This commit is contained in:
parent
14746f94ca
commit
31eaf321d0
12
src/lib.rs
12
src/lib.rs
@ -131,15 +131,11 @@ pub use optimize::OptimizationLevel;
|
||||
|
||||
#[cfg(feature = "internals")]
|
||||
#[deprecated(note = "this type is volatile and may change")]
|
||||
pub use token::Token;
|
||||
pub use token::{get_next_token, parse_string_literal, InputStream, Token, TokenizeState};
|
||||
|
||||
#[cfg(feature = "internals")]
|
||||
#[deprecated(note = "this type is volatile and may change")]
|
||||
pub use parser::Expr;
|
||||
|
||||
#[cfg(feature = "internals")]
|
||||
#[deprecated(note = "this type is volatile and may change")]
|
||||
pub use parser::Stmt;
|
||||
pub use parser::{Expr, ReturnType, Stmt};
|
||||
|
||||
#[cfg(feature = "internals")]
|
||||
#[deprecated(note = "this type is volatile and may change")]
|
||||
@ -148,7 +144,3 @@ pub use module::ModuleRef;
|
||||
#[cfg(feature = "internals")]
|
||||
#[deprecated(note = "this type is volatile and may change")]
|
||||
pub use utils::StaticVec;
|
||||
|
||||
#[cfg(feature = "internals")]
|
||||
#[deprecated(note = "this type is volatile and may change")]
|
||||
pub use parser::ReturnType;
|
||||
|
562
src/token.rs
562
src/token.rs
@ -209,6 +209,7 @@ pub enum Token {
|
||||
#[cfg(not(feature = "no_module"))]
|
||||
As,
|
||||
LexError(Box<LexError>),
|
||||
Comment(String),
|
||||
EOF,
|
||||
}
|
||||
|
||||
@ -429,89 +430,46 @@ impl From<Token> for String {
|
||||
}
|
||||
}
|
||||
|
||||
/// An iterator on a `Token` stream.
|
||||
pub struct TokenIterator<'a> {
|
||||
/// State of the tokenizer.
|
||||
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Default)]
|
||||
pub struct TokenizeState {
|
||||
/// Maximum length of a string (0 = unlimited).
|
||||
max_string_size: usize,
|
||||
/// Can the next token be a unary operator?
|
||||
can_be_unary: bool,
|
||||
/// Current position.
|
||||
pos: Position,
|
||||
/// The input character streams.
|
||||
streams: StaticVec<Peekable<Chars<'a>>>,
|
||||
/// Is the tokenizer currently inside a block comment?
|
||||
comment_level: usize,
|
||||
/// Return `None` at the end of the stream instead of `Some(Token::EOF)`?
|
||||
end_with_none: bool,
|
||||
/// Include comments?
|
||||
include_comments: bool,
|
||||
}
|
||||
|
||||
impl<'a> TokenIterator<'a> {
|
||||
/// Consume the next character.
|
||||
fn eat_next(&mut self) {
|
||||
self.get_next();
|
||||
self.advance();
|
||||
}
|
||||
/// Trait that encapsulates a peekable character input stream.
|
||||
pub trait InputStream {
|
||||
/// Get the next character
|
||||
fn get_next(&mut self) -> Option<char> {
|
||||
loop {
|
||||
if self.streams.is_empty() {
|
||||
// No more streams
|
||||
return None;
|
||||
} else if let Some(ch) = self.streams[0].next() {
|
||||
// Next character in current stream
|
||||
return Some(ch);
|
||||
} else {
|
||||
// Jump to the next stream
|
||||
let _ = self.streams.remove(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
fn get_next(&mut self) -> Option<char>;
|
||||
/// Peek the next character
|
||||
fn peek_next(&mut self) -> Option<char> {
|
||||
loop {
|
||||
if self.streams.is_empty() {
|
||||
// No more streams
|
||||
return None;
|
||||
} else if let Some(ch) = self.streams[0].peek() {
|
||||
// Next character in current stream
|
||||
return Some(*ch);
|
||||
} else {
|
||||
// Jump to the next stream
|
||||
let _ = self.streams.remove(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Move the current position one character ahead.
|
||||
fn advance(&mut self) {
|
||||
self.pos.advance();
|
||||
}
|
||||
/// Move the current position back one character.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if already at the beginning of a line - cannot rewind to the previous line.
|
||||
fn rewind(&mut self) {
|
||||
self.pos.rewind();
|
||||
}
|
||||
/// Move the current position to the next line.
|
||||
fn new_line(&mut self) {
|
||||
self.pos.new_line()
|
||||
fn peek_next(&mut self) -> Option<char>;
|
||||
}
|
||||
|
||||
/// Parse a string literal wrapped by `enclosing_char`.
|
||||
pub fn parse_string_literal(
|
||||
&mut self,
|
||||
stream: &mut impl InputStream,
|
||||
state: &mut TokenizeState,
|
||||
pos: &mut Position,
|
||||
enclosing_char: char,
|
||||
max_length: usize,
|
||||
) -> Result<String, (LexError, Position)> {
|
||||
let mut result = Vec::new();
|
||||
let mut escape = String::with_capacity(12);
|
||||
|
||||
loop {
|
||||
let next_char = self
|
||||
.get_next()
|
||||
.ok_or((LERR::UnterminatedString, self.pos))?;
|
||||
let next_char = stream.get_next().ok_or((LERR::UnterminatedString, *pos))?;
|
||||
|
||||
self.advance();
|
||||
pos.advance();
|
||||
|
||||
if max_length > 0 && result.len() > max_length {
|
||||
return Err((LexError::StringTooLong(max_length), self.pos));
|
||||
if state.max_string_size > 0 && result.len() > state.max_string_size {
|
||||
return Err((LexError::StringTooLong(state.max_string_size), *pos));
|
||||
}
|
||||
|
||||
match next_char {
|
||||
@ -554,22 +512,22 @@ impl<'a> TokenIterator<'a> {
|
||||
};
|
||||
|
||||
for _ in 0..len {
|
||||
let c = self.get_next().ok_or_else(|| {
|
||||
(LERR::MalformedEscapeSequence(seq.to_string()), self.pos)
|
||||
})?;
|
||||
let c = stream
|
||||
.get_next()
|
||||
.ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
|
||||
|
||||
seq.push(c);
|
||||
self.advance();
|
||||
pos.advance();
|
||||
|
||||
out_val *= 16;
|
||||
out_val += c.to_digit(16).ok_or_else(|| {
|
||||
(LERR::MalformedEscapeSequence(seq.to_string()), self.pos)
|
||||
})?;
|
||||
out_val += c
|
||||
.to_digit(16)
|
||||
.ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
|
||||
}
|
||||
|
||||
result.push(
|
||||
char::from_u32(out_val)
|
||||
.ok_or_else(|| (LERR::MalformedEscapeSequence(seq), self.pos))?,
|
||||
.ok_or_else(|| (LERR::MalformedEscapeSequence(seq), *pos))?,
|
||||
);
|
||||
}
|
||||
|
||||
@ -583,14 +541,12 @@ impl<'a> TokenIterator<'a> {
|
||||
ch if enclosing_char == ch && escape.is_empty() => break,
|
||||
|
||||
// Unknown escape sequence
|
||||
_ if !escape.is_empty() => {
|
||||
return Err((LERR::MalformedEscapeSequence(escape), self.pos))
|
||||
}
|
||||
_ if !escape.is_empty() => return Err((LERR::MalformedEscapeSequence(escape), *pos)),
|
||||
|
||||
// Cannot have new-lines inside string literals
|
||||
'\n' => {
|
||||
self.rewind();
|
||||
return Err((LERR::UnterminatedString, self.pos));
|
||||
pos.rewind();
|
||||
return Err((LERR::UnterminatedString, *pos));
|
||||
}
|
||||
|
||||
// All other characters
|
||||
@ -603,25 +559,94 @@ impl<'a> TokenIterator<'a> {
|
||||
|
||||
let s = result.iter().collect::<String>();
|
||||
|
||||
if max_length > 0 && s.len() > max_length {
|
||||
return Err((LexError::StringTooLong(max_length), self.pos));
|
||||
if state.max_string_size > 0 && s.len() > state.max_string_size {
|
||||
return Err((LexError::StringTooLong(state.max_string_size), *pos));
|
||||
}
|
||||
|
||||
Ok(s)
|
||||
}
|
||||
|
||||
/// Consume the next character.
|
||||
fn eat_next(stream: &mut impl InputStream, pos: &mut Position) {
|
||||
stream.get_next();
|
||||
pos.advance();
|
||||
}
|
||||
|
||||
/// Scan for a block comment until the end.
|
||||
fn scan_comment(
|
||||
stream: &mut impl InputStream,
|
||||
state: &mut TokenizeState,
|
||||
pos: &mut Position,
|
||||
comment: &mut String,
|
||||
) {
|
||||
while let Some(c) = stream.get_next() {
|
||||
pos.advance();
|
||||
|
||||
if state.include_comments {
|
||||
comment.push(c);
|
||||
}
|
||||
|
||||
match c {
|
||||
'/' => {
|
||||
if let Some(c2) = stream.get_next() {
|
||||
if state.include_comments {
|
||||
comment.push(c2);
|
||||
}
|
||||
if c2 == '*' {
|
||||
state.comment_level += 1;
|
||||
}
|
||||
}
|
||||
pos.advance();
|
||||
}
|
||||
'*' => {
|
||||
if let Some(c2) = stream.get_next() {
|
||||
if state.include_comments {
|
||||
comment.push(c2);
|
||||
}
|
||||
if c2 == '/' {
|
||||
state.comment_level -= 1;
|
||||
}
|
||||
}
|
||||
pos.advance();
|
||||
}
|
||||
'\n' => pos.new_line(),
|
||||
_ => (),
|
||||
}
|
||||
|
||||
if state.comment_level == 0 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the next token.
|
||||
fn inner_next(&mut self) -> Option<(Token, Position)> {
|
||||
pub fn get_next_token(
|
||||
stream: &mut impl InputStream,
|
||||
state: &mut TokenizeState,
|
||||
pos: &mut Position,
|
||||
) -> Option<(Token, Position)> {
|
||||
// Still inside a comment?
|
||||
if state.comment_level > 0 {
|
||||
let start_pos = *pos;
|
||||
let mut comment = Default::default();
|
||||
scan_comment(stream, state, pos, &mut comment);
|
||||
|
||||
if state.include_comments {
|
||||
println!("Comment ({}): {}", start_pos, comment);
|
||||
return Some((Token::Comment(comment), start_pos));
|
||||
}
|
||||
}
|
||||
|
||||
let mut negated = false;
|
||||
|
||||
while let Some(c) = self.get_next() {
|
||||
self.advance();
|
||||
while let Some(c) = stream.get_next() {
|
||||
pos.advance();
|
||||
|
||||
let pos = self.pos;
|
||||
let start_pos = *pos;
|
||||
|
||||
match (c, self.peek_next().unwrap_or('\0')) {
|
||||
match (c, stream.peek_next().unwrap_or('\0')) {
|
||||
// \n
|
||||
('\n', _) => self.new_line(),
|
||||
('\n', _) => pos.new_line(),
|
||||
|
||||
// digit ...
|
||||
('0'..='9', _) => {
|
||||
@ -629,32 +654,30 @@ impl<'a> TokenIterator<'a> {
|
||||
let mut radix_base: Option<u32> = None;
|
||||
result.push(c);
|
||||
|
||||
while let Some(next_char) = self.peek_next() {
|
||||
while let Some(next_char) = stream.peek_next() {
|
||||
match next_char {
|
||||
'0'..='9' | '_' => {
|
||||
result.push(next_char);
|
||||
self.eat_next();
|
||||
eat_next(stream, pos);
|
||||
}
|
||||
#[cfg(not(feature = "no_float"))]
|
||||
'.' => {
|
||||
result.push(next_char);
|
||||
self.eat_next();
|
||||
while let Some(next_char_in_float) = self.peek_next() {
|
||||
eat_next(stream, pos);
|
||||
while let Some(next_char_in_float) = stream.peek_next() {
|
||||
match next_char_in_float {
|
||||
'0'..='9' | '_' => {
|
||||
result.push(next_char_in_float);
|
||||
self.eat_next();
|
||||
eat_next(stream, pos);
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
// 0x????, 0o????, 0b????
|
||||
ch @ 'x' | ch @ 'X' | ch @ 'o' | ch @ 'O' | ch @ 'b' | ch @ 'B'
|
||||
if c == '0' =>
|
||||
{
|
||||
ch @ 'x' | ch @ 'X' | ch @ 'o' | ch @ 'O' | ch @ 'b' | ch @ 'B' if c == '0' => {
|
||||
result.push(next_char);
|
||||
self.eat_next();
|
||||
eat_next(stream, pos);
|
||||
|
||||
let valid = match ch {
|
||||
'x' | 'X' => [
|
||||
@ -679,13 +702,13 @@ impl<'a> TokenIterator<'a> {
|
||||
_ => unreachable!(),
|
||||
});
|
||||
|
||||
while let Some(next_char_in_escape_seq) = self.peek_next() {
|
||||
while let Some(next_char_in_escape_seq) = stream.peek_next() {
|
||||
if !valid.contains(&next_char_in_escape_seq) {
|
||||
break;
|
||||
}
|
||||
|
||||
result.push(next_char_in_escape_seq);
|
||||
self.eat_next();
|
||||
eat_next(stream, pos);
|
||||
}
|
||||
}
|
||||
|
||||
@ -709,7 +732,7 @@ impl<'a> TokenIterator<'a> {
|
||||
result.into_iter().collect(),
|
||||
)))
|
||||
}),
|
||||
pos,
|
||||
start_pos,
|
||||
));
|
||||
} else {
|
||||
let out: String = result.iter().filter(|&&c| c != '_').collect();
|
||||
@ -725,7 +748,7 @@ impl<'a> TokenIterator<'a> {
|
||||
result.into_iter().collect(),
|
||||
)))
|
||||
}),
|
||||
pos,
|
||||
start_pos,
|
||||
));
|
||||
}
|
||||
}
|
||||
@ -735,11 +758,11 @@ impl<'a> TokenIterator<'a> {
|
||||
let mut result = Vec::new();
|
||||
result.push(c);
|
||||
|
||||
while let Some(next_char) = self.peek_next() {
|
||||
while let Some(next_char) = stream.peek_next() {
|
||||
match next_char {
|
||||
x if x.is_ascii_alphanumeric() || x == '_' => {
|
||||
result.push(x);
|
||||
self.eat_next();
|
||||
eat_next(stream, pos);
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
@ -756,7 +779,7 @@ impl<'a> TokenIterator<'a> {
|
||||
if !is_valid_identifier {
|
||||
return Some((
|
||||
Token::LexError(Box::new(LERR::MalformedIdentifier(identifier))),
|
||||
pos,
|
||||
start_pos,
|
||||
));
|
||||
}
|
||||
|
||||
@ -790,30 +813,24 @@ impl<'a> TokenIterator<'a> {
|
||||
|
||||
_ => Token::Identifier(identifier),
|
||||
},
|
||||
pos,
|
||||
start_pos,
|
||||
));
|
||||
}
|
||||
|
||||
// " - string literal
|
||||
('"', _) => {
|
||||
return self
|
||||
.parse_string_literal('"', self.max_string_size)
|
||||
('"', _) => return parse_string_literal(stream, state, pos, '"')
|
||||
.map_or_else(
|
||||
|err| Some((Token::LexError(Box::new(err.0)), err.1)),
|
||||
|out| Some((Token::StringConst(out), pos)),
|
||||
);
|
||||
}
|
||||
|out| Some((Token::StringConst(out), start_pos)),
|
||||
),
|
||||
|
||||
// ' - character literal
|
||||
('\'', '\'') => {
|
||||
return Some((
|
||||
('\'', '\'') => return Some((
|
||||
Token::LexError(Box::new(LERR::MalformedChar("".to_string()))),
|
||||
pos,
|
||||
));
|
||||
}
|
||||
('\'', _) => {
|
||||
return Some(
|
||||
self.parse_string_literal('\'', self.max_string_size)
|
||||
start_pos,
|
||||
)),
|
||||
('\'', _) => return Some(
|
||||
parse_string_literal(stream, state, pos, '\'')
|
||||
.map_or_else(
|
||||
|err| (Token::LexError(Box::new(err.0)), err.1),
|
||||
|result| {
|
||||
@ -823,283 +840,329 @@ impl<'a> TokenIterator<'a> {
|
||||
if chars.next().is_some() {
|
||||
(
|
||||
Token::LexError(Box::new(LERR::MalformedChar(result))),
|
||||
pos,
|
||||
start_pos,
|
||||
)
|
||||
} else {
|
||||
(Token::CharConstant(first.expect("should be Some")), pos)
|
||||
(Token::CharConstant(first.expect("should be Some")), start_pos)
|
||||
}
|
||||
},
|
||||
),
|
||||
);
|
||||
}
|
||||
),
|
||||
|
||||
// Braces
|
||||
('{', _) => return Some((Token::LeftBrace, pos)),
|
||||
('}', _) => return Some((Token::RightBrace, pos)),
|
||||
('{', _) => return Some((Token::LeftBrace, start_pos)),
|
||||
('}', _) => return Some((Token::RightBrace, start_pos)),
|
||||
|
||||
// Parentheses
|
||||
('(', _) => return Some((Token::LeftParen, pos)),
|
||||
(')', _) => return Some((Token::RightParen, pos)),
|
||||
('(', _) => return Some((Token::LeftParen, start_pos)),
|
||||
(')', _) => return Some((Token::RightParen, start_pos)),
|
||||
|
||||
// Indexing
|
||||
('[', _) => return Some((Token::LeftBracket, pos)),
|
||||
(']', _) => return Some((Token::RightBracket, pos)),
|
||||
('[', _) => return Some((Token::LeftBracket, start_pos)),
|
||||
(']', _) => return Some((Token::RightBracket, start_pos)),
|
||||
|
||||
// Map literal
|
||||
#[cfg(not(feature = "no_object"))]
|
||||
('#', '{') => {
|
||||
self.eat_next();
|
||||
return Some((Token::MapStart, pos));
|
||||
eat_next(stream, pos);
|
||||
return Some((Token::MapStart, start_pos));
|
||||
}
|
||||
|
||||
// Operators
|
||||
('+', '=') => {
|
||||
self.eat_next();
|
||||
return Some((Token::PlusAssign, pos));
|
||||
eat_next(stream, pos);
|
||||
return Some((Token::PlusAssign, start_pos));
|
||||
}
|
||||
('+', _) if self.can_be_unary => return Some((Token::UnaryPlus, pos)),
|
||||
('+', _) => return Some((Token::Plus, pos)),
|
||||
('+', _) if state.can_be_unary => return Some((Token::UnaryPlus, start_pos)),
|
||||
('+', _) => return Some((Token::Plus, start_pos)),
|
||||
|
||||
('-', '0'..='9') if self.can_be_unary => negated = true,
|
||||
('-', '0'..='9') => return Some((Token::Minus, pos)),
|
||||
('-', '0'..='9') if state.can_be_unary => negated = true,
|
||||
('-', '0'..='9') => return Some((Token::Minus, start_pos)),
|
||||
('-', '=') => {
|
||||
self.eat_next();
|
||||
return Some((Token::MinusAssign, pos));
|
||||
eat_next(stream, pos);
|
||||
return Some((Token::MinusAssign, start_pos));
|
||||
}
|
||||
('-', '>') => {
|
||||
return Some((
|
||||
('-', '>') => return Some((
|
||||
Token::LexError(Box::new(LERR::ImproperSymbol(
|
||||
"'->' is not a valid symbol. This is not C or C++!".to_string(),
|
||||
))),
|
||||
pos,
|
||||
))
|
||||
}
|
||||
('-', _) if self.can_be_unary => return Some((Token::UnaryMinus, pos)),
|
||||
('-', _) => return Some((Token::Minus, pos)),
|
||||
start_pos,
|
||||
)),
|
||||
('-', _) if state.can_be_unary => return Some((Token::UnaryMinus, start_pos)),
|
||||
('-', _) => return Some((Token::Minus, start_pos)),
|
||||
|
||||
('*', '=') => {
|
||||
self.eat_next();
|
||||
return Some((Token::MultiplyAssign, pos));
|
||||
eat_next(stream, pos);
|
||||
return Some((Token::MultiplyAssign, start_pos));
|
||||
}
|
||||
('*', _) => return Some((Token::Multiply, pos)),
|
||||
('*', _) => return Some((Token::Multiply, start_pos)),
|
||||
|
||||
// Comments
|
||||
('/', '/') => {
|
||||
self.eat_next();
|
||||
eat_next(stream, pos);
|
||||
|
||||
while let Some(c) = self.get_next() {
|
||||
let mut comment = if state.include_comments {
|
||||
"//".to_string()
|
||||
} else {
|
||||
Default::default()
|
||||
};
|
||||
|
||||
while let Some(c) = stream.get_next() {
|
||||
if c == '\n' {
|
||||
self.new_line();
|
||||
pos.new_line();
|
||||
break;
|
||||
}
|
||||
|
||||
self.advance();
|
||||
if state.include_comments {
|
||||
comment.push(c);
|
||||
}
|
||||
pos.advance();
|
||||
}
|
||||
|
||||
if state.include_comments {
|
||||
println!("Comment ({}): {}", start_pos, comment);
|
||||
return Some((Token::Comment(comment), start_pos));
|
||||
}
|
||||
}
|
||||
('/', '*') => {
|
||||
let mut level = 1;
|
||||
state.comment_level = 1;
|
||||
|
||||
self.eat_next();
|
||||
eat_next(stream, pos);
|
||||
|
||||
while let Some(c) = self.get_next() {
|
||||
self.advance();
|
||||
let mut comment = if state.include_comments {
|
||||
"/*".to_string()
|
||||
} else {
|
||||
Default::default()
|
||||
};
|
||||
scan_comment(stream, state, pos, &mut comment);
|
||||
|
||||
match c {
|
||||
'/' => {
|
||||
if self.get_next() == Some('*') {
|
||||
level += 1;
|
||||
}
|
||||
self.advance();
|
||||
}
|
||||
'*' => {
|
||||
if self.get_next() == Some('/') {
|
||||
level -= 1;
|
||||
}
|
||||
self.advance();
|
||||
}
|
||||
'\n' => self.new_line(),
|
||||
_ => (),
|
||||
}
|
||||
|
||||
if level == 0 {
|
||||
break;
|
||||
}
|
||||
if state.include_comments {
|
||||
println!("Comment ({}): {}", start_pos, comment);
|
||||
return Some((Token::Comment(comment), start_pos));
|
||||
}
|
||||
}
|
||||
|
||||
('/', '=') => {
|
||||
self.eat_next();
|
||||
return Some((Token::DivideAssign, pos));
|
||||
eat_next(stream, pos);
|
||||
return Some((Token::DivideAssign, start_pos));
|
||||
}
|
||||
('/', _) => return Some((Token::Divide, pos)),
|
||||
('/', _) => return Some((Token::Divide, start_pos)),
|
||||
|
||||
(';', _) => return Some((Token::SemiColon, pos)),
|
||||
(',', _) => return Some((Token::Comma, pos)),
|
||||
('.', _) => return Some((Token::Period, pos)),
|
||||
(';', _) => return Some((Token::SemiColon, start_pos)),
|
||||
(',', _) => return Some((Token::Comma, start_pos)),
|
||||
('.', _) => return Some((Token::Period, start_pos)),
|
||||
|
||||
('=', '=') => {
|
||||
self.eat_next();
|
||||
eat_next(stream, pos);
|
||||
|
||||
// Warn against `===`
|
||||
if self.peek_next() == Some('=') {
|
||||
if stream.peek_next() == Some('=') {
|
||||
return Some((
|
||||
Token::LexError(Box::new(LERR::ImproperSymbol(
|
||||
"'===' is not a valid operator. This is not JavaScript! Should it be '=='?"
|
||||
.to_string(),
|
||||
))),
|
||||
pos,
|
||||
start_pos,
|
||||
));
|
||||
}
|
||||
|
||||
return Some((Token::EqualsTo, pos));
|
||||
return Some((Token::EqualsTo, start_pos));
|
||||
}
|
||||
('=', '>') => {
|
||||
return Some((
|
||||
('=', '>') => return Some((
|
||||
Token::LexError(Box::new(LERR::ImproperSymbol(
|
||||
"'=>' is not a valid symbol. This is not Rust! Should it be '>='?"
|
||||
.to_string(),
|
||||
))),
|
||||
pos,
|
||||
))
|
||||
}
|
||||
('=', _) => return Some((Token::Equals, pos)),
|
||||
start_pos,
|
||||
)),
|
||||
('=', _) => return Some((Token::Equals, start_pos)),
|
||||
|
||||
(':', ':') => {
|
||||
self.eat_next();
|
||||
return Some((Token::DoubleColon, pos));
|
||||
eat_next(stream, pos);
|
||||
return Some((Token::DoubleColon, start_pos));
|
||||
}
|
||||
(':', '=') => {
|
||||
return Some((
|
||||
(':', '=') => return Some((
|
||||
Token::LexError(Box::new(LERR::ImproperSymbol(
|
||||
"':=' is not a valid assignment operator. This is not Pascal! Should it be simply '='?"
|
||||
.to_string(),
|
||||
))),
|
||||
pos,
|
||||
))
|
||||
}
|
||||
(':', _) => return Some((Token::Colon, pos)),
|
||||
start_pos,
|
||||
)),
|
||||
(':', _) => return Some((Token::Colon, start_pos)),
|
||||
|
||||
('<', '=') => {
|
||||
self.eat_next();
|
||||
return Some((Token::LessThanEqualsTo, pos));
|
||||
eat_next(stream, pos);
|
||||
return Some((Token::LessThanEqualsTo, start_pos));
|
||||
}
|
||||
('<', '-') => {
|
||||
return Some((
|
||||
('<', '-') => return Some((
|
||||
Token::LexError(Box::new(LERR::ImproperSymbol(
|
||||
"'<-' is not a valid symbol. Should it be '<='?".to_string(),
|
||||
))),
|
||||
pos,
|
||||
))
|
||||
}
|
||||
start_pos,
|
||||
)),
|
||||
('<', '<') => {
|
||||
self.eat_next();
|
||||
eat_next(stream, pos);
|
||||
|
||||
return Some((
|
||||
if self.peek_next() == Some('=') {
|
||||
self.eat_next();
|
||||
if stream.peek_next() == Some('=') {
|
||||
eat_next(stream, pos);
|
||||
Token::LeftShiftAssign
|
||||
} else {
|
||||
Token::LeftShift
|
||||
},
|
||||
pos,
|
||||
start_pos,
|
||||
));
|
||||
}
|
||||
('<', _) => return Some((Token::LessThan, pos)),
|
||||
('<', _) => return Some((Token::LessThan, start_pos)),
|
||||
|
||||
('>', '=') => {
|
||||
self.eat_next();
|
||||
return Some((Token::GreaterThanEqualsTo, pos));
|
||||
eat_next(stream, pos);
|
||||
return Some((Token::GreaterThanEqualsTo, start_pos));
|
||||
}
|
||||
('>', '>') => {
|
||||
self.eat_next();
|
||||
eat_next(stream, pos);
|
||||
|
||||
return Some((
|
||||
if self.peek_next() == Some('=') {
|
||||
self.eat_next();
|
||||
if stream.peek_next() == Some('=') {
|
||||
eat_next(stream, pos);
|
||||
Token::RightShiftAssign
|
||||
} else {
|
||||
Token::RightShift
|
||||
},
|
||||
pos,
|
||||
start_pos,
|
||||
));
|
||||
}
|
||||
('>', _) => return Some((Token::GreaterThan, pos)),
|
||||
('>', _) => return Some((Token::GreaterThan, start_pos)),
|
||||
|
||||
('!', '=') => {
|
||||
self.eat_next();
|
||||
eat_next(stream, pos);
|
||||
|
||||
// Warn against `!==`
|
||||
if self.peek_next() == Some('=') {
|
||||
if stream.peek_next() == Some('=') {
|
||||
return Some((
|
||||
Token::LexError(Box::new(LERR::ImproperSymbol(
|
||||
"'!==' is not a valid operator. This is not JavaScript! Should it be '!='?"
|
||||
.to_string(),
|
||||
))),
|
||||
pos,
|
||||
start_pos,
|
||||
));
|
||||
}
|
||||
|
||||
return Some((Token::NotEqualsTo, pos));
|
||||
return Some((Token::NotEqualsTo, start_pos));
|
||||
}
|
||||
('!', _) => return Some((Token::Bang, pos)),
|
||||
('!', _) => return Some((Token::Bang, start_pos)),
|
||||
|
||||
('|', '|') => {
|
||||
self.eat_next();
|
||||
return Some((Token::Or, pos));
|
||||
eat_next(stream, pos);
|
||||
return Some((Token::Or, start_pos));
|
||||
}
|
||||
('|', '=') => {
|
||||
self.eat_next();
|
||||
return Some((Token::OrAssign, pos));
|
||||
eat_next(stream, pos);
|
||||
return Some((Token::OrAssign, start_pos));
|
||||
}
|
||||
('|', _) => return Some((Token::Pipe, pos)),
|
||||
('|', _) => return Some((Token::Pipe, start_pos)),
|
||||
|
||||
('&', '&') => {
|
||||
self.eat_next();
|
||||
return Some((Token::And, pos));
|
||||
eat_next(stream, pos);
|
||||
return Some((Token::And, start_pos));
|
||||
}
|
||||
('&', '=') => {
|
||||
self.eat_next();
|
||||
return Some((Token::AndAssign, pos));
|
||||
eat_next(stream, pos);
|
||||
return Some((Token::AndAssign, start_pos));
|
||||
}
|
||||
('&', _) => return Some((Token::Ampersand, pos)),
|
||||
('&', _) => return Some((Token::Ampersand, start_pos)),
|
||||
|
||||
('^', '=') => {
|
||||
self.eat_next();
|
||||
return Some((Token::XOrAssign, pos));
|
||||
eat_next(stream, pos);
|
||||
return Some((Token::XOrAssign, start_pos));
|
||||
}
|
||||
('^', _) => return Some((Token::XOr, pos)),
|
||||
('^', _) => return Some((Token::XOr, start_pos)),
|
||||
|
||||
('%', '=') => {
|
||||
self.eat_next();
|
||||
return Some((Token::ModuloAssign, pos));
|
||||
eat_next(stream, pos);
|
||||
return Some((Token::ModuloAssign, start_pos));
|
||||
}
|
||||
('%', _) => return Some((Token::Modulo, pos)),
|
||||
('%', _) => return Some((Token::Modulo, start_pos)),
|
||||
|
||||
('~', '=') => {
|
||||
self.eat_next();
|
||||
return Some((Token::PowerOfAssign, pos));
|
||||
eat_next(stream, pos);
|
||||
return Some((Token::PowerOfAssign, start_pos));
|
||||
}
|
||||
('~', _) => return Some((Token::PowerOf, pos)),
|
||||
('~', _) => return Some((Token::PowerOf, start_pos)),
|
||||
|
||||
('\0', _) => unreachable!(),
|
||||
|
||||
(ch, _) if ch.is_whitespace() => (),
|
||||
(ch, _) => return Some((Token::LexError(Box::new(LERR::UnexpectedChar(ch))), pos)),
|
||||
(ch, _) => return Some((Token::LexError(Box::new(LERR::UnexpectedChar(ch))), start_pos)),
|
||||
}
|
||||
}
|
||||
|
||||
self.advance();
|
||||
Some((Token::EOF, self.pos))
|
||||
pos.advance();
|
||||
|
||||
if state.end_with_none {
|
||||
None
|
||||
} else {
|
||||
Some((Token::EOF, *pos))
|
||||
}
|
||||
}
|
||||
|
||||
/// An type that implements the `InputStream` trait.
|
||||
/// Multiple charaacter streams are jointed together to form one single stream.
|
||||
pub struct MultiInputsStream<'a> {
|
||||
/// The input character streams.
|
||||
streams: StaticVec<Peekable<Chars<'a>>>,
|
||||
}
|
||||
|
||||
impl InputStream for MultiInputsStream<'_> {
|
||||
/// Get the next character
|
||||
fn get_next(&mut self) -> Option<char> {
|
||||
loop {
|
||||
if self.streams.is_empty() {
|
||||
// No more streams
|
||||
return None;
|
||||
} else if let Some(ch) = self.streams[0].next() {
|
||||
// Next character in current stream
|
||||
return Some(ch);
|
||||
} else {
|
||||
// Jump to the next stream
|
||||
let _ = self.streams.remove(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Peek the next character
|
||||
fn peek_next(&mut self) -> Option<char> {
|
||||
loop {
|
||||
if self.streams.is_empty() {
|
||||
// No more streams
|
||||
return None;
|
||||
} else if let Some(ch) = self.streams[0].peek() {
|
||||
// Next character in current stream
|
||||
return Some(*ch);
|
||||
} else {
|
||||
// Jump to the next stream
|
||||
let _ = self.streams.remove(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An iterator on a `Token` stream.
|
||||
pub struct TokenIterator<'a> {
|
||||
/// Current state.
|
||||
state: TokenizeState,
|
||||
/// Current position.
|
||||
pos: Position,
|
||||
/// Input character stream.
|
||||
stream: MultiInputsStream<'a>,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for TokenIterator<'a> {
|
||||
type Item = (Token, Position);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.inner_next().map(|x| {
|
||||
// Save the last token
|
||||
self.can_be_unary = x.0.is_next_unary();
|
||||
get_next_token(&mut self.stream, &mut self.state, &mut self.pos).map(|x| {
|
||||
// Save the last token's state
|
||||
self.state.can_be_unary = x.0.is_next_unary();
|
||||
x
|
||||
})
|
||||
}
|
||||
@ -1108,9 +1171,16 @@ impl<'a> Iterator for TokenIterator<'a> {
|
||||
/// Tokenize an input text stream.
|
||||
pub fn lex<'a>(input: &'a [&'a str], max_string_size: usize) -> TokenIterator<'a> {
|
||||
TokenIterator {
|
||||
state: TokenizeState {
|
||||
max_string_size,
|
||||
can_be_unary: true,
|
||||
comment_level: 0,
|
||||
end_with_none: false,
|
||||
include_comments: false,
|
||||
},
|
||||
pos: Position::new(1, 0),
|
||||
stream: MultiInputsStream {
|
||||
streams: input.iter().map(|s| s.chars().peekable()).collect(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user