From 31eaf321d0237322586487cb980140216afa22eb Mon Sep 17 00:00:00 2001
From: Stephen Chung <Stephen.Chung@intexact.com>
Date: Fri, 26 Jun 2020 19:44:50 +0800
Subject: [PATCH] Refactor tokenizer.

---
 src/lib.rs   |   12 +-
 src/token.rs | 1330 ++++++++++++++++++++++++++------------------------
 2 files changed, 702 insertions(+), 640 deletions(-)
diff --git a/src/lib.rs b/src/lib.rs
index 04c49e42..8ebea050 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -131,15 +131,11 @@ pub use optimize::OptimizationLevel;
 
 #[cfg(feature = "internals")]
 #[deprecated(note = "this type is volatile and may change")]
-pub use token::Token;
+pub use token::{get_next_token, parse_string_literal, InputStream, Token, TokenizeState};
 
 #[cfg(feature = "internals")]
 #[deprecated(note = "this type is volatile and may change")]
-pub use parser::Expr;
-
-#[cfg(feature = "internals")]
-#[deprecated(note = "this type is volatile and may change")]
-pub use parser::Stmt;
+pub use parser::{Expr, ReturnType, Stmt};
 
 #[cfg(feature = "internals")]
 #[deprecated(note = "this type is volatile and may change")]
@@ -148,7 +144,3 @@ pub use module::ModuleRef;
 #[cfg(feature = "internals")]
 #[deprecated(note = "this type is volatile and may change")]
 pub use utils::StaticVec;
-
-#[cfg(feature = "internals")]
-#[deprecated(note = "this type is volatile and may change")]
-pub use parser::ReturnType;
diff --git a/src/token.rs b/src/token.rs
index 76019d3c..1e53d8a3 100644
--- a/src/token.rs
+++ b/src/token.rs
@@ -209,6 +209,7 @@ pub enum Token {
     #[cfg(not(feature = "no_module"))]
     As,
     LexError(Box<LexError>),
+    Comment(String),
     EOF,
 }
 
@@ -429,24 +430,690 @@ impl From<Token> for String {
     }
 }
 
-/// An iterator on a `Token` stream.
-pub struct TokenIterator<'a> {
+/// State of the tokenizer.
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Default)]
+pub struct TokenizeState {
     /// Maximum length of a string (0 = unlimited).
     max_string_size: usize,
     /// Can the next token be a unary operator?
     can_be_unary: bool,
-    /// Current position.
-    pos: Position,
+    /// Is the tokenizer currently inside a block comment?
+    comment_level: usize,
+    /// Return `None` at the end of the stream instead of `Some(Token::EOF)`?
+    end_with_none: bool,
+    /// Include comments?
+    include_comments: bool,
+}
+
+/// Trait that encapsulates a peekable character input stream.
+pub trait InputStream {
+    /// Get the next character
+    fn get_next(&mut self) -> Option<char>;
+    /// Peek the next character
+    fn peek_next(&mut self) -> Option<char>;
+}
+
+/// Parse a string literal wrapped by `enclosing_char`.
+pub fn parse_string_literal(
+    stream: &mut impl InputStream,
+    state: &mut TokenizeState,
+    pos: &mut Position,
+    enclosing_char: char,
+) -> Result<String, (LexError, Position)> {
+    let mut result = Vec::new();
+    let mut escape = String::with_capacity(12);
+
+    loop {
+        let next_char = stream.get_next().ok_or((LERR::UnterminatedString, *pos))?;
+
+        pos.advance();
+
+        if state.max_string_size > 0 && result.len() > state.max_string_size {
+            return Err((LexError::StringTooLong(state.max_string_size), *pos));
+        }
+
+        match next_char {
+            // \...
+            '\\' if escape.is_empty() => {
+                escape.push('\\');
+            }
+            // \\
+            '\\' if !escape.is_empty() => {
+                escape.clear();
+                result.push('\\');
+            }
+            // \t
+            't' if !escape.is_empty() => {
+                escape.clear();
+                result.push('\t');
+            }
+            // \n
+            'n' if !escape.is_empty() => {
+                escape.clear();
+                result.push('\n');
+            }
+            // \r
+            'r' if !escape.is_empty() => {
+                escape.clear();
+                result.push('\r');
+            }
+            // \x??, \u????, \U????????
+            ch @ 'x' | ch @ 'u' | ch @ 'U' if !escape.is_empty() => {
+                let mut seq = escape.clone();
+                seq.push(ch);
+                escape.clear();
+
+                let mut out_val: u32 = 0;
+                let len = match ch {
+                    'x' => 2,
+                    'u' => 4,
+                    'U' => 8,
+                    _ => unreachable!(),
+                };
+
+                for _ in 0..len {
+                    let c = stream
+                        .get_next()
+                        .ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
+
+                    seq.push(c);
+                    pos.advance();
+
+                    out_val *= 16;
+                    out_val += c
+                        .to_digit(16)
+                        .ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
+                }
+
+                result.push(
+                    char::from_u32(out_val)
+                        .ok_or_else(|| (LERR::MalformedEscapeSequence(seq), *pos))?,
+                );
+            }
+
+            // \{enclosing_char} - escaped
+            ch if enclosing_char == ch && !escape.is_empty() => {
+                escape.clear();
+                result.push(ch)
+            }
+
+            // Close wrapper
+            ch if enclosing_char == ch && escape.is_empty() => break,
+
+            // Unknown escape sequence
+            _ if !escape.is_empty() => return Err((LERR::MalformedEscapeSequence(escape), *pos)),
+
+            // Cannot have new-lines inside string literals
+            '\n' => {
+                pos.rewind();
+                return Err((LERR::UnterminatedString, *pos));
+            }
+
+            // All other characters
+            ch => {
+                escape.clear();
+                result.push(ch);
+            }
+        }
+    }
+
+    let s = result.iter().collect::<String>();
+
+    if state.max_string_size > 0 && s.len() > state.max_string_size {
+        return Err((LexError::StringTooLong(state.max_string_size), *pos));
+    }
+
+    Ok(s)
+}
+
+/// Consume the next character.
+fn eat_next(stream: &mut impl InputStream, pos: &mut Position) {
+    stream.get_next();
+    pos.advance();
+}
+
+/// Scan for a block comment until the end.
+fn scan_comment(
+    stream: &mut impl InputStream,
+    state: &mut TokenizeState,
+    pos: &mut Position,
+    comment: &mut String,
+) {
+    while let Some(c) = stream.get_next() {
+        pos.advance();
+
+        if state.include_comments {
+            comment.push(c);
+        }
+
+        match c {
+            '/' => {
+                if let Some(c2) = stream.get_next() {
+                    if state.include_comments {
+                        comment.push(c2);
+                    }
+                    if c2 == '*' {
+                        state.comment_level += 1;
+                    }
+                }
+                pos.advance();
+            }
+            '*' => {
+                if let Some(c2) = stream.get_next() {
+                    if state.include_comments {
+                        comment.push(c2);
+                    }
+                    if c2 == '/' {
+                        state.comment_level -= 1;
+                    }
+                }
+                pos.advance();
+            }
+            '\n' => pos.new_line(),
+            _ => (),
+        }
+
+        if state.comment_level == 0 {
+            break;
+        }
+    }
+}
+
+/// Get the next token.
+pub fn get_next_token(
+    stream: &mut impl InputStream,
+    state: &mut TokenizeState,
+    pos: &mut Position,
+) -> Option<(Token, Position)> {
+    // Still inside a comment?
+    if state.comment_level > 0 {
+        let start_pos = *pos;
+        let mut comment = Default::default();
+        scan_comment(stream, state, pos, &mut comment);
+
+        if state.include_comments {
+            println!("Comment ({}): {}", start_pos, comment);
+            return Some((Token::Comment(comment), start_pos));
+        }
+    }
+
+    let mut negated = false;
+
+    while let Some(c) = stream.get_next() {
+        pos.advance();
+
+        let start_pos = *pos;
+
+        match (c, stream.peek_next().unwrap_or('\0')) {
+            // \n
+            ('\n', _) => pos.new_line(),
+
+            // digit ...
+            ('0'..='9', _) => {
+                let mut result = Vec::new();
+                let mut radix_base: Option<u32> = None;
+                result.push(c);
+
+                while let Some(next_char) = stream.peek_next() {
+                    match next_char {
+                        '0'..='9' | '_' => {
+                            result.push(next_char);
+                            eat_next(stream, pos);
+                        }
+                        #[cfg(not(feature = "no_float"))]
+                        '.' => {
+                            result.push(next_char);
+                            eat_next(stream, pos);
+                            while let Some(next_char_in_float) = stream.peek_next() {
+                                match next_char_in_float {
+                                    '0'..='9' | '_' => {
+                                        result.push(next_char_in_float);
+                                        eat_next(stream, pos);
+                                    }
+                                    _ => break,
+                                }
+                            }
+                        }
+                        // 0x????, 0o????, 0b????
+                        ch @ 'x' | ch @ 'X' | ch @ 'o' | ch @ 'O' | ch @ 'b' | ch @ 'B' if c == '0' => {
+                            result.push(next_char);
+                            eat_next(stream, pos);
+
+                            let valid = match ch {
+                                'x' | 'X' => [
+                                    'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F',
+                                    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '_',
+                                ],
+                                'o' | 'O' => [
+                                    '0', '1', '2', '3', '4', '5', '6', '7', '_', '_', '_', '_',
+                                    '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_',
+                                ],
+                                'b' | 'B' => [
+                                    '0', '1', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_',
+                                    '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_',
+                                ],
+                                _ => unreachable!(),
+                            };
+
+                            radix_base = Some(match ch {
+                                'x' | 'X' => 16,
+                                'o' | 'O' => 8,
+                                'b' | 'B' => 2,
+                                _ => unreachable!(),
+                            });
+
+                            while let Some(next_char_in_escape_seq) = stream.peek_next() {
+                                if !valid.contains(&next_char_in_escape_seq) {
+                                    break;
+                                }
+
+                                result.push(next_char_in_escape_seq);
+                                eat_next(stream, pos);
+                            }
+                        }
+
+                        _ => break,
+                    }
+                }
+
+                if negated {
+                    result.insert(0, '-');
+                }
+
+                // Parse number
+                if let Some(radix) = radix_base {
+                    let out: String = result.iter().skip(2).filter(|&&c| c != '_').collect();
+
+                    return Some((
+                        INT::from_str_radix(&out, radix)
+                            .map(Token::IntegerConstant)
+                            .unwrap_or_else(|_| {
+                                Token::LexError(Box::new(LERR::MalformedNumber(
+                                    result.into_iter().collect(),
+                                )))
+                            }),
+                        start_pos,
+                    ));
+                } else {
+                    let out: String = result.iter().filter(|&&c| c != '_').collect();
+                    let num = INT::from_str(&out).map(Token::IntegerConstant);
+
+                    // If integer parsing is unnecessary, try float instead
+                    #[cfg(not(feature = "no_float"))]
+                    let num = num.or_else(|_| FLOAT::from_str(&out).map(Token::FloatConstant));
+
+                    return Some((
+                        num.unwrap_or_else(|_| {
+                            Token::LexError(Box::new(LERR::MalformedNumber(
+                                result.into_iter().collect(),
+                            )))
+                        }),
+                        start_pos,
+                    ));
+                }
+            }
+
+            // letter or underscore ...
+            ('A'..='Z', _) | ('a'..='z', _) | ('_', _) => {
+                let mut result = Vec::new();
+                result.push(c);
+
+                while let Some(next_char) = stream.peek_next() {
+                    match next_char {
+                        x if x.is_ascii_alphanumeric() || x == '_' => {
+                            result.push(x);
+                            eat_next(stream, pos);
+                        }
+                        _ => break,
+                    }
+                }
+
+                let is_valid_identifier = result
+                    .iter()
+                    .find(|&ch| char::is_ascii_alphanumeric(ch)) // first alpha-numeric character
+                    .map(char::is_ascii_alphabetic) // is a letter
+                    .unwrap_or(false); // if no alpha-numeric at all - syntax error
+
+                let identifier: String = result.iter().collect();
+
+                if !is_valid_identifier {
+                    return Some((
+                        Token::LexError(Box::new(LERR::MalformedIdentifier(identifier))),
+                        start_pos,
+                    ));
+                }
+
+                return Some((
+                    match identifier.as_str() {
+                        "true" => Token::True,
+                        "false" => Token::False,
+                        "let" => Token::Let,
+                        "const" => Token::Const,
+                        "if" => Token::If,
+                        "else" => Token::Else,
+                        "while" => Token::While,
+                        "loop" => Token::Loop,
+                        "continue" => Token::Continue,
+                        "break" => Token::Break,
+                        "return" => Token::Return,
+                        "throw" => Token::Throw,
+                        "for" => Token::For,
+                        "in" => Token::In,
+                        #[cfg(not(feature = "no_function"))]
+                        "private" => Token::Private,
+                        #[cfg(not(feature = "no_module"))]
+                        "import" => Token::Import,
+                        #[cfg(not(feature = "no_module"))]
+                        "export" => Token::Export,
+                        #[cfg(not(feature = "no_module"))]
+                        "as" => Token::As,
+
+                        #[cfg(not(feature = "no_function"))]
+                        "fn" => Token::Fn,
+
+                        _ => Token::Identifier(identifier),
+                    },
+                    start_pos,
+                ));
+            }
+
+            // " - string literal
+            ('"', _) => return parse_string_literal(stream, state, pos, '"')
+                                .map_or_else(
+                                    |err| Some((Token::LexError(Box::new(err.0)), err.1)),
+                                    |out| Some((Token::StringConst(out), start_pos)),
+                                ),
+
+            // ' - character literal
+            ('\'', '\'') => return Some((
+                Token::LexError(Box::new(LERR::MalformedChar("".to_string()))),
+                start_pos,
+            )),
+            ('\'', _) => return Some(
+                parse_string_literal(stream, state, pos, '\'')
+                    .map_or_else(
+                        |err| (Token::LexError(Box::new(err.0)), err.1),
+                        |result| {
+                            let mut chars = result.chars();
+                            let first = chars.next();
+
+                            if chars.next().is_some() {
+                                (
+                                    Token::LexError(Box::new(LERR::MalformedChar(result))),
+                                    start_pos,
+                                )
+                            } else {
+                                (Token::CharConstant(first.expect("should be Some")), start_pos)
+                            }
+                        },
+                    ),
+            ),
+
+            // Braces
+            ('{', _) => return Some((Token::LeftBrace, start_pos)),
+            ('}', _) => return Some((Token::RightBrace, start_pos)),
+
+            // Parentheses
+            ('(', _) => return Some((Token::LeftParen, start_pos)),
+            (')', _) => return Some((Token::RightParen, start_pos)),
+
+            // Indexing
+            ('[', _) => return Some((Token::LeftBracket, start_pos)),
+            (']', _) => return Some((Token::RightBracket, start_pos)),
+
+            // Map literal
+            #[cfg(not(feature = "no_object"))]
+            ('#', '{') => {
+                eat_next(stream, pos);
+                return Some((Token::MapStart, start_pos));
+            }
+
+            // Operators
+            ('+', '=') => {
+                eat_next(stream, pos);
+                return Some((Token::PlusAssign, start_pos));
+            }
+            ('+', _) if state.can_be_unary => return Some((Token::UnaryPlus, start_pos)),
+            ('+', _) => return Some((Token::Plus, start_pos)),
+
+            ('-', '0'..='9') if state.can_be_unary => negated = true,
+            ('-', '0'..='9') => return Some((Token::Minus, start_pos)),
+            ('-', '=') => {
+                eat_next(stream, pos);
+                return Some((Token::MinusAssign, start_pos));
+            }
+            ('-', '>') => return Some((
+                Token::LexError(Box::new(LERR::ImproperSymbol(
+                    "'->' is not a valid symbol. This is not C or C++!".to_string(),
+                ))),
+                start_pos,
+            )),
+            ('-', _) if state.can_be_unary => return Some((Token::UnaryMinus, start_pos)),
+            ('-', _) => return Some((Token::Minus, start_pos)),
+
+            ('*', '=') => {
+                eat_next(stream, pos);
+                return Some((Token::MultiplyAssign, start_pos));
+            }
+            ('*', _) => return Some((Token::Multiply, start_pos)),
+
+            // Comments
+            ('/', '/') => {
+                eat_next(stream, pos);
+
+                let mut comment = if state.include_comments {
+                    "//".to_string()
+                } else {
+                    Default::default()
+                };
+
+                while let Some(c) = stream.get_next() {
+                    if c == '\n' {
+                        pos.new_line();
+                        break;
+                    }
+
+                    if state.include_comments {
+                        comment.push(c);
+                    }
+                    pos.advance();
+                }
+
+                if state.include_comments {
+                    println!("Comment ({}): {}", start_pos, comment);
+                    return Some((Token::Comment(comment), start_pos));
+                }
+            }
+            ('/', '*') => {
+                state.comment_level = 1;
+
+                eat_next(stream, pos);
+
+                let mut comment = if state.include_comments {
+                    "/*".to_string()
+                } else {
+                    Default::default()
+                };
+                scan_comment(stream, state, pos, &mut comment);
+
+                if state.include_comments {
+                    println!("Comment ({}): {}", start_pos, comment);
+                    return Some((Token::Comment(comment), start_pos));
+                }
+            }
+
+            ('/', '=') => {
+                eat_next(stream, pos);
+                return Some((Token::DivideAssign, start_pos));
+            }
+            ('/', _) => return Some((Token::Divide, start_pos)),
+
+            (';', _) => return Some((Token::SemiColon, start_pos)),
+            (',', _) => return Some((Token::Comma, start_pos)),
+            ('.', _) => return Some((Token::Period, start_pos)),
+
+            ('=', '=') => {
+                eat_next(stream, pos);
+
+                // Warn against `===`
+                if stream.peek_next() == Some('=') {
+                    return Some((
+                            Token::LexError(Box::new(LERR::ImproperSymbol(
+                                "'===' is not a valid operator. This is not JavaScript! Should it be '=='?"
+                                    .to_string(),
+                            ))),
+                            start_pos,
+                        ));
+                }
+
+                return Some((Token::EqualsTo, start_pos));
+            }
+            ('=', '>') => return Some((
+                Token::LexError(Box::new(LERR::ImproperSymbol(
+                    "'=>' is not a valid symbol. This is not Rust! Should it be '>='?"
+                        .to_string(),
+                ))),
+                start_pos,
+            )),
+            ('=', _) => return Some((Token::Equals, start_pos)),
+
+            (':', ':') => {
+                eat_next(stream, pos);
+                return Some((Token::DoubleColon, start_pos));
+            }
+            (':', '=') => return Some((
+                Token::LexError(Box::new(LERR::ImproperSymbol(
+                    "':=' is not a valid assignment operator. This is not Pascal! Should it be simply '='?"
+                        .to_string(),
+                ))),
+                start_pos,
+            )),
+            (':', _) => return Some((Token::Colon, start_pos)),
+
+            ('<', '=') => {
+                eat_next(stream, pos);
+                return Some((Token::LessThanEqualsTo, start_pos));
+            }
+            ('<', '-') => return Some((
+                Token::LexError(Box::new(LERR::ImproperSymbol(
+                    "'<-' is not a valid symbol. Should it be '<='?".to_string(),
+                ))),
+                start_pos,
+            )),
+            ('<', '<') => {
+                eat_next(stream, pos);
+
+                return Some((
+                    if stream.peek_next() == Some('=') {
+                        eat_next(stream, pos);
+                        Token::LeftShiftAssign
+                    } else {
+                        Token::LeftShift
+                    },
+                    start_pos,
+                ));
+            }
+            ('<', _) => return Some((Token::LessThan, start_pos)),
+
+            ('>', '=') => {
+                eat_next(stream, pos);
+                return Some((Token::GreaterThanEqualsTo, start_pos));
+            }
+            ('>', '>') => {
+                eat_next(stream, pos);
+
+                return Some((
+                    if stream.peek_next() == Some('=') {
+                        eat_next(stream, pos);
+                        Token::RightShiftAssign
+                    } else {
+                        Token::RightShift
+                    },
+                    start_pos,
+                ));
+            }
+            ('>', _) => return Some((Token::GreaterThan, start_pos)),
+
+            ('!', '=') => {
+                eat_next(stream, pos);
+
+                // Warn against `!==`
+                if stream.peek_next() == Some('=') {
+                    return Some((
+                            Token::LexError(Box::new(LERR::ImproperSymbol(
+                                "'!==' is not a valid operator. This is not JavaScript! Should it be '!='?"
+                                    .to_string(),
+                            ))),
+                            start_pos,
+                        ));
+                }
+
+                return Some((Token::NotEqualsTo, start_pos));
+            }
+            ('!', _) => return Some((Token::Bang, start_pos)),
+
+            ('|', '|') => {
+                eat_next(stream, pos);
+                return Some((Token::Or, start_pos));
+            }
+            ('|', '=') => {
+                eat_next(stream, pos);
+                return Some((Token::OrAssign, start_pos));
+            }
+            ('|', _) => return Some((Token::Pipe, start_pos)),
+
+            ('&', '&') => {
+                eat_next(stream, pos);
+                return Some((Token::And, start_pos));
+            }
+            ('&', '=') => {
+                eat_next(stream, pos);
+                return Some((Token::AndAssign, start_pos));
+            }
+            ('&', _) => return Some((Token::Ampersand, start_pos)),
+
+            ('^', '=') => {
+                eat_next(stream, pos);
+                return Some((Token::XOrAssign, start_pos));
+            }
+            ('^', _) => return Some((Token::XOr, start_pos)),
+
+            ('%', '=') => {
+                eat_next(stream, pos);
+                return Some((Token::ModuloAssign, start_pos));
+            }
+            ('%', _) => return Some((Token::Modulo, start_pos)),
+
+            ('~', '=') => {
+                eat_next(stream, pos);
+                return Some((Token::PowerOfAssign, start_pos));
+            }
+            ('~', _) => return Some((Token::PowerOf, start_pos)),
+
+            ('\0', _) => unreachable!(),
+
+            (ch, _) if ch.is_whitespace() => (),
+            (ch, _) => return Some((Token::LexError(Box::new(LERR::UnexpectedChar(ch))), start_pos)),
+        }
+    }
+
+    pos.advance();
+
+    if state.end_with_none {
+        None
+    } else {
+        Some((Token::EOF, *pos))
+    }
+}
+
+/// An type that implements the `InputStream` trait.
+/// Multiple charaacter streams are jointed together to form one single stream.
+pub struct MultiInputsStream<'a> {
     /// The input character streams.
     streams: StaticVec<Peekable<Chars<'a>>>,
 }
 
-impl<'a> TokenIterator<'a> {
-    /// Consume the next character.
-    fn eat_next(&mut self) {
-        self.get_next();
-        self.advance();
-    }
+impl InputStream for MultiInputsStream<'_> {
     /// Get the next character
     fn get_next(&mut self) -> Option<char> {
         loop {
@@ -477,629 +1144,25 @@ impl<'a> TokenIterator<'a> {
             }
         }
     }
-    /// Move the current position one character ahead.
-    fn advance(&mut self) {
-        self.pos.advance();
-    }
-    /// Move the current position back one character.
-    ///
-    /// # Panics
-    ///
-    /// Panics if already at the beginning of a line - cannot rewind to the previous line.
-    fn rewind(&mut self) {
-        self.pos.rewind();
-    }
-    /// Move the current position to the next line.
-    fn new_line(&mut self) {
-        self.pos.new_line()
-    }
-
-    /// Parse a string literal wrapped by `enclosing_char`.
-    pub fn parse_string_literal(
-        &mut self,
-        enclosing_char: char,
-        max_length: usize,
-    ) -> Result<String, (LexError, Position)> {
-        let mut result = Vec::new();
-        let mut escape = String::with_capacity(12);
-
-        loop {
-            let next_char = self
-                .get_next()
-                .ok_or((LERR::UnterminatedString, self.pos))?;
-
-            self.advance();
-
-            if max_length > 0 && result.len() > max_length {
-                return Err((LexError::StringTooLong(max_length), self.pos));
-            }
-
-            match next_char {
-                // \...
-                '\\' if escape.is_empty() => {
-                    escape.push('\\');
-                }
-                // \\
-                '\\' if !escape.is_empty() => {
-                    escape.clear();
-                    result.push('\\');
-                }
-                // \t
-                't' if !escape.is_empty() => {
-                    escape.clear();
-                    result.push('\t');
-                }
-                // \n
-                'n' if !escape.is_empty() => {
-                    escape.clear();
-                    result.push('\n');
-                }
-                // \r
-                'r' if !escape.is_empty() => {
-                    escape.clear();
-                    result.push('\r');
-                }
-                // \x??, \u????, \U????????
-                ch @ 'x' | ch @ 'u' | ch @ 'U' if !escape.is_empty() => {
-                    let mut seq = escape.clone();
-                    seq.push(ch);
-                    escape.clear();
-
-                    let mut out_val: u32 = 0;
-                    let len = match ch {
-                        'x' => 2,
-                        'u' => 4,
-                        'U' => 8,
-                        _ => unreachable!(),
-                    };
-
-                    for _ in 0..len {
-                        let c = self.get_next().ok_or_else(|| {
-                            (LERR::MalformedEscapeSequence(seq.to_string()), self.pos)
-                        })?;
-
-                        seq.push(c);
-                        self.advance();
-
-                        out_val *= 16;
-                        out_val += c.to_digit(16).ok_or_else(|| {
-                            (LERR::MalformedEscapeSequence(seq.to_string()), self.pos)
-                        })?;
-                    }
-
-                    result.push(
-                        char::from_u32(out_val)
-                            .ok_or_else(|| (LERR::MalformedEscapeSequence(seq), self.pos))?,
-                    );
-                }
-
-                // \{enclosing_char} - escaped
-                ch if enclosing_char == ch && !escape.is_empty() => {
-                    escape.clear();
-                    result.push(ch)
-                }
-
-                // Close wrapper
-                ch if enclosing_char == ch && escape.is_empty() => break,
-
-                // Unknown escape sequence
-                _ if !escape.is_empty() => {
-                    return Err((LERR::MalformedEscapeSequence(escape), self.pos))
-                }
-
-                // Cannot have new-lines inside string literals
-                '\n' => {
-                    self.rewind();
-                    return Err((LERR::UnterminatedString, self.pos));
-                }
-
-                // All other characters
-                ch => {
-                    escape.clear();
-                    result.push(ch);
-                }
-            }
-        }
-
-        let s = result.iter().collect::<String>();
-
-        if max_length > 0 && s.len() > max_length {
-            return Err((LexError::StringTooLong(max_length), self.pos));
-        }
-
-        Ok(s)
-    }
-
-    /// Get the next token.
-    fn inner_next(&mut self) -> Option<(Token, Position)> {
-        let mut negated = false;
-
-        while let Some(c) = self.get_next() {
-            self.advance();
-
-            let pos = self.pos;
-
-            match (c, self.peek_next().unwrap_or('\0')) {
-                // \n
-                ('\n', _) => self.new_line(),
-
-                // digit ...
-                ('0'..='9', _) => {
-                    let mut result = Vec::new();
-                    let mut radix_base: Option<u32> = None;
-                    result.push(c);
-
-                    while let Some(next_char) = self.peek_next() {
-                        match next_char {
-                            '0'..='9' | '_' => {
-                                result.push(next_char);
-                                self.eat_next();
-                            }
-                            #[cfg(not(feature = "no_float"))]
-                            '.' => {
-                                result.push(next_char);
-                                self.eat_next();
-                                while let Some(next_char_in_float) = self.peek_next() {
-                                    match next_char_in_float {
-                                        '0'..='9' | '_' => {
-                                            result.push(next_char_in_float);
-                                            self.eat_next();
-                                        }
-                                        _ => break,
-                                    }
-                                }
-                            }
-                            // 0x????, 0o????, 0b????
-                            ch @ 'x' | ch @ 'X' | ch @ 'o' | ch @ 'O' | ch @ 'b' | ch @ 'B'
-                                if c == '0' =>
-                            {
-                                result.push(next_char);
-                                self.eat_next();
-
-                                let valid = match ch {
-                                    'x' | 'X' => [
-                                        'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F',
-                                        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '_',
-                                    ],
-                                    'o' | 'O' => [
-                                        '0', '1', '2', '3', '4', '5', '6', '7', '_', '_', '_', '_',
-                                        '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_',
-                                    ],
-                                    'b' | 'B' => [
-                                        '0', '1', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_',
-                                        '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_',
-                                    ],
-                                    _ => unreachable!(),
-                                };
-
-                                radix_base = Some(match ch {
-                                    'x' | 'X' => 16,
-                                    'o' | 'O' => 8,
-                                    'b' | 'B' => 2,
-                                    _ => unreachable!(),
-                                });
-
-                                while let Some(next_char_in_escape_seq) = self.peek_next() {
-                                    if !valid.contains(&next_char_in_escape_seq) {
-                                        break;
-                                    }
-
-                                    result.push(next_char_in_escape_seq);
-                                    self.eat_next();
-                                }
-                            }
-
-                            _ => break,
-                        }
-                    }
-
-                    if negated {
-                        result.insert(0, '-');
-                    }
-
-                    // Parse number
-                    if let Some(radix) = radix_base {
-                        let out: String = result.iter().skip(2).filter(|&&c| c != '_').collect();
-
-                        return Some((
-                            INT::from_str_radix(&out, radix)
-                                .map(Token::IntegerConstant)
-                                .unwrap_or_else(|_| {
-                                    Token::LexError(Box::new(LERR::MalformedNumber(
-                                        result.into_iter().collect(),
-                                    )))
-                                }),
-                            pos,
-                        ));
-                    } else {
-                        let out: String = result.iter().filter(|&&c| c != '_').collect();
-                        let num = INT::from_str(&out).map(Token::IntegerConstant);
-
-                        // If integer parsing is unnecessary, try float instead
-                        #[cfg(not(feature = "no_float"))]
-                        let num = num.or_else(|_| FLOAT::from_str(&out).map(Token::FloatConstant));
-
-                        return Some((
-                            num.unwrap_or_else(|_| {
-                                Token::LexError(Box::new(LERR::MalformedNumber(
-                                    result.into_iter().collect(),
-                                )))
-                            }),
-                            pos,
-                        ));
-                    }
-                }
-
-                // letter or underscore ...
-                ('A'..='Z', _) | ('a'..='z', _) | ('_', _) => {
-                    let mut result = Vec::new();
-                    result.push(c);
-
-                    while let Some(next_char) = self.peek_next() {
-                        match next_char {
-                            x if x.is_ascii_alphanumeric() || x == '_' => {
-                                result.push(x);
-                                self.eat_next();
-                            }
-                            _ => break,
-                        }
-                    }
-
-                    let is_valid_identifier = result
-                        .iter()
-                        .find(|&ch| char::is_ascii_alphanumeric(ch)) // first alpha-numeric character
-                        .map(char::is_ascii_alphabetic) // is a letter
-                        .unwrap_or(false); // if no alpha-numeric at all - syntax error
-
-                    let identifier: String = result.iter().collect();
-
-                    if !is_valid_identifier {
-                        return Some((
-                            Token::LexError(Box::new(LERR::MalformedIdentifier(identifier))),
-                            pos,
-                        ));
-                    }
-
-                    return Some((
-                        match identifier.as_str() {
-                            "true" => Token::True,
-                            "false" => Token::False,
-                            "let" => Token::Let,
-                            "const" => Token::Const,
-                            "if" => Token::If,
-                            "else" => Token::Else,
-                            "while" => Token::While,
-                            "loop" => Token::Loop,
-                            "continue" => Token::Continue,
-                            "break" => Token::Break,
-                            "return" => Token::Return,
-                            "throw" => Token::Throw,
-                            "for" => Token::For,
-                            "in" => Token::In,
-                            #[cfg(not(feature = "no_function"))]
-                            "private" => Token::Private,
-                            #[cfg(not(feature = "no_module"))]
-                            "import" => Token::Import,
-                            #[cfg(not(feature = "no_module"))]
-                            "export" => Token::Export,
-                            #[cfg(not(feature = "no_module"))]
-                            "as" => Token::As,
-
-                            #[cfg(not(feature = "no_function"))]
-                            "fn" => Token::Fn,
-
-                            _ => Token::Identifier(identifier),
-                        },
-                        pos,
-                    ));
-                }
-
-                // " - string literal
-                ('"', _) => {
-                    return self
-                        .parse_string_literal('"', self.max_string_size)
-                        .map_or_else(
-                            |err| Some((Token::LexError(Box::new(err.0)), err.1)),
-                            |out| Some((Token::StringConst(out), pos)),
-                        );
-                }
-
-                // ' - character literal
-                ('\'', '\'') => {
-                    return Some((
-                        Token::LexError(Box::new(LERR::MalformedChar("".to_string()))),
-                        pos,
-                    ));
-                }
-                ('\'', _) => {
-                    return Some(
-                        self.parse_string_literal('\'', self.max_string_size)
-                            .map_or_else(
-                                |err| (Token::LexError(Box::new(err.0)), err.1),
-                                |result| {
-                                    let mut chars = result.chars();
-                                    let first = chars.next();
-
-                                    if chars.next().is_some() {
-                                        (
-                                            Token::LexError(Box::new(LERR::MalformedChar(result))),
-                                            pos,
-                                        )
-                                    } else {
-                                        (Token::CharConstant(first.expect("should be Some")), pos)
-                                    }
-                                },
-                            ),
-                    );
-                }
-
-                // Braces
-                ('{', _) => return Some((Token::LeftBrace, pos)),
-                ('}', _) => return Some((Token::RightBrace, pos)),
-
-                // Parentheses
-                ('(', _) => return Some((Token::LeftParen, pos)),
-                (')', _) => return Some((Token::RightParen, pos)),
-
-                // Indexing
-                ('[', _) => return Some((Token::LeftBracket, pos)),
-                (']', _) => return Some((Token::RightBracket, pos)),
-
-                // Map literal
-                #[cfg(not(feature = "no_object"))]
-                ('#', '{') => {
-                    self.eat_next();
-                    return Some((Token::MapStart, pos));
-                }
-
-                // Operators
-                ('+', '=') => {
-                    self.eat_next();
-                    return Some((Token::PlusAssign, pos));
-                }
-                ('+', _) if self.can_be_unary => return Some((Token::UnaryPlus, pos)),
-                ('+', _) => return Some((Token::Plus, pos)),
-
-                ('-', '0'..='9') if self.can_be_unary => negated = true,
-                ('-', '0'..='9') => return Some((Token::Minus, pos)),
-                ('-', '=') => {
-                    self.eat_next();
-                    return Some((Token::MinusAssign, pos));
-                }
-                ('-', '>') => {
-                    return Some((
-                        Token::LexError(Box::new(LERR::ImproperSymbol(
-                            "'->' is not a valid symbol. This is not C or C++!".to_string(),
-                        ))),
-                        pos,
-                    ))
-                }
-                ('-', _) if self.can_be_unary => return Some((Token::UnaryMinus, pos)),
-                ('-', _) => return Some((Token::Minus, pos)),
-
-                ('*', '=') => {
-                    self.eat_next();
-                    return Some((Token::MultiplyAssign, pos));
-                }
-                ('*', _) => return Some((Token::Multiply, pos)),
-
-                // Comments
-                ('/', '/') => {
-                    self.eat_next();
-
-                    while let Some(c) = self.get_next() {
-                        if c == '\n' {
-                            self.new_line();
-                            break;
-                        }
-
-                        self.advance();
-                    }
-                }
-                ('/', '*') => {
-                    let mut level = 1;
-
-                    self.eat_next();
-
-                    while let Some(c) = self.get_next() {
-                        self.advance();
-
-                        match c {
-                            '/' => {
-                                if self.get_next() == Some('*') {
-                                    level += 1;
-                                }
-                                self.advance();
-                            }
-                            '*' => {
-                                if self.get_next() == Some('/') {
-                                    level -= 1;
-                                }
-                                self.advance();
-                            }
-                            '\n' => self.new_line(),
-                            _ => (),
-                        }
-
-                        if level == 0 {
-                            break;
-                        }
-                    }
-                }
-
-                ('/', '=') => {
-                    self.eat_next();
-                    return Some((Token::DivideAssign, pos));
-                }
-                ('/', _) => return Some((Token::Divide, pos)),
-
-                (';', _) => return Some((Token::SemiColon, pos)),
-                (',', _) => return Some((Token::Comma, pos)),
-                ('.', _) => return Some((Token::Period, pos)),
-
-                ('=', '=') => {
-                    self.eat_next();
-
-                    // Warn against `===`
-                    if self.peek_next() == Some('=') {
-                        return Some((
-                                Token::LexError(Box::new(LERR::ImproperSymbol(
-                                    "'===' is not a valid operator. This is not JavaScript! Should it be '=='?"
-                                        .to_string(),
-                                ))),
-                                pos,
-                            ));
-                    }
-
-                    return Some((Token::EqualsTo, pos));
-                }
-                ('=', '>') => {
-                    return Some((
-                        Token::LexError(Box::new(LERR::ImproperSymbol(
-                            "'=>' is not a valid symbol. This is not Rust! Should it be '>='?"
-                                .to_string(),
-                        ))),
-                        pos,
-                    ))
-                }
-                ('=', _) => return Some((Token::Equals, pos)),
-
-                (':', ':') => {
-                    self.eat_next();
-                    return Some((Token::DoubleColon, pos));
-                }
-                (':', '=') => {
-                    return Some((
-                        Token::LexError(Box::new(LERR::ImproperSymbol(
-                            "':=' is not a valid assignment operator. This is not Pascal! Should it be simply '='?"
-                                .to_string(),
-                        ))),
-                        pos,
-                    ))
-                }
-                (':', _) => return Some((Token::Colon, pos)),
-
-                ('<', '=') => {
-                    self.eat_next();
-                    return Some((Token::LessThanEqualsTo, pos));
-                }
-                ('<', '-') => {
-                    return Some((
-                        Token::LexError(Box::new(LERR::ImproperSymbol(
-                            "'<-' is not a valid symbol. Should it be '<='?".to_string(),
-                        ))),
-                        pos,
-                    ))
-                }
-                ('<', '<') => {
-                    self.eat_next();
-
-                    return Some((
-                        if self.peek_next() == Some('=') {
-                            self.eat_next();
-                            Token::LeftShiftAssign
-                        } else {
-                            Token::LeftShift
-                        },
-                        pos,
-                    ));
-                }
-                ('<', _) => return Some((Token::LessThan, pos)),
-
-                ('>', '=') => {
-                    self.eat_next();
-                    return Some((Token::GreaterThanEqualsTo, pos));
-                }
-                ('>', '>') => {
-                    self.eat_next();
-
-                    return Some((
-                        if self.peek_next() == Some('=') {
-                            self.eat_next();
-                            Token::RightShiftAssign
-                        } else {
-                            Token::RightShift
-                        },
-                        pos,
-                    ));
-                }
-                ('>', _) => return Some((Token::GreaterThan, pos)),
-
-                ('!', '=') => {
-                    self.eat_next();
-
-                    // Warn against `!==`
-                    if self.peek_next() == Some('=') {
-                        return Some((
-                                Token::LexError(Box::new(LERR::ImproperSymbol(
-                                    "'!==' is not a valid operator. This is not JavaScript! Should it be '!='?"
-                                        .to_string(),
-                                ))),
-                                pos,
-                            ));
-                    }
-
-                    return Some((Token::NotEqualsTo, pos));
-                }
-                ('!', _) => return Some((Token::Bang, pos)),
-
-                ('|', '|') => {
-                    self.eat_next();
-                    return Some((Token::Or, pos));
-                }
-                ('|', '=') => {
-                    self.eat_next();
-                    return Some((Token::OrAssign, pos));
-                }
-                ('|', _) => return Some((Token::Pipe, pos)),
-
-                ('&', '&') => {
-                    self.eat_next();
-                    return Some((Token::And, pos));
-                }
-                ('&', '=') => {
-                    self.eat_next();
-                    return Some((Token::AndAssign, pos));
-                }
-                ('&', _) => return Some((Token::Ampersand, pos)),
-
-                ('^', '=') => {
-                    self.eat_next();
-                    return Some((Token::XOrAssign, pos));
-                }
-                ('^', _) => return Some((Token::XOr, pos)),
-
-                ('%', '=') => {
-                    self.eat_next();
-                    return Some((Token::ModuloAssign, pos));
-                }
-                ('%', _) => return Some((Token::Modulo, pos)),
-
-                ('~', '=') => {
-                    self.eat_next();
-                    return Some((Token::PowerOfAssign, pos));
-                }
-                ('~', _) => return Some((Token::PowerOf, pos)),
-
-                ('\0', _) => unreachable!(),
-
-                (ch, _) if ch.is_whitespace() => (),
-                (ch, _) => return Some((Token::LexError(Box::new(LERR::UnexpectedChar(ch))), pos)),
-            }
-        }
-
-        self.advance();
-        Some((Token::EOF, self.pos))
-    }
+}
+
+/// An iterator on a `Token` stream.
+pub struct TokenIterator<'a> {
+    /// Current state.
+    state: TokenizeState,
+    /// Current position.
+    pos: Position,
+    /// Input character stream.
+    stream: MultiInputsStream<'a>,
 }
 
 impl<'a> Iterator for TokenIterator<'a> {
     type Item = (Token, Position);
 
     fn next(&mut self) -> Option<Self::Item> {
-        self.inner_next().map(|x| {
-            // Save the last token
-            self.can_be_unary = x.0.is_next_unary();
+        get_next_token(&mut self.stream, &mut self.state, &mut self.pos).map(|x| {
+            // Save the last token's state
+            self.state.can_be_unary = x.0.is_next_unary();
             x
         })
     }
@@ -1108,9 +1171,16 @@ impl<'a> Iterator for TokenIterator<'a> {
 /// Tokenize an input text stream.
 pub fn lex<'a>(input: &'a [&'a str], max_string_size: usize) -> TokenIterator<'a> {
     TokenIterator {
-        max_string_size,
-        can_be_unary: true,
+        state: TokenizeState {
+            max_string_size,
+            can_be_unary: true,
+            comment_level: 0,
+            end_with_none: false,
+            include_comments: false,
+        },
         pos: Position::new(1, 0),
-        streams: input.iter().map(|s| s.chars().peekable()).collect(),
+        stream: MultiInputsStream {
+            streams: input.iter().map(|s| s.chars().peekable()).collect(),
+        },
     }
 }