2020-04-15 16:21:23 +02:00
|
|
|
//! Main module defining the lexer and parser.
|
|
|
|
|
|
|
|
use crate::error::LexError;
|
|
|
|
use crate::parser::INT;
|
|
|
|
|
|
|
|
#[cfg(not(feature = "no_float"))]
|
|
|
|
use crate::parser::FLOAT;
|
|
|
|
|
|
|
|
use crate::stdlib::{
|
|
|
|
borrow::Cow,
|
|
|
|
boxed::Box,
|
|
|
|
char, fmt,
|
|
|
|
iter::Peekable,
|
|
|
|
str::{Chars, FromStr},
|
|
|
|
string::{String, ToString},
|
|
|
|
vec::Vec,
|
|
|
|
};
|
|
|
|
|
|
|
|
type LERR = LexError;
|
|
|
|
|
|
|
|
/// A location (line number + character position) in the input script.
|
2020-04-20 05:08:54 +02:00
|
|
|
///
|
|
|
|
/// In order to keep footprint small, both line number and character position have 16-bit resolution,
|
|
|
|
/// meaning they go up to a maximum of 65,535 lines/characters per line.
|
|
|
|
/// Advancing beyond the maximum line length or maximum number of lines is not an error but has no effect.
|
2020-04-15 16:21:23 +02:00
|
|
|
#[derive(Eq, PartialEq, Ord, PartialOrd, Hash, Clone, Copy)]
|
|
|
|
pub struct Position {
|
2020-04-17 14:01:41 +02:00
|
|
|
/// Line number - 0 = none
|
2020-04-20 05:08:54 +02:00
|
|
|
line: u16,
|
2020-04-17 14:01:41 +02:00
|
|
|
/// Character position - 0 = BOL
|
2020-04-20 05:08:54 +02:00
|
|
|
pos: u16,
|
2020-04-15 16:21:23 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
impl Position {
|
|
|
|
/// Create a new `Position`.
|
2020-04-20 05:08:54 +02:00
|
|
|
pub fn new(line: u16, position: u16) -> Self {
|
2020-04-15 16:21:23 +02:00
|
|
|
assert!(line != 0, "line cannot be zero");
|
|
|
|
|
|
|
|
Self {
|
|
|
|
line,
|
|
|
|
pos: position,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-17 14:01:41 +02:00
|
|
|
/// Get the line number (1-based), or `None` if no position.
|
2020-04-15 16:21:23 +02:00
|
|
|
pub fn line(&self) -> Option<usize> {
|
2020-04-17 14:01:41 +02:00
|
|
|
if self.is_none() {
|
2020-04-15 16:21:23 +02:00
|
|
|
None
|
|
|
|
} else {
|
2020-04-20 05:08:54 +02:00
|
|
|
Some(self.line as usize)
|
2020-04-15 16:21:23 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Get the character position (1-based), or `None` if at beginning of a line.
|
|
|
|
pub fn position(&self) -> Option<usize> {
|
2020-04-17 14:01:41 +02:00
|
|
|
if self.is_none() || self.pos == 0 {
|
2020-04-15 16:21:23 +02:00
|
|
|
None
|
|
|
|
} else {
|
2020-04-20 05:08:54 +02:00
|
|
|
Some(self.pos as usize)
|
2020-04-15 16:21:23 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Advance by one character position.
|
|
|
|
pub(crate) fn advance(&mut self) {
|
2020-04-20 05:08:54 +02:00
|
|
|
assert!(!self.is_none(), "cannot advance Position::none");
|
|
|
|
|
|
|
|
// Advance up to maximum position
|
|
|
|
if self.pos < u16::MAX {
|
|
|
|
self.pos += 1;
|
|
|
|
}
|
2020-04-15 16:21:23 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Go backwards by one character position.
|
|
|
|
///
|
|
|
|
/// # Panics
|
|
|
|
///
|
|
|
|
/// Panics if already at beginning of a line - cannot rewind to a previous line.
|
|
|
|
///
|
|
|
|
pub(crate) fn rewind(&mut self) {
|
2020-04-20 05:08:54 +02:00
|
|
|
assert!(!self.is_none(), "cannot rewind Position::none");
|
2020-04-15 16:21:23 +02:00
|
|
|
assert!(self.pos > 0, "cannot rewind at position 0");
|
|
|
|
self.pos -= 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Advance to the next line.
|
|
|
|
pub(crate) fn new_line(&mut self) {
|
2020-04-20 05:08:54 +02:00
|
|
|
assert!(!self.is_none(), "cannot advance Position::none");
|
|
|
|
|
|
|
|
// Advance up to maximum position
|
|
|
|
if self.line < u16::MAX {
|
|
|
|
self.line += 1;
|
|
|
|
self.pos = 0;
|
|
|
|
}
|
2020-04-15 16:21:23 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Create a `Position` representing no position.
|
|
|
|
pub(crate) fn none() -> Self {
|
|
|
|
Self { line: 0, pos: 0 }
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Is there no `Position`?
|
|
|
|
pub fn is_none(&self) -> bool {
|
|
|
|
self.line == 0 && self.pos == 0
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Default for Position {
|
|
|
|
fn default() -> Self {
|
|
|
|
Self::new(1, 0)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl fmt::Display for Position {
|
|
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
2020-04-17 14:01:41 +02:00
|
|
|
if self.is_none() {
|
2020-04-15 16:21:23 +02:00
|
|
|
write!(f, "none")
|
|
|
|
} else {
|
|
|
|
write!(f, "line {}, position {}", self.line, self.pos)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl fmt::Debug for Position {
|
|
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
2020-05-05 09:00:10 +02:00
|
|
|
write!(f, "{}:{}", self.line, self.pos)
|
2020-04-15 16:21:23 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Tokens.
|
|
|
|
#[derive(Debug, PartialEq, Clone)]
|
|
|
|
pub enum Token {
|
|
|
|
IntegerConstant(INT),
|
2020-04-17 14:08:41 +02:00
|
|
|
#[cfg(not(feature = "no_float"))]
|
2020-04-15 16:21:23 +02:00
|
|
|
FloatConstant(FLOAT),
|
|
|
|
Identifier(String),
|
|
|
|
CharConstant(char),
|
|
|
|
StringConst(String),
|
|
|
|
LeftBrace,
|
|
|
|
RightBrace,
|
|
|
|
LeftParen,
|
|
|
|
RightParen,
|
|
|
|
LeftBracket,
|
|
|
|
RightBracket,
|
|
|
|
Plus,
|
|
|
|
UnaryPlus,
|
|
|
|
Minus,
|
|
|
|
UnaryMinus,
|
|
|
|
Multiply,
|
|
|
|
Divide,
|
|
|
|
Modulo,
|
|
|
|
PowerOf,
|
|
|
|
LeftShift,
|
|
|
|
RightShift,
|
|
|
|
SemiColon,
|
|
|
|
Colon,
|
2020-05-03 19:19:01 +02:00
|
|
|
DoubleColon,
|
2020-04-15 16:21:23 +02:00
|
|
|
Comma,
|
|
|
|
Period,
|
|
|
|
MapStart,
|
|
|
|
Equals,
|
|
|
|
True,
|
|
|
|
False,
|
|
|
|
Let,
|
|
|
|
Const,
|
|
|
|
If,
|
|
|
|
Else,
|
|
|
|
While,
|
|
|
|
Loop,
|
|
|
|
For,
|
|
|
|
In,
|
|
|
|
LessThan,
|
|
|
|
GreaterThan,
|
|
|
|
LessThanEqualsTo,
|
|
|
|
GreaterThanEqualsTo,
|
|
|
|
EqualsTo,
|
|
|
|
NotEqualsTo,
|
|
|
|
Bang,
|
|
|
|
Pipe,
|
|
|
|
Or,
|
|
|
|
XOr,
|
|
|
|
Ampersand,
|
|
|
|
And,
|
|
|
|
Fn,
|
|
|
|
Continue,
|
|
|
|
Break,
|
|
|
|
Return,
|
|
|
|
Throw,
|
|
|
|
PlusAssign,
|
|
|
|
MinusAssign,
|
|
|
|
MultiplyAssign,
|
|
|
|
DivideAssign,
|
|
|
|
LeftShiftAssign,
|
|
|
|
RightShiftAssign,
|
|
|
|
AndAssign,
|
|
|
|
OrAssign,
|
|
|
|
XOrAssign,
|
|
|
|
ModuloAssign,
|
|
|
|
PowerOfAssign,
|
2020-05-09 05:29:30 +02:00
|
|
|
Private,
|
2020-05-04 11:43:54 +02:00
|
|
|
Import,
|
|
|
|
Export,
|
|
|
|
As,
|
2020-04-15 16:21:23 +02:00
|
|
|
LexError(Box<LexError>),
|
2020-04-17 14:01:41 +02:00
|
|
|
EOF,
|
2020-04-15 16:21:23 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
impl Token {
|
|
|
|
/// Get the syntax of the token.
|
|
|
|
pub fn syntax(&self) -> Cow<str> {
|
|
|
|
use Token::*;
|
|
|
|
|
|
|
|
match self {
|
|
|
|
IntegerConstant(i) => i.to_string().into(),
|
2020-04-17 14:08:41 +02:00
|
|
|
#[cfg(not(feature = "no_float"))]
|
2020-04-15 16:21:23 +02:00
|
|
|
FloatConstant(f) => f.to_string().into(),
|
|
|
|
Identifier(s) => s.into(),
|
|
|
|
CharConstant(c) => c.to_string().into(),
|
|
|
|
LexError(err) => err.to_string().into(),
|
|
|
|
|
|
|
|
token => (match token {
|
|
|
|
StringConst(_) => "string",
|
|
|
|
LeftBrace => "{",
|
|
|
|
RightBrace => "}",
|
|
|
|
LeftParen => "(",
|
|
|
|
RightParen => ")",
|
|
|
|
LeftBracket => "[",
|
|
|
|
RightBracket => "]",
|
|
|
|
Plus => "+",
|
|
|
|
UnaryPlus => "+",
|
|
|
|
Minus => "-",
|
|
|
|
UnaryMinus => "-",
|
|
|
|
Multiply => "*",
|
|
|
|
Divide => "/",
|
|
|
|
SemiColon => ";",
|
|
|
|
Colon => ":",
|
2020-05-03 19:19:01 +02:00
|
|
|
DoubleColon => "::",
|
2020-04-15 16:21:23 +02:00
|
|
|
Comma => ",",
|
|
|
|
Period => ".",
|
|
|
|
MapStart => "#{",
|
|
|
|
Equals => "=",
|
|
|
|
True => "true",
|
|
|
|
False => "false",
|
|
|
|
Let => "let",
|
|
|
|
Const => "const",
|
|
|
|
If => "if",
|
|
|
|
Else => "else",
|
|
|
|
While => "while",
|
|
|
|
Loop => "loop",
|
2020-05-04 11:43:54 +02:00
|
|
|
For => "for",
|
|
|
|
In => "in",
|
2020-04-15 16:21:23 +02:00
|
|
|
LessThan => "<",
|
|
|
|
GreaterThan => ">",
|
|
|
|
Bang => "!",
|
|
|
|
LessThanEqualsTo => "<=",
|
|
|
|
GreaterThanEqualsTo => ">=",
|
|
|
|
EqualsTo => "==",
|
|
|
|
NotEqualsTo => "!=",
|
|
|
|
Pipe => "|",
|
|
|
|
Or => "||",
|
|
|
|
Ampersand => "&",
|
|
|
|
And => "&&",
|
|
|
|
Fn => "fn",
|
|
|
|
Continue => "continue",
|
|
|
|
Break => "break",
|
|
|
|
Return => "return",
|
|
|
|
Throw => "throw",
|
|
|
|
PlusAssign => "+=",
|
|
|
|
MinusAssign => "-=",
|
|
|
|
MultiplyAssign => "*=",
|
|
|
|
DivideAssign => "/=",
|
|
|
|
LeftShiftAssign => "<<=",
|
|
|
|
RightShiftAssign => ">>=",
|
|
|
|
AndAssign => "&=",
|
|
|
|
OrAssign => "|=",
|
|
|
|
XOrAssign => "^=",
|
|
|
|
LeftShift => "<<",
|
|
|
|
RightShift => ">>",
|
|
|
|
XOr => "^",
|
|
|
|
Modulo => "%",
|
|
|
|
ModuloAssign => "%=",
|
|
|
|
PowerOf => "~",
|
|
|
|
PowerOfAssign => "~=",
|
2020-05-09 05:29:30 +02:00
|
|
|
Private => "private",
|
2020-05-04 11:43:54 +02:00
|
|
|
Import => "import",
|
|
|
|
Export => "export",
|
|
|
|
As => "as",
|
2020-04-17 14:01:41 +02:00
|
|
|
EOF => "{EOF}",
|
2020-04-15 16:21:23 +02:00
|
|
|
_ => panic!("operator should be match in outer scope"),
|
|
|
|
})
|
|
|
|
.into(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-17 14:01:41 +02:00
|
|
|
// Is this token EOF?
|
|
|
|
pub fn is_eof(&self) -> bool {
|
|
|
|
use Token::*;
|
|
|
|
|
|
|
|
match self {
|
|
|
|
EOF => true,
|
|
|
|
_ => false,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-15 16:21:23 +02:00
|
|
|
// If another operator is after these, it's probably an unary operator
|
|
|
|
// (not sure about fn name).
|
|
|
|
pub fn is_next_unary(&self) -> bool {
|
|
|
|
use Token::*;
|
|
|
|
|
|
|
|
match self {
|
|
|
|
LexError(_) |
|
|
|
|
LeftBrace | // (+expr) - is unary
|
|
|
|
// RightBrace | {expr} - expr not unary & is closing
|
|
|
|
LeftParen | // {-expr} - is unary
|
|
|
|
// RightParen | (expr) - expr not unary & is closing
|
|
|
|
LeftBracket | // [-expr] - is unary
|
|
|
|
// RightBracket | [expr] - expr not unary & is closing
|
|
|
|
Plus |
|
|
|
|
UnaryPlus |
|
|
|
|
Minus |
|
|
|
|
UnaryMinus |
|
|
|
|
Multiply |
|
|
|
|
Divide |
|
|
|
|
Colon |
|
|
|
|
Comma |
|
|
|
|
Period |
|
|
|
|
Equals |
|
|
|
|
LessThan |
|
|
|
|
GreaterThan |
|
|
|
|
Bang |
|
|
|
|
LessThanEqualsTo |
|
|
|
|
GreaterThanEqualsTo |
|
|
|
|
EqualsTo |
|
|
|
|
NotEqualsTo |
|
|
|
|
Pipe |
|
|
|
|
Or |
|
|
|
|
Ampersand |
|
|
|
|
And |
|
|
|
|
If |
|
|
|
|
While |
|
|
|
|
PlusAssign |
|
|
|
|
MinusAssign |
|
|
|
|
MultiplyAssign |
|
|
|
|
DivideAssign |
|
|
|
|
LeftShiftAssign |
|
|
|
|
RightShiftAssign |
|
|
|
|
AndAssign |
|
|
|
|
OrAssign |
|
|
|
|
XOrAssign |
|
|
|
|
LeftShift |
|
|
|
|
RightShift |
|
|
|
|
XOr |
|
|
|
|
Modulo |
|
|
|
|
ModuloAssign |
|
|
|
|
Return |
|
|
|
|
Throw |
|
|
|
|
PowerOf |
|
|
|
|
In |
|
|
|
|
PowerOfAssign => true,
|
|
|
|
|
|
|
|
_ => false,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Get the precedence number of the token.
|
|
|
|
pub fn precedence(&self) -> u8 {
|
|
|
|
use Token::*;
|
|
|
|
|
|
|
|
match self {
|
2020-04-23 04:21:02 +02:00
|
|
|
// Assignments are not considered expressions - set to zero
|
|
|
|
Equals | PlusAssign | MinusAssign | MultiplyAssign | DivideAssign | LeftShiftAssign
|
|
|
|
| RightShiftAssign | AndAssign | OrAssign | XOrAssign | ModuloAssign
|
|
|
|
| PowerOfAssign => 0,
|
|
|
|
|
2020-04-15 16:21:23 +02:00
|
|
|
Or | XOr | Pipe => 40,
|
|
|
|
|
|
|
|
And | Ampersand => 50,
|
|
|
|
|
|
|
|
LessThan | LessThanEqualsTo | GreaterThan | GreaterThanEqualsTo | EqualsTo
|
|
|
|
| NotEqualsTo => 60,
|
|
|
|
|
|
|
|
In => 70,
|
|
|
|
|
|
|
|
Plus | Minus => 80,
|
|
|
|
|
|
|
|
Divide | Multiply | PowerOf => 90,
|
|
|
|
|
|
|
|
LeftShift | RightShift => 100,
|
|
|
|
|
|
|
|
Modulo => 110,
|
|
|
|
|
|
|
|
Period => 120,
|
|
|
|
|
|
|
|
_ => 0,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Does an expression bind to the right (instead of left)?
|
|
|
|
pub fn is_bind_right(&self) -> bool {
|
|
|
|
use Token::*;
|
|
|
|
|
|
|
|
match self {
|
|
|
|
// Assignments bind to the right
|
|
|
|
Equals | PlusAssign | MinusAssign | MultiplyAssign | DivideAssign | LeftShiftAssign
|
|
|
|
| RightShiftAssign | AndAssign | OrAssign | XOrAssign | ModuloAssign
|
|
|
|
| PowerOfAssign => true,
|
|
|
|
|
|
|
|
// Property access binds to the right
|
|
|
|
Period => true,
|
|
|
|
|
|
|
|
_ => false,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-04 13:36:58 +02:00
|
|
|
impl From<Token> for String {
|
|
|
|
fn from(token: Token) -> Self {
|
|
|
|
token.syntax().into()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-15 16:21:23 +02:00
|
|
|
/// An iterator on a `Token` stream.
|
|
|
|
pub struct TokenIterator<'a> {
|
|
|
|
/// Can the next token be a unary operator?
|
|
|
|
can_be_unary: bool,
|
|
|
|
/// Current position.
|
|
|
|
pos: Position,
|
|
|
|
/// The input character streams.
|
|
|
|
streams: Vec<Peekable<Chars<'a>>>,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> TokenIterator<'a> {
|
|
|
|
/// Consume the next character.
|
|
|
|
fn eat_next(&mut self) {
|
|
|
|
self.get_next();
|
|
|
|
self.advance();
|
|
|
|
}
|
|
|
|
/// Get the next character
|
|
|
|
fn get_next(&mut self) -> Option<char> {
|
|
|
|
loop {
|
|
|
|
if self.streams.is_empty() {
|
2020-04-17 14:01:41 +02:00
|
|
|
// No more streams
|
2020-04-15 16:21:23 +02:00
|
|
|
return None;
|
|
|
|
} else if let Some(ch) = self.streams[0].next() {
|
2020-04-17 14:01:41 +02:00
|
|
|
// Next character in current stream
|
2020-04-15 16:21:23 +02:00
|
|
|
return Some(ch);
|
|
|
|
} else {
|
2020-04-17 14:01:41 +02:00
|
|
|
// Jump to the next stream
|
2020-04-15 16:21:23 +02:00
|
|
|
let _ = self.streams.remove(0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/// Peek the next character
|
|
|
|
fn peek_next(&mut self) -> Option<char> {
|
|
|
|
loop {
|
|
|
|
if self.streams.is_empty() {
|
2020-04-17 14:01:41 +02:00
|
|
|
// No more streams
|
2020-04-15 16:21:23 +02:00
|
|
|
return None;
|
|
|
|
} else if let Some(ch) = self.streams[0].peek() {
|
2020-04-17 14:01:41 +02:00
|
|
|
// Next character in current stream
|
2020-04-15 16:21:23 +02:00
|
|
|
return Some(*ch);
|
|
|
|
} else {
|
2020-04-17 14:01:41 +02:00
|
|
|
// Jump to the next stream
|
2020-04-15 16:21:23 +02:00
|
|
|
let _ = self.streams.remove(0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/// Move the current position one character ahead.
|
|
|
|
fn advance(&mut self) {
|
|
|
|
self.pos.advance();
|
|
|
|
}
|
|
|
|
/// Move the current position back one character.
|
|
|
|
///
|
|
|
|
/// # Panics
|
|
|
|
///
|
|
|
|
/// Panics if already at the beginning of a line - cannot rewind to the previous line.
|
|
|
|
fn rewind(&mut self) {
|
|
|
|
self.pos.rewind();
|
|
|
|
}
|
|
|
|
/// Move the current position to the next line.
|
|
|
|
fn new_line(&mut self) {
|
|
|
|
self.pos.new_line()
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Parse a string literal wrapped by `enclosing_char`.
|
|
|
|
pub fn parse_string_literal(
|
|
|
|
&mut self,
|
|
|
|
enclosing_char: char,
|
|
|
|
) -> Result<String, (LexError, Position)> {
|
|
|
|
let mut result = Vec::new();
|
|
|
|
let mut escape = String::with_capacity(12);
|
|
|
|
|
|
|
|
loop {
|
2020-04-17 14:01:41 +02:00
|
|
|
let next_char = self
|
|
|
|
.get_next()
|
|
|
|
.ok_or((LERR::UnterminatedString, self.pos))?;
|
|
|
|
|
2020-04-15 16:21:23 +02:00
|
|
|
self.advance();
|
|
|
|
|
2020-04-17 14:01:41 +02:00
|
|
|
match next_char {
|
2020-04-15 16:21:23 +02:00
|
|
|
// \...
|
|
|
|
'\\' if escape.is_empty() => {
|
|
|
|
escape.push('\\');
|
|
|
|
}
|
|
|
|
// \\
|
|
|
|
'\\' if !escape.is_empty() => {
|
|
|
|
escape.clear();
|
|
|
|
result.push('\\');
|
|
|
|
}
|
|
|
|
// \t
|
|
|
|
't' if !escape.is_empty() => {
|
|
|
|
escape.clear();
|
|
|
|
result.push('\t');
|
|
|
|
}
|
|
|
|
// \n
|
|
|
|
'n' if !escape.is_empty() => {
|
|
|
|
escape.clear();
|
|
|
|
result.push('\n');
|
|
|
|
}
|
|
|
|
// \r
|
|
|
|
'r' if !escape.is_empty() => {
|
|
|
|
escape.clear();
|
|
|
|
result.push('\r');
|
|
|
|
}
|
|
|
|
// \x??, \u????, \U????????
|
|
|
|
ch @ 'x' | ch @ 'u' | ch @ 'U' if !escape.is_empty() => {
|
|
|
|
let mut seq = escape.clone();
|
|
|
|
seq.push(ch);
|
|
|
|
escape.clear();
|
|
|
|
|
|
|
|
let mut out_val: u32 = 0;
|
|
|
|
let len = match ch {
|
|
|
|
'x' => 2,
|
|
|
|
'u' => 4,
|
|
|
|
'U' => 8,
|
|
|
|
_ => panic!("should be 'x', 'u' or 'U'"),
|
|
|
|
};
|
|
|
|
|
|
|
|
for _ in 0..len {
|
|
|
|
let c = self.get_next().ok_or_else(|| {
|
|
|
|
(LERR::MalformedEscapeSequence(seq.to_string()), self.pos)
|
|
|
|
})?;
|
|
|
|
|
|
|
|
seq.push(c);
|
|
|
|
self.advance();
|
|
|
|
|
|
|
|
out_val *= 16;
|
|
|
|
out_val += c.to_digit(16).ok_or_else(|| {
|
|
|
|
(LERR::MalformedEscapeSequence(seq.to_string()), self.pos)
|
|
|
|
})?;
|
|
|
|
}
|
|
|
|
|
|
|
|
result.push(
|
|
|
|
char::from_u32(out_val)
|
|
|
|
.ok_or_else(|| (LERR::MalformedEscapeSequence(seq), self.pos))?,
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
// \{enclosing_char} - escaped
|
|
|
|
ch if enclosing_char == ch && !escape.is_empty() => {
|
|
|
|
escape.clear();
|
|
|
|
result.push(ch)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Close wrapper
|
|
|
|
ch if enclosing_char == ch && escape.is_empty() => break,
|
|
|
|
|
|
|
|
// Unknown escape sequence
|
|
|
|
_ if !escape.is_empty() => {
|
|
|
|
return Err((LERR::MalformedEscapeSequence(escape), self.pos))
|
|
|
|
}
|
|
|
|
|
|
|
|
// Cannot have new-lines inside string literals
|
|
|
|
'\n' => {
|
|
|
|
self.rewind();
|
|
|
|
return Err((LERR::UnterminatedString, self.pos));
|
|
|
|
}
|
|
|
|
|
|
|
|
// All other characters
|
|
|
|
ch => {
|
|
|
|
escape.clear();
|
|
|
|
result.push(ch);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(result.iter().collect())
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Get the next token.
|
|
|
|
fn inner_next(&mut self) -> Option<(Token, Position)> {
|
|
|
|
let mut negated = false;
|
|
|
|
|
|
|
|
while let Some(c) = self.get_next() {
|
|
|
|
self.advance();
|
|
|
|
|
|
|
|
let pos = self.pos;
|
|
|
|
|
|
|
|
match (c, self.peek_next().unwrap_or('\0')) {
|
|
|
|
// \n
|
|
|
|
('\n', _) => self.new_line(),
|
|
|
|
|
|
|
|
// digit ...
|
|
|
|
('0'..='9', _) => {
|
|
|
|
let mut result = Vec::new();
|
|
|
|
let mut radix_base: Option<u32> = None;
|
|
|
|
result.push(c);
|
|
|
|
|
|
|
|
while let Some(next_char) = self.peek_next() {
|
|
|
|
match next_char {
|
|
|
|
'0'..='9' | '_' => {
|
|
|
|
result.push(next_char);
|
|
|
|
self.eat_next();
|
|
|
|
}
|
|
|
|
#[cfg(not(feature = "no_float"))]
|
|
|
|
'.' => {
|
|
|
|
result.push(next_char);
|
|
|
|
self.eat_next();
|
|
|
|
while let Some(next_char_in_float) = self.peek_next() {
|
|
|
|
match next_char_in_float {
|
|
|
|
'0'..='9' | '_' => {
|
|
|
|
result.push(next_char_in_float);
|
|
|
|
self.eat_next();
|
|
|
|
}
|
|
|
|
_ => break,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// 0x????, 0o????, 0b????
|
|
|
|
ch @ 'x' | ch @ 'X' | ch @ 'o' | ch @ 'O' | ch @ 'b' | ch @ 'B'
|
|
|
|
if c == '0' =>
|
|
|
|
{
|
|
|
|
result.push(next_char);
|
|
|
|
self.eat_next();
|
|
|
|
|
|
|
|
let valid = match ch {
|
|
|
|
'x' | 'X' => [
|
|
|
|
'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F',
|
|
|
|
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '_',
|
|
|
|
],
|
|
|
|
'o' | 'O' => [
|
|
|
|
'0', '1', '2', '3', '4', '5', '6', '7', '_', '_', '_', '_',
|
|
|
|
'_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_',
|
|
|
|
],
|
|
|
|
'b' | 'B' => [
|
|
|
|
'0', '1', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_',
|
|
|
|
'_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_',
|
|
|
|
],
|
|
|
|
_ => panic!("unexpected character {}", ch),
|
|
|
|
};
|
|
|
|
|
|
|
|
radix_base = Some(match ch {
|
|
|
|
'x' | 'X' => 16,
|
|
|
|
'o' | 'O' => 8,
|
|
|
|
'b' | 'B' => 2,
|
|
|
|
_ => panic!("unexpected character {}", ch),
|
|
|
|
});
|
|
|
|
|
|
|
|
while let Some(next_char_in_hex) = self.peek_next() {
|
|
|
|
if !valid.contains(&next_char_in_hex) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
result.push(next_char_in_hex);
|
|
|
|
self.eat_next();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
_ => break,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if negated {
|
|
|
|
result.insert(0, '-');
|
|
|
|
}
|
|
|
|
|
|
|
|
// Parse number
|
|
|
|
if let Some(radix) = radix_base {
|
|
|
|
let out: String = result.iter().skip(2).filter(|&&c| c != '_').collect();
|
|
|
|
|
|
|
|
return Some((
|
|
|
|
INT::from_str_radix(&out, radix)
|
|
|
|
.map(Token::IntegerConstant)
|
|
|
|
.unwrap_or_else(|_| {
|
|
|
|
Token::LexError(Box::new(LERR::MalformedNumber(
|
2020-04-22 11:36:51 +02:00
|
|
|
result.into_iter().collect(),
|
2020-04-15 16:21:23 +02:00
|
|
|
)))
|
|
|
|
}),
|
|
|
|
pos,
|
|
|
|
));
|
|
|
|
} else {
|
|
|
|
let out: String = result.iter().filter(|&&c| c != '_').collect();
|
|
|
|
let num = INT::from_str(&out).map(Token::IntegerConstant);
|
|
|
|
|
|
|
|
// If integer parsing is unnecessary, try float instead
|
|
|
|
#[cfg(not(feature = "no_float"))]
|
|
|
|
let num = num.or_else(|_| FLOAT::from_str(&out).map(Token::FloatConstant));
|
|
|
|
|
|
|
|
return Some((
|
|
|
|
num.unwrap_or_else(|_| {
|
|
|
|
Token::LexError(Box::new(LERR::MalformedNumber(
|
2020-04-22 11:36:51 +02:00
|
|
|
result.into_iter().collect(),
|
2020-04-15 16:21:23 +02:00
|
|
|
)))
|
|
|
|
}),
|
|
|
|
pos,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// letter or underscore ...
|
|
|
|
('A'..='Z', _) | ('a'..='z', _) | ('_', _) => {
|
|
|
|
let mut result = Vec::new();
|
|
|
|
result.push(c);
|
|
|
|
|
|
|
|
while let Some(next_char) = self.peek_next() {
|
|
|
|
match next_char {
|
|
|
|
x if x.is_ascii_alphanumeric() || x == '_' => {
|
|
|
|
result.push(x);
|
|
|
|
self.eat_next();
|
|
|
|
}
|
|
|
|
_ => break,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
let is_valid_identifier = result
|
|
|
|
.iter()
|
|
|
|
.find(|&ch| char::is_ascii_alphanumeric(ch)) // first alpha-numeric character
|
|
|
|
.map(char::is_ascii_alphabetic) // is a letter
|
|
|
|
.unwrap_or(false); // if no alpha-numeric at all - syntax error
|
|
|
|
|
|
|
|
let identifier: String = result.iter().collect();
|
|
|
|
|
|
|
|
if !is_valid_identifier {
|
|
|
|
return Some((
|
|
|
|
Token::LexError(Box::new(LERR::MalformedIdentifier(identifier))),
|
|
|
|
pos,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
|
|
|
|
return Some((
|
|
|
|
match identifier.as_str() {
|
|
|
|
"true" => Token::True,
|
|
|
|
"false" => Token::False,
|
|
|
|
"let" => Token::Let,
|
|
|
|
"const" => Token::Const,
|
|
|
|
"if" => Token::If,
|
|
|
|
"else" => Token::Else,
|
|
|
|
"while" => Token::While,
|
|
|
|
"loop" => Token::Loop,
|
|
|
|
"continue" => Token::Continue,
|
|
|
|
"break" => Token::Break,
|
|
|
|
"return" => Token::Return,
|
|
|
|
"throw" => Token::Throw,
|
|
|
|
"for" => Token::For,
|
|
|
|
"in" => Token::In,
|
2020-05-09 05:29:30 +02:00
|
|
|
"private" => Token::Private,
|
2020-04-15 16:21:23 +02:00
|
|
|
|
2020-05-05 04:39:12 +02:00
|
|
|
#[cfg(not(feature = "no_module"))]
|
2020-05-04 11:43:54 +02:00
|
|
|
"import" => Token::Import,
|
2020-05-05 04:39:12 +02:00
|
|
|
#[cfg(not(feature = "no_module"))]
|
2020-05-04 11:43:54 +02:00
|
|
|
"export" => Token::Export,
|
2020-05-05 04:39:12 +02:00
|
|
|
#[cfg(not(feature = "no_module"))]
|
2020-05-04 11:43:54 +02:00
|
|
|
"as" => Token::As,
|
|
|
|
|
2020-04-15 16:21:23 +02:00
|
|
|
#[cfg(not(feature = "no_function"))]
|
|
|
|
"fn" => Token::Fn,
|
|
|
|
|
|
|
|
_ => Token::Identifier(identifier),
|
|
|
|
},
|
|
|
|
pos,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
|
|
|
|
// " - string literal
|
|
|
|
('"', _) => {
|
|
|
|
return self.parse_string_literal('"').map_or_else(
|
|
|
|
|err| Some((Token::LexError(Box::new(err.0)), err.1)),
|
|
|
|
|out| Some((Token::StringConst(out), pos)),
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
// ' - character literal
|
|
|
|
('\'', '\'') => {
|
|
|
|
return Some((
|
|
|
|
Token::LexError(Box::new(LERR::MalformedChar("".to_string()))),
|
|
|
|
pos,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
('\'', _) => {
|
|
|
|
return Some(self.parse_string_literal('\'').map_or_else(
|
|
|
|
|err| (Token::LexError(Box::new(err.0)), err.1),
|
|
|
|
|result| {
|
|
|
|
let mut chars = result.chars();
|
|
|
|
let first = chars.next();
|
|
|
|
|
|
|
|
if chars.next().is_some() {
|
|
|
|
(Token::LexError(Box::new(LERR::MalformedChar(result))), pos)
|
|
|
|
} else {
|
|
|
|
(Token::CharConstant(first.expect("should be Some")), pos)
|
|
|
|
}
|
|
|
|
},
|
|
|
|
));
|
|
|
|
}
|
|
|
|
|
|
|
|
// Braces
|
|
|
|
('{', _) => return Some((Token::LeftBrace, pos)),
|
|
|
|
('}', _) => return Some((Token::RightBrace, pos)),
|
|
|
|
|
|
|
|
// Parentheses
|
|
|
|
('(', _) => return Some((Token::LeftParen, pos)),
|
|
|
|
(')', _) => return Some((Token::RightParen, pos)),
|
|
|
|
|
|
|
|
// Indexing
|
|
|
|
('[', _) => return Some((Token::LeftBracket, pos)),
|
|
|
|
(']', _) => return Some((Token::RightBracket, pos)),
|
|
|
|
|
|
|
|
// Map literal
|
|
|
|
#[cfg(not(feature = "no_object"))]
|
|
|
|
('#', '{') => {
|
|
|
|
self.eat_next();
|
|
|
|
return Some((Token::MapStart, pos));
|
|
|
|
}
|
|
|
|
|
|
|
|
// Operators
|
|
|
|
('+', '=') => {
|
|
|
|
self.eat_next();
|
|
|
|
return Some((Token::PlusAssign, pos));
|
|
|
|
}
|
|
|
|
('+', _) if self.can_be_unary => return Some((Token::UnaryPlus, pos)),
|
|
|
|
('+', _) => return Some((Token::Plus, pos)),
|
|
|
|
|
|
|
|
('-', '0'..='9') if self.can_be_unary => negated = true,
|
|
|
|
('-', '0'..='9') => return Some((Token::Minus, pos)),
|
|
|
|
('-', '=') => {
|
|
|
|
self.eat_next();
|
|
|
|
return Some((Token::MinusAssign, pos));
|
|
|
|
}
|
|
|
|
('-', _) if self.can_be_unary => return Some((Token::UnaryMinus, pos)),
|
|
|
|
('-', _) => return Some((Token::Minus, pos)),
|
|
|
|
|
|
|
|
('*', '=') => {
|
|
|
|
self.eat_next();
|
|
|
|
return Some((Token::MultiplyAssign, pos));
|
|
|
|
}
|
|
|
|
('*', _) => return Some((Token::Multiply, pos)),
|
|
|
|
|
|
|
|
// Comments
|
|
|
|
('/', '/') => {
|
|
|
|
self.eat_next();
|
|
|
|
|
|
|
|
while let Some(c) = self.get_next() {
|
|
|
|
if c == '\n' {
|
|
|
|
self.new_line();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
self.advance();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
('/', '*') => {
|
|
|
|
let mut level = 1;
|
|
|
|
|
|
|
|
self.eat_next();
|
|
|
|
|
|
|
|
while let Some(c) = self.get_next() {
|
|
|
|
self.advance();
|
|
|
|
|
|
|
|
match c {
|
|
|
|
'/' => {
|
|
|
|
if self.get_next() == Some('*') {
|
|
|
|
level += 1;
|
|
|
|
}
|
|
|
|
self.advance();
|
|
|
|
}
|
|
|
|
'*' => {
|
|
|
|
if self.get_next() == Some('/') {
|
|
|
|
level -= 1;
|
|
|
|
}
|
|
|
|
self.advance();
|
|
|
|
}
|
|
|
|
'\n' => self.new_line(),
|
|
|
|
_ => (),
|
|
|
|
}
|
|
|
|
|
|
|
|
if level == 0 {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
('/', '=') => {
|
|
|
|
self.eat_next();
|
|
|
|
return Some((Token::DivideAssign, pos));
|
|
|
|
}
|
|
|
|
('/', _) => return Some((Token::Divide, pos)),
|
|
|
|
|
|
|
|
(';', _) => return Some((Token::SemiColon, pos)),
|
|
|
|
(',', _) => return Some((Token::Comma, pos)),
|
|
|
|
('.', _) => return Some((Token::Period, pos)),
|
|
|
|
|
|
|
|
('=', '=') => {
|
|
|
|
self.eat_next();
|
2020-04-22 11:36:51 +02:00
|
|
|
|
|
|
|
// Warn against `===`
|
|
|
|
if self.peek_next() == Some('=') {
|
|
|
|
return Some((
|
|
|
|
Token::LexError(Box::new(LERR::ImproperKeyword(
|
|
|
|
"'===' is not a valid operator. This is not JavaScript! Should it be '=='?"
|
|
|
|
.to_string(),
|
|
|
|
))),
|
|
|
|
pos,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
|
2020-04-15 16:21:23 +02:00
|
|
|
return Some((Token::EqualsTo, pos));
|
|
|
|
}
|
|
|
|
('=', _) => return Some((Token::Equals, pos)),
|
|
|
|
|
2020-05-05 04:39:12 +02:00
|
|
|
#[cfg(not(feature = "no_module"))]
|
2020-05-03 19:19:01 +02:00
|
|
|
(':', ':') => {
|
|
|
|
self.eat_next();
|
|
|
|
return Some((Token::DoubleColon, pos));
|
|
|
|
}
|
|
|
|
(':', _) => return Some((Token::Colon, pos)),
|
|
|
|
|
2020-04-15 16:21:23 +02:00
|
|
|
('<', '=') => {
|
|
|
|
self.eat_next();
|
|
|
|
return Some((Token::LessThanEqualsTo, pos));
|
|
|
|
}
|
|
|
|
('<', '<') => {
|
|
|
|
self.eat_next();
|
|
|
|
|
|
|
|
return Some((
|
|
|
|
if self.peek_next() == Some('=') {
|
|
|
|
self.eat_next();
|
|
|
|
Token::LeftShiftAssign
|
|
|
|
} else {
|
|
|
|
Token::LeftShift
|
|
|
|
},
|
|
|
|
pos,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
('<', _) => return Some((Token::LessThan, pos)),
|
|
|
|
|
|
|
|
('>', '=') => {
|
|
|
|
self.eat_next();
|
|
|
|
return Some((Token::GreaterThanEqualsTo, pos));
|
|
|
|
}
|
|
|
|
('>', '>') => {
|
|
|
|
self.eat_next();
|
|
|
|
|
|
|
|
return Some((
|
|
|
|
if self.peek_next() == Some('=') {
|
|
|
|
self.eat_next();
|
|
|
|
Token::RightShiftAssign
|
|
|
|
} else {
|
|
|
|
Token::RightShift
|
|
|
|
},
|
|
|
|
pos,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
('>', _) => return Some((Token::GreaterThan, pos)),
|
|
|
|
|
|
|
|
('!', '=') => {
|
|
|
|
self.eat_next();
|
2020-04-22 11:36:51 +02:00
|
|
|
|
|
|
|
// Warn against `!==`
|
|
|
|
if self.peek_next() == Some('=') {
|
|
|
|
return Some((
|
|
|
|
Token::LexError(Box::new(LERR::ImproperKeyword(
|
|
|
|
"'!==' is not a valid operator. This is not JavaScript! Should it be '!='?"
|
|
|
|
.to_string(),
|
|
|
|
))),
|
|
|
|
pos,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
|
2020-04-15 16:21:23 +02:00
|
|
|
return Some((Token::NotEqualsTo, pos));
|
|
|
|
}
|
|
|
|
('!', _) => return Some((Token::Bang, pos)),
|
|
|
|
|
|
|
|
('|', '|') => {
|
|
|
|
self.eat_next();
|
|
|
|
return Some((Token::Or, pos));
|
|
|
|
}
|
|
|
|
('|', '=') => {
|
|
|
|
self.eat_next();
|
|
|
|
return Some((Token::OrAssign, pos));
|
|
|
|
}
|
|
|
|
('|', _) => return Some((Token::Pipe, pos)),
|
|
|
|
|
|
|
|
('&', '&') => {
|
|
|
|
self.eat_next();
|
|
|
|
return Some((Token::And, pos));
|
|
|
|
}
|
|
|
|
('&', '=') => {
|
|
|
|
self.eat_next();
|
|
|
|
return Some((Token::AndAssign, pos));
|
|
|
|
}
|
|
|
|
('&', _) => return Some((Token::Ampersand, pos)),
|
|
|
|
|
|
|
|
('^', '=') => {
|
|
|
|
self.eat_next();
|
|
|
|
return Some((Token::XOrAssign, pos));
|
|
|
|
}
|
|
|
|
('^', _) => return Some((Token::XOr, pos)),
|
|
|
|
|
|
|
|
('%', '=') => {
|
|
|
|
self.eat_next();
|
|
|
|
return Some((Token::ModuloAssign, pos));
|
|
|
|
}
|
|
|
|
('%', _) => return Some((Token::Modulo, pos)),
|
|
|
|
|
|
|
|
('~', '=') => {
|
|
|
|
self.eat_next();
|
|
|
|
return Some((Token::PowerOfAssign, pos));
|
|
|
|
}
|
|
|
|
('~', _) => return Some((Token::PowerOf, pos)),
|
|
|
|
|
2020-04-23 07:24:24 +02:00
|
|
|
('\0', _) => panic!("should not be EOF"),
|
|
|
|
|
2020-04-15 16:21:23 +02:00
|
|
|
(ch, _) if ch.is_whitespace() => (),
|
|
|
|
(ch, _) => return Some((Token::LexError(Box::new(LERR::UnexpectedChar(ch))), pos)),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-17 14:01:41 +02:00
|
|
|
self.advance();
|
|
|
|
Some((Token::EOF, self.pos))
|
2020-04-15 16:21:23 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> Iterator for TokenIterator<'a> {
|
|
|
|
type Item = (Token, Position);
|
|
|
|
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
|
|
self.inner_next().map(|x| {
|
|
|
|
// Save the last token
|
|
|
|
self.can_be_unary = x.0.is_next_unary();
|
|
|
|
x
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Tokenize an input text stream.
|
|
|
|
pub fn lex<'a>(input: &'a [&'a str]) -> TokenIterator<'a> {
|
|
|
|
TokenIterator {
|
|
|
|
can_be_unary: true,
|
|
|
|
pos: Position::new(1, 0),
|
|
|
|
streams: input.iter().map(|s| s.chars().peekable()).collect(),
|
|
|
|
}
|
|
|
|
}
|