Split tokenizer into separate file, plus fix no_std feature.

This commit is contained in:
Stephen Chung 2020-04-15 22:21:23 +08:00
parent 78cd53db09
commit a35518fe49
15 changed files with 1071 additions and 1041 deletions

View File

@ -10,9 +10,12 @@ use crate::stdlib::{
any::{type_name, Any, TypeId},
boxed::Box,
fmt,
time::Instant,
string::String,
};
#[cfg(not(feature = "no_std"))]
use crate::stdlib::time::Instant;
/// A trait to represent any type.
///
/// Currently, `Variant` is not `Send` nor `Sync`, so it can practically be any type.
@ -186,6 +189,7 @@ impl Dynamic {
Union::Array(_) => "array",
Union::Map(_) => "map",
#[cfg(not(feature = "no_std"))]
Union::Variant(value) if value.is::<Instant>() => "timestamp",
Union::Variant(value) => (**value).type_name(),
}

View File

@ -6,9 +6,10 @@ use crate::error::ParseError;
use crate::fn_call::FuncArgs;
use crate::fn_register::RegisterFn;
use crate::optimize::{optimize_into_ast, OptimizationLevel};
use crate::parser::{lex, parse, parse_global_expr, Position, AST};
use crate::parser::{parse, parse_global_expr, AST};
use crate::result::EvalAltResult;
use crate::scope::Scope;
use crate::token::{lex, Position};
use crate::stdlib::{
any::{type_name, TypeId},

View File

@ -4,8 +4,9 @@
use crate::any::{Dynamic, Variant};
use crate::engine::{Engine, FUNC_TO_STRING, KEYWORD_DEBUG, KEYWORD_PRINT};
use crate::fn_register::{RegisterDynamicFn, RegisterFn, RegisterResultFn};
use crate::parser::{Position, INT};
use crate::parser::INT;
use crate::result::EvalAltResult;
use crate::token::Position;
#[cfg(not(feature = "no_index"))]
use crate::engine::Array;
@ -27,11 +28,13 @@ use crate::stdlib::{
format,
ops::{Add, BitAnd, BitOr, BitXor, Div, Mul, Neg, Range, Rem, Shl, Shr, Sub},
string::{String, ToString},
time::Instant,
vec::Vec,
{i32, i64, u32},
};
#[cfg(not(feature = "no_std"))]
use crate::stdlib::time::Instant;
#[cfg(feature = "only_i32")]
const MAX_INT: INT = i32::MAX;
#[cfg(not(feature = "only_i32"))]
@ -1153,80 +1156,83 @@ impl Engine<'_> {
}
});
// Register date/time functions
self.register_fn("timestamp", || Instant::now());
#[cfg(not(feature = "no_std"))]
{
// Register date/time functions
self.register_fn("timestamp", || Instant::now());
self.register_result_fn("-", |ts1: Instant, ts2: Instant| {
if ts2 > ts1 {
#[cfg(not(feature = "no_float"))]
return Ok(-(ts2 - ts1).as_secs_f64());
self.register_result_fn("-", |ts1: Instant, ts2: Instant| {
if ts2 > ts1 {
#[cfg(not(feature = "no_float"))]
return Ok(-(ts2 - ts1).as_secs_f64());
#[cfg(feature = "no_float")]
{
let seconds = (ts2 - ts1).as_secs();
#[cfg(not(feature = "unchecked"))]
#[cfg(feature = "no_float")]
{
if seconds > (MAX_INT as u64) {
return Err(EvalAltResult::ErrorArithmetic(
format!(
"Integer overflow for timestamp duration: {}",
-(seconds as i64)
),
Position::none(),
));
let seconds = (ts2 - ts1).as_secs();
#[cfg(not(feature = "unchecked"))]
{
if seconds > (MAX_INT as u64) {
return Err(EvalAltResult::ErrorArithmetic(
format!(
"Integer overflow for timestamp duration: {}",
-(seconds as i64)
),
Position::none(),
));
}
}
return Ok(-(seconds as INT));
}
} else {
#[cfg(not(feature = "no_float"))]
return Ok((ts1 - ts2).as_secs_f64());
#[cfg(feature = "no_float")]
{
let seconds = (ts1 - ts2).as_secs();
#[cfg(not(feature = "unchecked"))]
{
if seconds > (MAX_INT as u64) {
return Err(EvalAltResult::ErrorArithmetic(
format!("Integer overflow for timestamp duration: {}", seconds),
Position::none(),
));
}
}
return Ok(seconds as INT);
}
return Ok(-(seconds as INT));
}
} else {
});
reg_cmp!(self, "<", lt, Instant);
reg_cmp!(self, "<=", lte, Instant);
reg_cmp!(self, ">", gt, Instant);
reg_cmp!(self, ">=", gte, Instant);
reg_cmp!(self, "==", eq, Instant);
reg_cmp!(self, "!=", ne, Instant);
self.register_result_fn("elapsed", |timestamp: Instant| {
#[cfg(not(feature = "no_float"))]
return Ok((ts1 - ts2).as_secs_f64());
return Ok(timestamp.elapsed().as_secs_f64());
#[cfg(feature = "no_float")]
{
let seconds = (ts1 - ts2).as_secs();
let seconds = timestamp.elapsed().as_secs();
#[cfg(not(feature = "unchecked"))]
{
if seconds > (MAX_INT as u64) {
return Err(EvalAltResult::ErrorArithmetic(
format!("Integer overflow for timestamp duration: {}", seconds),
format!("Integer overflow for timestamp.elapsed(): {}", seconds),
Position::none(),
));
}
}
return Ok(seconds as INT);
}
}
});
reg_cmp!(self, "<", lt, Instant);
reg_cmp!(self, "<=", lte, Instant);
reg_cmp!(self, ">", gt, Instant);
reg_cmp!(self, ">=", gte, Instant);
reg_cmp!(self, "==", eq, Instant);
reg_cmp!(self, "!=", ne, Instant);
self.register_result_fn("elapsed", |timestamp: Instant| {
#[cfg(not(feature = "no_float"))]
return Ok(timestamp.elapsed().as_secs_f64());
#[cfg(feature = "no_float")]
{
let seconds = timestamp.elapsed().as_secs();
#[cfg(not(feature = "unchecked"))]
{
if seconds > (MAX_INT as u64) {
return Err(EvalAltResult::ErrorArithmetic(
format!("Integer overflow for timestamp.elapsed(): {}", seconds),
Position::none(),
));
}
}
return Ok(seconds as INT);
}
});
});
}
}
}

View File

@ -3,9 +3,10 @@
use crate::any::{Dynamic, Union};
use crate::error::ParseErrorType;
use crate::optimize::OptimizationLevel;
use crate::parser::{Expr, FnDef, Position, ReturnType, Stmt, INT};
use crate::parser::{Expr, FnDef, ReturnType, Stmt, INT};
use crate::result::EvalAltResult;
use crate::scope::{EntryRef as ScopeSource, EntryType as ScopeEntryType, Scope};
use crate::token::Position;
use crate::stdlib::{
any::TypeId,

View File

@ -1,6 +1,6 @@
//! Module containing error definitions for the parsing process.
use crate::parser::Position;
use crate::token::Position;
use crate::stdlib::{char, error::Error, fmt, string::String};

View File

@ -3,7 +3,7 @@
#![allow(non_snake_case)]
use crate::any::{Dynamic, Variant};
use crate::stdlib::{string::String, vec, vec::Vec};
use crate::stdlib::vec::Vec;
/// Trait that represent arguments to a function call.
/// Any data type that can be converted into a `Vec` of `Dynamic` values can be used

View File

@ -4,8 +4,8 @@
use crate::any::{Dynamic, Variant};
use crate::engine::{Engine, FnCallArgs};
use crate::parser::Position;
use crate::result::EvalAltResult;
use crate::token::Position;
use crate::stdlib::{any::TypeId, boxed::Box, string::ToString, vec};

View File

@ -82,15 +82,17 @@ mod parser;
mod result;
mod scope;
mod stdlib;
mod token;
pub use any::Dynamic;
pub use engine::Engine;
pub use error::{ParseError, ParseErrorType};
pub use fn_call::FuncArgs;
pub use fn_register::{RegisterDynamicFn, RegisterFn, RegisterResultFn};
pub use parser::{Position, AST, INT};
pub use parser::{AST, INT};
pub use result::EvalAltResult;
pub use scope::Scope;
pub use token::Position;
#[cfg(not(feature = "no_function"))]
pub use fn_func::Func;

View File

@ -3,9 +3,10 @@ use crate::engine::{
Engine, FnAny, FnCallArgs, FnSpec, FunctionsLib, KEYWORD_DEBUG, KEYWORD_EVAL, KEYWORD_PRINT,
KEYWORD_TYPE_OF,
};
use crate::parser::{map_dynamic_to_expr, Expr, FnDef, Position, ReturnType, Stmt, AST};
use crate::parser::{map_dynamic_to_expr, Expr, FnDef, ReturnType, Stmt, AST};
use crate::result::EvalAltResult;
use crate::scope::{Entry as ScopeEntry, EntryType as ScopeEntryType, Scope};
use crate::token::Position;
use crate::stdlib::{
boxed::Box,

File diff suppressed because it is too large Load Diff

View File

@ -2,7 +2,8 @@
use crate::any::Dynamic;
use crate::error::ParseError;
use crate::parser::{Position, INT};
use crate::parser::INT;
use crate::token::Position;
use crate::stdlib::{
error::Error,

View File

@ -1,14 +1,10 @@
//! Module that defines the `Scope` type representing a function call-stack scope.
use crate::any::{Dynamic, Variant};
use crate::parser::{map_dynamic_to_expr, Expr, Position};
use crate::parser::{map_dynamic_to_expr, Expr};
use crate::token::Position;
use crate::stdlib::{
borrow::Cow,
iter,
string::{String, ToString},
vec::Vec,
};
use crate::stdlib::{borrow::Cow, iter, vec::Vec};
/// Type of an entry in the Scope.
#[derive(Debug, Eq, PartialEq, Hash, Copy, Clone)]

View File

@ -8,7 +8,7 @@ mod inner {
panic, pin, prelude, ptr, result, slice, str, task, time, u128, u16, u32, u64, u8, usize,
};
pub use alloc::{borrow, boxed, format, string, sync, vec};
pub use alloc::{borrow, boxed, format, rc, string, sync, vec};
pub use core_error as error;

982
src/token.rs Normal file
View File

@ -0,0 +1,982 @@
//! Main module defining the lexer and parser.
use crate::error::LexError;
use crate::parser::INT;
#[cfg(not(feature = "no_float"))]
use crate::parser::FLOAT;
use crate::stdlib::{
borrow::Cow,
boxed::Box,
char, fmt,
iter::Peekable,
str::{Chars, FromStr},
string::{String, ToString},
usize,
vec::Vec,
};
type LERR = LexError;
/// A location (line number + character position) in the input script.
#[derive(Eq, PartialEq, Ord, PartialOrd, Hash, Clone, Copy)]
pub struct Position {
/// Line number - 0 = none, MAX = EOF
line: usize,
/// Character position - 0 = BOL, MAX = EOF
pos: usize,
}
impl Position {
/// Create a new `Position`.
pub fn new(line: usize, position: usize) -> Self {
assert!(line != 0, "line cannot be zero");
assert!(
line != usize::MAX || position != usize::MAX,
"invalid position"
);
Self {
line,
pos: position,
}
}
/// Get the line number (1-based), or `None` if no position or EOF.
pub fn line(&self) -> Option<usize> {
if self.is_none() || self.is_eof() {
None
} else {
Some(self.line)
}
}
/// Get the character position (1-based), or `None` if at beginning of a line.
pub fn position(&self) -> Option<usize> {
if self.is_none() || self.is_eof() || self.pos == 0 {
None
} else {
Some(self.pos)
}
}
/// Advance by one character position.
pub(crate) fn advance(&mut self) {
self.pos += 1;
}
/// Go backwards by one character position.
///
/// # Panics
///
/// Panics if already at beginning of a line - cannot rewind to a previous line.
///
pub(crate) fn rewind(&mut self) {
assert!(self.pos > 0, "cannot rewind at position 0");
self.pos -= 1;
}
/// Advance to the next line.
pub(crate) fn new_line(&mut self) {
self.line += 1;
self.pos = 0;
}
/// Create a `Position` representing no position.
pub(crate) fn none() -> Self {
Self { line: 0, pos: 0 }
}
/// Create a `Position` at EOF.
pub(crate) fn eof() -> Self {
Self {
line: usize::MAX,
pos: usize::MAX,
}
}
/// Is there no `Position`?
pub fn is_none(&self) -> bool {
self.line == 0 && self.pos == 0
}
/// Is the `Position` at EOF?
pub fn is_eof(&self) -> bool {
self.line == usize::MAX && self.pos == usize::MAX
}
}
impl Default for Position {
fn default() -> Self {
Self::new(1, 0)
}
}
impl fmt::Display for Position {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.is_eof() {
write!(f, "EOF")
} else if self.is_none() {
write!(f, "none")
} else {
write!(f, "line {}, position {}", self.line, self.pos)
}
}
}
impl fmt::Debug for Position {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.is_eof() {
write!(f, "(EOF)")
} else {
write!(f, "({}:{})", self.line, self.pos)
}
}
}
/// Tokens.
#[derive(Debug, PartialEq, Clone)]
pub enum Token {
IntegerConstant(INT),
FloatConstant(FLOAT),
Identifier(String),
CharConstant(char),
StringConst(String),
LeftBrace,
RightBrace,
LeftParen,
RightParen,
LeftBracket,
RightBracket,
Plus,
UnaryPlus,
Minus,
UnaryMinus,
Multiply,
Divide,
Modulo,
PowerOf,
LeftShift,
RightShift,
SemiColon,
Colon,
Comma,
Period,
#[cfg(not(feature = "no_object"))]
MapStart,
Equals,
True,
False,
Let,
Const,
If,
Else,
While,
Loop,
For,
In,
LessThan,
GreaterThan,
LessThanEqualsTo,
GreaterThanEqualsTo,
EqualsTo,
NotEqualsTo,
Bang,
Pipe,
Or,
XOr,
Ampersand,
And,
#[cfg(not(feature = "no_function"))]
Fn,
Continue,
Break,
Return,
Throw,
PlusAssign,
MinusAssign,
MultiplyAssign,
DivideAssign,
LeftShiftAssign,
RightShiftAssign,
AndAssign,
OrAssign,
XOrAssign,
ModuloAssign,
PowerOfAssign,
LexError(Box<LexError>),
}
impl Token {
/// Get the syntax of the token.
pub fn syntax(&self) -> Cow<str> {
use Token::*;
match self {
IntegerConstant(i) => i.to_string().into(),
FloatConstant(f) => f.to_string().into(),
Identifier(s) => s.into(),
CharConstant(c) => c.to_string().into(),
LexError(err) => err.to_string().into(),
token => (match token {
StringConst(_) => "string",
LeftBrace => "{",
RightBrace => "}",
LeftParen => "(",
RightParen => ")",
LeftBracket => "[",
RightBracket => "]",
Plus => "+",
UnaryPlus => "+",
Minus => "-",
UnaryMinus => "-",
Multiply => "*",
Divide => "/",
SemiColon => ";",
Colon => ":",
Comma => ",",
Period => ".",
#[cfg(not(feature = "no_object"))]
MapStart => "#{",
Equals => "=",
True => "true",
False => "false",
Let => "let",
Const => "const",
If => "if",
Else => "else",
While => "while",
Loop => "loop",
LessThan => "<",
GreaterThan => ">",
Bang => "!",
LessThanEqualsTo => "<=",
GreaterThanEqualsTo => ">=",
EqualsTo => "==",
NotEqualsTo => "!=",
Pipe => "|",
Or => "||",
Ampersand => "&",
And => "&&",
#[cfg(not(feature = "no_function"))]
Fn => "fn",
Continue => "continue",
Break => "break",
Return => "return",
Throw => "throw",
PlusAssign => "+=",
MinusAssign => "-=",
MultiplyAssign => "*=",
DivideAssign => "/=",
LeftShiftAssign => "<<=",
RightShiftAssign => ">>=",
AndAssign => "&=",
OrAssign => "|=",
XOrAssign => "^=",
LeftShift => "<<",
RightShift => ">>",
XOr => "^",
Modulo => "%",
ModuloAssign => "%=",
PowerOf => "~",
PowerOfAssign => "~=",
For => "for",
In => "in",
_ => panic!("operator should be match in outer scope"),
})
.into(),
}
}
// If another operator is after these, it's probably an unary operator
// (not sure about fn name).
pub fn is_next_unary(&self) -> bool {
use Token::*;
match self {
LexError(_) |
LeftBrace | // (+expr) - is unary
// RightBrace | {expr} - expr not unary & is closing
LeftParen | // {-expr} - is unary
// RightParen | (expr) - expr not unary & is closing
LeftBracket | // [-expr] - is unary
// RightBracket | [expr] - expr not unary & is closing
Plus |
UnaryPlus |
Minus |
UnaryMinus |
Multiply |
Divide |
Colon |
Comma |
Period |
Equals |
LessThan |
GreaterThan |
Bang |
LessThanEqualsTo |
GreaterThanEqualsTo |
EqualsTo |
NotEqualsTo |
Pipe |
Or |
Ampersand |
And |
If |
While |
PlusAssign |
MinusAssign |
MultiplyAssign |
DivideAssign |
LeftShiftAssign |
RightShiftAssign |
AndAssign |
OrAssign |
XOrAssign |
LeftShift |
RightShift |
XOr |
Modulo |
ModuloAssign |
Return |
Throw |
PowerOf |
In |
PowerOfAssign => true,
_ => false,
}
}
/// Get the precedence number of the token.
pub fn precedence(&self) -> u8 {
use Token::*;
match self {
Equals | PlusAssign | MinusAssign | MultiplyAssign | DivideAssign | LeftShiftAssign
| RightShiftAssign | AndAssign | OrAssign | XOrAssign | ModuloAssign
| PowerOfAssign => 10,
Or | XOr | Pipe => 40,
And | Ampersand => 50,
LessThan | LessThanEqualsTo | GreaterThan | GreaterThanEqualsTo | EqualsTo
| NotEqualsTo => 60,
In => 70,
Plus | Minus => 80,
Divide | Multiply | PowerOf => 90,
LeftShift | RightShift => 100,
Modulo => 110,
Period => 120,
_ => 0,
}
}
/// Does an expression bind to the right (instead of left)?
pub fn is_bind_right(&self) -> bool {
use Token::*;
match self {
// Assignments bind to the right
Equals | PlusAssign | MinusAssign | MultiplyAssign | DivideAssign | LeftShiftAssign
| RightShiftAssign | AndAssign | OrAssign | XOrAssign | ModuloAssign
| PowerOfAssign => true,
// Property access binds to the right
Period => true,
_ => false,
}
}
}
/// An iterator on a `Token` stream.
pub struct TokenIterator<'a> {
/// Can the next token be a unary operator?
can_be_unary: bool,
/// Current position.
pos: Position,
/// The input character streams.
streams: Vec<Peekable<Chars<'a>>>,
}
impl<'a> TokenIterator<'a> {
/// Consume the next character.
fn eat_next(&mut self) {
self.get_next();
self.advance();
}
/// Get the next character
fn get_next(&mut self) -> Option<char> {
loop {
if self.streams.is_empty() {
return None;
} else if let Some(ch) = self.streams[0].next() {
return Some(ch);
} else {
let _ = self.streams.remove(0);
}
}
}
/// Peek the next character
fn peek_next(&mut self) -> Option<char> {
loop {
if self.streams.is_empty() {
return None;
} else if let Some(ch) = self.streams[0].peek() {
return Some(*ch);
} else {
let _ = self.streams.remove(0);
}
}
}
/// Move the current position one character ahead.
fn advance(&mut self) {
self.pos.advance();
}
/// Move the current position back one character.
///
/// # Panics
///
/// Panics if already at the beginning of a line - cannot rewind to the previous line.
fn rewind(&mut self) {
self.pos.rewind();
}
/// Move the current position to the next line.
fn new_line(&mut self) {
self.pos.new_line()
}
/// Parse a string literal wrapped by `enclosing_char`.
pub fn parse_string_literal(
&mut self,
enclosing_char: char,
) -> Result<String, (LexError, Position)> {
let mut result = Vec::new();
let mut escape = String::with_capacity(12);
loop {
let next_char = self.get_next();
self.advance();
match next_char.ok_or((LERR::UnterminatedString, Position::eof()))? {
// \...
'\\' if escape.is_empty() => {
escape.push('\\');
}
// \\
'\\' if !escape.is_empty() => {
escape.clear();
result.push('\\');
}
// \t
't' if !escape.is_empty() => {
escape.clear();
result.push('\t');
}
// \n
'n' if !escape.is_empty() => {
escape.clear();
result.push('\n');
}
// \r
'r' if !escape.is_empty() => {
escape.clear();
result.push('\r');
}
// \x??, \u????, \U????????
ch @ 'x' | ch @ 'u' | ch @ 'U' if !escape.is_empty() => {
let mut seq = escape.clone();
seq.push(ch);
escape.clear();
let mut out_val: u32 = 0;
let len = match ch {
'x' => 2,
'u' => 4,
'U' => 8,
_ => panic!("should be 'x', 'u' or 'U'"),
};
for _ in 0..len {
let c = self.get_next().ok_or_else(|| {
(LERR::MalformedEscapeSequence(seq.to_string()), self.pos)
})?;
seq.push(c);
self.advance();
out_val *= 16;
out_val += c.to_digit(16).ok_or_else(|| {
(LERR::MalformedEscapeSequence(seq.to_string()), self.pos)
})?;
}
result.push(
char::from_u32(out_val)
.ok_or_else(|| (LERR::MalformedEscapeSequence(seq), self.pos))?,
);
}
// \{enclosing_char} - escaped
ch if enclosing_char == ch && !escape.is_empty() => {
escape.clear();
result.push(ch)
}
// Close wrapper
ch if enclosing_char == ch && escape.is_empty() => break,
// Unknown escape sequence
_ if !escape.is_empty() => {
return Err((LERR::MalformedEscapeSequence(escape), self.pos))
}
// Cannot have new-lines inside string literals
'\n' => {
self.rewind();
return Err((LERR::UnterminatedString, self.pos));
}
// All other characters
ch => {
escape.clear();
result.push(ch);
}
}
}
Ok(result.iter().collect())
}
/// Get the next token.
fn inner_next(&mut self) -> Option<(Token, Position)> {
let mut negated = false;
while let Some(c) = self.get_next() {
self.advance();
let pos = self.pos;
match (c, self.peek_next().unwrap_or('\0')) {
// \n
('\n', _) => self.new_line(),
// digit ...
('0'..='9', _) => {
let mut result = Vec::new();
let mut radix_base: Option<u32> = None;
result.push(c);
while let Some(next_char) = self.peek_next() {
match next_char {
'0'..='9' | '_' => {
result.push(next_char);
self.eat_next();
}
#[cfg(not(feature = "no_float"))]
'.' => {
result.push(next_char);
self.eat_next();
while let Some(next_char_in_float) = self.peek_next() {
match next_char_in_float {
'0'..='9' | '_' => {
result.push(next_char_in_float);
self.eat_next();
}
_ => break,
}
}
}
// 0x????, 0o????, 0b????
ch @ 'x' | ch @ 'X' | ch @ 'o' | ch @ 'O' | ch @ 'b' | ch @ 'B'
if c == '0' =>
{
result.push(next_char);
self.eat_next();
let valid = match ch {
'x' | 'X' => [
'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '_',
],
'o' | 'O' => [
'0', '1', '2', '3', '4', '5', '6', '7', '_', '_', '_', '_',
'_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_',
],
'b' | 'B' => [
'0', '1', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_',
'_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_',
],
_ => panic!("unexpected character {}", ch),
};
radix_base = Some(match ch {
'x' | 'X' => 16,
'o' | 'O' => 8,
'b' | 'B' => 2,
_ => panic!("unexpected character {}", ch),
});
while let Some(next_char_in_hex) = self.peek_next() {
if !valid.contains(&next_char_in_hex) {
break;
}
result.push(next_char_in_hex);
self.eat_next();
}
}
_ => break,
}
}
if negated {
result.insert(0, '-');
}
// Parse number
if let Some(radix) = radix_base {
let out: String = result.iter().skip(2).filter(|&&c| c != '_').collect();
return Some((
INT::from_str_radix(&out, radix)
.map(Token::IntegerConstant)
.unwrap_or_else(|_| {
Token::LexError(Box::new(LERR::MalformedNumber(
result.iter().collect(),
)))
}),
pos,
));
} else {
let out: String = result.iter().filter(|&&c| c != '_').collect();
let num = INT::from_str(&out).map(Token::IntegerConstant);
// If integer parsing is unnecessary, try float instead
#[cfg(not(feature = "no_float"))]
let num = num.or_else(|_| FLOAT::from_str(&out).map(Token::FloatConstant));
return Some((
num.unwrap_or_else(|_| {
Token::LexError(Box::new(LERR::MalformedNumber(
result.iter().collect(),
)))
}),
pos,
));
}
}
// letter or underscore ...
('A'..='Z', _) | ('a'..='z', _) | ('_', _) => {
let mut result = Vec::new();
result.push(c);
while let Some(next_char) = self.peek_next() {
match next_char {
x if x.is_ascii_alphanumeric() || x == '_' => {
result.push(x);
self.eat_next();
}
_ => break,
}
}
let is_valid_identifier = result
.iter()
.find(|&ch| char::is_ascii_alphanumeric(ch)) // first alpha-numeric character
.map(char::is_ascii_alphabetic) // is a letter
.unwrap_or(false); // if no alpha-numeric at all - syntax error
let identifier: String = result.iter().collect();
if !is_valid_identifier {
return Some((
Token::LexError(Box::new(LERR::MalformedIdentifier(identifier))),
pos,
));
}
return Some((
match identifier.as_str() {
"true" => Token::True,
"false" => Token::False,
"let" => Token::Let,
"const" => Token::Const,
"if" => Token::If,
"else" => Token::Else,
"while" => Token::While,
"loop" => Token::Loop,
"continue" => Token::Continue,
"break" => Token::Break,
"return" => Token::Return,
"throw" => Token::Throw,
"for" => Token::For,
"in" => Token::In,
#[cfg(not(feature = "no_function"))]
"fn" => Token::Fn,
_ => Token::Identifier(identifier),
},
pos,
));
}
// " - string literal
('"', _) => {
return self.parse_string_literal('"').map_or_else(
|err| Some((Token::LexError(Box::new(err.0)), err.1)),
|out| Some((Token::StringConst(out), pos)),
);
}
// ' - character literal
('\'', '\'') => {
return Some((
Token::LexError(Box::new(LERR::MalformedChar("".to_string()))),
pos,
));
}
('\'', _) => {
return Some(self.parse_string_literal('\'').map_or_else(
|err| (Token::LexError(Box::new(err.0)), err.1),
|result| {
let mut chars = result.chars();
let first = chars.next();
if chars.next().is_some() {
(Token::LexError(Box::new(LERR::MalformedChar(result))), pos)
} else {
(Token::CharConstant(first.expect("should be Some")), pos)
}
},
));
}
// Braces
('{', _) => return Some((Token::LeftBrace, pos)),
('}', _) => return Some((Token::RightBrace, pos)),
// Parentheses
('(', _) => return Some((Token::LeftParen, pos)),
(')', _) => return Some((Token::RightParen, pos)),
// Indexing
('[', _) => return Some((Token::LeftBracket, pos)),
(']', _) => return Some((Token::RightBracket, pos)),
// Map literal
#[cfg(not(feature = "no_object"))]
('#', '{') => {
self.eat_next();
return Some((Token::MapStart, pos));
}
// Operators
('+', '=') => {
self.eat_next();
return Some((Token::PlusAssign, pos));
}
('+', _) if self.can_be_unary => return Some((Token::UnaryPlus, pos)),
('+', _) => return Some((Token::Plus, pos)),
('-', '0'..='9') if self.can_be_unary => negated = true,
('-', '0'..='9') => return Some((Token::Minus, pos)),
('-', '=') => {
self.eat_next();
return Some((Token::MinusAssign, pos));
}
('-', _) if self.can_be_unary => return Some((Token::UnaryMinus, pos)),
('-', _) => return Some((Token::Minus, pos)),
('*', '=') => {
self.eat_next();
return Some((Token::MultiplyAssign, pos));
}
('*', _) => return Some((Token::Multiply, pos)),
// Comments
('/', '/') => {
self.eat_next();
while let Some(c) = self.get_next() {
if c == '\n' {
self.new_line();
break;
}
self.advance();
}
}
('/', '*') => {
let mut level = 1;
self.eat_next();
while let Some(c) = self.get_next() {
self.advance();
match c {
'/' => {
if self.get_next() == Some('*') {
level += 1;
}
self.advance();
}
'*' => {
if self.get_next() == Some('/') {
level -= 1;
}
self.advance();
}
'\n' => self.new_line(),
_ => (),
}
if level == 0 {
break;
}
}
}
('/', '=') => {
self.eat_next();
return Some((Token::DivideAssign, pos));
}
('/', _) => return Some((Token::Divide, pos)),
(';', _) => return Some((Token::SemiColon, pos)),
(':', _) => return Some((Token::Colon, pos)),
(',', _) => return Some((Token::Comma, pos)),
('.', _) => return Some((Token::Period, pos)),
('=', '=') => {
self.eat_next();
return Some((Token::EqualsTo, pos));
}
('=', _) => return Some((Token::Equals, pos)),
('<', '=') => {
self.eat_next();
return Some((Token::LessThanEqualsTo, pos));
}
('<', '<') => {
self.eat_next();
return Some((
if self.peek_next() == Some('=') {
self.eat_next();
Token::LeftShiftAssign
} else {
Token::LeftShift
},
pos,
));
}
('<', _) => return Some((Token::LessThan, pos)),
('>', '=') => {
self.eat_next();
return Some((Token::GreaterThanEqualsTo, pos));
}
('>', '>') => {
self.eat_next();
return Some((
if self.peek_next() == Some('=') {
self.eat_next();
Token::RightShiftAssign
} else {
Token::RightShift
},
pos,
));
}
('>', _) => return Some((Token::GreaterThan, pos)),
('!', '=') => {
self.eat_next();
return Some((Token::NotEqualsTo, pos));
}
('!', _) => return Some((Token::Bang, pos)),
('|', '|') => {
self.eat_next();
return Some((Token::Or, pos));
}
('|', '=') => {
self.eat_next();
return Some((Token::OrAssign, pos));
}
('|', _) => return Some((Token::Pipe, pos)),
('&', '&') => {
self.eat_next();
return Some((Token::And, pos));
}
('&', '=') => {
self.eat_next();
return Some((Token::AndAssign, pos));
}
('&', _) => return Some((Token::Ampersand, pos)),
('^', '=') => {
self.eat_next();
return Some((Token::XOrAssign, pos));
}
('^', _) => return Some((Token::XOr, pos)),
('%', '=') => {
self.eat_next();
return Some((Token::ModuloAssign, pos));
}
('%', _) => return Some((Token::Modulo, pos)),
('~', '=') => {
self.eat_next();
return Some((Token::PowerOfAssign, pos));
}
('~', _) => return Some((Token::PowerOf, pos)),
(ch, _) if ch.is_whitespace() => (),
(ch, _) => return Some((Token::LexError(Box::new(LERR::UnexpectedChar(ch))), pos)),
}
}
None
}
}
impl<'a> Iterator for TokenIterator<'a> {
type Item = (Token, Position);
fn next(&mut self) -> Option<Self::Item> {
self.inner_next().map(|x| {
// Save the last token
self.can_be_unary = x.0.is_next_unary();
x
})
}
}
/// Tokenize an input text stream.
pub fn lex<'a>(input: &'a [&'a str]) -> TokenIterator<'a> {
TokenIterator {
can_be_unary: true,
pos: Position::new(1, 0),
streams: input.iter().map(|s| s.chars().peekable()).collect(),
}
}

View File

@ -1,4 +1,5 @@
#![cfg(not(feature = "no_stdlib"))]
#![cfg(not(feature = "no_std"))]
use rhai::{Engine, EvalAltResult, INT};