Add custom syntax.

This commit is contained in:
Stephen Chung
2020-07-09 19:54:28 +08:00
parent e33760a7d4
commit 99164ebceb
10 changed files with 483 additions and 132 deletions

View File

@@ -11,6 +11,7 @@ use crate::parser::{Expr, FnAccess, ImmutableString, ReturnType, ScriptFnDef, St
use crate::r#unsafe::unsafe_cast_var_name_to_lifetime;
use crate::result::EvalAltResult;
use crate::scope::{EntryType as ScopeEntryType, Scope};
use crate::syntax::CustomSyntax;
use crate::token::Position;
use crate::utils::StaticVec;
@@ -82,8 +83,12 @@ pub const KEYWORD_THIS: &str = "this";
pub const FN_TO_STRING: &str = "to_string";
pub const FN_GET: &str = "get$";
pub const FN_SET: &str = "set$";
pub const FN_IDX_GET: &str = "$index$get$";
pub const FN_IDX_SET: &str = "$index$set$";
pub const FN_IDX_GET: &str = "index$get$";
pub const FN_IDX_SET: &str = "index$set$";
pub const MARKER_EXPR: &str = "$expr$";
pub const MARKER_STMT: &str = "$stmt$";
pub const MARKER_BLOCK: &str = "$block$";
pub const MARKER_IDENT: &str = "$ident$";
/// A type specifying the method of chaining.
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
@@ -273,6 +278,8 @@ pub struct Engine {
pub(crate) disabled_symbols: Option<HashSet<String>>,
/// A hashset containing custom keywords and precedence to recognize.
pub(crate) custom_keywords: Option<HashMap<String, u8>>,
/// Custom syntax.
pub(crate) custom_syntax: Option<HashMap<String, CustomSyntax>>,
/// Callback closure for implementing the `print` command.
pub(crate) print: Callback<str, ()>,
@@ -322,6 +329,7 @@ impl Default for Engine {
type_names: None,
disabled_symbols: None,
custom_keywords: None,
custom_syntax: None,
// default print/debug implementations
print: Box::new(default_print),
@@ -554,6 +562,7 @@ impl Engine {
type_names: None,
disabled_symbols: None,
custom_keywords: None,
custom_syntax: None,
print: Box::new(|_| {}),
debug: Box::new(|_| {}),
@@ -1595,6 +1604,26 @@ impl Engine {
}
}
/// Evaluate an expression inside an AST.
///
/// ## WARNING - Low Level API
///
/// This function is very low level. It evaluates an expression from an AST.
#[cfg(feature = "internals")]
#[deprecated(note = "this method is volatile and may change")]
pub fn eval_expr_from_ast(
&self,
scope: &mut Scope,
mods: &mut Imports,
state: &mut State,
lib: &Module,
this_ptr: &mut Option<&mut Dynamic>,
expr: &Expr,
level: usize,
) -> Result<Dynamic, Box<EvalAltResult>> {
self.eval_expr(scope, mods, state, lib, this_ptr, expr, level)
}
/// Evaluate an expression
fn eval_expr(
&self,
@@ -2026,6 +2055,12 @@ impl Engine {
Expr::False(_) => Ok(false.into()),
Expr::Unit(_) => Ok(().into()),
Expr::Custom(x) => {
let func = (x.0).1.as_ref();
let exprs = (x.0).0.as_ref();
func(self, scope, mods, state, lib, this_ptr, exprs, level)
}
_ => unreachable!(),
};

View File

@@ -91,6 +91,7 @@ mod scope;
mod serde;
mod settings;
mod stdlib;
mod syntax;
mod token;
mod r#unsafe;
mod utils;
@@ -153,13 +154,21 @@ pub use optimize::OptimizationLevel;
// Expose internal data structures.
#[cfg(feature = "internals")]
#[deprecated(note = "this type is volatile and may change")]
pub use error::LexError;
#[cfg(feature = "internals")]
#[deprecated(note = "this type is volatile and may change")]
pub use token::{get_next_token, parse_string_literal, InputStream, Token, TokenizeState};
#[cfg(feature = "internals")]
#[deprecated(note = "this type is volatile and may change")]
pub use parser::{Expr, ReturnType, ScriptFnDef, Stmt};
pub use parser::{CustomExpr, Expr, ReturnType, ScriptFnDef, Stmt};
#[cfg(feature = "internals")]
#[deprecated(note = "this type is volatile and may change")]
pub use engine::{Imports, State as EvalState};
#[cfg(feature = "internals")]
#[deprecated(note = "this type is volatile and may change")]

View File

@@ -2,7 +2,7 @@ use crate::any::Dynamic;
use crate::calc_fn_hash;
use crate::engine::{Engine, Imports, KEYWORD_DEBUG, KEYWORD_EVAL, KEYWORD_PRINT, KEYWORD_TYPE_OF};
use crate::module::Module;
use crate::parser::{map_dynamic_to_expr, Expr, ReturnType, ScriptFnDef, Stmt, AST};
use crate::parser::{map_dynamic_to_expr, CustomExpr, Expr, ReturnType, ScriptFnDef, Stmt, AST};
use crate::scope::{Entry as ScopeEntry, EntryType as ScopeEntryType, Scope};
use crate::utils::StaticVec;
@@ -598,6 +598,14 @@ fn optimize_expr(expr: Expr, state: &mut State) -> Expr {
state.find_constant(&name).expect("should find constant in scope!").clone().set_position(pos)
}
// Custom syntax
Expr::Custom(x) => Expr::Custom(Box::new((
CustomExpr(
(x.0).0.into_iter().map(|expr| optimize_expr(expr, state)).collect(),
(x.0).1),
x.1
))),
// All other expressions - skip
expr => expr,
}

View File

@@ -2,11 +2,16 @@
use crate::any::{Dynamic, Union};
use crate::calc_fn_hash;
use crate::engine::{make_getter, make_setter, Engine, KEYWORD_THIS};
use crate::engine::{
make_getter, make_setter, Engine, KEYWORD_THIS, MARKER_BLOCK, MARKER_EXPR, MARKER_IDENT,
MARKER_STMT,
};
use crate::error::{LexError, ParseError, ParseErrorType};
use crate::fn_native::Shared;
use crate::module::{Module, ModuleRef};
use crate::optimize::{optimize_into_ast, OptimizationLevel};
use crate::scope::{EntryType as ScopeEntryType, Scope};
use crate::syntax::FnCustomSyntaxEval;
use crate::token::{Position, Token, TokenStream};
use crate::utils::{StaticVec, StraightHasherBuilder};
@@ -568,6 +573,15 @@ impl Stmt {
}
}
#[derive(Clone)]
pub struct CustomExpr(pub StaticVec<Expr>, pub Shared<FnCustomSyntaxEval>);
impl fmt::Debug for CustomExpr {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(&self.0, f)
}
}
/// An expression.
///
/// Each variant is at most one pointer in size (for speed),
@@ -632,6 +646,8 @@ pub enum Expr {
False(Position),
/// ()
Unit(Position),
/// Custom syntax
Custom(Box<(CustomExpr, Position)>),
}
impl Default for Expr {
@@ -726,6 +742,8 @@ impl Expr {
Self::True(pos) | Self::False(pos) | Self::Unit(pos) => *pos,
Self::Dot(x) | Self::Index(x) => x.0.position(),
Self::Custom(x) => x.1,
}
}
@@ -758,6 +776,7 @@ impl Expr {
Self::Assignment(x) => x.3 = new_pos,
Self::Dot(x) => x.2 = new_pos,
Self::Index(x) => x.2 = new_pos,
Self::Custom(x) => x.1 = new_pos,
}
self
@@ -861,6 +880,8 @@ impl Expr {
Token::LeftParen => true,
_ => false,
},
Self::Custom(_) => false,
}
}
@@ -2024,6 +2045,85 @@ fn parse_expr(
settings.pos = input.peek().unwrap().1;
settings.ensure_level_within_max_limit(state.max_expr_depth)?;
// Check if it is a custom syntax.
if let Some(ref custom) = state.engine.custom_syntax {
let (token, pos) = input.peek().unwrap();
let token_pos = *pos;
match token {
Token::Custom(key) if custom.contains_key(key) => {
let custom = custom.get_key_value(key).unwrap();
let (key, syntax) = custom;
input.next().unwrap();
let mut exprs: StaticVec<Expr> = Default::default();
// Adjust the variables stack
match syntax.scope_delta {
delta if delta > 0 => {
state.stack.push(("".to_string(), ScopeEntryType::Normal))
}
delta if delta < 0 && state.stack.len() <= delta.abs() as usize => {
state.stack.clear()
}
delta if delta < 0 => state
.stack
.truncate(state.stack.len() - delta.abs() as usize),
_ => (),
}
for segment in syntax.segments.iter() {
settings.pos = input.peek().unwrap().1;
let settings = settings.level_up();
match segment.as_str() {
MARKER_IDENT => match input.next().unwrap() {
(Token::Identifier(s), pos) => {
exprs.push(Expr::Variable(Box::new(((s, pos), None, 0, None))));
}
(_, pos) => return Err(PERR::VariableExpected.into_err(pos)),
},
MARKER_EXPR => exprs.push(parse_expr(input, state, lib, settings)?),
MARKER_STMT => {
let stmt = parse_stmt(input, state, lib, settings)?
.unwrap_or_else(|| Stmt::Noop(settings.pos));
let pos = stmt.position();
exprs.push(Expr::Stmt(Box::new((stmt, pos))))
}
MARKER_BLOCK => {
let stmt = parse_block(input, state, lib, settings)?;
let pos = stmt.position();
exprs.push(Expr::Stmt(Box::new((stmt, pos))))
}
s => match input.peek().unwrap() {
(Token::Custom(custom), _) if custom == s => {
input.next().unwrap();
}
(t, _) if t.syntax().as_ref() == s => {
input.next().unwrap();
}
(_, pos) => {
return Err(PERR::MissingToken(
s.to_string(),
format!("for '{}' expression", key),
)
.into_err(*pos))
}
},
}
}
return Ok(Expr::Custom(Box::new((
CustomExpr(exprs, syntax.func.clone()),
token_pos,
))));
}
_ => (),
}
}
// Parse expression normally.
let lhs = parse_unary(input, state, lib, settings.level_up())?;
parse_binary_op(input, state, lib, 1, lhs, settings.level_up())
}
@@ -2297,7 +2397,7 @@ fn parse_import(
fn parse_export(
input: &mut TokenStream,
state: &mut ParseState,
lib: &mut FunctionsLib,
_lib: &mut FunctionsLib,
mut settings: ParseSettings,
) -> Result<Stmt, ParseError> {
settings.pos = eat_token(input, Token::Export);

132
src/syntax.rs Normal file
View File

@@ -0,0 +1,132 @@
use crate::any::Dynamic;
use crate::engine::{Engine, Imports, State, MARKER_BLOCK, MARKER_EXPR, MARKER_IDENT, MARKER_STMT};
use crate::error::LexError;
use crate::fn_native::{SendSync, Shared};
use crate::module::Module;
use crate::parser::Expr;
use crate::result::EvalAltResult;
use crate::scope::Scope;
use crate::token::{is_valid_identifier, Token};
use crate::utils::StaticVec;
use crate::stdlib::{
fmt,
rc::Rc,
string::{String, ToString},
sync::Arc,
};
/// A general function trail object.
#[cfg(not(feature = "sync"))]
pub type FnCustomSyntaxEval = dyn Fn(
&Engine,
&mut Scope,
&mut Imports,
&mut State,
&Module,
&mut Option<&mut Dynamic>,
&[Expr],
usize,
) -> Result<Dynamic, Box<EvalAltResult>>;
/// A general function trail object.
#[cfg(feature = "sync")]
pub type FnCustomSyntaxEval = dyn Fn(
&Engine,
&mut Scope,
&mut Imports,
&mut State,
&Module,
&mut Option<&mut Dynamic>,
&[Expr],
usize,
) -> Result<Dynamic, Box<EvalAltResult>>
+ Send
+ Sync;
#[derive(Clone)]
pub struct CustomSyntax {
pub segments: StaticVec<String>,
pub func: Shared<FnCustomSyntaxEval>,
pub scope_delta: isize,
}
impl fmt::Debug for CustomSyntax {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(&self.segments, f)
}
}
impl Engine {
pub fn add_custom_syntax<S: AsRef<str> + ToString>(
&mut self,
value: &[S],
scope_delta: isize,
func: impl Fn(
&Engine,
&mut Scope,
&mut Imports,
&mut State,
&Module,
&mut Option<&mut Dynamic>,
&[Expr],
usize,
) -> Result<Dynamic, Box<EvalAltResult>>
+ SendSync
+ 'static,
) -> Result<(), Box<LexError>> {
if value.is_empty() {
return Err(Box::new(LexError::ImproperSymbol("".to_string())));
}
let mut segments: StaticVec<_> = Default::default();
for s in value {
let seg = match s.as_ref() {
// Markers not in first position
MARKER_EXPR | MARKER_STMT | MARKER_BLOCK | MARKER_IDENT if !segments.is_empty() => {
s.to_string()
}
// Standard symbols not in first position
s if !segments.is_empty() && Token::lookup_from_syntax(s).is_some() => s.into(),
// Custom keyword
s if is_valid_identifier(s.chars()) => {
if self.custom_keywords.is_none() {
self.custom_keywords = Some(Default::default());
}
if !self.custom_keywords.as_ref().unwrap().contains_key(s) {
self.custom_keywords.as_mut().unwrap().insert(s.into(), 0);
}
s.into()
}
// Anything else is an error
_ => return Err(Box::new(LexError::ImproperSymbol(s.to_string()))),
};
segments.push(seg);
}
let key = segments.remove(0);
let syntax = CustomSyntax {
segments,
#[cfg(not(feature = "sync"))]
func: Rc::new(func),
#[cfg(feature = "sync")]
func: Arc::new(func),
scope_delta,
};
if self.custom_syntax.is_none() {
self.custom_syntax = Some(Default::default());
}
self.custom_syntax
.as_mut()
.unwrap()
.insert(key, syntax.into());
Ok(())
}
}

View File

@@ -312,6 +312,87 @@ impl Token {
}
}
/// Reverse lookup a token from a piece of syntax.
pub fn lookup_from_syntax(syntax: &str) -> Option<Self> {
use Token::*;
Some(match syntax {
"{" => LeftBrace,
"}" => RightBrace,
"(" => LeftParen,
")" => RightParen,
"[" => LeftBracket,
"]" => RightBracket,
"+" => Plus,
"-" => Minus,
"*" => Multiply,
"/" => Divide,
";" => SemiColon,
":" => Colon,
"::" => DoubleColon,
"," => Comma,
"." => Period,
"#{" => MapStart,
"=" => Equals,
"true" => True,
"false" => False,
"let" => Let,
"const" => Const,
"if" => If,
"else" => Else,
"while" => While,
"loop" => Loop,
"for" => For,
"in" => In,
"<" => LessThan,
">" => GreaterThan,
"!" => Bang,
"<=" => LessThanEqualsTo,
">=" => GreaterThanEqualsTo,
"==" => EqualsTo,
"!=" => NotEqualsTo,
"|" => Pipe,
"||" => Or,
"&" => Ampersand,
"&&" => And,
#[cfg(not(feature = "no_function"))]
"fn" => Fn,
"continue" => Continue,
"break" => Break,
"return" => Return,
"throw" => Throw,
"+=" => PlusAssign,
"-=" => MinusAssign,
"*=" => MultiplyAssign,
"/=" => DivideAssign,
"<<=" => LeftShiftAssign,
">>=" => RightShiftAssign,
"&=" => AndAssign,
"|=" => OrAssign,
"^=" => XOrAssign,
"<<" => LeftShift,
">>" => RightShift,
"^" => XOr,
"%" => Modulo,
"%=" => ModuloAssign,
"~" => PowerOf,
"~=" => PowerOfAssign,
#[cfg(not(feature = "no_function"))]
"private" => Private,
#[cfg(not(feature = "no_module"))]
"import" => Import,
#[cfg(not(feature = "no_module"))]
"export" => Export,
#[cfg(not(feature = "no_module"))]
"as" => As,
"===" | "!==" | "->" | "<-" | "=>" | ":=" | "::<" | "(*" | "*)" | "#" => {
Reserved(syntax.into())
}
_ => return None,
})
}
// Is this token EOF?
pub fn is_eof(&self) -> bool {
use Token::*;
@@ -628,9 +709,9 @@ pub fn parse_string_literal(
}
/// Consume the next character.
fn eat_next(stream: &mut impl InputStream, pos: &mut Position) {
stream.get_next();
fn eat_next(stream: &mut impl InputStream, pos: &mut Position) -> Option<char> {
pos.advance();
stream.get_next()
}
/// Scan for a block comment until the end.
@@ -858,35 +939,8 @@ fn get_next_token_inner(
}
return Some((
match identifier.as_str() {
"true" => Token::True,
"false" => Token::False,
"let" => Token::Let,
"const" => Token::Const,
"if" => Token::If,
"else" => Token::Else,
"while" => Token::While,
"loop" => Token::Loop,
"continue" => Token::Continue,
"break" => Token::Break,
"return" => Token::Return,
"throw" => Token::Throw,
"for" => Token::For,
"in" => Token::In,
#[cfg(not(feature = "no_function"))]
"private" => Token::Private,
#[cfg(not(feature = "no_module"))]
"import" => Token::Import,
#[cfg(not(feature = "no_module"))]
"export" => Token::Export,
#[cfg(not(feature = "no_module"))]
"as" => Token::As,
#[cfg(not(feature = "no_function"))]
"fn" => Token::Fn,
_ => Token::Identifier(identifier),
},
Token::lookup_from_syntax(&identifier)
.unwrap_or_else(|| Token::Identifier(identifier)),
start_pos,
));
}
@@ -947,6 +1001,7 @@ fn get_next_token_inner(
eat_next(stream, pos);
return Some((Token::MapStart, start_pos));
}
('#', _) => return Some((Token::Reserved("#".into()), start_pos)),
// Operators
('+', '=') => {
@@ -1163,40 +1218,42 @@ fn get_next_token_inner(
}
/// A type that implements the `InputStream` trait.
/// Multiple charaacter streams are jointed together to form one single stream.
/// Multiple character streams are jointed together to form one single stream.
pub struct MultiInputsStream<'a> {
/// The input character streams.
streams: StaticVec<Peekable<Chars<'a>>>,
/// The current stream index.
index: usize,
}
impl InputStream for MultiInputsStream<'_> {
/// Get the next character
fn get_next(&mut self) -> Option<char> {
loop {
if self.streams.is_empty() {
if self.index >= self.streams.len() {
// No more streams
return None;
} else if let Some(ch) = self.streams[0].next() {
} else if let Some(ch) = self.streams[self.index].next() {
// Next character in current stream
return Some(ch);
} else {
// Jump to the next stream
let _ = self.streams.remove(0);
self.index += 1;
}
}
}
/// Peek the next character
fn peek_next(&mut self) -> Option<char> {
loop {
if self.streams.is_empty() {
if self.index >= self.streams.len() {
// No more streams
return None;
} else if let Some(ch) = self.streams[0].peek() {
} else if let Some(&ch) = self.streams[self.index].peek() {
// Next character in current stream
return Some(*ch);
return Some(ch);
} else {
// Jump to the next stream
let _ = self.streams.remove(0);
self.index += 1;
}
}
}
@@ -1252,7 +1309,11 @@ impl<'a> Iterator for TokenIterator<'a, '_> {
.to_string(),
))),
"(*" | "*)" => Token::LexError(Box::new(LERR::ImproperSymbol(
"'(* .. *)' is not a valid comment style. This is not Pascal! Should it be '/* .. */'?"
"'(* .. *)' is not a valid comment format. This is not Pascal! Should it be '/* .. */'?"
.to_string(),
))),
"#" => Token::LexError(Box::new(LERR::ImproperSymbol(
"'#' is not a valid symbol. Should it be '#{'?"
.to_string(),
))),
token => Token::LexError(Box::new(LERR::ImproperSymbol(
@@ -1298,6 +1359,7 @@ pub fn lex<'a, 'e>(input: &'a [&'a str], engine: &'e Engine) -> TokenIterator<'a
pos: Position::new(1, 0),
stream: MultiInputsStream {
streams: input.iter().map(|s| s.chars().peekable()).collect(),
index: 0,
},
}
}