Add custom operators.

This commit is contained in:
Stephen Chung
2020-07-05 17:41:45 +08:00
parent 936a3ff44a
commit e390dd73e6
13 changed files with 326 additions and 139 deletions

View File

@@ -553,7 +553,7 @@ impl Engine {
scripts: &[&str],
optimization_level: OptimizationLevel,
) -> Result<AST, ParseError> {
let stream = lex(scripts, self.max_string_size, self.disable_tokens.as_ref());
let stream = lex(scripts, self);
self.parse(&mut stream.peekable(), scope, optimization_level)
}
@@ -678,7 +678,7 @@ impl Engine {
// Trims the JSON string and add a '#' in front
let scripts = ["#", json.trim()];
let stream = lex(&scripts, self.max_string_size, self.disable_tokens.as_ref());
let stream = lex(&scripts, self);
let ast =
self.parse_global_expr(&mut stream.peekable(), &scope, OptimizationLevel::None)?;
@@ -759,7 +759,7 @@ impl Engine {
script: &str,
) -> Result<AST, ParseError> {
let scripts = [script];
let stream = lex(&scripts, self.max_string_size, self.disable_tokens.as_ref());
let stream = lex(&scripts, self);
{
let mut peekable = stream.peekable();
self.parse_global_expr(&mut peekable, scope, self.optimization_level)
@@ -914,7 +914,7 @@ impl Engine {
script: &str,
) -> Result<T, Box<EvalAltResult>> {
let scripts = [script];
let stream = lex(&scripts, self.max_string_size, self.disable_tokens.as_ref());
let stream = lex(&scripts, self);
// No need to optimize a lone expression
let ast = self.parse_global_expr(&mut stream.peekable(), scope, OptimizationLevel::None)?;
@@ -1047,7 +1047,7 @@ impl Engine {
script: &str,
) -> Result<(), Box<EvalAltResult>> {
let scripts = [script];
let stream = lex(&scripts, self.max_string_size, self.disable_tokens.as_ref());
let stream = lex(&scripts, self);
let ast = self.parse(&mut stream.peekable(), scope, self.optimization_level)?;
self.consume_ast_with_scope(scope, &ast)
}

View File

@@ -268,8 +268,10 @@ pub struct Engine {
/// A hashmap mapping type names to pretty-print names.
pub(crate) type_names: Option<HashMap<String, String>>,
/// A hash-set containing tokens to disable.
pub(crate) disable_tokens: Option<HashSet<String>>,
/// A hashset containing symbols to disable.
pub(crate) disabled_symbols: Option<HashSet<String>>,
/// A hashset containing custom keywords and precedence to recognize.
pub(crate) custom_keywords: Option<HashMap<String, u8>>,
/// Callback closure for implementing the `print` command.
pub(crate) print: Callback<str, ()>,
@@ -317,7 +319,8 @@ impl Default for Engine {
module_resolver: None,
type_names: None,
disable_tokens: None,
disabled_symbols: None,
custom_keywords: None,
// default print/debug implementations
print: Box::new(default_print),
@@ -497,7 +500,8 @@ impl Engine {
module_resolver: None,
type_names: None,
disable_tokens: None,
disabled_symbols: None,
custom_keywords: None,
print: Box::new(|_| {}),
debug: Box::new(|_| {}),

View File

@@ -332,36 +332,26 @@ pub enum ReturnType {
Exception,
}
#[derive(Debug, Clone, Eq, PartialEq, Hash, Default)]
struct ParseState {
#[derive(Clone)]
struct ParseState<'e> {
/// Reference to the scripting `Engine`.
engine: &'e Engine,
/// Encapsulates a local stack with variable names to simulate an actual runtime scope.
pub stack: Vec<(String, ScopeEntryType)>,
stack: Vec<(String, ScopeEntryType)>,
/// Encapsulates a local stack with variable names to simulate an actual runtime scope.
pub modules: Vec<String>,
modules: Vec<String>,
/// Maximum levels of expression nesting.
pub max_expr_depth: usize,
/// Maximum length of a string.
pub max_string_size: usize,
/// Maximum length of an array.
pub max_array_size: usize,
/// Maximum number of properties in a map.
pub max_map_size: usize,
max_expr_depth: usize,
}
impl ParseState {
impl<'e> ParseState<'e> {
/// Create a new `ParseState`.
pub fn new(
max_expr_depth: usize,
max_string_size: usize,
max_array_size: usize,
max_map_size: usize,
) -> Self {
pub fn new(engine: &'e Engine, max_expr_depth: usize) -> Self {
Self {
engine,
max_expr_depth,
max_string_size,
max_array_size,
max_map_size,
..Default::default()
stack: Default::default(),
modules: Default::default(),
}
}
/// Find a variable by name in the `ParseState`, searching in reverse.
@@ -1206,10 +1196,10 @@ fn parse_array_literal(
let mut arr = StaticVec::new();
while !input.peek().unwrap().0.is_eof() {
if state.max_array_size > 0 && arr.len() >= state.max_array_size {
if state.engine.max_array_size > 0 && arr.len() >= state.engine.max_array_size {
return Err(PERR::LiteralTooLarge(
"Size of array literal".to_string(),
state.max_array_size,
state.engine.max_array_size,
)
.into_err(input.peek().unwrap().1));
}
@@ -1306,10 +1296,10 @@ fn parse_map_literal(
}
};
if state.max_map_size > 0 && map.len() >= state.max_map_size {
if state.engine.max_map_size > 0 && map.len() >= state.engine.max_map_size {
return Err(PERR::LiteralTooLarge(
"Number of properties in object map literal".to_string(),
state.max_map_size,
state.engine.max_map_size,
)
.into_err(input.peek().unwrap().1));
}
@@ -1866,7 +1856,8 @@ fn parse_binary_op(
loop {
let (current_op, _) = input.peek().unwrap();
let precedence = current_op.precedence();
let custom = state.engine.custom_keywords.as_ref();
let precedence = current_op.precedence(custom);
let bind_right = current_op.is_bind_right();
// Bind left to the parent lhs expression if precedence is higher
@@ -1879,7 +1870,7 @@ fn parse_binary_op(
let rhs = parse_unary(input, state, settings)?;
let next_precedence = input.peek().unwrap().0.precedence();
let next_precedence = input.peek().unwrap().0.precedence(custom);
// Bind to right if the next operator has higher precedence
// If same precedence, then check if the operator binds right
@@ -1949,6 +1940,19 @@ fn parse_binary_op(
make_dot_expr(current_lhs, rhs, pos)?
}
Token::Custom(s)
if state
.engine
.custom_keywords
.as_ref()
.map(|c| c.contains_key(&s))
.unwrap_or(false) =>
{
// Accept non-native functions for custom operators
let op = (op.0, false, op.2);
Expr::FnCall(Box::new((op, None, hash, args, None)))
}
op_token => return Err(PERR::UnknownOperator(op_token.into()).into_err(pos)),
};
}
@@ -2467,7 +2471,7 @@ fn parse_fn(
settings.ensure_level_within_max_limit(state.max_expr_depth)?;
let name = match input.next().unwrap() {
(Token::Identifier(s), _) => s,
(Token::Identifier(s), _) | (Token::Custom(s), _) => s,
(_, pos) => return Err(PERR::FnMissingName.into_err(pos)),
};
@@ -2555,12 +2559,7 @@ impl Engine {
scope: &Scope,
optimization_level: OptimizationLevel,
) -> Result<AST, ParseError> {
let mut state = ParseState::new(
self.max_expr_depth,
self.max_string_size,
self.max_array_size,
self.max_map_size,
);
let mut state = ParseState::new(self, self.max_expr_depth);
let settings = ParseSettings {
allow_if_expr: false,
allow_stmt_expr: false,
@@ -2596,12 +2595,7 @@ impl Engine {
) -> Result<(Vec<Stmt>, Vec<ScriptFnDef>), ParseError> {
let mut statements = Vec::<Stmt>::new();
let mut functions = HashMap::<u64, ScriptFnDef, _>::with_hasher(StraightHasherBuilder);
let mut state = ParseState::new(
self.max_expr_depth,
self.max_string_size,
self.max_array_size,
self.max_map_size,
);
let mut state = ParseState::new(self, self.max_expr_depth);
while !input.peek().unwrap().0.is_eof() {
// Collect all the function definitions
@@ -2615,12 +2609,7 @@ impl Engine {
match input.peek().unwrap() {
(Token::Fn, pos) => {
let mut state = ParseState::new(
self.max_function_expr_depth,
self.max_string_size,
self.max_array_size,
self.max_map_size,
);
let mut state = ParseState::new(self, self.max_function_expr_depth);
let settings = ParseSettings {
allow_if_expr: true,
allow_stmt_expr: true,

View File

@@ -2,6 +2,7 @@ use crate::engine::Engine;
use crate::module::ModuleResolver;
use crate::optimize::OptimizationLevel;
use crate::packages::PackageLibrary;
use crate::token::is_valid_identifier;
impl Engine {
/// Load a new package into the `Engine`.
@@ -194,10 +195,60 @@ impl Engine {
/// # }
/// ```
pub fn disable_symbol(&mut self, symbol: &str) {
if self.disable_tokens.is_none() {
self.disable_tokens = Some(Default::default());
if self.disabled_symbols.is_none() {
self.disabled_symbols = Some(Default::default());
}
self.disable_tokens.as_mut().unwrap().insert(symbol.into());
self.disabled_symbols
.as_mut()
.unwrap()
.insert(symbol.into());
}
/// Register a custom operator into the language.
///
/// The operator must be a valid identifier (i.e. it cannot be a symbol).
///
/// # Examples
///
/// ```rust
/// # fn main() -> Result<(), Box<rhai::EvalAltResult>> {
/// use rhai::{Engine, RegisterFn};
///
/// let mut engine = Engine::new();
///
/// // Register a custom operator called 'foo' and give it
/// // a precedence of 140 (i.e. between +|- and *|/).
/// engine.register_custom_operator("foo", 140).unwrap();
///
/// // Register a binary function named 'foo'
/// engine.register_fn("foo", |x: i64, y: i64| (x * y) - (x + y));
///
/// assert_eq!(
/// engine.eval_expression::<i64>("1 + 2 * 3 foo 4 - 5 / 6")?,
/// 15
/// );
/// # Ok(())
/// # }
/// ```
pub fn register_custom_operator(
&mut self,
keyword: &str,
precedence: u8,
) -> Result<(), String> {
if !is_valid_identifier(keyword.chars()) {
return Err(format!("not a valid identifier: '{}'", keyword).into());
}
if self.custom_keywords.is_none() {
self.custom_keywords = Some(Default::default());
}
self.custom_keywords
.as_mut()
.unwrap()
.insert(keyword.into(), precedence);
Ok(())
}
}

View File

@@ -1,5 +1,6 @@
//! Main module defining the lexer and parser.
use crate::engine::Engine;
use crate::error::LexError;
use crate::parser::INT;
use crate::utils::StaticVec;
@@ -11,7 +12,7 @@ use crate::stdlib::{
borrow::Cow,
boxed::Box,
char,
collections::HashSet,
collections::{HashMap, HashSet},
fmt,
iter::Peekable,
str::{Chars, FromStr},
@@ -212,6 +213,7 @@ pub enum Token {
As,
LexError(Box<LexError>),
Comment(String),
Custom(String),
EOF,
}
@@ -224,12 +226,13 @@ impl Token {
IntegerConstant(i) => i.to_string().into(),
#[cfg(not(feature = "no_float"))]
FloatConstant(f) => f.to_string().into(),
Identifier(s) => s.clone().into(),
StringConstant(_) => "string".into(),
CharConstant(c) => c.to_string().into(),
Identifier(s) => s.clone().into(),
Custom(s) => s.clone().into(),
LexError(err) => err.to_string().into(),
token => match token {
StringConstant(_) => "string",
LeftBrace => "{",
RightBrace => "}",
LeftParen => "(",
@@ -324,9 +327,9 @@ impl Token {
match self {
LexError(_) |
LeftBrace | // (+expr) - is unary
LeftBrace | // {+expr} - is unary
// RightBrace | {expr} - expr not unary & is closing
LeftParen | // {-expr} - is unary
LeftParen | // (-expr) - is unary
// RightParen | (expr) - expr not unary & is closing
LeftBracket | // [-expr] - is unary
// RightBracket | [expr] - expr not unary & is closing
@@ -371,14 +374,14 @@ impl Token {
Throw |
PowerOf |
In |
PowerOfAssign => true,
PowerOfAssign => true,
_ => false,
}
}
/// Get the precedence number of the token.
pub fn precedence(&self) -> u8 {
pub fn precedence(&self, custom: Option<&HashMap<String, u8>>) -> u8 {
use Token::*;
match self {
@@ -387,24 +390,27 @@ impl Token {
| RightShiftAssign | AndAssign | OrAssign | XOrAssign | ModuloAssign
| PowerOfAssign => 0,
Or | XOr | Pipe => 40,
Or | XOr | Pipe => 30,
And | Ampersand => 50,
And | Ampersand => 60,
LessThan | LessThanEqualsTo | GreaterThan | GreaterThanEqualsTo | EqualsTo
| NotEqualsTo => 60,
| NotEqualsTo => 90,
In => 70,
In => 110,
Plus | Minus => 80,
Plus | Minus => 130,
Divide | Multiply | PowerOf => 90,
Divide | Multiply | PowerOf => 160,
LeftShift | RightShift => 100,
LeftShift | RightShift => 190,
Modulo => 110,
Modulo => 210,
Period => 120,
Period => 240,
// Custom operators
Custom(s) => custom.map_or(0, |c| *c.get(s).unwrap()),
_ => 0,
}
@@ -1211,9 +1217,9 @@ impl InputStream for MultiInputsStream<'_> {
}
/// An iterator on a `Token` stream.
pub struct TokenIterator<'a, 't> {
/// Disable certain tokens.
pub disable_tokens: Option<&'t HashSet<String>>,
pub struct TokenIterator<'a, 'e> {
/// Reference to the scripting `Engine`.
engine: &'e Engine,
/// Current state.
state: TokenizeState,
/// Current position.
@@ -1226,15 +1232,15 @@ impl<'a> Iterator for TokenIterator<'a, '_> {
type Item = (Token, Position);
fn next(&mut self) -> Option<Self::Item> {
match get_next_token(&mut self.stream, &mut self.state, &mut self.pos) {
None => None,
r @ Some(_) if self.disable_tokens.is_none() => r,
Some((token, pos))
if token.is_operator()
&& self
.disable_tokens
.unwrap()
.contains(token.syntax().as_ref()) =>
match (
get_next_token(&mut self.stream, &mut self.state, &mut self.pos),
self.engine.disabled_symbols.as_ref(),
self.engine.custom_keywords.as_ref(),
) {
(None, _, _) => None,
(r @ Some(_), None, None) => r,
(Some((token, pos)), Some(disabled), _)
if token.is_operator() && disabled.contains(token.syntax().as_ref()) =>
{
// Convert disallowed operators into lex errors
Some((
@@ -1242,31 +1248,27 @@ impl<'a> Iterator for TokenIterator<'a, '_> {
pos,
))
}
Some((token, pos))
if token.is_keyword()
&& self
.disable_tokens
.unwrap()
.contains(token.syntax().as_ref()) =>
(Some((token, pos)), Some(disabled), _)
if token.is_keyword() && disabled.contains(token.syntax().as_ref()) =>
{
// Convert disallowed keywords into identifiers
Some((Token::Identifier(token.syntax().into()), pos))
}
r => r,
(Some((Token::Identifier(s), pos)), _, Some(custom)) if custom.contains_key(&s) => {
// Convert custom keywords
Some((Token::Custom(s), pos))
}
(r, _, _) => r,
}
}
}
/// Tokenize an input text stream.
pub fn lex<'a, 't>(
input: &'a [&'a str],
max_string_size: usize,
disable_tokens: Option<&'t HashSet<String>>,
) -> TokenIterator<'a, 't> {
pub fn lex<'a, 'e>(input: &'a [&'a str], engine: &'e Engine) -> TokenIterator<'a, 'e> {
TokenIterator {
disable_tokens,
engine,
state: TokenizeState {
max_string_size,
max_string_size: engine.max_string_size,
non_unary: false,
comment_level: 0,
end_with_none: false,