Merge branch 'master' into plugins

This commit is contained in:
Stephen Chung 2020-07-29 14:17:40 +08:00
commit 4510d3a659
11 changed files with 192 additions and 108 deletions

View File

@ -29,6 +29,7 @@ jobs:
- "--features no_object"
- "--features no_function"
- "--features no_module"
- "--features unicode-xid-ident"
toolchain: [stable]
experimental: [false]
include:

View File

@ -14,7 +14,7 @@ include = [
"Cargo.toml"
]
keywords = [ "scripting" ]
categories = [ "no-std", "embedded", "parser-implementations" ]
categories = [ "no-std", "embedded", "wasm", "parser-implementations" ]
[dependencies]
num-traits = { version = "0.2.11", default-features = false }
@ -34,6 +34,7 @@ no_object = [] # no custom objects
no_function = [] # no script-defined functions
no_module = [] # no modules
internals = [] # expose internal data structures
unicode-xid-ident = ["unicode-xid"] # allow Unicode Standard Annex #31 for identifiers.
# compiling for no-std
no_std = [ "num-traits/libm", "hashbrown", "core-error", "libm", "ahash" ]
@ -73,6 +74,11 @@ default_features = false
features = ["derive", "alloc"]
optional = true
[dependencies.unicode-xid]
version = "0.2.1"
default_features = false
optional = true
[target.'cfg(target_arch = "wasm32")'.dependencies]
instant= { version = "0.1.4", features = ["wasm-bindgen"] } # WASM implementation of std::time::Instant

View File

@ -20,6 +20,7 @@ New features
* Custom syntax now works even without the `internals` feature.
* Currying of function pointers is supported via the new `curry` keyword.
* `Module::set_indexer_get_set_fn` is added as a shorthand of both `Module::set_indexer_get_fn` and `Module::set_indexer_set_fn`.
* New `unicode-xid-ident` feature to allow [Unicode Standard Annex #31](http://www.unicode.org/reports/tr31/) for identifiers.
Breaking changes
----------------

View File

@ -21,6 +21,11 @@ Variable names are case _sensitive_.
Variable names also cannot be the same as a [keyword].
### Unicode Standard Annex #31 Identifiers
The [`unicode-xid-ident`] feature expands the allowed characters for variable names to the set defined by
[Unicode Standard Annex #31](http://www.unicode.org/reports/tr31/).
Declare a Variable
------------------

View File

@ -12,6 +12,7 @@
[`no_std`]: {{rootUrl}}/start/features.md
[`no-std`]: {{rootUrl}}/start/features.md
[`internals`]: {{rootUrl}}/start/features.md
[`unicode-xid-ident`]: {{rootUrl}}/start/features.md
[minimal builds]: {{rootUrl}}/start/builds/minimal.md
[WASM]: {{rootUrl}}/start/builds/wasm.md

View File

@ -11,21 +11,22 @@ Notice that this deviates from Rust norm where features are _additive_.
Excluding unneeded functionalities can result in smaller, faster builds as well as
more control over what a script can (or cannot) do.
| Feature | Description |
| ------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `unchecked` | Disable arithmetic checking (such as over-flows and division by zero), call stack depth limit, operations count limit and modules loading limit.<br/>Beware that a bad script may panic the entire system! |
| `sync` | Restrict all values types to those that are `Send + Sync`. Under this feature, all Rhai types, including [`Engine`], [`Scope`] and [`AST`], are all `Send + Sync`. |
| `no_optimize` | Disable [script optimization]. |
| `no_float` | Disable floating-point numbers and math. |
| `only_i32` | Set the system integer type to `i32` and disable all other integer types. `INT` is set to `i32`. |
| `only_i64` | Set the system integer type to `i64` and disable all other integer types. `INT` is set to `i64`. |
| `no_index` | Disable [arrays] and indexing features. |
| `no_object` | Disable support for [custom types] and [object maps]. |
| `no_function` | Disable script-defined [functions]. |
| `no_module` | Disable loading external [modules]. |
| `no_std` | Build for `no-std`. Notice that additional dependencies will be pulled in to replace `std` features. |
| `serde` | Enable serialization/deserialization via `serde`. Notice that the [`serde`](https://crates.io/crates/serde) crate will be pulled in together with its dependencies. |
| `internals` | Expose internal data structures (e.g. [`AST`] nodes). Beware that Rhai internals are volatile and may change from version to version. |
| Feature | Description |
| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `unchecked` | Disable arithmetic checking (such as over-flows and division by zero), call stack depth limit, operations count limit and modules loading limit.<br/>Beware that a bad script may panic the entire system! |
| `sync` | Restrict all values types to those that are `Send + Sync`. Under this feature, all Rhai types, including [`Engine`], [`Scope`] and [`AST`], are all `Send + Sync`. |
| `no_optimize` | Disable [script optimization]. |
| `no_float` | Disable floating-point numbers and math. |
| `only_i32` | Set the system integer type to `i32` and disable all other integer types. `INT` is set to `i32`. |
| `only_i64` | Set the system integer type to `i64` and disable all other integer types. `INT` is set to `i64`. |
| `no_index` | Disable [arrays] and indexing features. |
| `no_object` | Disable support for [custom types] and [object maps]. |
| `no_function` | Disable script-defined [functions]. |
| `no_module` | Disable loading external [modules]. |
| `no_std` | Build for `no-std`. Notice that additional dependencies will be pulled in to replace `std` features. |
| `serde` | Enable serialization/deserialization via `serde`. Notice that the [`serde`](https://crates.io/crates/serde) crate will be pulled in together with its dependencies. |
| `internals` | Expose internal data structures (e.g. [`AST`] nodes). Beware that Rhai internals are volatile and may change from version to version. |
| `unicode-xid-ident` | Allow [Unicode Standard Annex #31](http://www.unicode.org/reports/tr31/) as identifiers. |
Example

View File

@ -931,8 +931,8 @@ pub fn run_builtin_binary_op(
}
if args_type == TypeId::of::<INT>() {
let x = *x.downcast_ref::<INT>().unwrap();
let y = *y.downcast_ref::<INT>().unwrap();
let x = x.clone().cast::<INT>();
let y = y.clone().cast::<INT>();
#[cfg(not(feature = "unchecked"))]
match op {
@ -973,8 +973,8 @@ pub fn run_builtin_binary_op(
_ => (),
}
} else if args_type == TypeId::of::<bool>() {
let x = *x.downcast_ref::<bool>().unwrap();
let y = *y.downcast_ref::<bool>().unwrap();
let x = x.clone().cast::<bool>();
let y = y.clone().cast::<bool>();
match op {
"&" => return Ok(Some((x && y).into())),
@ -999,8 +999,8 @@ pub fn run_builtin_binary_op(
_ => (),
}
} else if args_type == TypeId::of::<char>() {
let x = *x.downcast_ref::<char>().unwrap();
let y = *y.downcast_ref::<char>().unwrap();
let x = x.clone().cast::<char>();
let y = y.clone().cast::<char>();
match op {
"==" => return Ok(Some((x == y).into())),
@ -1021,8 +1021,8 @@ pub fn run_builtin_binary_op(
#[cfg(not(feature = "no_float"))]
if args_type == TypeId::of::<FLOAT>() {
let x = *x.downcast_ref::<FLOAT>().unwrap();
let y = *y.downcast_ref::<FLOAT>().unwrap();
let x = x.clone().cast::<FLOAT>();
let y = y.clone().cast::<FLOAT>();
match op {
"+" => return Ok(Some((x + y).into())),
@ -1060,7 +1060,7 @@ pub fn run_builtin_op_assignment(
if args_type == TypeId::of::<INT>() {
let x = x.downcast_mut::<INT>().unwrap();
let y = *y.downcast_ref::<INT>().unwrap();
let y = y.clone().cast::<INT>();
#[cfg(not(feature = "unchecked"))]
match op {
@ -1096,7 +1096,7 @@ pub fn run_builtin_op_assignment(
}
} else if args_type == TypeId::of::<bool>() {
let x = x.downcast_mut::<bool>().unwrap();
let y = *y.downcast_ref::<bool>().unwrap();
let y = y.clone().cast::<bool>();
match op {
"&=" => return Ok(Some(*x = *x && y)),
@ -1116,7 +1116,7 @@ pub fn run_builtin_op_assignment(
#[cfg(not(feature = "no_float"))]
if args_type == TypeId::of::<FLOAT>() {
let x = x.downcast_mut::<FLOAT>().unwrap();
let y = *y.downcast_ref::<FLOAT>().unwrap();
let y = y.clone().cast::<FLOAT>();
match op {
"+=" => return Ok(Some(*x += y)),

View File

@ -300,11 +300,9 @@ impl CallableFunction {
/// Get the access mode.
pub fn access(&self) -> FnAccess {
match self {
CallableFunction::Plugin(_) => FnAccess::Public,
CallableFunction::Pure(_)
| CallableFunction::Method(_)
| CallableFunction::Iterator(_) => FnAccess::Public,
CallableFunction::Script(f) => f.access,
Self::Plugin(_) => FnAccess::Public,
Self::Pure(_) | Self::Method(_) | Self::Iterator(_) => FnAccess::Public,
Self::Script(f) => f.access,
}
}
/// Get a reference to a native Rust function.

View File

@ -15,7 +15,7 @@ use crate::stdlib::{
any::TypeId,
boxed::Box,
fmt::Display,
format,
format, mem,
string::{String, ToString},
vec::Vec,
};
@ -242,7 +242,7 @@ def_package!(crate:MoreStringPackage:"Additional string utilities, including str
}
if len > 0 {
let ch = *args[2].downcast_ref::< char>().unwrap();
let ch = mem::take(args[2]).cast::<char>();
let s = args[0].downcast_mut::<ImmutableString>().unwrap();
let orig_len = s.chars().count();

View File

@ -30,8 +30,11 @@ pub type TokenStream<'a, 't> = Peekable<TokenIterator<'a, 't>>;
/// A location (line number + character position) in the input script.
///
/// In order to keep footprint small, both line number and character position have 16-bit unsigned resolution,
/// meaning they go up to a maximum of 65,535 lines and characters per line.
/// # Limitations
///
/// In order to keep footprint small, both line number and character position have 16-bit resolution,
/// meaning they go up to a maximum of 65,535 lines and 65,535 characters per line.
///
/// Advancing beyond the maximum line length or maximum number of lines is not an error but has no effect.
#[derive(Eq, PartialEq, Ord, PartialOrd, Hash, Clone, Copy)]
pub struct Position {
@ -43,6 +46,13 @@ pub struct Position {
impl Position {
/// Create a new `Position`.
///
/// `line` must not be zero.
/// If `position` is zero, then it is at the beginning of a line.
///
/// # Panics
///
/// Panics if `line` is zero.
pub fn new(line: u16, position: u16) -> Self {
assert!(line != 0, "line cannot be zero");
@ -52,7 +62,7 @@ impl Position {
}
}
/// Get the line number (1-based), or `None` if no position.
/// Get the line number (1-based), or `None` if there is no position.
pub fn line(&self) -> Option<usize> {
if self.is_none() {
None
@ -85,7 +95,6 @@ impl Position {
/// # Panics
///
/// Panics if already at beginning of a line - cannot rewind to a previous line.
///
pub(crate) fn rewind(&mut self) {
assert!(!self.is_none(), "cannot rewind Position::none");
assert!(self.pos > 0, "cannot rewind at position 0");
@ -104,7 +113,7 @@ impl Position {
}
/// Create a `Position` representing no position.
pub(crate) fn none() -> Self {
pub fn none() -> Self {
Self { line: 0, pos: 0 }
}
@ -146,9 +155,9 @@ impl fmt::Debug for Position {
pub enum Token {
/// An `INT` constant.
IntegerConstant(INT),
/// A `FLOAT` constaint.
/// A `FLOAT` constant.
///
/// Never appears under the `no_float` feature.
/// Reserved under the `no_float` feature.
#[cfg(not(feature = "no_float"))]
FloatConstant(FLOAT),
/// An identifier.
@ -249,7 +258,7 @@ pub enum Token {
And,
/// `fn`
///
/// Never appears under the `no_function` feature.
/// Reserved under the `no_function` feature.
#[cfg(not(feature = "no_function"))]
Fn,
/// `continue`
@ -284,22 +293,22 @@ pub enum Token {
PowerOfAssign,
/// `private`
///
/// Never appears under the `no_function` feature.
/// Reserved under the `no_function` feature.
#[cfg(not(feature = "no_function"))]
Private,
/// `import`
///
/// Never appears under the `no_module` feature.
/// Reserved under the `no_module` feature.
#[cfg(not(feature = "no_module"))]
Import,
/// `export`
///
/// Never appears under the `no_module` feature.
/// Reserved under the `no_module` feature.
#[cfg(not(feature = "no_module"))]
Export,
/// `as`
///
/// Never appears under the `no_module` feature.
/// Reserved under the `no_module` feature.
#[cfg(not(feature = "no_module"))]
As,
/// A lexer error.
@ -643,7 +652,7 @@ impl Token {
}
}
/// Is this token a standard keyword?
/// Is this token an active standard keyword?
pub fn is_keyword(&self) -> bool {
use Token::*;
@ -670,7 +679,7 @@ impl Token {
}
/// Convert a token into a function name, if possible.
pub fn into_function_name(self) -> Result<String, Self> {
pub(crate) fn into_function_name(self) -> Result<String, Self> {
match self {
Self::Reserved(s) if is_keyword_function(&s) => Ok(s),
Self::Custom(s) | Self::Identifier(s) if is_valid_identifier(s.chars()) => Ok(s),
@ -726,32 +735,6 @@ pub trait InputStream {
fn peek_next(&mut self) -> Option<char>;
}
pub fn is_keyword_function(name: &str) -> bool {
name == KEYWORD_PRINT
|| name == KEYWORD_DEBUG
|| name == KEYWORD_TYPE_OF
|| name == KEYWORD_EVAL
|| name == KEYWORD_FN_PTR
|| name == KEYWORD_FN_PTR_CALL
|| name == KEYWORD_FN_PTR_CURRY
}
pub fn is_valid_identifier(name: impl Iterator<Item = char>) -> bool {
let mut first_alphabetic = false;
for ch in name {
match ch {
'_' => (),
_ if char::is_ascii_alphabetic(&ch) => first_alphabetic = true,
_ if !first_alphabetic => return false,
_ if char::is_ascii_alphanumeric(&ch) => (),
_ => return false,
}
}
first_alphabetic
}
/// [INTERNALS] Parse a string literal wrapped by `enclosing_char`.
/// Exported under the `internals` feature only.
///
@ -1098,35 +1081,7 @@ fn get_next_token_inner(
// letter or underscore ...
('A'..='Z', _) | ('a'..='z', _) | ('_', _) => {
let mut result = Vec::new();
result.push(c);
while let Some(next_char) = stream.peek_next() {
match next_char {
x if x.is_ascii_alphanumeric() || x == '_' => {
result.push(x);
eat_next(stream, pos);
}
_ => break,
}
}
let is_valid_identifier = is_valid_identifier(result.iter().cloned());
let identifier: String = result.into_iter().collect();
if !is_valid_identifier {
return Some((
Token::LexError(Box::new(LERR::MalformedIdentifier(identifier))),
start_pos,
));
}
return Some((
Token::lookup_from_syntax(&identifier)
.unwrap_or_else(|| Token::Identifier(identifier)),
start_pos,
));
return get_identifier(stream, pos, start_pos, c);
}
// " - string literal
@ -1149,7 +1104,7 @@ fn get_next_token_inner(
|err| (Token::LexError(Box::new(err.0)), err.1),
|result| {
let mut chars = result.chars();
let first = chars.next();
let first = chars.next().unwrap();
if chars.next().is_some() {
(
@ -1157,10 +1112,7 @@ fn get_next_token_inner(
start_pos,
)
} else {
(
Token::CharConstant(first.expect("should be Some")),
start_pos,
)
(Token::CharConstant(first), start_pos)
}
},
))
@ -1404,6 +1356,10 @@ fn get_next_token_inner(
('\0', _) => unreachable!(),
(ch, _) if ch.is_whitespace() => (),
#[cfg(feature = "unicode-xid-ident")]
(ch, _) if unicode_xid::UnicodeXID::is_xid_start(ch) => {
return get_identifier(stream, pos, start_pos, c);
}
(ch, _) => {
return Some((
Token::LexError(Box::new(LERR::UnexpectedInput(ch.to_string()))),
@ -1422,6 +1378,95 @@ fn get_next_token_inner(
}
}
/// Get the next identifier.
fn get_identifier(
stream: &mut impl InputStream,
pos: &mut Position,
start_pos: Position,
first_char: char,
) -> Option<(Token, Position)> {
let mut result = Vec::new();
result.push(first_char);
while let Some(next_char) = stream.peek_next() {
match next_char {
x if is_id_continue(x) => {
result.push(x);
eat_next(stream, pos);
}
_ => break,
}
}
let is_valid_identifier = is_valid_identifier(result.iter().cloned());
let identifier: String = result.into_iter().collect();
if !is_valid_identifier {
return Some((
Token::LexError(Box::new(LERR::MalformedIdentifier(identifier))),
start_pos,
));
}
return Some((
Token::lookup_from_syntax(&identifier).unwrap_or_else(|| Token::Identifier(identifier)),
start_pos,
));
}
/// Is this keyword allowed as a function?
#[inline(always)]
pub fn is_keyword_function(name: &str) -> bool {
name == KEYWORD_PRINT
|| name == KEYWORD_DEBUG
|| name == KEYWORD_TYPE_OF
|| name == KEYWORD_EVAL
|| name == KEYWORD_FN_PTR
|| name == KEYWORD_FN_PTR_CALL
|| name == KEYWORD_FN_PTR_CURRY
}
pub fn is_valid_identifier(name: impl Iterator<Item = char>) -> bool {
let mut first_alphabetic = false;
for ch in name {
match ch {
'_' => (),
_ if is_id_first_alphabetic(ch) => first_alphabetic = true,
_ if !first_alphabetic => return false,
_ if char::is_ascii_alphanumeric(&ch) => (),
_ => return false,
}
}
first_alphabetic
}
#[cfg(feature = "unicode-xid-ident")]
#[inline(always)]
fn is_id_first_alphabetic(x: char) -> bool {
unicode_xid::UnicodeXID::is_xid_start(x)
}
#[cfg(feature = "unicode-xid-ident")]
#[inline(always)]
fn is_id_continue(x: char) -> bool {
unicode_xid::UnicodeXID::is_xid_continue(x)
}
#[cfg(not(feature = "unicode-xid-ident"))]
#[inline(always)]
fn is_id_first_alphabetic(x: char) -> bool {
x.is_ascii_alphabetic()
}
#[cfg(not(feature = "unicode-xid-ident"))]
#[inline(always)]
fn is_id_continue(x: char) -> bool {
x.is_ascii_alphanumeric() || x == '_'
}
/// A type that implements the `InputStream` trait.
/// Multiple character streams are jointed together to form one single stream.
pub struct MultiInputsStream<'a> {

View File

@ -51,3 +51,29 @@ fn test_tokens_custom_operator() -> Result<(), Box<EvalAltResult>> {
Ok(())
}
#[test]
fn test_tokens_unicode_xid_ident() -> Result<(), Box<EvalAltResult>> {
let engine = Engine::new();
let result = engine.eval::<INT>(
r"
fn () { 42 }
()
",
);
#[cfg(feature = "unicode-xid-ident")]
assert_eq!(result?, 42);
#[cfg(not(feature = "unicode-xid-ident"))]
assert!(result.is_err());
let result = engine.eval::<INT>(
r"
fn _1() { 1 }
_1()
",
);
assert!(result.is_err());
Ok(())
}