Refine unicode-xid-ident feature writeup.

This commit is contained in:
Stephen Chung 2020-07-29 13:57:17 +08:00
parent eeb86f5efb
commit e2daba55d7
6 changed files with 37 additions and 31 deletions

View File

@ -14,7 +14,7 @@ include = [
"Cargo.toml" "Cargo.toml"
] ]
keywords = [ "scripting" ] keywords = [ "scripting" ]
categories = [ "no-std", "embedded", "parser-implementations" ] categories = [ "no-std", "embedded", "wasm", "parser-implementations" ]
[dependencies] [dependencies]
num-traits = { version = "0.2.11", default-features = false } num-traits = { version = "0.2.11", default-features = false }
@ -34,7 +34,7 @@ no_object = [] # no custom objects
no_function = [] # no script-defined functions no_function = [] # no script-defined functions
no_module = [] # no modules no_module = [] # no modules
internals = [] # expose internal data structures internals = [] # expose internal data structures
unicode-xid-ident = ["unicode-xid"] # allow unicode-xid for identifiers. unicode-xid-ident = ["unicode-xid"] # allow Unicode Standard Annex #31 for identifiers.
# compiling for no-std # compiling for no-std
no_std = [ "num-traits/libm", "hashbrown", "core-error", "libm", "ahash" ] no_std = [ "num-traits/libm", "hashbrown", "core-error", "libm", "ahash" ]

View File

@ -20,7 +20,7 @@ New features
* Custom syntax now works even without the `internals` feature. * Custom syntax now works even without the `internals` feature.
* Currying of function pointers is supported via the new `curry` keyword. * Currying of function pointers is supported via the new `curry` keyword.
* `Module::set_indexer_get_set_fn` is added as a shorthand of both `Module::set_indexer_get_fn` and `Module::set_indexer_set_fn`. * `Module::set_indexer_get_set_fn` is added as a shorthand of both `Module::set_indexer_get_fn` and `Module::set_indexer_set_fn`.
* New `unicode-xid-ident` feature to allow unicode-xid for identifiers. * New `unicode-xid-ident` feature to allow [Unicode Standard Annex #31](http://www.unicode.org/reports/tr31/) for identifiers.
Breaking changes Breaking changes
---------------- ----------------

View File

@ -21,6 +21,11 @@ Variable names are case _sensitive_.
Variable names also cannot be the same as a [keyword]. Variable names also cannot be the same as a [keyword].
### Unicode Standard Annex #31 Identifiers
The [`unicode-xid-ident`] feature expands the allowed characters for variable names to the set defined by
[Unicode Standard Annex #31](http://www.unicode.org/reports/tr31/).
Declare a Variable Declare a Variable
------------------ ------------------

View File

@ -12,7 +12,7 @@ Excluding unneeded functionalities can result in smaller, faster builds as well
more control over what a script can (or cannot) do. more control over what a script can (or cannot) do.
| Feature | Description | | Feature | Description |
| ------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `unchecked` | Disable arithmetic checking (such as over-flows and division by zero), call stack depth limit, operations count limit and modules loading limit.<br/>Beware that a bad script may panic the entire system! | | `unchecked` | Disable arithmetic checking (such as over-flows and division by zero), call stack depth limit, operations count limit and modules loading limit.<br/>Beware that a bad script may panic the entire system! |
| `sync` | Restrict all values types to those that are `Send + Sync`. Under this feature, all Rhai types, including [`Engine`], [`Scope`] and [`AST`], are all `Send + Sync`. | | `sync` | Restrict all values types to those that are `Send + Sync`. Under this feature, all Rhai types, including [`Engine`], [`Scope`] and [`AST`], are all `Send + Sync`. |
| `no_optimize` | Disable [script optimization]. | | `no_optimize` | Disable [script optimization]. |
@ -26,7 +26,7 @@ more control over what a script can (or cannot) do.
| `no_std` | Build for `no-std`. Notice that additional dependencies will be pulled in to replace `std` features. | | `no_std` | Build for `no-std`. Notice that additional dependencies will be pulled in to replace `std` features. |
| `serde` | Enable serialization/deserialization via `serde`. Notice that the [`serde`](https://crates.io/crates/serde) crate will be pulled in together with its dependencies. | | `serde` | Enable serialization/deserialization via `serde`. Notice that the [`serde`](https://crates.io/crates/serde) crate will be pulled in together with its dependencies. |
| `internals` | Expose internal data structures (e.g. [`AST`] nodes). Beware that Rhai internals are volatile and may change from version to version. | | `internals` | Expose internal data structures (e.g. [`AST`] nodes). Beware that Rhai internals are volatile and may change from version to version. |
| `unicode-xid-ident` | Allow unicode-xid for identifiers. | | `unicode-xid-ident` | Allow [Unicode Standard Annex #31](http://www.unicode.org/reports/tr31/) as identifiers. |
Example Example

View File

@ -1104,7 +1104,7 @@ fn get_next_token_inner(
|err| (Token::LexError(Box::new(err.0)), err.1), |err| (Token::LexError(Box::new(err.0)), err.1),
|result| { |result| {
let mut chars = result.chars(); let mut chars = result.chars();
let first = chars.next(); let first = chars.next().unwrap();
if chars.next().is_some() { if chars.next().is_some() {
( (
@ -1112,10 +1112,7 @@ fn get_next_token_inner(
start_pos, start_pos,
) )
} else { } else {
( (Token::CharConstant(first), start_pos)
Token::CharConstant(first.expect("should be Some")),
start_pos,
)
} }
}, },
)) ))
@ -1419,6 +1416,7 @@ fn get_identifier(
} }
/// Is this keyword allowed as a function? /// Is this keyword allowed as a function?
#[inline(always)]
pub fn is_keyword_function(name: &str) -> bool { pub fn is_keyword_function(name: &str) -> bool {
name == KEYWORD_PRINT name == KEYWORD_PRINT
|| name == KEYWORD_DEBUG || name == KEYWORD_DEBUG
@ -1446,22 +1444,25 @@ pub fn is_valid_identifier(name: impl Iterator<Item = char>) -> bool {
} }
#[cfg(feature = "unicode-xid-ident")] #[cfg(feature = "unicode-xid-ident")]
#[inline(always)]
fn is_id_first_alphabetic(x: char) -> bool { fn is_id_first_alphabetic(x: char) -> bool {
unicode_xid::UnicodeXID::is_xid_start(x) unicode_xid::UnicodeXID::is_xid_start(x)
} }
#[cfg(feature = "unicode-xid-ident")] #[cfg(feature = "unicode-xid-ident")]
#[inline(always)]
fn is_id_continue(x: char) -> bool { fn is_id_continue(x: char) -> bool {
unicode_xid::UnicodeXID::is_xid_continue(x) unicode_xid::UnicodeXID::is_xid_continue(x)
} }
#[cfg(not(feature = "unicode-xid-ident"))] #[cfg(not(feature = "unicode-xid-ident"))]
#[inline(always)]
fn is_id_first_alphabetic(x: char) -> bool { fn is_id_first_alphabetic(x: char) -> bool {
x.is_ascii_alphabetic() x.is_ascii_alphabetic()
} }
#[cfg(not(feature = "unicode-xid-ident"))] #[cfg(not(feature = "unicode-xid-ident"))]
#[inline(always)]
fn is_id_continue(x: char) -> bool { fn is_id_continue(x: char) -> bool {
x.is_ascii_alphanumeric() || x == '_' x.is_ascii_alphanumeric() || x == '_'
} }