Merge pull request #205 from ekicyou/feature/unicode-xid-ident

New `unicode-xid-ident` feature to allow unicode-xid for identifiers.
This commit is contained in:
Stephen Chung 2020-07-29 11:40:13 +08:00 committed by GitHub
commit 7e7c61c4c7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 115 additions and 45 deletions

View File

@ -29,6 +29,7 @@ jobs:
- "--features no_object"
- "--features no_function"
- "--features no_module"
- "--features unicode-xid-ident"
toolchain: [stable]
experimental: [false]
include:

View File

@ -34,6 +34,7 @@ no_object = [] # no custom objects
no_function = [] # no script-defined functions
no_module = [] # no modules
internals = [] # expose internal data structures
unicode-xid-ident = ["unicode-xid"] # allow unicode-xid for identifiers.
# compiling for no-std
no_std = [ "num-traits/libm", "hashbrown", "core-error", "libm", "ahash" ]
@ -73,6 +74,11 @@ default_features = false
features = ["derive", "alloc"]
optional = true
[dependencies.unicode-xid]
version = "0.2.1"
default_features = false
optional = true
[target.'cfg(target_arch = "wasm32")'.dependencies]
instant= { version = "0.1.4", features = ["wasm-bindgen"] } # WASM implementation of std::time::Instant

View File

@ -19,6 +19,7 @@ New features
* Custom syntax now works even without the `internals` feature.
* Currying of function pointers is supported via the `curry` keyword.
* `Module::set_indexer_get_set_fn` is added as a shorthand of both `Module::set_indexer_get_fn` and `Module::set_indexer_set_fn`.
* New `unicode-xid-ident` feature to allow unicode-xid for identifiers.
Breaking changes
----------------

View File

@ -12,6 +12,7 @@
[`no_std`]: {{rootUrl}}/start/features.md
[`no-std`]: {{rootUrl}}/start/features.md
[`internals`]: {{rootUrl}}/start/features.md
[`unicode-xid-ident`]: {{rootUrl}}/start/features.md
[minimal builds]: {{rootUrl}}/start/builds/minimal.md
[WASM]: {{rootUrl}}/start/builds/wasm.md

View File

@ -26,6 +26,7 @@ more control over what a script can (or cannot) do.
| `no_std` | Build for `no-std`. Notice that additional dependencies will be pulled in to replace `std` features. |
| `serde` | Enable serialization/deserialization via [`serde`]. Notice that the [`serde`](https://crates.io/crates/serde) crate will be pulled in together with its dependencies. |
| `internals` | Expose internal data structures (e.g. [`AST`] nodes). Beware that Rhai internals are volatile and may change from version to version. |
| `unicode-xid-ident` | Allow unicode-xid for identifiers. |
Example

View File

@ -714,22 +714,6 @@ pub trait InputStream {
fn peek_next(&mut self) -> Option<char>;
}
pub fn is_valid_identifier(name: impl Iterator<Item = char>) -> bool {
let mut first_alphabetic = false;
for ch in name {
match ch {
'_' => (),
_ if char::is_ascii_alphabetic(&ch) => first_alphabetic = true,
_ if !first_alphabetic => return false,
_ if char::is_ascii_alphanumeric(&ch) => (),
_ => return false,
}
}
first_alphabetic
}
/// [INTERNALS] Parse a string literal wrapped by `enclosing_char`.
/// Exported under the `internals` feature only.
///
@ -1076,35 +1060,7 @@ fn get_next_token_inner(
// letter or underscore ...
('A'..='Z', _) | ('a'..='z', _) | ('_', _) => {
let mut result = Vec::new();
result.push(c);
while let Some(next_char) = stream.peek_next() {
match next_char {
x if x.is_ascii_alphanumeric() || x == '_' => {
result.push(x);
eat_next(stream, pos);
}
_ => break,
}
}
let is_valid_identifier = is_valid_identifier(result.iter().cloned());
let identifier: String = result.into_iter().collect();
if !is_valid_identifier {
return Some((
Token::LexError(Box::new(LERR::MalformedIdentifier(identifier))),
start_pos,
));
}
return Some((
Token::lookup_from_syntax(&identifier)
.unwrap_or_else(|| Token::Identifier(identifier)),
start_pos,
));
return get_identifier(stream, pos, start_pos, c);
}
// " - string literal
@ -1382,6 +1338,10 @@ fn get_next_token_inner(
('\0', _) => unreachable!(),
(ch, _) if ch.is_whitespace() => (),
#[cfg(feature = "unicode-xid-ident")]
(ch, _) if unicode_xid::UnicodeXID::is_xid_start(ch) => {
return get_identifier(stream, pos, start_pos, c);
}
(ch, _) => {
return Some((
Token::LexError(Box::new(LERR::UnexpectedInput(ch.to_string()))),
@ -1400,6 +1360,80 @@ fn get_next_token_inner(
}
}
/// Get the next identifier.
fn get_identifier(
stream: &mut impl InputStream,
pos: &mut Position,
start_pos: Position,
first_char: char,
) -> Option<(Token, Position)> {
let mut result = Vec::new();
result.push(first_char);
while let Some(next_char) = stream.peek_next() {
match next_char {
x if is_id_continue(x) => {
result.push(x);
eat_next(stream, pos);
}
_ => break,
}
}
let is_valid_identifier = is_valid_identifier(result.iter().cloned());
let identifier: String = result.into_iter().collect();
if !is_valid_identifier {
return Some((
Token::LexError(Box::new(LERR::MalformedIdentifier(identifier))),
start_pos,
));
}
return Some((
Token::lookup_from_syntax(&identifier).unwrap_or_else(|| Token::Identifier(identifier)),
start_pos,
));
}
pub fn is_valid_identifier(name: impl Iterator<Item = char>) -> bool {
let mut first_alphabetic = false;
for ch in name {
match ch {
'_' => (),
_ if is_id_first_alphabetic(ch) => first_alphabetic = true,
_ if !first_alphabetic => return false,
_ if char::is_ascii_alphanumeric(&ch) => (),
_ => return false,
}
}
first_alphabetic
}
#[cfg(feature = "unicode-xid-ident")]
fn is_id_first_alphabetic(x: char) -> bool {
unicode_xid::UnicodeXID::is_xid_start(x)
}
#[cfg(feature = "unicode-xid-ident")]
fn is_id_continue(x: char) -> bool {
unicode_xid::UnicodeXID::is_xid_continue(x)
}
#[cfg(not(feature = "unicode-xid-ident"))]
fn is_id_first_alphabetic(x: char) -> bool {
x.is_ascii_alphabetic()
}
#[cfg(not(feature = "unicode-xid-ident"))]
fn is_id_continue(x: char) -> bool {
x.is_ascii_alphanumeric() || x == '_'
}
/// A type that implements the `InputStream` trait.
/// Multiple character streams are jointed together to form one single stream.
pub struct MultiInputsStream<'a> {

View File

@ -51,3 +51,29 @@ fn test_tokens_custom_operator() -> Result<(), Box<EvalAltResult>> {
Ok(())
}
#[test]
fn test_tokens_unicode_xid_ident() -> Result<(), Box<EvalAltResult>> {
let engine = Engine::new();
let result = engine.eval::<INT>(
r"
fn () { 42 }
()
",
);
#[cfg(feature = "unicode-xid-ident")]
assert_eq!(result?, 42);
#[cfg(not(feature = "unicode-xid-ident"))]
assert!(result.is_err());
let result = engine.eval::<INT>(
r"
fn _1() { 1 }
_1()
",
);
assert!(result.is_err());
Ok(())
}