From 4d226542faaa13c9daa54aad71fb2b05a83bf166 Mon Sep 17 00:00:00 2001 From: Stephen Chung Date: Mon, 27 Dec 2021 21:56:50 +0800 Subject: [PATCH] Split out strings interner. --- src/api/register.rs | 108 ++++++++++++++++++++--------- src/engine.rs | 8 +-- src/lib.rs | 5 +- src/module/mod.rs | 30 ++++----- src/parser.rs | 153 +++++++++++++++++++----------------------- src/types/interner.rs | 71 ++++++++++++++++++++ src/types/mod.rs | 2 + 7 files changed, 241 insertions(+), 136 deletions(-) create mode 100644 src/types/interner.rs diff --git a/src/api/register.rs b/src/api/register.rs index 157f650e..22276593 100644 --- a/src/api/register.rs +++ b/src/api/register.rs @@ -193,8 +193,12 @@ impl Engine { /// } /// /// impl TestStruct { - /// fn new() -> Self { Self { field: 1 } } - /// fn update(&mut self, offset: i64) { self.field += offset; } + /// fn new() -> Self { + /// Self { field: 1 } + /// } + /// fn update(&mut self, offset: i64) { + /// self.field += offset; + /// } /// } /// /// # fn main() -> Result<(), Box> { @@ -233,7 +237,9 @@ impl Engine { /// } /// /// impl TestStruct { - /// fn new() -> Self { Self { field: 1 } } + /// fn new() -> Self { + /// Self { field: 1 } + /// } /// } /// /// # fn main() -> Result<(), Box> { @@ -308,9 +314,13 @@ impl Engine { /// } /// /// impl TestStruct { - /// fn new() -> Self { Self { field: 1 } } + /// fn new() -> Self { + /// Self { field: 1 } + /// } /// // Even a getter must start with `&mut self` and not `&self`. - /// fn get_field(&mut self) -> i64 { self.field } + /// fn get_field(&mut self) -> i64 { + /// self.field + /// } /// } /// /// # fn main() -> Result<(), Box> { @@ -336,7 +346,7 @@ impl Engine { name: impl AsRef, get_fn: impl Fn(&mut T) -> V + SendSync + 'static, ) -> &mut Self { - self.register_fn(&crate::engine::make_getter(name), get_fn) + self.register_fn(&crate::engine::make_getter(name.as_ref()), get_fn) } /// Register a getter function for a member of a registered type with the [`Engine`]. /// @@ -355,7 +365,9 @@ impl Engine { /// } /// /// impl TestStruct { - /// fn new() -> Self { Self { field: 1 } } + /// fn new() -> Self { + /// Self { field: 1 } + /// } /// // Even a getter must start with `&mut self` and not `&self`. /// fn get_field(&mut self) -> Result> { /// Ok(self.field) @@ -383,7 +395,7 @@ impl Engine { name: impl AsRef, get_fn: impl Fn(&mut T) -> RhaiResultOf + SendSync + 'static, ) -> &mut Self { - self.register_result_fn(&crate::engine::make_getter(name), get_fn) + self.register_result_fn(&crate::engine::make_getter(name.as_ref()), get_fn) } /// Register a setter function for a member of a registered type with the [`Engine`]. /// @@ -398,8 +410,12 @@ impl Engine { /// } /// /// impl TestStruct { - /// fn new() -> Self { Self { field: 1 } } - /// fn set_field(&mut self, new_val: i64) { self.field = new_val; } + /// fn new() -> Self { + /// Self { field: 1 } + /// } + /// fn set_field(&mut self, new_val: i64) { + /// self.field = new_val; + /// } /// } /// /// # fn main() -> Result<(), Box> { @@ -429,7 +445,7 @@ impl Engine { name: impl AsRef, set_fn: impl Fn(&mut T, V) + SendSync + 'static, ) -> &mut Self { - self.register_fn(&crate::engine::make_setter(name), set_fn) + self.register_fn(&crate::engine::make_setter(name.as_ref()), set_fn) } /// Register a setter function for a member of a registered type with the [`Engine`]. /// @@ -446,7 +462,9 @@ impl Engine { /// } /// /// impl TestStruct { - /// fn new() -> Self { Self { field: 1 } } + /// fn new() -> Self { + /// Self { field: 1 } + /// } /// fn set_field(&mut self, new_val: i64) -> Result<(), Box> { /// self.field = new_val; /// Ok(()) @@ -478,7 +496,7 @@ impl Engine { name: impl AsRef, set_fn: impl Fn(&mut T, V) -> RhaiResultOf<()> + SendSync + 'static, ) -> &mut Self { - self.register_result_fn(&crate::engine::make_setter(name), set_fn) + self.register_result_fn(&crate::engine::make_setter(name.as_ref()), set_fn) } /// Short-hand for registering both getter and setter functions /// of a registered type with the [`Engine`]. @@ -496,10 +514,16 @@ impl Engine { /// } /// /// impl TestStruct { - /// fn new() -> Self { Self { field: 1 } } + /// fn new() -> Self { + /// Self { field: 1 } + /// } /// // Even a getter must start with `&mut self` and not `&self`. - /// fn get_field(&mut self) -> i64 { self.field } - /// fn set_field(&mut self, new_val: i64) { self.field = new_val; } + /// fn get_field(&mut self) -> i64 { + /// self.field + /// } + /// fn set_field(&mut self, new_val: i64) { + /// self.field = new_val; + /// } /// } /// /// # fn main() -> Result<(), Box> { @@ -550,9 +574,13 @@ impl Engine { /// } /// /// impl TestStruct { - /// fn new() -> Self { Self { fields: vec![1, 2, 3, 4, 5] } } + /// fn new() -> Self { + /// Self { fields: vec![1, 2, 3, 4, 5] } + /// } /// // Even a getter must start with `&mut self` and not `&self`. - /// fn get_field(&mut self, index: i64) -> i64 { self.fields[index as usize] } + /// fn get_field(&mut self, index: i64) -> i64 { + /// self.fields[index as usize] + /// } /// } /// /// # fn main() -> Result<(), Box> { @@ -623,7 +651,9 @@ impl Engine { /// } /// /// impl TestStruct { - /// fn new() -> Self { Self { fields: vec![1, 2, 3, 4, 5] } } + /// fn new() -> Self { + /// Self { fields: vec![1, 2, 3, 4, 5] } + /// } /// // Even a getter must start with `&mut self` and not `&self`. /// fn get_field(&mut self, index: i64) -> Result> { /// Ok(self.fields[index as usize]) @@ -696,8 +726,12 @@ impl Engine { /// } /// /// impl TestStruct { - /// fn new() -> Self { Self { fields: vec![1, 2, 3, 4, 5] } } - /// fn set_field(&mut self, index: i64, value: i64) { self.fields[index as usize] = value; } + /// fn new() -> Self { + /// Self { fields: vec![1, 2, 3, 4, 5] } + /// } + /// fn set_field(&mut self, index: i64, value: i64) { + /// self.fields[index as usize] = value; + /// } /// } /// /// # fn main() -> Result<(), Box> { @@ -715,10 +749,10 @@ impl Engine { /// .register_indexer_set(TestStruct::set_field); /// /// # #[cfg(not(feature = "no_index"))] - /// assert_eq!( - /// engine.eval::("let a = new_ts(); a[2] = 42; a")?.fields[2], - /// 42 - /// ); + /// let result = engine.eval::("let a = new_ts(); a[2] = 42; a")?; + /// + /// # #[cfg(not(feature = "no_index"))] + /// assert_eq!(result.fields[2], 42); /// # Ok(()) /// # } /// ``` @@ -769,7 +803,9 @@ impl Engine { /// } /// /// impl TestStruct { - /// fn new() -> Self { Self { fields: vec![1, 2, 3, 4, 5] } } + /// fn new() -> Self { + /// Self { fields: vec![1, 2, 3, 4, 5] } + /// } /// fn set_field(&mut self, index: i64, value: i64) -> Result<(), Box> { /// self.fields[index as usize] = value; /// Ok(()) @@ -789,10 +825,10 @@ impl Engine { /// .register_indexer_set_result(TestStruct::set_field); /// /// # #[cfg(not(feature = "no_index"))] - /// assert_eq!( - /// engine.eval::("let a = new_ts(); a[2] = 42; a")?.fields[2], - /// 42 - /// ); + /// let result = engine.eval::("let a = new_ts(); a[2] = 42; a")?; + /// + /// # #[cfg(not(feature = "no_index"))] + /// assert_eq!(result.fields[2], 42); /// # Ok(()) /// # } /// ``` @@ -845,10 +881,16 @@ impl Engine { /// } /// /// impl TestStruct { - /// fn new() -> Self { Self { fields: vec![1, 2, 3, 4, 5] } } + /// fn new() -> Self { + /// Self { fields: vec![1, 2, 3, 4, 5] } + /// } /// // Even a getter must start with `&mut self` and not `&self`. - /// fn get_field(&mut self, index: i64) -> i64 { self.fields[index as usize] } - /// fn set_field(&mut self, index: i64, value: i64) { self.fields[index as usize] = value; } + /// fn get_field(&mut self, index: i64) -> i64 { + /// self.fields[index as usize] + /// } + /// fn set_field(&mut self, index: i64, value: i64) { + /// self.fields[index as usize] = value; + /// } /// } /// /// # fn main() -> Result<(), Box> { diff --git a/src/engine.rs b/src/engine.rs index 58dc68f2..e4306199 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -1013,16 +1013,16 @@ impl Default for Engine { #[cfg(not(feature = "no_object"))] #[inline] #[must_use] -pub fn make_getter(id: impl AsRef) -> String { - format!("{}{}", FN_GET, id.as_ref()) +pub fn make_getter(id: &str) -> String { + format!("{}{}", FN_GET, id) } /// Make setter function #[cfg(not(feature = "no_object"))] #[inline] #[must_use] -pub fn make_setter(id: impl AsRef) -> String { - format!("{}{}", FN_SET, id.as_ref()) +pub fn make_setter(id: &str) -> String { + format!("{}{}", FN_SET, id) } /// Is this function an anonymous function? diff --git a/src/lib.rs b/src/lib.rs index 05651856..67baf76d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -226,7 +226,10 @@ pub use tokenizer::{ }; #[cfg(feature = "internals")] -pub use parser::{IdentifierBuilder, ParseState}; +pub use types::StringsInterner; + +#[cfg(feature = "internals")] +pub use parser::ParseState; #[cfg(feature = "internals")] pub use ast::{ diff --git a/src/module/mod.rs b/src/module/mod.rs index 97481c74..98884c7c 100644 --- a/src/module/mod.rs +++ b/src/module/mod.rs @@ -5,9 +5,9 @@ use crate::func::{ shared_take_or_clone, CallableFunction, FnCallArgs, IteratorFn, RegisterNativeFunction, SendSync, }; -use crate::parser::IdentifierBuilder; use crate::tokenizer::Token; use crate::types::dynamic::Variant; +use crate::types::StringsInterner; use crate::{ calc_fn_params_hash, calc_qualified_fn_hash, combine_hashes, Dynamic, Identifier, ImmutableString, NativeCallContext, RhaiResultOf, Shared, StaticVec, @@ -155,8 +155,8 @@ pub struct Module { indexed: bool, /// Does the [`Module`] contain indexed functions that have been exposed to the global namespace? contains_indexed_global_functions: bool, - /// Interned strings - identifiers: IdentifierBuilder, + /// Interned strings. + interner: StringsInterner, } impl Default for Module { @@ -253,7 +253,7 @@ impl Module { all_type_iterators: BTreeMap::new(), indexed: true, contains_indexed_global_functions: false, - identifiers: IdentifierBuilder::new(), + interner: StringsInterner::new(), } } @@ -477,7 +477,7 @@ impl Module { let param_names_and_types = fn_def .params .iter() - .map(|v| self.identifiers.get(v.as_str())) + .map(|v| self.interner.get("", v.as_str()).into()) .collect(); self.functions.insert( hash_script, @@ -490,7 +490,7 @@ impl Module { #[cfg(feature = "metadata")] param_names_and_types, #[cfg(feature = "metadata")] - return_type_name: self.identifiers.get("Dynamic"), + return_type_name: self.interner.get("", "Dynamic").into(), #[cfg(feature = "metadata")] comments: None, func: Into::::into(fn_def).into(), @@ -639,7 +639,7 @@ impl Module { pub fn update_fn_metadata(&mut self, hash_fn: u64, arg_names: &[impl AsRef]) -> &mut Self { let mut param_names: StaticVec<_> = arg_names .iter() - .map(|name| self.identifiers.get(name.as_ref())) + .map(|name| self.interner.get("", name.as_ref()).into()) .collect(); if let Some(f) = self.functions.get_mut(&hash_fn) { @@ -771,7 +771,7 @@ impl Module { let mut names = _arg_names .iter() .flat_map(|&p| p.iter()) - .map(|&arg| self.identifiers.get(arg)) + .map(|&arg| self.interner.get("", arg).into()) .collect::>(); let return_type = if names.len() > arg_types.len() { names.pop().expect("exists") @@ -787,7 +787,7 @@ impl Module { self.functions.insert( hash_fn, FuncInfo { - name: self.identifiers.get(name), + name: self.interner.get("", name.as_ref()).into(), namespace, access, params: param_types.len(), @@ -1017,7 +1017,7 @@ impl Module { F: Fn(&mut A) -> RhaiResultOf + SendSync + 'static, { self.set_fn( - &crate::engine::make_getter(name), + &crate::engine::make_getter(name.as_ref()), FnNamespace::Global, FnAccess::Public, None, @@ -1059,7 +1059,7 @@ impl Module { F: Fn(&mut A, B) -> RhaiResultOf<()> + SendSync + 'static, { self.set_fn( - &crate::engine::make_setter(name), + &crate::engine::make_setter(name.as_ref()), FnNamespace::Global, FnAccess::Public, None, @@ -1285,7 +1285,7 @@ impl Module { self.all_type_iterators.clear(); self.indexed = false; self.contains_indexed_global_functions = false; - self.identifiers += other.identifiers; + self.interner += other.interner; self } @@ -1305,7 +1305,7 @@ impl Module { self.all_type_iterators.clear(); self.indexed = false; self.contains_indexed_global_functions = false; - self.identifiers += other.identifiers; + self.interner += other.interner; self } @@ -1334,7 +1334,7 @@ impl Module { self.all_type_iterators.clear(); self.indexed = false; self.contains_indexed_global_functions = false; - self.identifiers.merge(&other.identifiers); + self.interner.merge(&other.interner); self } @@ -1384,7 +1384,7 @@ impl Module { self.all_type_iterators.clear(); self.indexed = false; self.contains_indexed_global_functions = false; - self.identifiers.merge(&other.identifiers); + self.interner.merge(&other.interner); self } diff --git a/src/parser.rs b/src/parser.rs index edd1bca3..a9d1ab8d 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -14,6 +14,7 @@ use crate::tokenizer::{ TokenizerControl, }; use crate::types::dynamic::AccessMode; +use crate::types::StringsInterner; use crate::{ calc_fn_hash, calc_qualified_fn_hash, calc_qualified_var_hash, Dynamic, Engine, ExclusiveRange, Identifier, ImmutableString, InclusiveRange, LexError, ParseError, Position, Scope, Shared, @@ -25,7 +26,6 @@ use std::{ collections::BTreeMap, hash::{Hash, Hasher}, num::{NonZeroU8, NonZeroUsize}, - ops::AddAssign, }; pub type ParseResult = Result; @@ -38,41 +38,6 @@ const SCOPE_SEARCH_BARRIER_MARKER: &str = "$BARRIER$"; /// The message: `TokenStream` never ends const NEVER_ENDS: &str = "`TokenStream` never ends"; -/// _(internals)_ A factory of identifiers from text strings. -/// Exported under the `internals` feature only. -/// -/// When [`SmartString`](https://crates.io/crates/smartstring) is used as [`Identifier`], -/// this just returns a copy because most identifiers in Rhai are short and ASCII-based. -/// -/// When [`ImmutableString`] is used as [`Identifier`], this type acts as an interner which keeps a -/// collection of strings and returns shared instances, only creating a new string when it is not -/// yet interned. -#[derive(Debug, Clone, Hash)] -pub struct IdentifierBuilder(); - -impl IdentifierBuilder { - /// Create a new [`IdentifierBuilder`]. - #[inline] - #[must_use] - pub const fn new() -> Self { - Self() - } - /// Get an identifier from a text string. - #[inline] - #[must_use] - pub fn get(&mut self, text: impl AsRef + Into) -> Identifier { - text.into() - } - /// Merge another [`IdentifierBuilder`] into this. - #[inline(always)] - pub fn merge(&mut self, _other: &Self) {} -} - -impl AddAssign for IdentifierBuilder { - #[inline(always)] - fn add_assign(&mut self, _rhs: Self) {} -} - /// _(internals)_ A type that encapsulates the current state of the parser. /// Exported under the `internals` feature only. #[derive(Debug)] @@ -82,7 +47,7 @@ pub struct ParseState<'e> { /// Input stream buffer containing the next character to read. pub tokenizer_control: TokenizerControl, /// Interned strings. - pub interned_strings: IdentifierBuilder, + pub interned_strings: StringsInterner, /// Encapsulates a local stack with variable names to simulate an actual runtime scope. pub stack: StaticVec<(Identifier, AccessMode)>, /// Size of the local variables stack upon entry of the current block scope. @@ -125,7 +90,7 @@ impl<'e> ParseState<'e> { external_vars: BTreeMap::new(), #[cfg(not(feature = "no_closure"))] allow_capture: true, - interned_strings: IdentifierBuilder::new(), + interned_strings: StringsInterner::new(), stack: StaticVec::new_const(), entry_stack_len: 0, #[cfg(not(feature = "no_module"))] @@ -204,11 +169,26 @@ impl<'e> ParseState<'e> { .and_then(|(i, _)| NonZeroUsize::new(i + 1)) } + /// Get an interned identifier, creating one if it is not yet interned. + #[inline(always)] + #[must_use] + pub fn get_identifier( + &mut self, + prefix: &'static str, + text: impl AsRef + Into + Into, + ) -> Identifier { + self.interned_strings.get(prefix, text).into() + } + /// Get an interned string, creating one if it is not yet interned. #[inline(always)] #[must_use] - pub fn get_identifier(&mut self, text: impl AsRef + Into) -> Identifier { - self.interned_strings.get(text) + pub fn get_interned_string( + &mut self, + prefix: &'static str, + text: impl AsRef + Into + Into, + ) -> ImmutableString { + self.interned_strings.get(prefix, text) } } @@ -278,15 +258,15 @@ impl Expr { match self { Self::Variable(_, pos, x) if x.1.is_none() => { let ident = x.2; - let getter = state.get_identifier(crate::engine::make_getter(&ident)); + let getter = state.get_identifier(crate::engine::FN_GET, ident.as_str()); let hash_get = calc_fn_hash(&getter, 1); - let setter = state.get_identifier(crate::engine::make_setter(&ident)); + let setter = state.get_identifier(crate::engine::FN_SET, ident.as_str()); let hash_set = calc_fn_hash(&setter, 2); Self::Property(Box::new(( (getter, hash_get), (setter, hash_set), - (state.get_identifier(ident).into(), pos), + (state.get_interned_string("", ident.as_str()), pos), ))) } _ => self, @@ -517,7 +497,7 @@ fn parse_fn_call( args.shrink_to_fit(); return Ok(FnCallExpr { - name: state.get_identifier(id), + name: state.get_identifier("", id), capture_parent_scope, namespace, hashes, @@ -576,7 +556,7 @@ fn parse_fn_call( args.shrink_to_fit(); return Ok(FnCallExpr { - name: state.get_identifier(id), + name: state.get_identifier("", id), capture_parent_scope, namespace, hashes, @@ -936,7 +916,7 @@ fn parse_map_literal( } let expr = parse_expr(input, state, lib, settings.level_up())?; - let name = state.get_identifier(name); + let name = state.get_identifier("", name); template.insert(name.clone(), crate::Dynamic::UNIT); map.push((Ident { name, pos }, expr)); @@ -1178,7 +1158,7 @@ fn parse_primary( Token::IntegerConstant(x) => Expr::IntegerConstant(x, settings.pos), Token::CharConstant(c) => Expr::CharConstant(c, settings.pos), Token::StringConstant(s) => { - Expr::StringConstant(state.get_identifier(s).into(), settings.pos) + Expr::StringConstant(state.get_identifier("", s).into(), settings.pos) } Token::True => Expr::BoolConstant(true, settings.pos), Token::False => Expr::BoolConstant(false, settings.pos), @@ -1357,7 +1337,7 @@ fn parse_primary( Expr::Variable( None, settings.pos, - (None, None, state.get_identifier(s)).into(), + (None, None, state.get_identifier("", s)).into(), ) } // Namespace qualification @@ -1371,7 +1351,7 @@ fn parse_primary( Expr::Variable( None, settings.pos, - (None, None, state.get_identifier(s)).into(), + (None, None, state.get_identifier("", s)).into(), ) } // Normal variable access @@ -1392,7 +1372,7 @@ fn parse_primary( Expr::Variable( short_index, settings.pos, - (index, None, state.get_identifier(s)).into(), + (index, None, state.get_identifier("", s)).into(), ) } } @@ -1410,14 +1390,14 @@ fn parse_primary( Token::LeftParen | Token::Bang if is_keyword_function(&s) => Expr::Variable( None, settings.pos, - (None, None, state.get_identifier(s)).into(), + (None, None, state.get_identifier("", s)).into(), ), // Access to `this` as a variable is OK within a function scope #[cfg(not(feature = "no_function"))] _ if &*s == KEYWORD_THIS && settings.is_function_scope => Expr::Variable( None, settings.pos, - (None, None, state.get_identifier(s)).into(), + (None, None, state.get_identifier("", s)).into(), ), // Cannot access to `this` as a variable not in a function scope _ if &*s == KEYWORD_THIS => { @@ -1519,7 +1499,7 @@ fn parse_postfix( Expr::Variable( None, pos2, - (None, namespace, state.get_identifier(id2)).into(), + (None, namespace, state.get_identifier("", id2)).into(), ) } // Indexing @@ -1636,7 +1616,7 @@ fn parse_unary( args.shrink_to_fit(); Ok(FnCallExpr { - name: state.get_identifier("-"), + name: state.get_identifier("", "-"), hashes: FnCallHashes::from_native(calc_fn_hash("-", 1)), args, ..Default::default() @@ -1662,7 +1642,7 @@ fn parse_unary( args.shrink_to_fit(); Ok(FnCallExpr { - name: state.get_identifier("+"), + name: state.get_identifier("", "+"), hashes: FnCallHashes::from_native(calc_fn_hash("+", 1)), args, ..Default::default() @@ -1679,7 +1659,7 @@ fn parse_unary( args.shrink_to_fit(); Ok(FnCallExpr { - name: state.get_identifier("!"), + name: state.get_identifier("", "!"), hashes: FnCallHashes::from_native(calc_fn_hash("!", 1)), args, ..Default::default() @@ -2016,7 +1996,7 @@ fn parse_binary_op( let hash = calc_fn_hash(&op, 2); let op_base = FnCallExpr { - name: state.get_identifier(op.as_ref()), + name: state.get_identifier("", op.as_ref()), hashes: FnCallHashes::from_native(hash), ..Default::default() }; @@ -2071,7 +2051,7 @@ fn parse_binary_op( FnCallExpr { hashes: calc_fn_hash(OP_CONTAINS, 2).into(), args, - name: state.get_identifier(OP_CONTAINS), + name: state.get_identifier("", OP_CONTAINS), ..op_base } .into_fn_call_expr(pos) @@ -2122,7 +2102,7 @@ fn parse_custom_syntax( if syntax.scope_may_be_changed { // Add a barrier variable to the stack so earlier variables will not be matched. // Variable searches stop at the first barrier. - let marker = state.get_identifier(SCOPE_SEARCH_BARRIER_MARKER); + let marker = state.get_identifier("", SCOPE_SEARCH_BARRIER_MARKER); state.stack.push((marker, AccessMode::ReadWrite)); } @@ -2142,7 +2122,10 @@ fn parse_custom_syntax( if seg.starts_with(CUSTOM_SYNTAX_MARKER_SYNTAX_VARIANT) && seg.len() > CUSTOM_SYNTAX_MARKER_SYNTAX_VARIANT.len() => { - inputs.push(Expr::StringConstant(state.get_identifier(seg).into(), pos)); + inputs.push(Expr::StringConstant( + state.get_identifier("", seg).into(), + pos, + )); break; } Ok(Some(seg)) => seg, @@ -2153,28 +2136,28 @@ fn parse_custom_syntax( match required_token.as_str() { CUSTOM_SYNTAX_MARKER_IDENT => { let (name, pos) = parse_var_name(input)?; - let name = state.get_identifier(name); + let name = state.get_identifier("", name); segments.push(name.clone().into()); - tokens.push(state.get_identifier(CUSTOM_SYNTAX_MARKER_IDENT)); + tokens.push(state.get_identifier("", CUSTOM_SYNTAX_MARKER_IDENT)); inputs.push(Expr::Variable(None, pos, (None, None, name).into())); } CUSTOM_SYNTAX_MARKER_SYMBOL => { let (symbol, pos) = parse_symbol(input)?; - let symbol: ImmutableString = state.get_identifier(symbol).into(); + let symbol: ImmutableString = state.get_identifier("", symbol).into(); segments.push(symbol.clone()); - tokens.push(state.get_identifier(CUSTOM_SYNTAX_MARKER_SYMBOL)); + tokens.push(state.get_identifier("", CUSTOM_SYNTAX_MARKER_SYMBOL)); inputs.push(Expr::StringConstant(symbol, pos)); } CUSTOM_SYNTAX_MARKER_EXPR => { inputs.push(parse_expr(input, state, lib, settings)?); - let keyword = state.get_identifier(CUSTOM_SYNTAX_MARKER_EXPR); + let keyword = state.get_identifier("", CUSTOM_SYNTAX_MARKER_EXPR); segments.push(keyword.clone().into()); tokens.push(keyword); } CUSTOM_SYNTAX_MARKER_BLOCK => match parse_block(input, state, lib, settings)? { block @ Stmt::Block(_, _) => { inputs.push(Expr::Stmt(Box::new(block.into()))); - let keyword = state.get_identifier(CUSTOM_SYNTAX_MARKER_BLOCK); + let keyword = state.get_identifier("", CUSTOM_SYNTAX_MARKER_BLOCK); segments.push(keyword.clone().into()); tokens.push(keyword); } @@ -2183,8 +2166,8 @@ fn parse_custom_syntax( CUSTOM_SYNTAX_MARKER_BOOL => match input.next().expect(NEVER_ENDS) { (b @ Token::True, pos) | (b @ Token::False, pos) => { inputs.push(Expr::BoolConstant(b == Token::True, pos)); - segments.push(state.get_identifier(b.literal_syntax()).into()); - tokens.push(state.get_identifier(CUSTOM_SYNTAX_MARKER_BOOL)); + segments.push(state.get_identifier("", b.literal_syntax()).into()); + tokens.push(state.get_identifier("", CUSTOM_SYNTAX_MARKER_BOOL)); } (_, pos) => { return Err( @@ -2197,7 +2180,7 @@ fn parse_custom_syntax( (Token::IntegerConstant(i), pos) => { inputs.push(Expr::IntegerConstant(i, pos)); segments.push(i.to_string().into()); - tokens.push(state.get_identifier(CUSTOM_SYNTAX_MARKER_INT)); + tokens.push(state.get_identifier("", CUSTOM_SYNTAX_MARKER_INT)); } (_, pos) => { return Err( @@ -2213,6 +2196,7 @@ fn parse_custom_syntax( inputs.push(Expr::FloatConstant(f, pos)); segments.push(f.to_string().into()); tokens.push(state.get_identifier( + "", crate::custom_syntax::markers::CUSTOM_SYNTAX_MARKER_FLOAT, )); } @@ -2226,10 +2210,10 @@ fn parse_custom_syntax( } CUSTOM_SYNTAX_MARKER_STRING => match input.next().expect(NEVER_ENDS) { (Token::StringConstant(s), pos) => { - let s: ImmutableString = state.get_identifier(s).into(); + let s: ImmutableString = state.get_identifier("", s).into(); inputs.push(Expr::StringConstant(s.clone(), pos)); segments.push(s); - tokens.push(state.get_identifier(CUSTOM_SYNTAX_MARKER_STRING)); + tokens.push(state.get_identifier("", CUSTOM_SYNTAX_MARKER_STRING)); } (_, pos) => { return Err(PERR::MissingSymbol("Expecting a string".to_string()).into_err(pos)) @@ -2477,13 +2461,13 @@ fn parse_for( let prev_stack_len = state.stack.len(); let counter_var = counter_name.map(|name| { - let name = state.get_identifier(name); + let name = state.get_identifier("", name); let pos = counter_pos.expect("`Some`"); state.stack.push((name.clone(), AccessMode::ReadWrite)); Ident { name, pos } }); - let loop_var = state.get_identifier(name); + let loop_var = state.get_identifier("", name); state.stack.push((loop_var.clone(), AccessMode::ReadWrite)); let loop_var = Ident { name: loop_var, @@ -2521,7 +2505,7 @@ fn parse_let( // let name ... let (name, pos) = parse_var_name(input)?; - let name = state.get_identifier(name); + let name = state.get_identifier("", name); let var_def = Ident { name: name.clone(), pos, @@ -2581,7 +2565,7 @@ fn parse_import( // import expr as name ... let (name, pos) = parse_var_name(input)?; - let name = state.get_identifier(name); + let name = state.get_identifier("", name); state.modules.push(name.clone()); Ok(Stmt::Import( @@ -2638,11 +2622,11 @@ fn parse_export( exports.push(( Ident { - name: state.get_identifier(id), + name: state.get_identifier("", id), pos: id_pos, }, Ident { - name: state.get_identifier(rename.as_ref().map_or("", |s| s.as_ref())), + name: state.get_identifier("", rename.as_ref().map_or("", |s| s.as_ref())), pos: rename_pos, }, )); @@ -3039,7 +3023,7 @@ fn parse_try_catch( .into_err(err_pos)); } - let name = state.get_identifier(name); + let name = state.get_identifier("", name); state.stack.push((name.clone(), AccessMode::ReadWrite)); Some(Ident { name, pos }) } else { @@ -3104,7 +3088,7 @@ fn parse_fn( PERR::FnDuplicatedParam(name.to_string(), s.to_string()).into_err(pos) ); } - let s = state.get_identifier(s); + let s = state.get_identifier("", s); state.stack.push((s.clone(), AccessMode::ReadWrite)); params.push((s, pos)) } @@ -3143,7 +3127,7 @@ fn parse_fn( params.shrink_to_fit(); Ok(ScriptFnDef { - name: state.get_identifier(name), + name: state.get_identifier("", name), access, params, body, @@ -3187,7 +3171,7 @@ fn make_curry_from_externals( ); let expr = FnCallExpr { - name: state.get_identifier(crate::engine::KEYWORD_FN_PTR_CURRY), + name: state.get_identifier("", crate::engine::KEYWORD_FN_PTR_CURRY), hashes: FnCallHashes::from_native(calc_fn_hash( crate::engine::KEYWORD_FN_PTR_CURRY, num_externals + 1, @@ -3229,7 +3213,7 @@ fn parse_anon_fn( PERR::FnDuplicatedParam("".to_string(), s.to_string()).into_err(pos) ); } - let s = state.get_identifier(s); + let s = state.get_identifier("", s); state.stack.push((s.clone(), AccessMode::ReadWrite)); params_list.push(s) } @@ -3287,7 +3271,10 @@ fn parse_anon_fn( body.hash(hasher); let hash = hasher.finish(); - let fn_name = state.get_identifier(&(format!("{}{:016x}", crate::engine::FN_ANONYMOUS, hash))); + let fn_name = state.get_identifier( + "", + &(format!("{}{:016x}", crate::engine::FN_ANONYMOUS, hash)), + ); // Define the function let script = ScriptFnDef { diff --git a/src/types/interner.rs b/src/types/interner.rs new file mode 100644 index 00000000..df3c4af7 --- /dev/null +++ b/src/types/interner.rs @@ -0,0 +1,71 @@ +use crate::engine::{make_getter, make_setter, FN_GET, FN_SET}; +use crate::{Identifier, ImmutableString}; +#[cfg(feature = "no_std")] +use std::prelude::v1::*; +use std::{collections::BTreeMap, ops::AddAssign}; + +/// _(internals)_ A factory of identifiers from text strings. +/// Exported under the `internals` feature only. +/// +/// Since [`SmartString`](https://crates.io/crates/smartstring) is used as [`Identifier`], +/// this just returns a copy because most identifiers in Rhai are short and ASCII-based. +/// +/// Property getters and setters are interned separately. +#[derive(Debug, Clone, Default, Hash)] +pub struct StringsInterner { + /// Property getters. + getters: BTreeMap, + /// Property setters. + setters: BTreeMap, +} + +impl StringsInterner { + /// Create a new [`IdentifierBuilder`]. + #[inline] + #[must_use] + pub fn new() -> Self { + Self { + getters: BTreeMap::new(), + setters: BTreeMap::new(), + } + } + /// Get an identifier from a text string and prefix, adding it to the interner if necessary. + /// + /// # Panics + /// + /// Panics if the prefix is not recognized. + #[inline] + #[must_use] + pub fn get( + &mut self, + prefix: &'static str, + text: impl AsRef + Into + Into, + ) -> ImmutableString { + let (dict, mapper) = match prefix { + "" => return text.into(), + FN_GET => (&mut self.getters, make_getter as fn(&str) -> String), + FN_SET => (&mut self.setters, make_setter as fn(&str) -> String), + _ => unreachable!("unsupported prefix {}", prefix), + }; + + if dict.contains_key(text.as_ref()) { + self.getters.get(text.as_ref()).expect("exists").clone() + } else { + let value: ImmutableString = mapper(text.as_ref()).into(); + let text = text.into(); + dict.insert(text, value.clone()); + value + } + } + /// Merge another [`IdentifierBuilder`] into this. + #[inline(always)] + pub fn merge(&mut self, _other: &Self) {} +} + +impl AddAssign for StringsInterner { + #[inline(always)] + fn add_assign(&mut self, rhs: Self) { + self.getters.extend(rhs.getters.into_iter()); + self.setters.extend(rhs.setters.into_iter()); + } +} diff --git a/src/types/mod.rs b/src/types/mod.rs index d20c2242..1012f8b1 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -4,6 +4,7 @@ pub mod dynamic; pub mod error; pub mod fn_ptr; pub mod immutable_string; +pub mod interner; pub mod parse_error; pub mod scope; @@ -11,5 +12,6 @@ pub use dynamic::Dynamic; pub use error::EvalAltResult; pub use fn_ptr::FnPtr; pub use immutable_string::ImmutableString; +pub use interner::StringsInterner; pub use parse_error::{LexError, ParseError, ParseErrorType}; pub use scope::Scope;