Revise strings interning.

This commit is contained in:
Stephen Chung 2022-08-12 16:34:57 +08:00
parent 5ba9b3bd1c
commit cba394d73c
15 changed files with 272 additions and 123 deletions

View File

@ -221,7 +221,7 @@ impl Engine {
scripts.as_ref(), scripts.as_ref(),
self.token_mapper.as_ref().map(<_>::as_ref), self.token_mapper.as_ref().map(<_>::as_ref),
); );
let mut state = ParseState::new(self, scope, tokenizer_control); let mut state = ParseState::new(self, scope, Default::default(), tokenizer_control);
let mut _ast = self.parse(&mut stream.peekable(), &mut state, optimization_level)?; let mut _ast = self.parse(&mut stream.peekable(), &mut state, optimization_level)?;
#[cfg(feature = "metadata")] #[cfg(feature = "metadata")]
_ast.set_doc(state.tokenizer_control.borrow().global_comments.join("\n")); _ast.set_doc(state.tokenizer_control.borrow().global_comments.join("\n"));
@ -294,7 +294,7 @@ impl Engine {
self.lex_raw(&scripts, self.token_mapper.as_ref().map(<_>::as_ref)); self.lex_raw(&scripts, self.token_mapper.as_ref().map(<_>::as_ref));
let mut peekable = stream.peekable(); let mut peekable = stream.peekable();
let mut state = ParseState::new(self, scope, tokenizer_control); let mut state = ParseState::new(self, scope, Default::default(), tokenizer_control);
self.parse_global_expr(&mut peekable, &mut state, self.optimization_level) self.parse_global_expr(&mut peekable, &mut state, self.optimization_level)
} }
} }

View File

@ -116,7 +116,7 @@ impl Engine {
let scripts = [script]; let scripts = [script];
let (stream, tokenizer_control) = let (stream, tokenizer_control) =
self.lex_raw(&scripts, self.token_mapper.as_ref().map(<_>::as_ref)); self.lex_raw(&scripts, self.token_mapper.as_ref().map(<_>::as_ref));
let mut state = ParseState::new(self, scope, tokenizer_control); let mut state = ParseState::new(self, scope, Default::default(), tokenizer_control);
// No need to optimize a lone expression // No need to optimize a lone expression
let ast = self.parse_global_expr( let ast = self.parse_global_expr(

View File

@ -120,7 +120,7 @@ impl Engine {
); );
let scope = Scope::new(); let scope = Scope::new();
let mut state = ParseState::new(self, &scope, tokenizer_control); let mut state = ParseState::new(self, &scope, Default::default(), tokenizer_control);
let ast = self.parse_global_expr( let ast = self.parse_global_expr(
&mut stream.peekable(), &mut stream.peekable(),

View File

@ -58,7 +58,7 @@ impl Engine {
let scripts = [script]; let scripts = [script];
let (stream, tokenizer_control) = let (stream, tokenizer_control) =
self.lex_raw(&scripts, self.token_mapper.as_ref().map(<_>::as_ref)); self.lex_raw(&scripts, self.token_mapper.as_ref().map(<_>::as_ref));
let mut state = ParseState::new(self, scope, tokenizer_control); let mut state = ParseState::new(self, scope, Default::default(), tokenizer_control);
let ast = self.parse(&mut stream.peekable(), &mut state, self.optimization_level)?; let ast = self.parse(&mut stream.peekable(), &mut state, self.optimization_level)?;
self.run_ast_with_scope(scope, &ast) self.run_ast_with_scope(scope, &ast)
} }

View File

@ -2,14 +2,15 @@
use crate::api::options::LangOptions; use crate::api::options::LangOptions;
use crate::func::native::{ use crate::func::native::{
OnDebugCallback, OnDefVarCallback, OnParseTokenCallback, OnPrintCallback, OnVarCallback, locked_write, OnDebugCallback, OnDefVarCallback, OnParseTokenCallback, OnPrintCallback,
OnVarCallback,
}; };
use crate::packages::{Package, StandardPackage}; use crate::packages::{Package, StandardPackage};
use crate::tokenizer::Token; use crate::tokenizer::Token;
use crate::types::dynamic::Union; use crate::types::StringsInterner;
use crate::{ use crate::{
Dynamic, Identifier, ImmutableString, Module, OptimizationLevel, Position, RhaiResult, Shared, Dynamic, Identifier, ImmutableString, Locked, Module, OptimizationLevel, Position, RhaiResult,
StaticVec, Shared, StaticVec,
}; };
#[cfg(feature = "no_std")] #[cfg(feature = "no_std")]
use std::prelude::v1::*; use std::prelude::v1::*;
@ -105,7 +106,7 @@ pub struct Engine {
pub(crate) module_resolver: Box<dyn crate::ModuleResolver>, pub(crate) module_resolver: Box<dyn crate::ModuleResolver>,
/// An empty [`ImmutableString`] for cloning purposes. /// An empty [`ImmutableString`] for cloning purposes.
pub(crate) empty_string: ImmutableString, pub(crate) interned_strings: Locked<StringsInterner<'static>>,
/// A set of symbols to disable. /// A set of symbols to disable.
pub(crate) disabled_symbols: BTreeSet<Identifier>, pub(crate) disabled_symbols: BTreeSet<Identifier>,
@ -269,7 +270,7 @@ impl Engine {
#[cfg(not(feature = "no_module"))] #[cfg(not(feature = "no_module"))]
module_resolver: Box::new(crate::module::resolvers::DummyModuleResolver::new()), module_resolver: Box::new(crate::module::resolvers::DummyModuleResolver::new()),
empty_string: ImmutableString::new(), interned_strings: StringsInterner::new().into(),
disabled_symbols: BTreeSet::new(), disabled_symbols: BTreeSet::new(),
#[cfg(not(feature = "no_custom_syntax"))] #[cfg(not(feature = "no_custom_syntax"))]
custom_keywords: BTreeMap::new(), custom_keywords: BTreeMap::new(),
@ -310,30 +311,21 @@ impl Engine {
engine engine
} }
/// Get an empty [`ImmutableString`]. /// Get an interned string.
///
/// [`Engine`] keeps a single instance of an empty [`ImmutableString`] and uses this to create
/// shared instances for subsequent uses. This minimizes unnecessary allocations for empty strings.
#[inline(always)]
#[must_use] #[must_use]
pub fn const_empty_string(&self) -> ImmutableString { #[inline(always)]
self.empty_string.clone() pub(crate) fn get_interned_string(
&self,
string: impl AsRef<str> + Into<ImmutableString>,
) -> ImmutableString {
locked_write(&self.interned_strings).get(string).into()
} }
/// Check a result to ensure that it is valid. /// Check a result to ensure that it is valid.
pub(crate) fn check_return_value(&self, mut result: RhaiResult, _pos: Position) -> RhaiResult { #[inline]
if let Ok(ref mut r) = result { pub(crate) fn check_return_value(&self, result: RhaiResult, _pos: Position) -> RhaiResult {
// Concentrate all empty strings into one instance to save memory
if let Dynamic(Union::Str(s, ..)) = r {
if s.is_empty() {
if !s.ptr_eq(&self.empty_string) {
*s = self.const_empty_string();
}
return result;
}
}
#[cfg(not(feature = "unchecked"))] #[cfg(not(feature = "unchecked"))]
if let Ok(ref r) = result {
self.check_data_size(r, _pos)?; self.check_data_size(r, _pos)?;
} }

View File

@ -328,7 +328,7 @@ impl Engine {
// `... ${...} ...` // `... ${...} ...`
Expr::InterpolatedString(x, _) => { Expr::InterpolatedString(x, _) => {
let mut concat = self.const_empty_string().into(); let mut concat = self.get_interned_string("").into();
let target = &mut concat; let target = &mut concat;
let mut result = Ok(Dynamic::UNIT); let mut result = Ok(Dynamic::UNIT);
@ -355,7 +355,10 @@ impl Engine {
} }
} }
result.map(|_| concat.take_or_clone()) self.check_return_value(
result.map(|_| concat.take_or_clone()),
expr.start_position(),
)
} }
#[cfg(not(feature = "no_index"))] #[cfg(not(feature = "no_index"))]

View File

@ -7,7 +7,9 @@ use crate::ast::{
}; };
use crate::func::get_hasher; use crate::func::get_hasher;
use crate::types::dynamic::{AccessMode, Union}; use crate::types::dynamic::{AccessMode, Union};
use crate::{Dynamic, Engine, Module, Position, RhaiResult, RhaiResultOf, Scope, ERR, INT}; use crate::{
Dynamic, Engine, ImmutableString, Module, Position, RhaiResult, RhaiResultOf, Scope, ERR, INT,
};
use std::hash::{Hash, Hasher}; use std::hash::{Hash, Hasher};
#[cfg(feature = "no_std")] #[cfg(feature = "no_std")]
use std::prelude::v1::*; use std::prelude::v1::*;
@ -137,23 +139,10 @@ impl Engine {
pos: op_pos, pos: op_pos,
} = op_info; } = op_info;
let mut lock_guard; let mut lock_guard = target.write_lock::<Dynamic>().unwrap();
let lhs_ptr_inner;
#[cfg(not(feature = "no_closure"))]
let target_is_shared = target.is_shared();
#[cfg(feature = "no_closure")]
let target_is_shared = false;
if target_is_shared {
lock_guard = target.write_lock::<Dynamic>().unwrap();
lhs_ptr_inner = &mut *lock_guard;
} else {
lhs_ptr_inner = &mut *target;
}
let hash = hash_op_assign; let hash = hash_op_assign;
let args = &mut [lhs_ptr_inner, &mut new_val]; let args = &mut [&mut *lock_guard, &mut new_val];
let level = level + 1; let level = level + 1;
match self.call_native_fn( match self.call_native_fn(
@ -181,21 +170,17 @@ impl Engine {
} }
} else { } else {
// Normal assignment // Normal assignment
#[cfg(not(feature = "no_closure"))]
if target.is_shared() {
// Handle case where target is a `Dynamic` shared value
// (returned by a variable resolver, for example)
*target.write_lock::<Dynamic>().unwrap() = new_val; *target.write_lock::<Dynamic>().unwrap() = new_val;
} else {
*target.as_mut() = new_val;
} }
#[cfg(feature = "no_closure")] /*
{ if let Some(mut guard) = target.write_lock::<Dynamic>() {
*target.as_mut() = new_val; if guard.is::<ImmutableString>() {
let s = std::mem::take(&mut *guard).cast::<ImmutableString>();
*guard = self.get_interned_string(s).into();
} }
} }
*/
target.propagate_changed_value(op_info.pos) target.propagate_changed_value(op_info.pos)
} }
@ -301,6 +286,13 @@ impl Engine {
.map(Dynamic::flatten); .map(Dynamic::flatten);
if let Ok(rhs_val) = rhs_result { if let Ok(rhs_val) = rhs_result {
let rhs_val = if rhs_val.is::<ImmutableString>() {
self.get_interned_string(rhs_val.cast::<ImmutableString>())
.into()
} else {
rhs_val
};
let _new_val = Some((rhs_val, *op_info)); let _new_val = Some((rhs_val, *op_info));
// Must be either `var[index] op= val` or `var.prop op= val` // Must be either `var[index] op= val` or `var.prop op= val`

View File

@ -820,14 +820,14 @@ pub fn get_builtin_op_assignment_fn(op: &str, x: &Dynamic, y: &Dynamic) -> Optio
return match op { return match op {
"+=" => Some(|_, args| { "+=" => Some(|_, args| {
let (first, second) = args.split_first_mut().expect(BUILTIN); let (first, second) = args.split_first_mut().expect(BUILTIN);
let mut x = first.write_lock::<ImmutableString>().expect(BUILTIN); let x = &mut *first.write_lock::<ImmutableString>().expect(BUILTIN);
let y = &*second[0].read_lock::<ImmutableString>().expect(BUILTIN); let y = std::mem::take(second[0]).cast::<ImmutableString>();
Ok((*x += y).into()) Ok((*x += y).into())
}), }),
"-=" => Some(|_, args| { "-=" => Some(|_, args| {
let (first, second) = args.split_first_mut().expect(BUILTIN); let (first, second) = args.split_first_mut().expect(BUILTIN);
let mut x = first.write_lock::<ImmutableString>().expect(BUILTIN); let x = &mut *first.write_lock::<ImmutableString>().expect(BUILTIN);
let y = &*second[0].read_lock::<ImmutableString>().expect(BUILTIN); let y = std::mem::take(second[0]).cast::<ImmutableString>();
Ok((*x -= y).into()) Ok((*x -= y).into())
}), }),
_ => None, _ => None,

View File

@ -1050,7 +1050,7 @@ fn optimize_expr(expr: &mut Expr, state: &mut OptimizerState, _chaining: bool) {
// `` // ``
Expr::InterpolatedString(x, pos) if x.is_empty() => { Expr::InterpolatedString(x, pos) if x.is_empty() => {
state.set_dirty(); state.set_dirty();
*expr = Expr::StringConstant(state.engine.const_empty_string(), *pos); *expr = Expr::StringConstant(state.engine.get_interned_string(""), *pos);
} }
// `... ${const} ...` // `... ${const} ...`
Expr::InterpolatedString(..) if expr.is_constant() => { Expr::InterpolatedString(..) if expr.is_constant() => {

View File

@ -170,7 +170,7 @@ fn collect_fn_metadata(
"comments".into(), "comments".into(),
func.comments func.comments
.iter() .iter()
.map(|s| dict.get(s).into()) .map(|s| dict.get(s.as_ref()).into())
.collect::<Array>() .collect::<Array>()
.into(), .into(),
); );

View File

@ -75,7 +75,7 @@ mod print_debug_functions {
/// Return the empty string. /// Return the empty string.
#[rhai_fn(name = "print", name = "debug")] #[rhai_fn(name = "print", name = "debug")]
pub fn print_empty_string(ctx: NativeCallContext) -> ImmutableString { pub fn print_empty_string(ctx: NativeCallContext) -> ImmutableString {
ctx.engine().const_empty_string() ctx.engine().get_interned_string("")
} }
/// Return the `string`. /// Return the `string`.
@ -121,7 +121,7 @@ mod print_debug_functions {
#[rhai_fn(name = "print", name = "to_string")] #[rhai_fn(name = "print", name = "to_string")]
pub fn print_unit(ctx: NativeCallContext, unit: ()) -> ImmutableString { pub fn print_unit(ctx: NativeCallContext, unit: ()) -> ImmutableString {
let _ = unit; let _ = unit;
ctx.engine().const_empty_string() ctx.engine().get_interned_string("")
} }
/// Convert the unit into a string in debug format. /// Convert the unit into a string in debug format.
#[rhai_fn(name = "debug", name = "to_debug")] #[rhai_fn(name = "debug", name = "to_debug")]

View File

@ -59,7 +59,10 @@ mod string_functions {
// The following are needed in order to override the generic versions with `Dynamic` parameters. // The following are needed in order to override the generic versions with `Dynamic` parameters.
#[rhai_fn(name = "+", pure)] #[rhai_fn(name = "+", pure)]
pub fn add_append_str(string1: &mut ImmutableString, string2: &str) -> ImmutableString { pub fn add_append_str(
string1: &mut ImmutableString,
string2: ImmutableString,
) -> ImmutableString {
&*string1 + string2 &*string1 + string2
} }
#[rhai_fn(name = "+", pure)] #[rhai_fn(name = "+", pure)]
@ -81,6 +84,20 @@ mod string_functions {
string string
} }
#[rhai_fn(name = "+=")]
pub fn add_assign_append_str(string1: &mut ImmutableString, string2: ImmutableString) {
*string1 += string2
}
#[rhai_fn(name = "+=", pure)]
pub fn add_assign_append_char(string: &mut ImmutableString, character: char) {
*string += character
}
#[rhai_fn(name = "+=")]
pub fn add_assign_append_unit(string: &mut ImmutableString, item: ()) {
let _ = string;
let _ = item;
}
#[cfg(not(feature = "no_index"))] #[cfg(not(feature = "no_index"))]
pub mod blob_functions { pub mod blob_functions {
use crate::Blob; use crate::Blob;
@ -320,7 +337,7 @@ mod string_functions {
len: INT, len: INT,
) -> ImmutableString { ) -> ImmutableString {
if string.is_empty() || len <= 0 { if string.is_empty() || len <= 0 {
return ctx.engine().const_empty_string(); return ctx.engine().get_interned_string("");
} }
let mut chars = StaticVec::<char>::with_capacity(len as usize); let mut chars = StaticVec::<char>::with_capacity(len as usize);
@ -803,13 +820,13 @@ mod string_functions {
len: INT, len: INT,
) -> ImmutableString { ) -> ImmutableString {
if string.is_empty() { if string.is_empty() {
return ctx.engine().const_empty_string(); return ctx.engine().get_interned_string("");
} }
let mut chars = StaticVec::with_capacity(string.len()); let mut chars = StaticVec::with_capacity(string.len());
let offset = if string.is_empty() || len <= 0 { let offset = if string.is_empty() || len <= 0 {
return ctx.engine().const_empty_string(); return ctx.engine().get_interned_string("");
} else if start < 0 { } else if start < 0 {
let abs_start = start.unsigned_abs() as usize; let abs_start = start.unsigned_abs() as usize;
chars.extend(string.chars()); chars.extend(string.chars());
@ -819,7 +836,7 @@ mod string_functions {
chars.len() - abs_start chars.len() - abs_start
} }
} else if start as usize >= string.chars().count() { } else if start as usize >= string.chars().count() {
return ctx.engine().const_empty_string(); return ctx.engine().get_interned_string("");
} else { } else {
start as usize start as usize
}; };
@ -865,7 +882,7 @@ mod string_functions {
start: INT, start: INT,
) -> ImmutableString { ) -> ImmutableString {
if string.is_empty() { if string.is_empty() {
ctx.engine().const_empty_string() ctx.engine().get_interned_string("")
} else { } else {
let len = string.len() as INT; let len = string.len() as INT;
sub_string(ctx, string, start, len) sub_string(ctx, string, start, len)
@ -1245,7 +1262,7 @@ mod string_functions {
let num_chars = string.chars().count(); let num_chars = string.chars().count();
if abs_index > num_chars { if abs_index > num_chars {
vec![ vec![
ctx.engine().const_empty_string().into(), ctx.engine().get_interned_string("").into(),
string.as_str().into(), string.as_str().into(),
] ]
} else { } else {

View File

@ -106,7 +106,12 @@ impl<'e> ParseState<'e> {
/// Create a new [`ParseState`]. /// Create a new [`ParseState`].
#[inline(always)] #[inline(always)]
#[must_use] #[must_use]
pub fn new(engine: &Engine, scope: &'e Scope, tokenizer_control: TokenizerControl) -> Self { pub fn new(
engine: &Engine,
scope: &'e Scope,
interned_strings: StringsInterner<'e>,
tokenizer_control: TokenizerControl,
) -> Self {
Self { Self {
tokenizer_control, tokenizer_control,
expr_filter: |_| true, expr_filter: |_| true,
@ -114,7 +119,7 @@ impl<'e> ParseState<'e> {
external_vars: Vec::new(), external_vars: Vec::new(),
#[cfg(not(feature = "no_closure"))] #[cfg(not(feature = "no_closure"))]
allow_capture: true, allow_capture: true,
interned_strings: StringsInterner::new(), interned_strings,
scope, scope,
global: GlobalRuntimeState::new(engine), global: GlobalRuntimeState::new(engine),
stack: Scope::new(), stack: Scope::new(),
@ -237,7 +242,7 @@ impl<'e> ParseState<'e> {
/// Get an interned identifier, creating one if it is not yet interned. /// Get an interned identifier, creating one if it is not yet interned.
#[inline(always)] #[inline(always)]
#[must_use] #[must_use]
pub fn get_identifier(&mut self, text: impl AsRef<str>) -> Identifier { pub fn get_identifier(&mut self, text: impl AsRef<str> + Into<ImmutableString>) -> Identifier {
self.get_identifier_with_prefix("", text).into() self.get_identifier_with_prefix("", text).into()
} }
@ -247,7 +252,7 @@ impl<'e> ParseState<'e> {
pub fn get_identifier_with_prefix( pub fn get_identifier_with_prefix(
&mut self, &mut self,
prefix: impl AsRef<str>, prefix: impl AsRef<str>,
text: impl AsRef<str>, text: impl AsRef<str> + Into<ImmutableString>,
) -> Identifier { ) -> Identifier {
self.interned_strings.get_with_prefix(prefix, text).into() self.interned_strings.get_with_prefix(prefix, text).into()
} }
@ -256,7 +261,10 @@ impl<'e> ParseState<'e> {
#[inline(always)] #[inline(always)]
#[allow(dead_code)] #[allow(dead_code)]
#[must_use] #[must_use]
pub fn get_interned_string(&mut self, text: impl AsRef<str>) -> ImmutableString { pub fn get_interned_string(
&mut self,
text: impl AsRef<str> + Into<ImmutableString>,
) -> ImmutableString {
self.get_interned_string_with_prefix("", text) self.get_interned_string_with_prefix("", text)
} }
@ -267,7 +275,7 @@ impl<'e> ParseState<'e> {
pub fn get_interned_string_with_prefix( pub fn get_interned_string_with_prefix(
&mut self, &mut self,
prefix: impl AsRef<str>, prefix: impl AsRef<str>,
text: impl AsRef<str>, text: impl AsRef<str> + Into<ImmutableString>,
) -> ImmutableString { ) -> ImmutableString {
self.interned_strings.get_with_prefix(prefix, text) self.interned_strings.get_with_prefix(prefix, text)
} }
@ -1372,8 +1380,14 @@ impl Engine {
// | ... // | ...
#[cfg(not(feature = "no_function"))] #[cfg(not(feature = "no_function"))]
Token::Pipe | Token::Or if settings.options.contains(LangOptions::ANON_FN) => { Token::Pipe | Token::Or if settings.options.contains(LangOptions::ANON_FN) => {
let mut new_state = let interned_strings = std::mem::take(&mut state.interned_strings);
ParseState::new(self, state.scope, state.tokenizer_control.clone());
let mut new_state = ParseState::new(
self,
state.scope,
interned_strings,
state.tokenizer_control.clone(),
);
#[cfg(not(feature = "no_module"))] #[cfg(not(feature = "no_module"))]
{ {
@ -1415,7 +1429,11 @@ impl Engine {
..settings ..settings
}; };
let (expr, func) = self.parse_anon_fn(input, &mut new_state, lib, new_settings)?; let result = self.parse_anon_fn(input, &mut new_state, lib, new_settings);
state.interned_strings = new_state.interned_strings;
let (expr, func) = result?;
#[cfg(not(feature = "no_closure"))] #[cfg(not(feature = "no_closure"))]
new_state.external_vars.iter().try_for_each( new_state.external_vars.iter().try_for_each(
@ -2311,7 +2329,7 @@ impl Engine {
let hash = calc_fn_hash(&op, 2); let hash = calc_fn_hash(&op, 2);
let op_base = FnCallExpr { let op_base = FnCallExpr {
name: state.get_identifier(op), name: state.get_identifier(op.as_ref()),
hashes: FnCallHashes::from_native(hash), hashes: FnCallHashes::from_native(hash),
pos, pos,
..Default::default() ..Default::default()
@ -3233,8 +3251,14 @@ impl Engine {
match input.next().expect(NEVER_ENDS) { match input.next().expect(NEVER_ENDS) {
(Token::Fn, pos) => { (Token::Fn, pos) => {
let mut new_state = let interned_strings = std::mem::take(&mut state.interned_strings);
ParseState::new(self, state.scope, state.tokenizer_control.clone());
let mut new_state = ParseState::new(
self,
state.scope,
interned_strings,
state.tokenizer_control.clone(),
);
#[cfg(not(feature = "no_module"))] #[cfg(not(feature = "no_module"))]
{ {
@ -3280,7 +3304,11 @@ impl Engine {
#[cfg(not(feature = "no_function"))] #[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")] #[cfg(feature = "metadata")]
comments, comments,
)?; );
state.interned_strings = new_state.interned_strings;
let func = func?;
let hash = calc_fn_hash(&func.name, func.params.len()); let hash = calc_fn_hash(&func.name, func.params.len());

View File

@ -82,7 +82,7 @@ impl Borrow<SmartString> for ImmutableString {
impl Borrow<str> for ImmutableString { impl Borrow<str> for ImmutableString {
#[inline(always)] #[inline(always)]
fn borrow(&self) -> &str { fn borrow(&self) -> &str {
self.0.as_str() self.as_str()
} }
} }
@ -187,14 +187,14 @@ impl FromIterator<SmartString> for ImmutableString {
impl fmt::Display for ImmutableString { impl fmt::Display for ImmutableString {
#[inline(always)] #[inline(always)]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(self.0.as_str(), f) fmt::Display::fmt(self.as_str(), f)
} }
} }
impl fmt::Debug for ImmutableString { impl fmt::Debug for ImmutableString {
#[inline(always)] #[inline(always)]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(self.0.as_str(), f) fmt::Debug::fmt(self.as_str(), f)
} }
} }
@ -208,7 +208,7 @@ impl Add for ImmutableString {
} else if self.is_empty() { } else if self.is_empty() {
rhs rhs
} else { } else {
self.make_mut().push_str(rhs.0.as_str()); self.make_mut().push_str(rhs.as_str());
self self
} }
} }
@ -225,7 +225,40 @@ impl Add for &ImmutableString {
rhs.clone() rhs.clone()
} else { } else {
let mut s = self.clone(); let mut s = self.clone();
s.make_mut().push_str(rhs.0.as_str()); s.make_mut().push_str(rhs.as_str());
s
}
}
}
impl Add<&Self> for ImmutableString {
type Output = Self;
#[inline]
fn add(mut self, rhs: &Self) -> Self::Output {
if rhs.is_empty() {
self
} else if self.is_empty() {
rhs.clone()
} else {
self.make_mut().push_str(rhs.as_str());
self
}
}
}
impl Add<ImmutableString> for &ImmutableString {
type Output = ImmutableString;
#[inline]
fn add(self, rhs: ImmutableString) -> Self::Output {
if rhs.is_empty() {
self.clone()
} else if self.is_empty() {
rhs
} else {
let mut s = self.clone();
s.make_mut().push_str(rhs.as_str());
s s
} }
} }
@ -238,7 +271,7 @@ impl AddAssign<&ImmutableString> for ImmutableString {
if self.is_empty() { if self.is_empty() {
self.0 = rhs.0.clone(); self.0 = rhs.0.clone();
} else { } else {
self.make_mut().push_str(rhs.0.as_str()); self.make_mut().push_str(rhs.as_str());
} }
} }
} }
@ -251,7 +284,7 @@ impl AddAssign<ImmutableString> for ImmutableString {
if self.is_empty() { if self.is_empty() {
self.0 = rhs.0; self.0 = rhs.0;
} else { } else {
self.make_mut().push_str(rhs.0.as_str()); self.make_mut().push_str(rhs.as_str());
} }
} }
} }
@ -580,6 +613,10 @@ impl ImmutableString {
pub fn new() -> Self { pub fn new() -> Self {
Self(SmartString::new_const().into()) Self(SmartString::new_const().into())
} }
/// Strong count of references to the underlying string.
pub(crate) fn strong_count(&self) -> usize {
Shared::strong_count(&self.0)
}
/// Consume the [`ImmutableString`] and convert it into a [`String`]. /// Consume the [`ImmutableString`] and convert it into a [`String`].
/// ///
/// If there are other references to the same string, a cloned copy is returned. /// If there are other references to the same string, a cloned copy is returned.

View File

@ -1,5 +1,5 @@
use crate::func::hashing::get_hasher; use crate::func::hashing::get_hasher;
use crate::{Identifier, ImmutableString}; use crate::ImmutableString;
#[cfg(feature = "no_std")] #[cfg(feature = "no_std")]
use std::prelude::v1::*; use std::prelude::v1::*;
@ -10,12 +10,20 @@ use std::{
ops::AddAssign, ops::AddAssign,
}; };
/// Maximum number of strings interned.
pub const MAX_INTERNED_STRINGS: usize = 256;
/// Maximum length of strings interned.
pub const MAX_STRING_LEN: usize = 24;
/// _(internals)_ A factory of identifiers from text strings. /// _(internals)_ A factory of identifiers from text strings.
/// Exported under the `internals` feature only. /// Exported under the `internals` feature only.
/// ///
/// Normal identifiers, property getters and setters are interned separately. /// Normal identifiers, property getters and setters are interned separately.
#[derive(Debug, Clone, Default, Hash)] #[derive(Debug, Clone, Hash)]
pub struct StringsInterner<'a> { pub struct StringsInterner<'a> {
/// Maximum capacity.
max: usize,
/// Normal strings. /// Normal strings.
strings: BTreeMap<u64, ImmutableString>, strings: BTreeMap<u64, ImmutableString>,
/// Property getters. /// Property getters.
@ -28,12 +36,27 @@ pub struct StringsInterner<'a> {
dummy: PhantomData<&'a ()>, dummy: PhantomData<&'a ()>,
} }
impl Default for StringsInterner<'_> {
#[inline(always)]
fn default() -> Self {
Self::new()
}
}
impl StringsInterner<'_> { impl StringsInterner<'_> {
/// Create a new [`StringsInterner`]. /// Create a new [`StringsInterner`].
#[inline] #[inline(always)]
#[must_use] #[must_use]
pub fn new() -> Self { pub fn new() -> Self {
Self::new_with_capacity(MAX_INTERNED_STRINGS)
}
/// Create a new [`StringsInterner`] with a maximum capacity.
#[inline]
#[must_use]
pub fn new_with_capacity(capacity: usize) -> Self {
Self { Self {
max: capacity,
strings: BTreeMap::new(), strings: BTreeMap::new(),
#[cfg(not(feature = "no_object"))] #[cfg(not(feature = "no_object"))]
getters: BTreeMap::new(), getters: BTreeMap::new(),
@ -46,7 +69,7 @@ impl StringsInterner<'_> {
/// Get an identifier from a text string and prefix, adding it to the interner if necessary. /// Get an identifier from a text string and prefix, adding it to the interner if necessary.
#[inline(always)] #[inline(always)]
#[must_use] #[must_use]
pub fn get(&mut self, text: impl AsRef<str>) -> ImmutableString { pub fn get(&mut self, text: impl AsRef<str> + Into<ImmutableString>) -> ImmutableString {
self.get_with_prefix("", text) self.get_with_prefix("", text)
} }
@ -65,40 +88,81 @@ impl StringsInterner<'_> {
/// Panics if the prefix is not recognized. /// Panics if the prefix is not recognized.
#[inline] #[inline]
#[must_use] #[must_use]
pub fn get_with_prefix( pub fn get_with_prefix<T: AsRef<str> + Into<ImmutableString>>(
&mut self, &mut self,
prefix: impl AsRef<str>, prefix: impl AsRef<str>,
text: impl AsRef<str>, text: T,
) -> ImmutableString { ) -> ImmutableString {
let prefix = prefix.as_ref(); let prefix = prefix.as_ref();
let text = text.as_ref(); let key = text.as_ref();
let (dict, mapper): (_, fn(&str) -> Identifier) = match prefix { // Do not intern numbers
if prefix == "" && key.bytes().all(|c| c == b'.' || (c >= b'0' && c <= b'9')) {
return text.into();
}
let (dict, mapper): (_, fn(T) -> ImmutableString) = match prefix {
"" => (&mut self.strings, |s| s.into()), "" => (&mut self.strings, |s| s.into()),
#[cfg(not(feature = "no_object"))] #[cfg(not(feature = "no_object"))]
crate::engine::FN_GET => (&mut self.getters, crate::engine::make_getter), crate::engine::FN_GET => (&mut self.getters, |s| {
crate::engine::make_getter(s.as_ref()).into()
}),
#[cfg(not(feature = "no_object"))] #[cfg(not(feature = "no_object"))]
crate::engine::FN_SET => (&mut self.setters, crate::engine::make_setter), crate::engine::FN_SET => (&mut self.setters, |s| {
crate::engine::make_setter(s.as_ref()).into()
}),
_ => unreachable!("unsupported prefix {}", prefix), _ => unreachable!("unsupported prefix {}", prefix),
}; };
if key.len() > MAX_STRING_LEN {
return mapper(text);
}
let hasher = &mut get_hasher(); let hasher = &mut get_hasher();
text.hash(hasher); key.hash(hasher);
let key = hasher.finish(); let key = hasher.finish();
if !dict.is_empty() && dict.contains_key(&key) { if !dict.is_empty() && dict.contains_key(&key) {
dict.get(&key).unwrap().clone() return dict.get(&key).unwrap().clone();
} else { }
let value: ImmutableString = mapper(text).into();
let value = mapper(text);
if value.strong_count() > 1 {
return value;
}
dict.insert(key, value.clone()); dict.insert(key, value.clone());
value
println!("Interning '{value}'");
// If the interner is over capacity, remove the longest entry
if self.strings.len() > self.max {
// Leave some buffer to grow when shrinking the cache.
// We leave at least two entries, one for the empty string, and one for the string
// that has just been inserted.
let max = if self.max < 5 { 2 } else { self.max - 3 };
while self.strings.len() > max {
let (_, n) = self.strings.iter().fold((0, 0), |(x, n), (&k, v)| {
if k != key && v.len() > x {
(v.len(), k)
} else {
(x, n)
}
});
self.strings.remove(&n);
} }
} }
value
}
/// Number of strings interned. /// Number of strings interned.
#[inline(always)] #[inline]
#[must_use] #[must_use]
pub fn len(&self) -> usize { pub fn len(&self) -> usize {
#[cfg(not(feature = "no_object"))] #[cfg(not(feature = "no_object"))]
@ -109,7 +173,7 @@ impl StringsInterner<'_> {
} }
/// Number of strings interned. /// Number of strings interned.
#[inline(always)] #[inline]
#[must_use] #[must_use]
pub fn is_empty(&self) -> bool { pub fn is_empty(&self) -> bool {
#[cfg(not(feature = "no_object"))] #[cfg(not(feature = "no_object"))]
@ -118,29 +182,45 @@ impl StringsInterner<'_> {
#[cfg(feature = "no_object")] #[cfg(feature = "no_object")]
return self.strings.is_empty(); return self.strings.is_empty();
} }
/// Clear all interned strings.
#[inline]
pub fn clear(&mut self) {
self.strings.clear();
#[cfg(not(feature = "no_object"))]
{
self.getters.clear();
self.setters.clear();
}
}
} }
impl AddAssign<Self> for StringsInterner<'_> { impl AddAssign<Self> for StringsInterner<'_> {
#[inline(always)] #[inline(always)]
fn add_assign(&mut self, rhs: Self) { fn add_assign(&mut self, rhs: Self) {
self.strings.extend(rhs.strings.into_iter()); self.strings.extend(rhs.strings.into_iter());
#[cfg(not(feature = "no_object"))] #[cfg(not(feature = "no_object"))]
{
self.getters.extend(rhs.getters.into_iter()); self.getters.extend(rhs.getters.into_iter());
#[cfg(not(feature = "no_object"))]
self.setters.extend(rhs.setters.into_iter()); self.setters.extend(rhs.setters.into_iter());
} }
} }
}
impl AddAssign<&Self> for StringsInterner<'_> { impl AddAssign<&Self> for StringsInterner<'_> {
#[inline(always)] #[inline(always)]
fn add_assign(&mut self, rhs: &Self) { fn add_assign(&mut self, rhs: &Self) {
self.strings self.strings
.extend(rhs.strings.iter().map(|(k, v)| (k.clone(), v.clone()))); .extend(rhs.strings.iter().map(|(&k, v)| (k, v.clone())));
#[cfg(not(feature = "no_object"))] #[cfg(not(feature = "no_object"))]
{
self.getters self.getters
.extend(rhs.getters.iter().map(|(k, v)| (k.clone(), v.clone()))); .extend(rhs.getters.iter().map(|(&k, v)| (k, v.clone())));
#[cfg(not(feature = "no_object"))]
self.setters self.setters
.extend(rhs.setters.iter().map(|(k, v)| (k.clone(), v.clone()))); .extend(rhs.setters.iter().map(|(&k, v)| (k, v.clone())));
}
} }
} }