Simplify strings interner.
This commit is contained in:
parent
25476d1cea
commit
a518ab62bb
@ -47,9 +47,6 @@ const NEVER_ENDS: &str = "`Token`";
|
||||
/// Unroll `switch` ranges no larger than this.
|
||||
const SMALL_SWITCH_RANGE: INT = 16;
|
||||
|
||||
/// Number of string interners used: two additional for property getters/setters if not `no_object`
|
||||
const NUM_INTERNERS: usize = if cfg!(feature = "no_object") { 1 } else { 3 };
|
||||
|
||||
/// _(internals)_ A type that encapsulates the current state of the parser.
|
||||
/// Exported under the `internals` feature only.
|
||||
pub struct ParseState<'e> {
|
||||
@ -58,7 +55,7 @@ pub struct ParseState<'e> {
|
||||
/// Controls whether parsing of an expression should stop given the next token.
|
||||
pub expr_filter: fn(&Token) -> bool,
|
||||
/// String interners.
|
||||
interned_strings: [StringsInterner<'e>; NUM_INTERNERS],
|
||||
interned_strings: StringsInterner<'e>,
|
||||
/// External [scope][Scope] with constants.
|
||||
pub scope: &'e Scope<'e>,
|
||||
/// Global runtime state.
|
||||
@ -88,6 +85,8 @@ pub struct ParseState<'e> {
|
||||
}
|
||||
|
||||
impl fmt::Debug for ParseState<'_> {
|
||||
#[cold]
|
||||
#[inline(never)]
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let mut f = f.debug_struct("ParseState");
|
||||
|
||||
@ -116,7 +115,7 @@ impl<'e> ParseState<'e> {
|
||||
pub fn new(
|
||||
engine: &Engine,
|
||||
scope: &'e Scope,
|
||||
interned_strings: [StringsInterner<'e>; NUM_INTERNERS],
|
||||
interned_strings: StringsInterner<'e>,
|
||||
tokenizer_control: TokenizerControl,
|
||||
) -> Self {
|
||||
Self {
|
||||
@ -254,7 +253,7 @@ impl<'e> ParseState<'e> {
|
||||
&mut self,
|
||||
text: impl AsRef<str> + Into<ImmutableString>,
|
||||
) -> ImmutableString {
|
||||
self.interned_strings[0].get(text)
|
||||
self.interned_strings.get(text)
|
||||
}
|
||||
|
||||
/// Get an interned property getter, creating one if it is not yet interned.
|
||||
@ -265,8 +264,11 @@ impl<'e> ParseState<'e> {
|
||||
&mut self,
|
||||
text: impl AsRef<str> + Into<ImmutableString>,
|
||||
) -> ImmutableString {
|
||||
self.interned_strings[1]
|
||||
.get_with_mapper(|s| crate::engine::make_getter(s.as_ref()).into(), text)
|
||||
self.interned_strings.get_with_mapper(
|
||||
crate::engine::FN_GET,
|
||||
|s| crate::engine::make_getter(s.as_ref()).into(),
|
||||
text,
|
||||
)
|
||||
}
|
||||
|
||||
/// Get an interned property setter, creating one if it is not yet interned.
|
||||
@ -277,8 +279,11 @@ impl<'e> ParseState<'e> {
|
||||
&mut self,
|
||||
text: impl AsRef<str> + Into<ImmutableString>,
|
||||
) -> ImmutableString {
|
||||
self.interned_strings[2]
|
||||
.get_with_mapper(|s| crate::engine::make_setter(s.as_ref()).into(), text)
|
||||
self.interned_strings.get_with_mapper(
|
||||
crate::engine::FN_SET,
|
||||
|s| crate::engine::make_setter(s.as_ref()).into(),
|
||||
text,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
use super::BloomFilterU64;
|
||||
use crate::func::{hashing::get_hasher, StraightHashMap};
|
||||
use crate::ImmutableString;
|
||||
#[cfg(feature = "no_std")]
|
||||
@ -14,7 +15,7 @@ use std::{
|
||||
};
|
||||
|
||||
/// Maximum number of strings interned.
|
||||
pub const MAX_INTERNED_STRINGS: usize = 256;
|
||||
pub const MAX_INTERNED_STRINGS: usize = 1024;
|
||||
|
||||
/// Maximum length of strings interned.
|
||||
pub const MAX_STRING_LEN: usize = 24;
|
||||
@ -28,8 +29,10 @@ pub struct StringsInterner<'a> {
|
||||
pub capacity: usize,
|
||||
/// Maximum string length.
|
||||
pub max_string_len: usize,
|
||||
/// Normal strings.
|
||||
strings: StraightHashMap<ImmutableString>,
|
||||
/// Cached strings.
|
||||
cache: StraightHashMap<ImmutableString>,
|
||||
/// Bloom filter to avoid caching "one-hit wonders".
|
||||
filter: BloomFilterU64,
|
||||
/// Take care of the lifetime parameter.
|
||||
dummy: PhantomData<&'a ()>,
|
||||
}
|
||||
@ -42,9 +45,10 @@ impl Default for StringsInterner<'_> {
|
||||
}
|
||||
|
||||
impl fmt::Debug for StringsInterner<'_> {
|
||||
#[inline]
|
||||
#[cold]
|
||||
#[inline(never)]
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_list().entries(self.strings.values()).finish()
|
||||
f.debug_list().entries(self.cache.values()).finish()
|
||||
}
|
||||
}
|
||||
|
||||
@ -56,7 +60,8 @@ impl StringsInterner<'_> {
|
||||
Self {
|
||||
capacity: MAX_INTERNED_STRINGS,
|
||||
max_string_len: MAX_STRING_LEN,
|
||||
strings: StraightHashMap::default(),
|
||||
cache: StraightHashMap::default(),
|
||||
filter: BloomFilterU64::new(),
|
||||
dummy: PhantomData,
|
||||
}
|
||||
}
|
||||
@ -65,7 +70,7 @@ impl StringsInterner<'_> {
|
||||
#[inline(always)]
|
||||
#[must_use]
|
||||
pub fn get<S: AsRef<str> + Into<ImmutableString>>(&mut self, text: S) -> ImmutableString {
|
||||
self.get_with_mapper(Into::into, text)
|
||||
self.get_with_mapper("", Into::into, text)
|
||||
}
|
||||
|
||||
/// Get an identifier from a text string, adding it to the interner if necessary.
|
||||
@ -73,20 +78,23 @@ impl StringsInterner<'_> {
|
||||
#[must_use]
|
||||
pub fn get_with_mapper<S: AsRef<str>>(
|
||||
&mut self,
|
||||
id: &str,
|
||||
mapper: impl Fn(S) -> ImmutableString,
|
||||
text: S,
|
||||
) -> ImmutableString {
|
||||
let key = text.as_ref();
|
||||
|
||||
if key.len() > MAX_STRING_LEN {
|
||||
let hasher = &mut get_hasher();
|
||||
id.hash(hasher);
|
||||
key.hash(hasher);
|
||||
let hash = hasher.finish();
|
||||
|
||||
// Cache long strings only on the second try to avoid caching "one-hit wonders".
|
||||
if key.len() > MAX_STRING_LEN && self.filter.is_absent_and_set(hash) {
|
||||
return mapper(text);
|
||||
}
|
||||
|
||||
let hasher = &mut get_hasher();
|
||||
key.hash(hasher);
|
||||
let key = hasher.finish();
|
||||
|
||||
let result = match self.strings.entry(key) {
|
||||
let result = match self.cache.entry(hash) {
|
||||
Entry::Occupied(e) => return e.get().clone(),
|
||||
Entry::Vacant(e) => {
|
||||
let value = mapper(text);
|
||||
@ -100,7 +108,7 @@ impl StringsInterner<'_> {
|
||||
};
|
||||
|
||||
// If the interner is over capacity, remove the longest entry that has the lowest count
|
||||
if self.strings.len() > self.capacity {
|
||||
if self.cache.len() > self.capacity {
|
||||
// Leave some buffer to grow when shrinking the cache.
|
||||
// We leave at least two entries, one for the empty string, and one for the string
|
||||
// that has just been inserted.
|
||||
@ -110,21 +118,21 @@ impl StringsInterner<'_> {
|
||||
self.capacity - 3
|
||||
};
|
||||
|
||||
while self.strings.len() > max {
|
||||
let (_, _, n) =
|
||||
self.strings
|
||||
.iter()
|
||||
.fold((0, usize::MAX, 0), |(x, c, n), (&k, v)| {
|
||||
if k != key
|
||||
&& (v.strong_count() < c || (v.strong_count() == c && v.len() > x))
|
||||
{
|
||||
(v.len(), v.strong_count(), k)
|
||||
} else {
|
||||
(x, c, n)
|
||||
}
|
||||
});
|
||||
while self.cache.len() > max {
|
||||
let (_, _, n) = self
|
||||
.cache
|
||||
.iter()
|
||||
.fold((0, usize::MAX, 0), |(x, c, n), (&k, v)| {
|
||||
if k != hash
|
||||
&& (v.strong_count() < c || (v.strong_count() == c && v.len() > x))
|
||||
{
|
||||
(v.len(), v.strong_count(), k)
|
||||
} else {
|
||||
(x, c, n)
|
||||
}
|
||||
});
|
||||
|
||||
self.strings.remove(&n);
|
||||
self.cache.remove(&n);
|
||||
}
|
||||
}
|
||||
|
||||
@ -136,7 +144,7 @@ impl StringsInterner<'_> {
|
||||
#[must_use]
|
||||
#[allow(dead_code)]
|
||||
pub fn len(&self) -> usize {
|
||||
self.strings.len()
|
||||
self.cache.len()
|
||||
}
|
||||
|
||||
/// Returns `true` if there are no interned strings.
|
||||
@ -144,28 +152,28 @@ impl StringsInterner<'_> {
|
||||
#[must_use]
|
||||
#[allow(dead_code)]
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.strings.is_empty()
|
||||
self.cache.is_empty()
|
||||
}
|
||||
|
||||
/// Clear all interned strings.
|
||||
#[inline(always)]
|
||||
#[allow(dead_code)]
|
||||
pub fn clear(&mut self) {
|
||||
self.strings.clear();
|
||||
self.cache.clear();
|
||||
}
|
||||
}
|
||||
|
||||
impl AddAssign<Self> for StringsInterner<'_> {
|
||||
#[inline(always)]
|
||||
fn add_assign(&mut self, rhs: Self) {
|
||||
self.strings.extend(rhs.strings.into_iter());
|
||||
self.cache.extend(rhs.cache.into_iter());
|
||||
}
|
||||
}
|
||||
|
||||
impl AddAssign<&Self> for StringsInterner<'_> {
|
||||
#[inline(always)]
|
||||
fn add_assign(&mut self, rhs: &Self) {
|
||||
self.strings
|
||||
.extend(rhs.strings.iter().map(|(&k, v)| (k, v.clone())));
|
||||
self.cache
|
||||
.extend(rhs.cache.iter().map(|(&k, v)| (k, v.clone())));
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user