Simplify strings interner.

This commit is contained in:
Stephen Chung 2022-09-27 08:52:51 +08:00
parent 25476d1cea
commit a518ab62bb
2 changed files with 57 additions and 44 deletions

View File

@ -47,9 +47,6 @@ const NEVER_ENDS: &str = "`Token`";
/// Unroll `switch` ranges no larger than this.
const SMALL_SWITCH_RANGE: INT = 16;
/// Number of string interners used: two additional for property getters/setters if not `no_object`
const NUM_INTERNERS: usize = if cfg!(feature = "no_object") { 1 } else { 3 };
/// _(internals)_ A type that encapsulates the current state of the parser.
/// Exported under the `internals` feature only.
pub struct ParseState<'e> {
@ -58,7 +55,7 @@ pub struct ParseState<'e> {
/// Controls whether parsing of an expression should stop given the next token.
pub expr_filter: fn(&Token) -> bool,
/// String interners.
interned_strings: [StringsInterner<'e>; NUM_INTERNERS],
interned_strings: StringsInterner<'e>,
/// External [scope][Scope] with constants.
pub scope: &'e Scope<'e>,
/// Global runtime state.
@ -88,6 +85,8 @@ pub struct ParseState<'e> {
}
impl fmt::Debug for ParseState<'_> {
#[cold]
#[inline(never)]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut f = f.debug_struct("ParseState");
@ -116,7 +115,7 @@ impl<'e> ParseState<'e> {
pub fn new(
engine: &Engine,
scope: &'e Scope,
interned_strings: [StringsInterner<'e>; NUM_INTERNERS],
interned_strings: StringsInterner<'e>,
tokenizer_control: TokenizerControl,
) -> Self {
Self {
@ -254,7 +253,7 @@ impl<'e> ParseState<'e> {
&mut self,
text: impl AsRef<str> + Into<ImmutableString>,
) -> ImmutableString {
self.interned_strings[0].get(text)
self.interned_strings.get(text)
}
/// Get an interned property getter, creating one if it is not yet interned.
@ -265,8 +264,11 @@ impl<'e> ParseState<'e> {
&mut self,
text: impl AsRef<str> + Into<ImmutableString>,
) -> ImmutableString {
self.interned_strings[1]
.get_with_mapper(|s| crate::engine::make_getter(s.as_ref()).into(), text)
self.interned_strings.get_with_mapper(
crate::engine::FN_GET,
|s| crate::engine::make_getter(s.as_ref()).into(),
text,
)
}
/// Get an interned property setter, creating one if it is not yet interned.
@ -277,8 +279,11 @@ impl<'e> ParseState<'e> {
&mut self,
text: impl AsRef<str> + Into<ImmutableString>,
) -> ImmutableString {
self.interned_strings[2]
.get_with_mapper(|s| crate::engine::make_setter(s.as_ref()).into(), text)
self.interned_strings.get_with_mapper(
crate::engine::FN_SET,
|s| crate::engine::make_setter(s.as_ref()).into(),
text,
)
}
}

View File

@ -1,3 +1,4 @@
use super::BloomFilterU64;
use crate::func::{hashing::get_hasher, StraightHashMap};
use crate::ImmutableString;
#[cfg(feature = "no_std")]
@ -14,7 +15,7 @@ use std::{
};
/// Maximum number of strings interned.
pub const MAX_INTERNED_STRINGS: usize = 256;
pub const MAX_INTERNED_STRINGS: usize = 1024;
/// Maximum length of strings interned.
pub const MAX_STRING_LEN: usize = 24;
@ -28,8 +29,10 @@ pub struct StringsInterner<'a> {
pub capacity: usize,
/// Maximum string length.
pub max_string_len: usize,
/// Normal strings.
strings: StraightHashMap<ImmutableString>,
/// Cached strings.
cache: StraightHashMap<ImmutableString>,
/// Bloom filter to avoid caching "one-hit wonders".
filter: BloomFilterU64,
/// Take care of the lifetime parameter.
dummy: PhantomData<&'a ()>,
}
@ -42,9 +45,10 @@ impl Default for StringsInterner<'_> {
}
impl fmt::Debug for StringsInterner<'_> {
#[inline]
#[cold]
#[inline(never)]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_list().entries(self.strings.values()).finish()
f.debug_list().entries(self.cache.values()).finish()
}
}
@ -56,7 +60,8 @@ impl StringsInterner<'_> {
Self {
capacity: MAX_INTERNED_STRINGS,
max_string_len: MAX_STRING_LEN,
strings: StraightHashMap::default(),
cache: StraightHashMap::default(),
filter: BloomFilterU64::new(),
dummy: PhantomData,
}
}
@ -65,7 +70,7 @@ impl StringsInterner<'_> {
#[inline(always)]
#[must_use]
pub fn get<S: AsRef<str> + Into<ImmutableString>>(&mut self, text: S) -> ImmutableString {
self.get_with_mapper(Into::into, text)
self.get_with_mapper("", Into::into, text)
}
/// Get an identifier from a text string, adding it to the interner if necessary.
@ -73,20 +78,23 @@ impl StringsInterner<'_> {
#[must_use]
pub fn get_with_mapper<S: AsRef<str>>(
&mut self,
id: &str,
mapper: impl Fn(S) -> ImmutableString,
text: S,
) -> ImmutableString {
let key = text.as_ref();
if key.len() > MAX_STRING_LEN {
let hasher = &mut get_hasher();
id.hash(hasher);
key.hash(hasher);
let hash = hasher.finish();
// Cache long strings only on the second try to avoid caching "one-hit wonders".
if key.len() > MAX_STRING_LEN && self.filter.is_absent_and_set(hash) {
return mapper(text);
}
let hasher = &mut get_hasher();
key.hash(hasher);
let key = hasher.finish();
let result = match self.strings.entry(key) {
let result = match self.cache.entry(hash) {
Entry::Occupied(e) => return e.get().clone(),
Entry::Vacant(e) => {
let value = mapper(text);
@ -100,7 +108,7 @@ impl StringsInterner<'_> {
};
// If the interner is over capacity, remove the longest entry that has the lowest count
if self.strings.len() > self.capacity {
if self.cache.len() > self.capacity {
// Leave some buffer to grow when shrinking the cache.
// We leave at least two entries, one for the empty string, and one for the string
// that has just been inserted.
@ -110,21 +118,21 @@ impl StringsInterner<'_> {
self.capacity - 3
};
while self.strings.len() > max {
let (_, _, n) =
self.strings
.iter()
.fold((0, usize::MAX, 0), |(x, c, n), (&k, v)| {
if k != key
&& (v.strong_count() < c || (v.strong_count() == c && v.len() > x))
{
(v.len(), v.strong_count(), k)
} else {
(x, c, n)
}
});
while self.cache.len() > max {
let (_, _, n) = self
.cache
.iter()
.fold((0, usize::MAX, 0), |(x, c, n), (&k, v)| {
if k != hash
&& (v.strong_count() < c || (v.strong_count() == c && v.len() > x))
{
(v.len(), v.strong_count(), k)
} else {
(x, c, n)
}
});
self.strings.remove(&n);
self.cache.remove(&n);
}
}
@ -136,7 +144,7 @@ impl StringsInterner<'_> {
#[must_use]
#[allow(dead_code)]
pub fn len(&self) -> usize {
self.strings.len()
self.cache.len()
}
/// Returns `true` if there are no interned strings.
@ -144,28 +152,28 @@ impl StringsInterner<'_> {
#[must_use]
#[allow(dead_code)]
pub fn is_empty(&self) -> bool {
self.strings.is_empty()
self.cache.is_empty()
}
/// Clear all interned strings.
#[inline(always)]
#[allow(dead_code)]
pub fn clear(&mut self) {
self.strings.clear();
self.cache.clear();
}
}
impl AddAssign<Self> for StringsInterner<'_> {
#[inline(always)]
fn add_assign(&mut self, rhs: Self) {
self.strings.extend(rhs.strings.into_iter());
self.cache.extend(rhs.cache.into_iter());
}
}
impl AddAssign<&Self> for StringsInterner<'_> {
#[inline(always)]
fn add_assign(&mut self, rhs: &Self) {
self.strings
.extend(rhs.strings.iter().map(|(&k, v)| (k, v.clone())));
self.cache
.extend(rhs.cache.iter().map(|(&k, v)| (k, v.clone())));
}
}