Simplify strings interner.
This commit is contained in:
parent
25476d1cea
commit
a518ab62bb
@ -47,9 +47,6 @@ const NEVER_ENDS: &str = "`Token`";
|
|||||||
/// Unroll `switch` ranges no larger than this.
|
/// Unroll `switch` ranges no larger than this.
|
||||||
const SMALL_SWITCH_RANGE: INT = 16;
|
const SMALL_SWITCH_RANGE: INT = 16;
|
||||||
|
|
||||||
/// Number of string interners used: two additional for property getters/setters if not `no_object`
|
|
||||||
const NUM_INTERNERS: usize = if cfg!(feature = "no_object") { 1 } else { 3 };
|
|
||||||
|
|
||||||
/// _(internals)_ A type that encapsulates the current state of the parser.
|
/// _(internals)_ A type that encapsulates the current state of the parser.
|
||||||
/// Exported under the `internals` feature only.
|
/// Exported under the `internals` feature only.
|
||||||
pub struct ParseState<'e> {
|
pub struct ParseState<'e> {
|
||||||
@ -58,7 +55,7 @@ pub struct ParseState<'e> {
|
|||||||
/// Controls whether parsing of an expression should stop given the next token.
|
/// Controls whether parsing of an expression should stop given the next token.
|
||||||
pub expr_filter: fn(&Token) -> bool,
|
pub expr_filter: fn(&Token) -> bool,
|
||||||
/// String interners.
|
/// String interners.
|
||||||
interned_strings: [StringsInterner<'e>; NUM_INTERNERS],
|
interned_strings: StringsInterner<'e>,
|
||||||
/// External [scope][Scope] with constants.
|
/// External [scope][Scope] with constants.
|
||||||
pub scope: &'e Scope<'e>,
|
pub scope: &'e Scope<'e>,
|
||||||
/// Global runtime state.
|
/// Global runtime state.
|
||||||
@ -88,6 +85,8 @@ pub struct ParseState<'e> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Debug for ParseState<'_> {
|
impl fmt::Debug for ParseState<'_> {
|
||||||
|
#[cold]
|
||||||
|
#[inline(never)]
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
let mut f = f.debug_struct("ParseState");
|
let mut f = f.debug_struct("ParseState");
|
||||||
|
|
||||||
@ -116,7 +115,7 @@ impl<'e> ParseState<'e> {
|
|||||||
pub fn new(
|
pub fn new(
|
||||||
engine: &Engine,
|
engine: &Engine,
|
||||||
scope: &'e Scope,
|
scope: &'e Scope,
|
||||||
interned_strings: [StringsInterner<'e>; NUM_INTERNERS],
|
interned_strings: StringsInterner<'e>,
|
||||||
tokenizer_control: TokenizerControl,
|
tokenizer_control: TokenizerControl,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
Self {
|
Self {
|
||||||
@ -254,7 +253,7 @@ impl<'e> ParseState<'e> {
|
|||||||
&mut self,
|
&mut self,
|
||||||
text: impl AsRef<str> + Into<ImmutableString>,
|
text: impl AsRef<str> + Into<ImmutableString>,
|
||||||
) -> ImmutableString {
|
) -> ImmutableString {
|
||||||
self.interned_strings[0].get(text)
|
self.interned_strings.get(text)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get an interned property getter, creating one if it is not yet interned.
|
/// Get an interned property getter, creating one if it is not yet interned.
|
||||||
@ -265,8 +264,11 @@ impl<'e> ParseState<'e> {
|
|||||||
&mut self,
|
&mut self,
|
||||||
text: impl AsRef<str> + Into<ImmutableString>,
|
text: impl AsRef<str> + Into<ImmutableString>,
|
||||||
) -> ImmutableString {
|
) -> ImmutableString {
|
||||||
self.interned_strings[1]
|
self.interned_strings.get_with_mapper(
|
||||||
.get_with_mapper(|s| crate::engine::make_getter(s.as_ref()).into(), text)
|
crate::engine::FN_GET,
|
||||||
|
|s| crate::engine::make_getter(s.as_ref()).into(),
|
||||||
|
text,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get an interned property setter, creating one if it is not yet interned.
|
/// Get an interned property setter, creating one if it is not yet interned.
|
||||||
@ -277,8 +279,11 @@ impl<'e> ParseState<'e> {
|
|||||||
&mut self,
|
&mut self,
|
||||||
text: impl AsRef<str> + Into<ImmutableString>,
|
text: impl AsRef<str> + Into<ImmutableString>,
|
||||||
) -> ImmutableString {
|
) -> ImmutableString {
|
||||||
self.interned_strings[2]
|
self.interned_strings.get_with_mapper(
|
||||||
.get_with_mapper(|s| crate::engine::make_setter(s.as_ref()).into(), text)
|
crate::engine::FN_SET,
|
||||||
|
|s| crate::engine::make_setter(s.as_ref()).into(),
|
||||||
|
text,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
use super::BloomFilterU64;
|
||||||
use crate::func::{hashing::get_hasher, StraightHashMap};
|
use crate::func::{hashing::get_hasher, StraightHashMap};
|
||||||
use crate::ImmutableString;
|
use crate::ImmutableString;
|
||||||
#[cfg(feature = "no_std")]
|
#[cfg(feature = "no_std")]
|
||||||
@ -14,7 +15,7 @@ use std::{
|
|||||||
};
|
};
|
||||||
|
|
||||||
/// Maximum number of strings interned.
|
/// Maximum number of strings interned.
|
||||||
pub const MAX_INTERNED_STRINGS: usize = 256;
|
pub const MAX_INTERNED_STRINGS: usize = 1024;
|
||||||
|
|
||||||
/// Maximum length of strings interned.
|
/// Maximum length of strings interned.
|
||||||
pub const MAX_STRING_LEN: usize = 24;
|
pub const MAX_STRING_LEN: usize = 24;
|
||||||
@ -28,8 +29,10 @@ pub struct StringsInterner<'a> {
|
|||||||
pub capacity: usize,
|
pub capacity: usize,
|
||||||
/// Maximum string length.
|
/// Maximum string length.
|
||||||
pub max_string_len: usize,
|
pub max_string_len: usize,
|
||||||
/// Normal strings.
|
/// Cached strings.
|
||||||
strings: StraightHashMap<ImmutableString>,
|
cache: StraightHashMap<ImmutableString>,
|
||||||
|
/// Bloom filter to avoid caching "one-hit wonders".
|
||||||
|
filter: BloomFilterU64,
|
||||||
/// Take care of the lifetime parameter.
|
/// Take care of the lifetime parameter.
|
||||||
dummy: PhantomData<&'a ()>,
|
dummy: PhantomData<&'a ()>,
|
||||||
}
|
}
|
||||||
@ -42,9 +45,10 @@ impl Default for StringsInterner<'_> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Debug for StringsInterner<'_> {
|
impl fmt::Debug for StringsInterner<'_> {
|
||||||
#[inline]
|
#[cold]
|
||||||
|
#[inline(never)]
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
f.debug_list().entries(self.strings.values()).finish()
|
f.debug_list().entries(self.cache.values()).finish()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -56,7 +60,8 @@ impl StringsInterner<'_> {
|
|||||||
Self {
|
Self {
|
||||||
capacity: MAX_INTERNED_STRINGS,
|
capacity: MAX_INTERNED_STRINGS,
|
||||||
max_string_len: MAX_STRING_LEN,
|
max_string_len: MAX_STRING_LEN,
|
||||||
strings: StraightHashMap::default(),
|
cache: StraightHashMap::default(),
|
||||||
|
filter: BloomFilterU64::new(),
|
||||||
dummy: PhantomData,
|
dummy: PhantomData,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -65,7 +70,7 @@ impl StringsInterner<'_> {
|
|||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn get<S: AsRef<str> + Into<ImmutableString>>(&mut self, text: S) -> ImmutableString {
|
pub fn get<S: AsRef<str> + Into<ImmutableString>>(&mut self, text: S) -> ImmutableString {
|
||||||
self.get_with_mapper(Into::into, text)
|
self.get_with_mapper("", Into::into, text)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get an identifier from a text string, adding it to the interner if necessary.
|
/// Get an identifier from a text string, adding it to the interner if necessary.
|
||||||
@ -73,20 +78,23 @@ impl StringsInterner<'_> {
|
|||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn get_with_mapper<S: AsRef<str>>(
|
pub fn get_with_mapper<S: AsRef<str>>(
|
||||||
&mut self,
|
&mut self,
|
||||||
|
id: &str,
|
||||||
mapper: impl Fn(S) -> ImmutableString,
|
mapper: impl Fn(S) -> ImmutableString,
|
||||||
text: S,
|
text: S,
|
||||||
) -> ImmutableString {
|
) -> ImmutableString {
|
||||||
let key = text.as_ref();
|
let key = text.as_ref();
|
||||||
|
|
||||||
if key.len() > MAX_STRING_LEN {
|
let hasher = &mut get_hasher();
|
||||||
|
id.hash(hasher);
|
||||||
|
key.hash(hasher);
|
||||||
|
let hash = hasher.finish();
|
||||||
|
|
||||||
|
// Cache long strings only on the second try to avoid caching "one-hit wonders".
|
||||||
|
if key.len() > MAX_STRING_LEN && self.filter.is_absent_and_set(hash) {
|
||||||
return mapper(text);
|
return mapper(text);
|
||||||
}
|
}
|
||||||
|
|
||||||
let hasher = &mut get_hasher();
|
let result = match self.cache.entry(hash) {
|
||||||
key.hash(hasher);
|
|
||||||
let key = hasher.finish();
|
|
||||||
|
|
||||||
let result = match self.strings.entry(key) {
|
|
||||||
Entry::Occupied(e) => return e.get().clone(),
|
Entry::Occupied(e) => return e.get().clone(),
|
||||||
Entry::Vacant(e) => {
|
Entry::Vacant(e) => {
|
||||||
let value = mapper(text);
|
let value = mapper(text);
|
||||||
@ -100,7 +108,7 @@ impl StringsInterner<'_> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// If the interner is over capacity, remove the longest entry that has the lowest count
|
// If the interner is over capacity, remove the longest entry that has the lowest count
|
||||||
if self.strings.len() > self.capacity {
|
if self.cache.len() > self.capacity {
|
||||||
// Leave some buffer to grow when shrinking the cache.
|
// Leave some buffer to grow when shrinking the cache.
|
||||||
// We leave at least two entries, one for the empty string, and one for the string
|
// We leave at least two entries, one for the empty string, and one for the string
|
||||||
// that has just been inserted.
|
// that has just been inserted.
|
||||||
@ -110,21 +118,21 @@ impl StringsInterner<'_> {
|
|||||||
self.capacity - 3
|
self.capacity - 3
|
||||||
};
|
};
|
||||||
|
|
||||||
while self.strings.len() > max {
|
while self.cache.len() > max {
|
||||||
let (_, _, n) =
|
let (_, _, n) = self
|
||||||
self.strings
|
.cache
|
||||||
.iter()
|
.iter()
|
||||||
.fold((0, usize::MAX, 0), |(x, c, n), (&k, v)| {
|
.fold((0, usize::MAX, 0), |(x, c, n), (&k, v)| {
|
||||||
if k != key
|
if k != hash
|
||||||
&& (v.strong_count() < c || (v.strong_count() == c && v.len() > x))
|
&& (v.strong_count() < c || (v.strong_count() == c && v.len() > x))
|
||||||
{
|
{
|
||||||
(v.len(), v.strong_count(), k)
|
(v.len(), v.strong_count(), k)
|
||||||
} else {
|
} else {
|
||||||
(x, c, n)
|
(x, c, n)
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
self.strings.remove(&n);
|
self.cache.remove(&n);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -136,7 +144,7 @@ impl StringsInterner<'_> {
|
|||||||
#[must_use]
|
#[must_use]
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
pub fn len(&self) -> usize {
|
pub fn len(&self) -> usize {
|
||||||
self.strings.len()
|
self.cache.len()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns `true` if there are no interned strings.
|
/// Returns `true` if there are no interned strings.
|
||||||
@ -144,28 +152,28 @@ impl StringsInterner<'_> {
|
|||||||
#[must_use]
|
#[must_use]
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
pub fn is_empty(&self) -> bool {
|
pub fn is_empty(&self) -> bool {
|
||||||
self.strings.is_empty()
|
self.cache.is_empty()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Clear all interned strings.
|
/// Clear all interned strings.
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
pub fn clear(&mut self) {
|
pub fn clear(&mut self) {
|
||||||
self.strings.clear();
|
self.cache.clear();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl AddAssign<Self> for StringsInterner<'_> {
|
impl AddAssign<Self> for StringsInterner<'_> {
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn add_assign(&mut self, rhs: Self) {
|
fn add_assign(&mut self, rhs: Self) {
|
||||||
self.strings.extend(rhs.strings.into_iter());
|
self.cache.extend(rhs.cache.into_iter());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl AddAssign<&Self> for StringsInterner<'_> {
|
impl AddAssign<&Self> for StringsInterner<'_> {
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn add_assign(&mut self, rhs: &Self) {
|
fn add_assign(&mut self, rhs: &Self) {
|
||||||
self.strings
|
self.cache
|
||||||
.extend(rhs.strings.iter().map(|(&k, v)| (k, v.clone())));
|
.extend(rhs.cache.iter().map(|(&k, v)| (k, v.clone())));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user