Do not cache "one-hit wonders"

This commit is contained in:
Stephen Chung 2022-09-12 19:47:29 +08:00
parent c1ae9e0405
commit 44219c732c
5 changed files with 84 additions and 29 deletions

View File

@ -1,6 +1,7 @@
//! System caches.
use crate::func::{CallableFunction, StraightHashMap};
use crate::types::BloomFilterU64;
use crate::{Identifier, StaticVec};
use std::marker::PhantomData;
#[cfg(feature = "no_std")]
@ -16,12 +17,27 @@ pub struct FnResolutionCacheEntry {
pub source: Option<Box<Identifier>>,
}
/// _(internals)_ A function resolution cache.
/// _(internals)_ A function resolution cache with a bloom filter.
/// Exported under the `internals` feature only.
///
/// [`FnResolutionCacheEntry`] is [`Box`]ed in order to pack as many entries inside a single B-Tree
/// level as possible.
pub type FnResolutionCache = StraightHashMap<u64, Option<FnResolutionCacheEntry>>;
#[derive(Debug, Clone, Default)]
pub struct FnResolutionCache {
/// Hash map containing cached functions.
pub map: StraightHashMap<u64, Option<FnResolutionCacheEntry>>,
/// Bloom filter to avoid caching "one-hit wonders".
pub filter: BloomFilterU64,
}
impl FnResolutionCache {
/// Clear the [`FnResolutionCache`].
#[inline(always)]
pub fn clear(&mut self) {
self.map.clear();
self.filter.clear();
}
}
/// _(internals)_ A type containing system-wide caches.
/// Exported under the `internals` feature only.
@ -31,7 +47,7 @@ pub type FnResolutionCache = StraightHashMap<u64, Option<FnResolutionCacheEntry>
#[derive(Debug, Clone)]
pub struct Caches<'a> {
/// Stack of [function resolution caches][FnResolutionCache].
fn_resolution: StaticVec<FnResolutionCache>,
fn_resolution_caches: StaticVec<FnResolutionCache>,
/// Take care of the lifetime parameter.
dummy: PhantomData<&'a ()>,
}
@ -42,7 +58,7 @@ impl Caches<'_> {
#[must_use]
pub const fn new() -> Self {
Self {
fn_resolution: StaticVec::new_const(),
fn_resolution_caches: StaticVec::new_const(),
dummy: PhantomData,
}
}
@ -50,27 +66,27 @@ impl Caches<'_> {
#[inline(always)]
#[must_use]
pub fn fn_resolution_caches_len(&self) -> usize {
self.fn_resolution.len()
self.fn_resolution_caches.len()
}
/// Get a mutable reference to the current function resolution cache.
#[inline]
#[must_use]
pub fn fn_resolution_cache_mut(&mut self) -> &mut FnResolutionCache {
if self.fn_resolution.is_empty() {
if self.fn_resolution_caches.is_empty() {
// Push a new function resolution cache if the stack is empty
self.push_fn_resolution_cache();
}
self.fn_resolution.last_mut().unwrap()
self.fn_resolution_caches.last_mut().unwrap()
}
/// Push an empty function resolution cache onto the stack and make it current.
#[allow(dead_code)]
#[inline(always)]
pub fn push_fn_resolution_cache(&mut self) {
self.fn_resolution.push(StraightHashMap::default());
self.fn_resolution_caches.push(Default::default());
}
/// Rewind the function resolution caches stack to a particular size.
#[inline(always)]
pub fn rewind_fn_resolution_caches(&mut self, len: usize) {
self.fn_resolution.truncate(len);
self.fn_resolution_caches.truncate(len);
}
}

View File

@ -257,8 +257,9 @@ impl Engine {
let hash = combine_hashes(hashes.native, hash);
let cache = caches.fn_resolution_cache_mut();
let local_entry: CallableFunction;
let func = match cache.entry(hash) {
let func = match cache.map.entry(hash) {
Entry::Vacant(entry) => {
let func = if args.len() == 2 {
get_builtin_binary_op_fn(name, operands[0], operands[1])
@ -267,14 +268,22 @@ impl Engine {
};
if let Some(f) = func {
&entry
.insert(Some(FnResolutionCacheEntry {
func: CallableFunction::from_fn_builtin(f),
source: None,
}))
.as_ref()
.unwrap()
.func
if cache.filter.is_absent(hash) {
// Do not cache "one-hit wonders"
cache.filter.mark(hash);
local_entry = CallableFunction::from_fn_builtin(f);
&local_entry
} else {
// Cache repeated calls
&entry
.insert(Some(FnResolutionCacheEntry {
func: CallableFunction::from_fn_builtin(f),
source: None,
}))
.as_ref()
.unwrap()
.func
}
} else {
let result = self.exec_fn_call(
None, global, caches, lib, name, *hashes, operands, false, false, pos,

View File

@ -185,6 +185,7 @@ impl Engine {
&self,
_global: &GlobalRuntimeState,
caches: &'s mut Caches,
local_entry: &'s mut Option<FnResolutionCacheEntry>,
lib: &[&Module],
fn_name: &str,
hash_base: u64,
@ -203,7 +204,9 @@ impl Engine {
)
});
match caches.fn_resolution_cache_mut().entry(hash) {
let cache = caches.fn_resolution_cache_mut();
match cache.map.entry(hash) {
Entry::Occupied(entry) => entry.into_mut().as_ref(),
Entry::Vacant(entry) => {
let num_args = args.as_ref().map_or(0, |a| a.len());
@ -229,12 +232,20 @@ impl Engine {
});
if let Some((f, s)) = func {
// Specific version found - insert into cache and return it
let new_entry = FnResolutionCacheEntry {
// Specific version found
let new_entry = Some(FnResolutionCacheEntry {
func: f.clone(),
source: s.map(|s| Box::new(s.into())),
});
return if cache.filter.is_absent(hash) {
// Do not cache "one-hit wonders"
cache.filter.mark(hash);
*local_entry = new_entry;
local_entry.as_ref()
} else {
// Cache entry
entry.insert(new_entry).as_ref()
};
return entry.insert(Some(new_entry)).as_ref();
}
// Check `Dynamic` parameters for functions with parameters
@ -288,7 +299,15 @@ impl Engine {
}
});
return entry.insert(builtin).as_ref();
return if cache.filter.is_absent(hash) {
// Do not cache "one-hit wonders"
cache.filter.mark(hash);
*local_entry = builtin;
local_entry.as_ref()
} else {
// Cache entry
entry.insert(builtin).as_ref()
};
}
// Try all permutations with `Dynamic` wildcards
@ -345,9 +364,12 @@ impl Engine {
let parent_source = global.source.clone();
// Check if function access already in the cache
let mut local_entry = None;
let func = self.resolve_fn(
global,
caches,
&mut local_entry,
lib,
name,
hash,
@ -619,11 +641,15 @@ impl Engine {
let level = level + 1;
// Script-defined function call?
#[cfg(not(feature = "no_function"))]
let mut local_entry = None;
#[cfg(not(feature = "no_function"))]
if let Some(FnResolutionCacheEntry { func, ref source }) = self
.resolve_fn(
global,
caches,
&mut local_entry,
lib,
fn_name,
hashes.script,

View File

@ -234,7 +234,7 @@ impl Engine {
) -> bool {
let cache = caches.fn_resolution_cache_mut();
if let Some(result) = cache.get(&hash_script).map(Option::is_some) {
if let Some(result) = cache.map.get(&hash_script).map(Option::is_some) {
return result;
}
@ -251,7 +251,11 @@ impl Engine {
|| self.global_sub_modules.values().any(|m| m.contains_qualified_fn(hash_script));
if !result {
cache.insert(hash_script, None);
if cache.filter.is_absent(hash_script) {
cache.filter.mark(hash_script);
} else {
cache.map.insert(hash_script, None);
}
}
result

View File

@ -10,7 +10,7 @@ use std::{
/// Number of `usize` values required for 256 bits.
const SIZE: usize = (256 / 8) / mem::size_of::<usize>();
/// A simple bloom filter implementation for `u64` hash values only - i.e., all 64 bits are assumed
/// A simple bloom filter implementation for `u64` hash values only - i.e. all 64 bits are assumed
/// to be relatively random.
///
/// For this reason, the implementation is simplistic - it just looks at the least significant byte
@ -23,7 +23,7 @@ pub struct BloomFilterU64([usize; SIZE]);
impl BloomFilterU64 {
/// Get the bit position of a `u64` hash value.
#[inline(always)]
const fn hash(value: u64) -> (usize, usize) {
const fn calc_hash(value: u64) -> (usize, usize) {
let hash = (value & 0x00ff) as usize;
(hash / 64, 0x01 << (hash % 64))
}
@ -48,14 +48,14 @@ impl BloomFilterU64 {
/// Mark a `u64` hash into this [`BloomFilterU64`].
#[inline(always)]
pub fn mark(&mut self, hash: u64) -> &mut Self {
let (offset, mask) = Self::hash(hash);
let (offset, mask) = Self::calc_hash(hash);
self.0[offset] |= mask;
self
}
/// Is a `u64` hash definitely absent from this [`BloomFilterU64`]?
#[inline]
pub const fn is_absent(&self, hash: u64) -> bool {
let (offset, mask) = Self::hash(hash);
let (offset, mask) = Self::calc_hash(hash);
(self.0[offset] & mask) == 0
}
}