From 669cbd8a10dcf67cfb1b6d88f6e55fa5828f13b7 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Sun, 22 Dec 2024 17:24:46 +0100 Subject: [PATCH 1/3] Deduplicate ingredient indexing --- src/accumulator.rs | 2 +- src/function/accumulated.rs | 8 +++----- src/key.rs | 11 +---------- 3 files changed, 5 insertions(+), 16 deletions(-) diff --git a/src/accumulator.rs b/src/accumulator.rs index b9355419..1e6d7e6f 100644 --- a/src/accumulator.rs +++ b/src/accumulator.rs @@ -61,7 +61,7 @@ pub struct IngredientImpl { } impl IngredientImpl { - /// Find the accumulator ingrediate for `A` in the database, if any. + /// Find the accumulator ingredient for `A` in the database, if any. pub fn from_db(db: &Db) -> Option<&Self> where Db: ?Sized + Database, diff --git a/src/function/accumulated.rs b/src/function/accumulated.rs index d8b0fcfc..588e42d1 100644 --- a/src/function/accumulated.rs +++ b/src/function/accumulated.rs @@ -53,8 +53,9 @@ where continue; } + let ingredient = zalsa.lookup_ingredient(k.ingredient_index); // Extend `output` with any values accumulated by `k`. - if let Some(accumulated_map) = k.accumulated(db) { + if let Some(accumulated_map) = ingredient.accumulated(db, k.key_index) { accumulated_map.extend_with_accumulated(accumulator.index(), &mut output); // Skip over the inputs because we know that the entire sub-graph has no accumulated values @@ -69,10 +70,7 @@ where // output vector, we want to push in execution order, so reverse order to // ensure the first child that was executed will be the first child popped // from the stack. - let Some(origin) = zalsa - .lookup_ingredient(k.ingredient_index) - .origin(db, k.key_index) - else { + let Some(origin) = ingredient.origin(db, k.key_index) else { continue; }; diff --git a/src/key.rs b/src/key.rs index 92e63541..67ce3e3e 100644 --- a/src/key.rs +++ b/src/key.rs @@ -1,7 +1,4 @@ -use crate::{ - accumulator::accumulated_map::AccumulatedMap, cycle::CycleRecoveryStrategy, - zalsa::IngredientIndex, Database, Id, -}; +use crate::{cycle::CycleRecoveryStrategy, zalsa::IngredientIndex, Database, Id}; /// An integer that uniquely identifies a particular query instance within the /// database. Used to track dependencies between queries. Fully ordered and @@ -99,12 +96,6 @@ impl DatabaseKeyIndex { pub(crate) fn cycle_recovery_strategy(self, db: &dyn Database) -> CycleRecoveryStrategy { self.ingredient_index.cycle_recovery_strategy(db) } - - pub(crate) fn accumulated(self, db: &dyn Database) -> Option<&AccumulatedMap> { - db.zalsa() - .lookup_ingredient(self.ingredient_index) - .accumulated(db, self.key_index) - } } impl std::fmt::Debug for DatabaseKeyIndex { From 86ab415592463ad98067bd75129fd65a5a102b13 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Sun, 22 Dec 2024 17:35:57 +0100 Subject: [PATCH 2/3] Reduce memo lookups needed for eviction --- src/function/memo.rs | 44 ++++++++++++++++++++------------------------ src/table/memo.rs | 31 +++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 24 deletions(-) diff --git a/src/function/memo.rs b/src/function/memo.rs index f982dc33..440fc2a7 100644 --- a/src/function/memo.rs +++ b/src/function/memo.rs @@ -61,30 +61,26 @@ impl IngredientImpl { /// with an equivalent memo that has no value. If the memo is untracked, BaseInput, /// or has values assigned as output of another query, this has no effect. pub(super) fn evict_value_from_memo_for<'db>(&'db self, zalsa: &'db Zalsa, id: Id) { - let Some(memo) = self.get_memo_from_table_for(zalsa, id) else { - return; - }; - - match memo.revisions.origin { - QueryOrigin::Assigned(_) - | QueryOrigin::DerivedUntracked(_) - | QueryOrigin::BaseInput => { - // Careful: Cannot evict memos whose values were - // assigned as output of another query - // or those with untracked inputs - // as their values cannot be reconstructed. - } - - QueryOrigin::Derived(_) => { - let memo_evicted = Arc::new(Memo::new( - None::>, - memo.verified_at.load(), - memo.revisions.clone(), - )); - - self.insert_memo_into_table_for(zalsa, id, memo_evicted); - } - } + zalsa + .memo_table_for(id) + .map_memo::>(self.memo_ingredient_index, |memo| { + match memo.revisions.origin { + QueryOrigin::Assigned(_) + | QueryOrigin::DerivedUntracked(_) + | QueryOrigin::BaseInput => { + // Careful: Cannot evict memos whose values were + // assigned as output of another query + // or those with untracked inputs + // as their values cannot be reconstructed. + memo + } + QueryOrigin::Derived(_) => Arc::new(Memo::new( + None::>, + memo.verified_at.load(), + memo.revisions.clone(), + )), + } + }); } } diff --git a/src/table/memo.rs b/src/table/memo.rs index 73bd52bd..f05f7308 100644 --- a/src/table/memo.rs +++ b/src/table/memo.rs @@ -163,6 +163,37 @@ impl MemoTable { unsafe { Some(Self::from_dummy(arc_swap.load_full())) } } + /// Calls `f` on the memo at `memo_ingredient_index` and replaces the memo with the result of `f`. + /// If the memo is not present, `f` is not called. + pub(crate) fn map_memo( + &self, + memo_ingredient_index: MemoIngredientIndex, + f: impl FnOnce(Arc) -> Arc, + ) { + // If the memo slot is already occupied, it must already have the + // right type info etc, and we only need the read-lock. + let memos = self.memos.read(); + let Some(MemoEntry { + data: + Some(MemoEntryData { + type_id, + to_dyn_fn: _, + arc_swap, + }), + }) = memos.get(memo_ingredient_index.as_usize()) + else { + return; + }; + assert_eq!( + *type_id, + TypeId::of::(), + "inconsistent type-id for `{memo_ingredient_index:?}`" + ); + // SAFETY: type_id check asserted above + let memo = f(unsafe { Self::from_dummy(arc_swap.load_full()) }); + unsafe { Self::from_dummy::(arc_swap.swap(Self::to_dummy(memo))) }; + } + pub(crate) fn into_memos( mut self, ) -> impl Iterator)> { From 38207e4f8eae88059817c8a9dca4330aa2ce6c99 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Sun, 22 Dec 2024 17:47:09 +0100 Subject: [PATCH 3/3] Store `QueryEdges` edges in a `Box` as it is not cloned anyways Likewise, an empty `Box`ed slice does not allocate so we can remove the `lazy_static` that was needed before --- Cargo.toml | 1 - src/active_query.rs | 3 +-- src/function/memo.rs | 36 +++++++++++++++++++++++++++--------- src/table/memo.rs | 8 +++----- src/zalsa_local.rs | 13 +++++-------- 5 files changed, 36 insertions(+), 25 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 69393bd8..1e788cfb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,6 @@ rustc-hash = "2" salsa-macro-rules = { version = "0.1.0", path = "components/salsa-macro-rules" } salsa-macros = { path = "components/salsa-macros" } smallvec = "1" -lazy_static = "1" rayon = "1.10.0" [dev-dependencies] diff --git a/src/active_query.rs b/src/active_query.rs index 71781a38..a586cde0 100644 --- a/src/active_query.rs +++ b/src/active_query.rs @@ -8,7 +8,6 @@ use crate::{ hash::FxIndexSet, key::{DatabaseKeyIndex, DependencyIndex}, tracked_struct::{Disambiguator, Identity}, - zalsa_local::EMPTY_DEPENDENCIES, Cycle, Id, Revision, }; @@ -108,7 +107,7 @@ impl ActiveQuery { pub(crate) fn into_revisions(self) -> QueryRevisions { let input_outputs = if self.input_outputs.is_empty() { - EMPTY_DEPENDENCIES.clone() + Box::default() } else { self.input_outputs.into_iter().collect() }; diff --git a/src/function/memo.rs b/src/function/memo.rs index 440fc2a7..304c6c31 100644 --- a/src/function/memo.rs +++ b/src/function/memo.rs @@ -61,9 +61,9 @@ impl IngredientImpl { /// with an equivalent memo that has no value. If the memo is untracked, BaseInput, /// or has values assigned as output of another query, this has no effect. pub(super) fn evict_value_from_memo_for<'db>(&'db self, zalsa: &'db Zalsa, id: Id) { - zalsa - .memo_table_for(id) - .map_memo::>(self.memo_ingredient_index, |memo| { + zalsa.memo_table_for(id).map_memo::>>( + self.memo_ingredient_index, + |memo| { match memo.revisions.origin { QueryOrigin::Assigned(_) | QueryOrigin::DerivedUntracked(_) @@ -74,13 +74,31 @@ impl IngredientImpl { // as their values cannot be reconstructed. memo } - QueryOrigin::Derived(_) => Arc::new(Memo::new( - None::>, - memo.verified_at.load(), - memo.revisions.clone(), - )), + QueryOrigin::Derived(_) => { + // QueryRevisions: !Clone to discourage cloning, we need it here though + let QueryRevisions { + changed_at, + durability, + origin, + tracked_struct_ids, + accumulated, + } = &memo.revisions; + // Re-assemble the memo but with the value set to `None` + Arc::new(Memo::new( + None, + memo.verified_at.load(), + QueryRevisions { + changed_at: *changed_at, + durability: *durability, + origin: origin.clone(), + tracked_struct_ids: tracked_struct_ids.clone(), + accumulated: accumulated.clone(), + }, + )) + } } - }); + }, + ); } } diff --git a/src/table/memo.rs b/src/table/memo.rs index f05f7308..fe2fc958 100644 --- a/src/table/memo.rs +++ b/src/table/memo.rs @@ -194,11 +194,9 @@ impl MemoTable { unsafe { Self::from_dummy::(arc_swap.swap(Self::to_dummy(memo))) }; } - pub(crate) fn into_memos( - mut self, - ) -> impl Iterator)> { - let memos = std::mem::take(self.memos.get_mut()); - memos + pub(crate) fn into_memos(self) -> impl Iterator)> { + self.memos + .into_inner() .into_iter() .zip(0..) .filter_map(|(mut memo, index)| memo.data.take().map(|d| (d, index))) diff --git a/src/zalsa_local.rs b/src/zalsa_local.rs index 6988d653..3fb28e7e 100644 --- a/src/zalsa_local.rs +++ b/src/zalsa_local.rs @@ -21,7 +21,6 @@ use crate::EventKind; use crate::Id; use crate::Revision; use std::cell::RefCell; -use std::sync::Arc; /// State that is specific to a single execution thread. /// @@ -325,7 +324,8 @@ impl std::panic::RefUnwindSafe for ZalsaLocal {} /// Summarizes "all the inputs that a query used" /// and "all the outputs its wrote to" -#[derive(Debug, Clone)] +#[derive(Debug)] +// #[derive(Clone)] cloning this is expensive, so we don't derive pub(crate) struct QueryRevisions { /// The most revision in which some input changed. pub(crate) changed_at: Revision, @@ -435,10 +435,6 @@ pub enum EdgeKind { Output, } -lazy_static::lazy_static! { - pub(crate) static ref EMPTY_DEPENDENCIES: Arc<[(EdgeKind, DependencyIndex)]> = Arc::new([]); -} - /// The edges between a memoized value and other queries in the dependency graph. /// These edges include both dependency edges /// e.g., when creating the memoized value for Q0 executed another function Q1) @@ -458,7 +454,8 @@ pub struct QueryEdges { /// Important: /// /// * The inputs must be in **execution order** for the red-green algorithm to work. - pub input_outputs: Arc<[(EdgeKind, DependencyIndex)]>, + // pub input_outputs: ThinBox<[(EdgeKind, DependencyIndex)]>, once that is a thing + pub input_outputs: Box<[(EdgeKind, DependencyIndex)]>, } impl QueryEdges { @@ -483,7 +480,7 @@ impl QueryEdges { } /// Creates a new `QueryEdges`; the values given for each field must meet struct invariants. - pub(crate) fn new(input_outputs: Arc<[(EdgeKind, DependencyIndex)]>) -> Self { + pub(crate) fn new(input_outputs: Box<[(EdgeKind, DependencyIndex)]>) -> Self { Self { input_outputs } } }