use std::{fmt, hash::Hash, marker::PhantomData, ops::DerefMut}; use crossbeam::{atomic::AtomicCell, queue::SegQueue}; use tracked_field::FieldIngredientImpl; use crate::{ cycle::CycleRecoveryStrategy, ingredient::{fmt_index, Ingredient, Jar, JarAux}, key::{DatabaseKeyIndex, DependencyIndex}, plumbing::ZalsaLocal, runtime::StampedValue, salsa_struct::SalsaStructInDb, table::{memo::MemoTable, sync::SyncTable, Slot, Table}, zalsa::{IngredientIndex, Zalsa}, zalsa_local::QueryOrigin, Database, Durability, Event, EventKind, Id, Revision, }; pub mod tracked_field; // ANCHOR: Configuration /// Trait that defines the key properties of a tracked struct. /// Implemented by the `#[salsa::tracked]` macro when applied /// to a struct. pub trait Configuration: Sized + 'static { const DEBUG_NAME: &'static str; const FIELD_DEBUG_NAMES: &'static [&'static str]; /// A (possibly empty) tuple of the fields for this struct. type Fields<'db>: Send + Sync; /// A array of [`Revision`][] values, one per each of the value fields. /// When a struct is re-recreated in a new revision, the corresponding /// entries for each field are updated to the new revision if their /// values have changed (or if the field is marked as `#[no_eq]`). type Revisions: Send + Sync + DerefMut; type Struct<'db>: Copy; /// Create an end-user struct from the underlying raw pointer. /// /// This call is an "end-step" to the tracked struct lookup/creation /// process in a given revision: it occurs only when the struct is newly /// created or, if a struct is being reused, after we have updated its /// fields (or confirmed it is green and no updates are required). fn struct_from_id<'db>(id: Id) -> Self::Struct<'db>; /// Deref the struct to yield the underlying id. fn deref_struct(s: Self::Struct<'_>) -> Id; fn id_fields(fields: &Self::Fields<'_>) -> impl Hash; /// Create a new value revision array where each element is set to `current_revision`. fn new_revisions(current_revision: Revision) -> Self::Revisions; /// Update the field data and, if the value has changed, /// the appropriate entry in the `revisions` array. /// /// # Safety /// /// Requires the same conditions as the `maybe_update` /// method on [the `Update` trait](`crate::update::Update`). /// /// In short, requires that `old_fields` be a pointer into /// storage from a previous revision. /// It must meet its validity invariant. /// Owned content must meet safety invariant. /// `*mut` here is not strictly needed; /// it is used to signal that the content /// is not guaranteed to recursively meet /// its safety invariant and /// hence this must be dereferenced with caution. /// /// Ensures that `old_fields` is fully updated and valid /// after it returns and that `revisions` has been updated /// for any field that changed. unsafe fn update_fields<'db>( current_revision: Revision, revisions: &mut Self::Revisions, old_fields: *mut Self::Fields<'db>, new_fields: Self::Fields<'db>, ); } // ANCHOR_END: Configuration pub struct JarImpl where C: Configuration, { phantom: PhantomData, } impl Default for JarImpl { fn default() -> Self { Self { phantom: Default::default(), } } } impl Jar for JarImpl { fn create_ingredients( &self, _aux: &dyn JarAux, struct_index: crate::zalsa::IngredientIndex, ) -> Vec> { let struct_ingredient = >::new(struct_index); std::iter::once(Box::new(struct_ingredient) as _) .chain((0..C::FIELD_DEBUG_NAMES.len()).map(|field_index| { Box::new(>::new(struct_index, field_index)) as _ })) .collect() } } pub trait TrackedStructInDb: SalsaStructInDb { /// Converts the identifier for this tracked struct into a `DatabaseKeyIndex`. fn database_key_index(db: &dyn Database, id: Id) -> DatabaseKeyIndex; } /// Created for each tracked struct. /// /// This ingredient only stores the "id" fields. It is a kind of "dressed up" interner; /// the active query + values of id fields are hashed to create the tracked /// struct id. The value fields are stored in [`crate::function::FunctionIngredient`] /// instances keyed by the tracked struct id. Unlike normal interners, tracked /// struct indices can be deleted and reused aggressively: when a tracked /// function re-executes, any tracked structs that it created before but did /// not create this time can be deleted. pub struct IngredientImpl where C: Configuration, { /// Our index in the database. ingredient_index: IngredientIndex, /// Phantom data: we fetch `Value` out from `Table` phantom: PhantomData Value>, /// Store freed ids free_list: SegQueue, } /// Defines the identity of a tracked struct. /// This is the key to a hashmap that is (initially) /// stored in the [`ActiveQuery`](`crate::active_query::ActiveQuery`) /// struct and later moved to the [`Memo`](`crate::function::memo::Memo`). #[derive(Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Copy, Clone)] pub(crate) struct KeyStruct { /// The hash of the `#[id]` fields of this struct. /// Note that multiple structs may share the same hash. data_hash: u64, /// The unique disambiguator assigned within the active query /// to distinguish distinct tracked structs with the same hash. disambiguator: Disambiguator, } // ANCHOR: ValueStruct #[derive(Debug)] pub struct Value where C: Configuration, { /// The durability minimum durability of all inputs consumed /// by the creator query prior to creating this tracked struct. /// If any of those inputs changes, then the creator query may /// create this struct with different values. durability: Durability, /// The revision when this tracked struct was last updated. /// This field also acts as a kind of "lock". Once it is equal /// to `Some(current_revision)`, the fields are locked and /// cannot change further. This makes it safe to give out `&`-references /// so long as they do not live longer than the current revision /// (which is assured by tying their lifetime to the lifetime of an `&`-ref /// to the database). /// /// The struct is updated from an older revision `R0` to the current revision `R1` /// when the struct is first accessed in `R1`, whether that be because the original /// query re-created the struct (i.e., by user calling `Struct::new`) or because /// the struct was read from. (Structs may not be recreated in the new revision if /// the inputs to the query have not changed.) /// /// When re-creating the struct, the field is temporarily set to `None`. /// This is signal that there is an active `&mut` modifying the other fields: /// even reading from those fields in that situation would create UB. /// This `None` value should never be observable by users unless they have /// leaked a reference across threads somehow. updated_at: AtomicCell>, /// Fields of this tracked struct. They can change across revisions, /// but they do not change within a particular revision. fields: C::Fields<'static>, /// The revision information for each field: when did this field last change. /// When tracked structs are re-created, this revision may be updated to the /// current revision if the value is different. revisions: C::Revisions, /// Memo table storing the results of query functions etc. memos: MemoTable, /// Sync table storing the results of query functions etc. syncs: SyncTable, } // ANCHOR_END: ValueStruct #[derive(Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Copy, Clone)] pub struct Disambiguator(pub u32); impl IngredientImpl where C: Configuration, { /// Convert the fields from a `'db` lifetime to `'static`: used when storing /// the data into this ingredient, should never be released outside this type. unsafe fn to_static<'db>(&'db self, fields: C::Fields<'db>) -> C::Fields<'static> { unsafe { std::mem::transmute(fields) } } unsafe fn to_self_ref<'db>(&'db self, fields: &'db C::Fields<'static>) -> &'db C::Fields<'db> { unsafe { std::mem::transmute(fields) } } /// Convert from static back to the db lifetime; used when returning data /// out from this ingredient. unsafe fn to_self_ptr<'db>(&'db self, fields: *mut C::Fields<'static>) -> *mut C::Fields<'db> { unsafe { std::mem::transmute(fields) } } /// Create a tracked struct ingredient. Generated by the `#[tracked]` macro, /// not meant to be called directly by end-users. fn new(index: IngredientIndex) -> Self { Self { ingredient_index: index, phantom: PhantomData, free_list: Default::default(), } } /// Returns the database key index for a tracked struct with the given id. pub fn database_key_index(&self, id: Id) -> DatabaseKeyIndex { DatabaseKeyIndex { ingredient_index: self.ingredient_index, key_index: id, } } pub fn new_struct<'db>( &'db self, db: &'db dyn Database, fields: C::Fields<'db>, ) -> C::Struct<'db> { let (zalsa, zalsa_local) = db.zalsas(); let data_hash = crate::hash::hash(&C::id_fields(&fields)); let (current_deps, disambiguator) = zalsa_local.disambiguate(self.ingredient_index, Revision::start(), data_hash); let key_struct = KeyStruct { disambiguator, data_hash, }; let current_revision = zalsa.current_revision(); match zalsa_local.tracked_struct_id(&key_struct) { Some(id) => { // The struct already exists in the intern map. zalsa_local.add_output(self.database_key_index(id).into()); self.update(zalsa, current_revision, id, ¤t_deps, fields); C::struct_from_id(id) } None => { // This is a new tracked struct, so create an entry in the struct map. let id = self.allocate(zalsa, zalsa_local, current_revision, ¤t_deps, fields); let key = self.database_key_index(id); zalsa_local.add_output(key.into()); zalsa_local.store_tracked_struct_id(key_struct, key); C::struct_from_id(id) } } } fn allocate<'db>( &'db self, zalsa: &'db Zalsa, zalsa_local: &'db ZalsaLocal, current_revision: Revision, current_deps: &StampedValue<()>, fields: C::Fields<'db>, ) -> Id { let value = || Value { updated_at: AtomicCell::new(Some(current_revision)), durability: current_deps.durability, fields: unsafe { self.to_static(fields) }, revisions: C::new_revisions(current_deps.changed_at), memos: Default::default(), syncs: Default::default(), }; if let Some(id) = self.free_list.pop() { let data_raw = Self::data_raw(zalsa.table(), id); assert!( unsafe { (*data_raw).updated_at.load().is_none() }, "free list entry for `{id:?}` does not have `None` for `updated_at`" ); // Overwrite the free-list entry. Use `*foo = ` because the entry // has been previously initialized and we want to free the old contents. unsafe { *data_raw = value(); } id } else { zalsa_local.allocate::>(zalsa.table(), self.ingredient_index, value()) } } /// Get mutable access to the data for `id` -- this holds a write lock for the duration /// of the returned value. /// /// # Panics /// /// * If the value is not present in the map. /// * If the value is already updated in this revision. fn update<'db>( &'db self, zalsa: &'db Zalsa, current_revision: Revision, id: Id, current_deps: &StampedValue<()>, fields: C::Fields<'db>, ) { let data_raw = Self::data_raw(zalsa.table(), id); // The protocol is: // // * When we begin updating, we store `None` in the `created_at` field // * When completed, we store `Some(current_revision)` in `created_at` // // No matter what mischief users get up to, it should be impossible for us to // observe `None` in `created_at`. The `id` should only be associated with one // query and that query can only be running in one thread at a time. // // We *can* observe `Some(current_revision)` however, which means that this // tracked struct is already updated for this revision in two ways. // In that case we should not modify or touch it because there may be // `&`-references to its contents floating around. // // Observing `Some(current_revision)` can happen in two scenarios: leaks (tsk tsk) // but also the scenario embodied by the test test `test_run_5_then_20` in `specify_tracked_fn_in_rev_1_but_not_2.rs`: // // * Revision 1: // * Tracked function F creates tracked struct S // * F reads input I // * Revision 2: I is changed, F is re-executed // // When F is re-executed in rev 2, we first try to validate F's inputs/outputs, // which is the list [output: S, input: I]. As no inputs have changed by the time // we reach S, we mark it as verified. But then input I is seen to hvae changed, // and so we re-execute F. Note that we *know* that S will have the same value // (barring program bugs). // // Further complicating things: it is possible that F calls F2 // and gives it (e.g.) S as one of its arguments. Validating F2 may cause F2 to // re-execute which means that it may indeed have read from S's fields // during the current revision and thus obtained an `&` reference to those fields // that is still live. // UNSAFE: Marking as mut requires exclusive access for the duration of // the `mut`. We have now *claimed* this data by swapping in `None`, // any attempt to read concurrently will panic. let last_updated_at = unsafe { (*data_raw).updated_at.load() }; assert!( last_updated_at.is_some(), "two concurrent writers to {id:?}, should not be possible" ); if last_updated_at == Some(current_revision) { // already read-locked return; } // Acquire the write-lock. This can only fail if there is a parallel thread // reading from this same `id`, which can only happen if the user has leaked it. // Tsk tsk. let swapped_out = unsafe { (*data_raw).updated_at.swap(None) }; if swapped_out != last_updated_at { panic!( "failed to acquire write lock, id `{id:?}` must have been leaked across threads" ); } // UNSAFE: Marking as mut requires exclusive access for the duration of // the `mut`. We have now *claimed* this data by swapping in `None`, // any attempt to read concurrently will panic. let data = unsafe { &mut *data_raw }; // SAFETY: We assert that the pointer to `data.revisions` // is a pointer into the database referencing a value // from a previous revision. As such, it continues to meet // its validity invariant and any owned content also continues // to meet its safety invariant. unsafe { C::update_fields( current_revision, &mut data.revisions, self.to_self_ptr(std::ptr::addr_of_mut!(data.fields)), fields, ); } if current_deps.durability < data.durability { data.revisions = C::new_revisions(current_revision); } data.durability = current_deps.durability; let swapped_out = data.updated_at.swap(Some(current_revision)); assert!(swapped_out.is_none()); } /// Fetch the data for a given id created by this ingredient from the table, /// -giving it the appropriate type. fn data(table: &Table, id: Id) -> &Value { table.get(id) } fn data_raw(table: &Table, id: Id) -> *mut Value { table.get_raw(id) } /// Deletes the given entities. This is used after a query `Q` executes and we can compare /// the entities `E_now` that it produced in this revision vs the entities /// `E_prev` it produced in the last revision. Any missing entities `E_prev - E_new` can be /// deleted. /// /// # Warning /// /// Using this method on an entity id that MAY be used in the current revision will lead to /// unspecified results (but not UB). See [`InternedIngredient::delete_index`] for more /// discussion and important considerations. pub(crate) fn delete_entity(&self, db: &dyn crate::Database, id: Id) { db.salsa_event(&|| Event { thread_id: std::thread::current().id(), kind: crate::EventKind::DidDiscard { key: self.database_key_index(id), }, }); let zalsa = db.zalsa(); let current_revision = zalsa.current_revision(); let data = Self::data_raw(zalsa.table(), id); // We want to set `updated_at` to `None`, signalling that other field values // cannot be read. The current vaue should be `Some(R0)` for some older revision. let data_ref = unsafe { &*data }; match data_ref.updated_at.load() { None => { panic!("cannot delete write-locked id `{id:?}`; value leaked across threads"); } Some(r) => { if r == current_revision { panic!( "cannot delete read-locked id `{id:?}`; \ value leaked across threads or user functions not deterministic" ) } if data_ref.updated_at.compare_exchange(Some(r), None).is_err() { panic!("race occurred when deleting value `{id:?}`") } } } // Take the memo table. This is safe because we have modified `data_ref.updated_at` to `None` // and the code that references the memo-table has a read-lock. let memo_table = unsafe { (*data).take_memo_table() }; for (memo_ingredient_index, memo) in memo_table.into_memos() { let ingredient_index = zalsa.ingredient_index_for_memo(memo_ingredient_index); let executor = DatabaseKeyIndex { ingredient_index, key_index: id, }; db.salsa_event(&|| Event { thread_id: std::thread::current().id(), kind: EventKind::DidDiscard { key: executor }, }); for stale_output in memo.origin().outputs() { zalsa .lookup_ingredient(stale_output.ingredient_index) .remove_stale_output(db, executor, stale_output.key_index); } } // now that all cleanup has occurred, make available for re-use self.free_list.push(id); } /// Return reference to the field data ignoring dependency tracking. /// Used for debugging. pub fn leak_fields<'db>( &'db self, db: &'db dyn Database, s: C::Struct<'db>, ) -> &'db C::Fields<'db> { let id = C::deref_struct(s); let value = Self::data(db.zalsa().table(), id); unsafe { self.to_self_ref(&value.fields) } } /// Access to this value field. /// Note that this function returns the entire tuple of value fields. /// The caller is responible for selecting the appropriate element. pub fn field<'db>( &'db self, db: &'db dyn crate::Database, s: C::Struct<'db>, field_index: usize, ) -> &'db C::Fields<'db> { let (zalsa, zalsa_local) = db.zalsas(); let id = C::deref_struct(s); let field_ingredient_index = self.ingredient_index.successor(field_index); let data = Self::data(zalsa.table(), id); data.read_lock(zalsa.current_revision()); let field_changed_at = data.revisions[field_index]; zalsa_local.report_tracked_read( DependencyIndex { ingredient_index: field_ingredient_index, key_index: Some(id), }, data.durability, field_changed_at, ); unsafe { self.to_self_ref(&data.fields) } } } impl Ingredient for IngredientImpl where C: Configuration, { fn ingredient_index(&self) -> IngredientIndex { self.ingredient_index } fn maybe_changed_after( &self, _db: &dyn Database, _input: Option, _revision: Revision, ) -> bool { false } fn cycle_recovery_strategy(&self) -> CycleRecoveryStrategy { crate::cycle::CycleRecoveryStrategy::Panic } fn origin(&self, _db: &dyn Database, _key_index: crate::Id) -> Option { None } fn mark_validated_output<'db>( &'db self, _db: &'db dyn Database, _executor: DatabaseKeyIndex, _output_key: Option, ) { // we used to update `update_at` field but now we do it lazilly when data is accessed // // FIXME: delete this method } fn remove_stale_output( &self, db: &dyn Database, _executor: DatabaseKeyIndex, stale_output_key: Option, ) { // This method is called when, in prior revisions, // `executor` creates a tracked struct `salsa_output_key`, // but it did not in the current revision. // In that case, we can delete `stale_output_key` and any data associated with it. self.delete_entity(db.as_dyn_database(), stale_output_key.unwrap()); } fn fmt_index(&self, index: Option, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { fmt_index(C::DEBUG_NAME, index, fmt) } fn debug_name(&self) -> &'static str { C::DEBUG_NAME } fn requires_reset_for_new_revision(&self) -> bool { false } fn reset_for_new_revision(&mut self) {} } impl std::fmt::Debug for IngredientImpl where C: Configuration, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct(std::any::type_name::()) .field("ingredient_index", &self.ingredient_index) .finish() } } impl Value where C: Configuration, { fn take_memo_table(&mut self) -> MemoTable { // This fn is only called after `updated_at` has been set to `None`; // this ensures that there is no concurrent access // (and that the `&mut self` is accurate...). assert!(self.updated_at.load().is_none()); std::mem::take(&mut self.memos) } fn read_lock(&self, current_revision: Revision) { loop { match self.updated_at.load() { None => { panic!("access to field whilst the value is being initialized"); } Some(r) => { if r == current_revision { return; } if self .updated_at .compare_exchange(Some(r), Some(current_revision)) .is_ok() { break; } } } } } } impl Slot for Value where C: Configuration, { unsafe fn memos(&self, current_revision: Revision) -> &crate::table::memo::MemoTable { // Acquiring the read lock here with the current revision // ensures that there is no danger of a race // when deleting a tracked struct. self.read_lock(current_revision); &self.memos } unsafe fn syncs(&self, current_revision: Revision) -> &crate::table::sync::SyncTable { // Acquiring the read lock here with the current revision // ensures that there is no danger of a race // when deleting a tracked struct. self.read_lock(current_revision); &self.syncs } }