diff --git a/Cargo.toml b/Cargo.toml index d3815b1..8a8e906 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ description = "A generic framework for on-demand, incrementalized computation (e [dependencies] arc-swap = "1" crossbeam = "0.8" -dashmap = "6" +dashmap = { version = "6", features = ["raw-api"] } hashlink = "0.9" indexmap = "2" append-only-vec = "0.1.5" diff --git a/components/salsa-macro-rules/src/setup_interned_struct.rs b/components/salsa-macro-rules/src/setup_interned_struct.rs index 2b7f8d8..bf9d98f 100644 --- a/components/salsa-macro-rules/src/setup_interned_struct.rs +++ b/components/salsa-macro-rules/src/setup_interned_struct.rs @@ -32,6 +32,9 @@ macro_rules! setup_interned_struct { // Indices for each field from 0..N -- must be unsuffixed (e.g., `0`, `1`). field_indices: [$($field_index:tt),*], + // Indexed types for each field (T0, T1, ...) + field_indexed_tys: [$($indexed_ty:ident),*], + // Number of fields num_fields: $N:literal, @@ -62,10 +65,36 @@ macro_rules! setup_interned_struct { type $Configuration = $Struct<'static>; + type StructData<$db_lt> = ($($field_ty,)*); + + /// Key to use during hash lookups. Each field is some type that implements `Lookup` + /// for the owned type. This permits interning with an `&str` when a `String` is required and so forth. + struct StructKey<$db_lt, $($indexed_ty: $zalsa::interned::Lookup<$field_ty>),*>( + $($indexed_ty,)* + std::marker::PhantomData<&$db_lt ()>, + ); + + impl<$db_lt, $($indexed_ty: $zalsa::interned::Lookup<$field_ty>),*> $zalsa::interned::Lookup> + for StructKey<$db_lt, $($indexed_ty),*> { + + fn hash(&self, h: &mut H) { + $($zalsa::interned::Lookup::hash(&self.$field_index, &mut *h);)* + } + + fn eq(&self, data: &StructData<$db_lt>) -> bool { + ($($zalsa::interned::Lookup::eq(&self.$field_index, &data.$field_index) && )* true) + } + + #[allow(unused_unit)] + fn into_owned(self) -> StructData<$db_lt> { + ($($zalsa::interned::Lookup::into_owned(self.$field_index),)*) + } + } + impl $zalsa_struct::Configuration for $Configuration { const DEBUG_NAME: &'static str = stringify!($Struct); - type Data<$db_lt> = ($($field_ty,)*); - type Struct<$db_lt> = $Struct<$db_lt>; + type Data<'a> = StructData<'a>; + type Struct<'a> = $Struct<'a>; fn struct_from_id<'db>(id: salsa::Id) -> Self::Struct<'db> { $Struct(id, std::marker::PhantomData) } @@ -126,13 +155,14 @@ macro_rules! setup_interned_struct { } impl<$db_lt> $Struct<$db_lt> { - pub fn $new_fn<$Db>(db: &$db_lt $Db, $($field_id: $field_ty),*) -> Self + pub fn $new_fn<$Db>(db: &$db_lt $Db, $($field_id: impl $zalsa::interned::Lookup<$field_ty>),*) -> Self where // FIXME(rust-lang/rust#65991): The `db` argument *should* have the type `dyn Database` $Db: ?Sized + salsa::Database, { let current_revision = $zalsa::current_revision(db); - $Configuration::ingredient(db).intern(db.as_dyn_database(), ($($field_id,)*)) + $Configuration::ingredient(db).intern(db.as_dyn_database(), + StructKey::<$db_lt>($($field_id,)* std::marker::PhantomData::default())) } $( diff --git a/components/salsa-macros/src/interned.rs b/components/salsa-macros/src/interned.rs index b0fdb32..aea9a4e 100644 --- a/components/salsa-macros/src/interned.rs +++ b/components/salsa-macros/src/interned.rs @@ -90,6 +90,7 @@ impl Macro { let field_getter_ids = salsa_struct.field_getter_ids(); let field_options = salsa_struct.field_options(); let field_tys = salsa_struct.field_tys(); + let field_indexed_tys = salsa_struct.field_indexed_tys(); let generate_debug_impl = salsa_struct.generate_debug_impl(); let zalsa = self.hygiene.ident("zalsa"); @@ -112,6 +113,7 @@ impl Macro { field_getters: [#(#field_vis #field_getter_ids),*], field_tys: [#(#field_tys),*], field_indices: [#(#field_indices),*], + field_indexed_tys: [#(#field_indexed_tys),*], num_fields: #num_fields, generate_debug_impl: #generate_debug_impl, unused_names: [ diff --git a/components/salsa-macros/src/salsa_struct.rs b/components/salsa-macros/src/salsa_struct.rs index 5480824..b93470a 100644 --- a/components/salsa-macros/src/salsa_struct.rs +++ b/components/salsa-macros/src/salsa_struct.rs @@ -242,6 +242,14 @@ where self.fields.iter().map(|f| &f.field.ty).collect() } + pub(crate) fn field_indexed_tys(&self) -> Vec { + self.fields + .iter() + .enumerate() + .map(|(i, _)| quote::format_ident!("T{i}")) + .collect() + } + pub(crate) fn field_options(&self) -> Vec { self.fields .iter() diff --git a/src/interned.rs b/src/interned.rs index 2003520..94a6d89 100644 --- a/src/interned.rs +++ b/src/interned.rs @@ -1,7 +1,3 @@ -use std::fmt; -use std::hash::Hash; -use std::marker::PhantomData; - use crate::durability::Durability; use crate::id::AsId; use crate::ingredient::fmt_index; @@ -13,6 +9,10 @@ use crate::table::Slot; use crate::zalsa::IngredientIndex; use crate::zalsa_local::QueryOrigin; use crate::{Database, DatabaseKeyIndex, Id}; +use std::fmt; +use std::hash::{BuildHasher, Hash, Hasher}; +use std::marker::PhantomData; +use std::path::{Path, PathBuf}; use super::hash::FxDashMap; use super::ingredient::Ingredient; @@ -117,7 +117,7 @@ where pub fn intern_id<'db>( &'db self, db: &'db dyn crate::Database, - data: C::Data<'db>, + data: impl Lookup>, ) -> crate::Id { C::deref_struct(self.intern(db, data)).as_id() } @@ -126,7 +126,7 @@ where pub fn intern<'db>( &'db self, db: &'db dyn crate::Database, - data: C::Data<'db>, + data: impl Lookup>, ) -> C::Struct<'db> { let zalsa_local = db.zalsa_local(); zalsa_local.report_tracked_read( @@ -137,12 +137,29 @@ where // Optimisation to only get read lock on the map if the data has already // been interned. + // We need to use the raw API for this lookup. See the [`Lookup`][] trait definition for an explanation of why. + let data_hash = { + let mut hasher = self.key_map.hasher().build_hasher(); + data.hash(&mut hasher); + hasher.finish() + }; + let shard = self.key_map.determine_shard(data_hash as _); + { + let lock = self.key_map.shards()[shard].read(); + if let Some(bucket) = lock.find(data_hash, |(a, _)| { + // SAFETY: it's safe to go from Data<'static> to Data<'db> + // shrink lifetime here to use a single lifetime in Lookup::eq(&StructKey<'db>, &C::Data<'db>) + let a: &C::Data<'db> = unsafe { std::mem::transmute(a) }; + Lookup::eq(&data, a) + }) { + // SAFETY: Read lock on map is held during this block + return C::struct_from_id(unsafe { *bucket.as_ref().1.get() }); + } + }; + + let data = data.into_owned(); + let internal_data = unsafe { self.to_internal_data(data) }; - if let Some(guard) = self.key_map.get(&internal_data) { - let id = *guard; - drop(guard); - return C::struct_from_id(id); - } match self.key_map.entry(internal_data.clone()) { // Data has been interned by a racing call, use that ID instead @@ -288,3 +305,119 @@ where &self.syncs } } + +/// The `Lookup` trait is a more flexible variant on [`std::borrow::Borrow`] +/// and [`std::borrow::ToOwned`]. +/// +/// It is implemented by "some type that can be used as the lookup key for `O`". +/// This means that `self` can be hashed and compared for equality with values +/// of type `O` without actually creating an owned value. It `self` needs to be interned, +/// it can be converted into an equivalent value of type `O`. +/// +/// The canonical example is `&str: Lookup`. However, this example +/// alone can be handled by [`std::borrow::Borrow`][]. In our case, we may have +/// multiple keys accumulated into a struct, like `ViewStruct: Lookup<(K1, ...)>`, +/// where `struct ViewStruct...>(K1...)`. The `Borrow` trait +/// requires that `&(K1...)` be convertible to `&ViewStruct` which just isn't +/// possible. `Lookup` instead offers direct `hash` and `eq` methods. +pub trait Lookup { + fn hash(&self, h: &mut H); + fn eq(&self, data: &O) -> bool; + fn into_owned(self) -> O; +} + +impl Lookup for T +where + T: Hash + Eq, +{ + fn hash(&self, h: &mut H) { + Hash::hash(self, &mut *h); + } + + fn eq(&self, data: &T) -> bool { + self == data + } + + fn into_owned(self) -> T { + self + } +} + +impl Lookup for &T +where + T: Clone + Eq + Hash, +{ + fn hash(&self, h: &mut H) { + Hash::hash(self, &mut *h); + } + + fn eq(&self, data: &T) -> bool { + *self == data + } + + fn into_owned(self) -> T { + Clone::clone(self) + } +} + +impl Lookup for &str { + fn hash(&self, h: &mut H) { + Hash::hash(self, &mut *h) + } + + fn eq(&self, data: &String) -> bool { + self == data + } + + fn into_owned(self) -> String { + self.to_owned() + } +} + +impl + Clone + Lookup, T> Lookup> for &[A] { + fn hash(&self, h: &mut H) { + for a in *self { + Hash::hash(a, h); + } + } + + fn eq(&self, data: &Vec) -> bool { + self.len() == data.len() && data.iter().enumerate().all(|(i, a)| &self[i] == a) + } + + fn into_owned(self) -> Vec { + self.iter().map(|a| Lookup::into_owned(a.clone())).collect() + } +} + +impl + Clone + Lookup, T> Lookup> for [A; N] { + fn hash(&self, h: &mut H) { + for a in self { + Hash::hash(a, h); + } + } + + fn eq(&self, data: &Vec) -> bool { + self.len() == data.len() && data.iter().enumerate().all(|(i, a)| &self[i] == a) + } + + fn into_owned(self) -> Vec { + self.into_iter() + .map(|a| Lookup::into_owned(a.clone())) + .collect() + } +} + +impl Lookup for &Path { + fn hash(&self, h: &mut H) { + Hash::hash(self, h); + } + + fn eq(&self, data: &PathBuf) -> bool { + self == data + } + + fn into_owned(self) -> PathBuf { + self.to_owned() + } +} diff --git a/src/lib.rs b/src/lib.rs index c23d9c2..0ee7d3e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -132,6 +132,7 @@ pub mod plumbing { pub use crate::interned::Configuration; pub use crate::interned::IngredientImpl; pub use crate::interned::JarImpl; + pub use crate::interned::Lookup; pub use crate::interned::Value; } diff --git a/tests/interned-struct-with-lifetime.rs b/tests/interned-struct-with-lifetime.rs deleted file mode 100644 index a74d2c4..0000000 --- a/tests/interned-struct-with-lifetime.rs +++ /dev/null @@ -1,31 +0,0 @@ -//! Test that a `tracked` fn on a `salsa::input` -//! compiles and executes successfully. - -use expect_test::expect; -use test_log::test; - -#[salsa::interned] -struct InternedString<'db> { - data: String, -} - -#[salsa::interned] -struct InternedPair<'db> { - data: (InternedString<'db>, InternedString<'db>), -} - -#[salsa::tracked] -fn intern_stuff(db: &dyn salsa::Database) -> String { - let s1 = InternedString::new(db, "Hello, ".to_string()); - let s2 = InternedString::new(db, "World, ".to_string()); - let s3 = InternedPair::new(db, (s1, s2)); - format!("{s3:?}") -} - -#[test] -fn execute() { - let db = salsa::DatabaseImpl::new(); - expect![[r#" - "InternedPair { data: (InternedString { data: \"Hello, \" }, InternedString { data: \"World, \" }) }" - "#]].assert_debug_eq(&intern_stuff(&db)); -} diff --git a/tests/interned-structs.rs b/tests/interned-structs.rs new file mode 100644 index 0000000..70dbc26 --- /dev/null +++ b/tests/interned-structs.rs @@ -0,0 +1,105 @@ +//! Test that a `tracked` fn on a `salsa::input` +//! compiles and executes successfully. + +use expect_test::expect; +use std::path::{Path, PathBuf}; +use test_log::test; + +#[salsa::interned] +struct InternedString<'db> { + data: String, +} + +#[salsa::interned] +struct InternedPair<'db> { + data: (InternedString<'db>, InternedString<'db>), +} + +#[salsa::interned] +struct InternedTwoFields<'db> { + data1: String, + data2: String, +} + +#[salsa::interned] +struct InternedVec<'db> { + data1: Vec, +} + +#[salsa::interned] +struct InternedPathBuf<'db> { + data1: PathBuf, +} + +#[salsa::tracked] +fn intern_stuff(db: &dyn salsa::Database) -> String { + let s1 = InternedString::new(db, "Hello, ".to_string()); + let s2 = InternedString::new(db, "World, "); + let s3 = InternedPair::new(db, (s1, s2)); + + format!("{s3:?}") +} + +#[test] +fn execute() { + let db = salsa::DatabaseImpl::new(); + expect![[r#" + "InternedPair { data: (InternedString { data: \"Hello, \" }, InternedString { data: \"World, \" }) }" + "#]].assert_debug_eq(&intern_stuff(&db)); +} + +#[test] +fn interning_returns_equal_keys_for_equal_data() { + let db = salsa::DatabaseImpl::new(); + let s1 = InternedString::new(&db, "Hello, ".to_string()); + let s2 = InternedString::new(&db, "World, ".to_string()); + let s1_2 = InternedString::new(&db, "Hello, "); + let s2_2 = InternedString::new(&db, "World, "); + assert_eq!(s1, s1_2); + assert_eq!(s2, s2_2); +} +#[test] +fn interning_returns_equal_keys_for_equal_data_multi_field() { + let db = salsa::DatabaseImpl::new(); + let s1 = InternedTwoFields::new(&db, "Hello, ".to_string(), "World"); + let s2 = InternedTwoFields::new(&db, "World, ", "Hello".to_string()); + let s1_2 = InternedTwoFields::new(&db, "Hello, ", "World"); + let s2_2 = InternedTwoFields::new(&db, "World, ", "Hello"); + let new = InternedTwoFields::new(&db, "Hello, World", ""); + + assert_eq!(s1, s1_2); + assert_eq!(s2, s2_2); + assert_ne!(s1, s2_2); + assert_ne!(s1, new); +} + +#[test] +fn interning_vec() { + let db = salsa::DatabaseImpl::new(); + let s1 = InternedVec::new(&db, ["Hello, ".to_string(), "World".to_string()].as_slice()); + let s2 = InternedVec::new(&db, ["Hello, ", "World"].as_slice()); + let s3 = InternedVec::new(&db, vec!["Hello, ".to_string(), "World".to_string()]); + let s4 = InternedVec::new(&db, ["Hello, ", "World"].as_slice()); + let s5 = InternedVec::new(&db, ["Hello, ", "World", "Test"].as_slice()); + let s6 = InternedVec::new(&db, ["Hello, ", "World", ""].as_slice()); + let s7 = InternedVec::new(&db, ["Hello, "].as_slice()); + assert_eq!(s1, s2); + assert_eq!(s1, s3); + assert_eq!(s1, s4); + assert_ne!(s1, s5); + assert_ne!(s1, s6); + assert_ne!(s5, s6); + assert_ne!(s6, s7); +} + +#[test] +fn interning_path_buf() { + let db = salsa::DatabaseImpl::new(); + let s1 = InternedPathBuf::new(&db, PathBuf::from("test_path".to_string())); + let s2 = InternedPathBuf::new(&db, Path::new("test_path")); + let s3 = InternedPathBuf::new(&db, Path::new("test_path/")); + let s4 = InternedPathBuf::new(&db, Path::new("test_path/a")); + assert_eq!(s1, s2); + assert_eq!(s1, s3); + assert_ne!(s1, s4); +}