412: Fix race condition in interned data r=nikomatsakis a=Skepfyr

In some scenarios the `InternedIngredient` could return two different ids for the same data if called in parallel.

Co-authored-by: Jack Rickard <jack.rickard@outlook.com>
This commit is contained in:
bors[bot] 2022-09-27 22:48:54 +00:00 committed by GitHub
commit 30b5e9760a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -32,12 +32,12 @@ pub struct InternedIngredient<Id: InternedId, Data: InternedData> {
/// Maps from data to the existing interned id for that data. /// Maps from data to the existing interned id for that data.
/// ///
/// Deadlock requirement: We access `key_map` while holding lock on `value_map`, but not vice versa. /// Deadlock requirement: We access `value_map` while holding lock on `key_map`, but not vice versa.
key_map: FxDashMap<Data, Id>, key_map: FxDashMap<Data, Id>,
/// Maps from an interned id to its data. /// Maps from an interned id to its data.
/// ///
/// Deadlock requirement: We access `key_map` while holding lock on `value_map`, but not vice versa. /// Deadlock requirement: We access `value_map` while holding lock on `key_map`, but not vice versa.
value_map: FxDashMap<Id, Box<Data>>, value_map: FxDashMap<Id, Box<Data>>,
/// counter for the next id. /// counter for the next id.
@ -83,23 +83,26 @@ where
self.reset_at, self.reset_at,
); );
// Optimisation to only get read lock on the map if the data has already
// been interned.
if let Some(id) = self.key_map.get(&data) { if let Some(id) = self.key_map.get(&data) {
return *id; return *id;
} }
loop { match self.key_map.entry(data.clone()) {
let next_id = self.counter.fetch_add(1); // Data has been interned by a racing call, use that ID instead
let next_id = Id::from_id(crate::id::Id::from_u32(next_id)); dashmap::mapref::entry::Entry::Occupied(entry) => *entry.get(),
match self.value_map.entry(next_id) { // We won any races so should intern the data
// If we already have an entry with this id... dashmap::mapref::entry::Entry::Vacant(entry) => {
dashmap::mapref::entry::Entry::Occupied(_) => continue, let next_id = self.counter.fetch_add(1);
let next_id = Id::from_id(crate::id::Id::from_u32(next_id));
// Otherwise... let old_value = self.value_map.insert(next_id, Box::new(data));
dashmap::mapref::entry::Entry::Vacant(entry) => { assert!(
self.key_map.insert(data.clone(), next_id); old_value.is_none(),
entry.insert(Box::new(data)); "next_id is guaranteed to be unique, bar overflow"
return next_id; );
} entry.insert(next_id);
next_id
} }
} }
} }
@ -145,33 +148,32 @@ where
/// ///
/// # Warning /// # Warning
/// ///
/// This should only be used when you are certain that the given `id` has not (and will not) /// This should only be used when you are certain that:
/// be used in the current revision. More specifically, this is used when a query `Q` executes /// 1. The given `id` has not (and will not) be used in the current revision.
/// and we can compare the entities `E_now` that it produced in this revision vs the entities /// 2. The interned data corresponding to `id` will not be interned in this revision.
/// `E_prev` it produced in the last revision. Any missing entities `E_prev - E_new` can be ///
/// deleted. /// More specifically, this is used when a query `Q` executes and we can compare the
/// entities `E_now` that it produced in this revision vs the entities `E_prev` it
/// produced in the last revision. Any missing entities `E_prev - E_new` can be deleted.
/// ///
/// If you are wrong about this, it should not be unsafe, but unpredictable results may occur. /// If you are wrong about this, it should not be unsafe, but unpredictable results may occur.
pub(crate) fn delete_index(&self, id: Id) { pub(crate) fn delete_index(&self, id: Id) {
match self.value_map.entry(id) { let (_, key) = self
dashmap::mapref::entry::Entry::Vacant(_) => { .value_map
panic!("No entry for id `{:?}`", id); .remove(&id)
} .unwrap_or_else(|| panic!("No entry for id `{:?}`", id));
dashmap::mapref::entry::Entry::Occupied(entry) => {
self.key_map.remove(entry.get());
// Careful: even though `id` ought not to have been used in this revision, self.key_map.remove(&key);
// we don't know that for sure since users could have leaked things. If they did, // Careful: even though `id` ought not to have been used in this revision,
// they may have stray references into `data`. So push the box onto the // we don't know that for sure since users could have leaked things. If they did,
// "to be deleted" queue. // they may have stray references into `data`. So push the box onto the
// // "to be deleted" queue.
// To avoid this, we could include some kind of atomic counter in the `Box` that //
// gets set whenever `data` executes, so we can track if the data was accessed since // To avoid this, we could include some kind of atomic counter in the `Box` that
// the last time an `&mut self` method was called. But that'd take extra storage // gets set whenever `data` executes, so we can track if the data was accessed since
// and doesn't obviously seem worth it. // the last time an `&mut self` method was called. But that'd take extra storage
self.deleted_entries.push(entry.remove()); // and doesn't obviously seem worth it.
} self.deleted_entries.push(key);
}
} }
pub(crate) fn clear_deleted_indices(&mut self) { pub(crate) fn clear_deleted_indices(&mut self) {