From d4e1156957d91502c9e626f3447296aba8b80ec1 Mon Sep 17 00:00:00 2001 From: Martin von Zweigbergk Date: Wed, 22 Mar 2023 14:08:39 -0700 Subject: [PATCH] repo: move IdIndex to revset engine --- lib/src/default_revset_engine.rs | 188 ++++++++++++++++++++++++++++++- lib/src/repo.rs | 187 ------------------------------ 2 files changed, 185 insertions(+), 190 deletions(-) diff --git a/lib/src/default_revset_engine.rs b/lib/src/default_revset_engine.rs index ccf592b3c..0f117f77d 100644 --- a/lib/src/default_revset_engine.rs +++ b/lib/src/default_revset_engine.rs @@ -18,17 +18,17 @@ use std::iter::Peekable; use itertools::Itertools; -use crate::backend::{ChangeId, CommitId}; +use crate::backend::{ChangeId, CommitId, ObjectId}; use crate::default_index_store::{CompositeIndex, IndexEntry, IndexPosition}; use crate::default_revset_graph_iterator::RevsetGraphIterator; use crate::index::{HexPrefix, PrefixResolution}; use crate::matchers::{EverythingMatcher, Matcher, PrefixMatcher}; -use crate::repo::{IdIndex, Repo}; +use crate::repo::Repo; use crate::revset::{ ChangeIdIndex, Revset, RevsetError, RevsetExpression, RevsetFilterPredicate, RevsetGraphEdge, RevsetIteratorExt, GENERATION_RANGE_FULL, }; -use crate::rewrite; +use crate::{backend, rewrite}; trait ToPredicateFn<'index> { /// Creates function that tests if the given entry is included in the set. @@ -111,6 +111,82 @@ impl ChangeIdIndex for ChangeIdIndexImpl<'_> { } } +#[derive(Debug, Clone)] +struct IdIndex(Vec<(K, V)>); + +impl IdIndex +where + K: ObjectId + Ord, +{ + /// Creates new index from the given entries. Multiple values can be + /// associated with a single key. + pub fn from_vec(mut vec: Vec<(K, V)>) -> Self { + vec.sort_unstable_by(|(k0, _), (k1, _)| k0.cmp(k1)); + IdIndex(vec) + } + + /// Looks up entries with the given prefix, and collects values if matched + /// entries have unambiguous keys. + pub fn resolve_prefix_with( + &self, + prefix: &HexPrefix, + mut value_mapper: impl FnMut(&V) -> U, + ) -> PrefixResolution> { + let mut range = self.resolve_prefix_range(prefix).peekable(); + if let Some((first_key, _)) = range.peek().copied() { + let maybe_entries: Option> = range + .map(|(k, v)| (k == first_key).then(|| value_mapper(v))) + .collect(); + if let Some(entries) = maybe_entries { + PrefixResolution::SingleMatch(entries) + } else { + PrefixResolution::AmbiguousMatch + } + } else { + PrefixResolution::NoMatch + } + } + + /// Iterates over entries with the given prefix. + pub fn resolve_prefix_range<'a: 'b, 'b>( + &'a self, + prefix: &'b HexPrefix, + ) -> impl Iterator + 'b { + let min_bytes = prefix.min_prefix_bytes(); + let pos = self.0.partition_point(|(k, _)| k.as_bytes() < min_bytes); + self.0[pos..] + .iter() + .take_while(|(k, _)| prefix.matches(k)) + .map(|(k, v)| (k, v)) + } + + /// This function returns the shortest length of a prefix of `key` that + /// disambiguates it from every other key in the index. + /// + /// The length to be returned is a number of hexadecimal digits. + /// + /// This has some properties that we do not currently make much use of: + /// + /// - The algorithm works even if `key` itself is not in the index. + /// + /// - In the special case when there are keys in the trie for which our + /// `key` is an exact prefix, returns `key.len() + 1`. Conceptually, in + /// order to disambiguate, you need every letter of the key *and* the + /// additional fact that it's the entire key). This case is extremely + /// unlikely for hashes with 12+ hexadecimal characters. + pub fn shortest_unique_prefix_len(&self, key: &K) -> usize { + let pos = self.0.partition_point(|(k, _)| k < key); + let left = pos.checked_sub(1).map(|p| &self.0[p]); + let right = self.0[pos..].iter().find(|(k, _)| k != key); + itertools::chain(left, right) + .map(|(neighbor, _value)| { + backend::common_hex_len(key.as_bytes(), neighbor.as_bytes()) + 1 + }) + .max() + .unwrap_or(0) + } +} + struct EagerRevset<'index> { index_entries: Vec>, } @@ -712,6 +788,112 @@ mod tests { use crate::default_index_store::MutableIndexImpl; use crate::index::Index; + #[test] + fn test_id_index_resolve_prefix() { + fn sorted(resolution: PrefixResolution>) -> PrefixResolution> { + match resolution { + PrefixResolution::SingleMatch(mut xs) => { + xs.sort(); // order of values might not be preserved by IdIndex + PrefixResolution::SingleMatch(xs) + } + _ => resolution, + } + } + let id_index = IdIndex::from_vec(vec![ + (ChangeId::from_hex("0000"), 0), + (ChangeId::from_hex("0099"), 1), + (ChangeId::from_hex("0099"), 2), + (ChangeId::from_hex("0aaa"), 3), + (ChangeId::from_hex("0aab"), 4), + ]); + assert_eq!( + id_index.resolve_prefix_with(&HexPrefix::new("0").unwrap(), |&v| v), + PrefixResolution::AmbiguousMatch, + ); + assert_eq!( + id_index.resolve_prefix_with(&HexPrefix::new("00").unwrap(), |&v| v), + PrefixResolution::AmbiguousMatch, + ); + assert_eq!( + id_index.resolve_prefix_with(&HexPrefix::new("000").unwrap(), |&v| v), + PrefixResolution::SingleMatch(vec![0]), + ); + assert_eq!( + id_index.resolve_prefix_with(&HexPrefix::new("0001").unwrap(), |&v| v), + PrefixResolution::NoMatch, + ); + assert_eq!( + sorted(id_index.resolve_prefix_with(&HexPrefix::new("009").unwrap(), |&v| v)), + PrefixResolution::SingleMatch(vec![1, 2]), + ); + assert_eq!( + id_index.resolve_prefix_with(&HexPrefix::new("0aa").unwrap(), |&v| v), + PrefixResolution::AmbiguousMatch, + ); + assert_eq!( + id_index.resolve_prefix_with(&HexPrefix::new("0aab").unwrap(), |&v| v), + PrefixResolution::SingleMatch(vec![4]), + ); + assert_eq!( + id_index.resolve_prefix_with(&HexPrefix::new("f").unwrap(), |&v| v), + PrefixResolution::NoMatch, + ); + } + + #[test] + fn test_id_index_shortest_unique_prefix_len() { + // No crash if empty + let id_index = IdIndex::from_vec(vec![] as Vec<(ChangeId, ())>); + assert_eq!( + id_index.shortest_unique_prefix_len(&ChangeId::from_hex("00")), + 0 + ); + + let id_index = IdIndex::from_vec(vec![ + (ChangeId::from_hex("ab"), ()), + (ChangeId::from_hex("acd0"), ()), + (ChangeId::from_hex("acd0"), ()), // duplicated key is allowed + ]); + assert_eq!( + id_index.shortest_unique_prefix_len(&ChangeId::from_hex("acd0")), + 2 + ); + assert_eq!( + id_index.shortest_unique_prefix_len(&ChangeId::from_hex("ac")), + 3 + ); + + let id_index = IdIndex::from_vec(vec![ + (ChangeId::from_hex("ab"), ()), + (ChangeId::from_hex("acd0"), ()), + (ChangeId::from_hex("acf0"), ()), + (ChangeId::from_hex("a0"), ()), + (ChangeId::from_hex("ba"), ()), + ]); + + assert_eq!( + id_index.shortest_unique_prefix_len(&ChangeId::from_hex("a0")), + 2 + ); + assert_eq!( + id_index.shortest_unique_prefix_len(&ChangeId::from_hex("ba")), + 1 + ); + assert_eq!( + id_index.shortest_unique_prefix_len(&ChangeId::from_hex("ab")), + 2 + ); + assert_eq!( + id_index.shortest_unique_prefix_len(&ChangeId::from_hex("acd0")), + 3 + ); + // If it were there, the length would be 1. + assert_eq!( + id_index.shortest_unique_prefix_len(&ChangeId::from_hex("c0")), + 1 + ); + } + /// Generator of unique 16-byte ChangeId excluding root id fn change_id_generator() -> impl FnMut() -> ChangeId { let mut iter = (1_u128..).map(|n| ChangeId::new(n.to_le_bytes().into())); diff --git a/lib/src/repo.rs b/lib/src/repo.rs index 80ee50b95..80684f3bc 100644 --- a/lib/src/repo.rs +++ b/lib/src/repo.rs @@ -1265,190 +1265,3 @@ mod dirty_cell { } } } - -#[derive(Debug, Clone)] -pub struct IdIndex(Vec<(K, V)>); - -impl IdIndex -where - K: ObjectId + Ord, -{ - /// Creates new index from the given entries. Multiple values can be - /// associated with a single key. - pub fn from_vec(mut vec: Vec<(K, V)>) -> Self { - vec.sort_unstable_by(|(k0, _), (k1, _)| k0.cmp(k1)); - IdIndex(vec) - } - - /// Looks up entries with the given prefix, and collects values if matched - /// entries have unambiguous keys. - pub fn resolve_prefix_with( - &self, - prefix: &HexPrefix, - mut value_mapper: impl FnMut(&V) -> U, - ) -> PrefixResolution> { - let mut range = self.resolve_prefix_range(prefix).peekable(); - if let Some((first_key, _)) = range.peek().copied() { - let maybe_entries: Option> = range - .map(|(k, v)| (k == first_key).then(|| value_mapper(v))) - .collect(); - if let Some(entries) = maybe_entries { - PrefixResolution::SingleMatch(entries) - } else { - PrefixResolution::AmbiguousMatch - } - } else { - PrefixResolution::NoMatch - } - } - - /// Iterates over entries with the given prefix. - pub fn resolve_prefix_range<'a: 'b, 'b>( - &'a self, - prefix: &'b HexPrefix, - ) -> impl Iterator + 'b { - let min_bytes = prefix.min_prefix_bytes(); - let pos = self.0.partition_point(|(k, _)| k.as_bytes() < min_bytes); - self.0[pos..] - .iter() - .take_while(|(k, _)| prefix.matches(k)) - .map(|(k, v)| (k, v)) - } - - /// This function returns the shortest length of a prefix of `key` that - /// disambiguates it from every other key in the index. - /// - /// The length to be returned is a number of hexadecimal digits. - /// - /// This has some properties that we do not currently make much use of: - /// - /// - The algorithm works even if `key` itself is not in the index. - /// - /// - In the special case when there are keys in the trie for which our - /// `key` is an exact prefix, returns `key.len() + 1`. Conceptually, in - /// order to disambiguate, you need every letter of the key *and* the - /// additional fact that it's the entire key). This case is extremely - /// unlikely for hashes with 12+ hexadecimal characters. - pub fn shortest_unique_prefix_len(&self, key: &K) -> usize { - let pos = self.0.partition_point(|(k, _)| k < key); - let left = pos.checked_sub(1).map(|p| &self.0[p]); - let right = self.0[pos..].iter().find(|(k, _)| k != key); - itertools::chain(left, right) - .map(|(neighbor, _value)| { - backend::common_hex_len(key.as_bytes(), neighbor.as_bytes()) + 1 - }) - .max() - .unwrap_or(0) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_id_index_resolve_prefix() { - fn sorted(resolution: PrefixResolution>) -> PrefixResolution> { - match resolution { - PrefixResolution::SingleMatch(mut xs) => { - xs.sort(); // order of values might not be preserved by IdIndex - PrefixResolution::SingleMatch(xs) - } - _ => resolution, - } - } - let id_index = IdIndex::from_vec(vec![ - (ChangeId::from_hex("0000"), 0), - (ChangeId::from_hex("0099"), 1), - (ChangeId::from_hex("0099"), 2), - (ChangeId::from_hex("0aaa"), 3), - (ChangeId::from_hex("0aab"), 4), - ]); - assert_eq!( - id_index.resolve_prefix_with(&HexPrefix::new("0").unwrap(), |&v| v), - PrefixResolution::AmbiguousMatch, - ); - assert_eq!( - id_index.resolve_prefix_with(&HexPrefix::new("00").unwrap(), |&v| v), - PrefixResolution::AmbiguousMatch, - ); - assert_eq!( - id_index.resolve_prefix_with(&HexPrefix::new("000").unwrap(), |&v| v), - PrefixResolution::SingleMatch(vec![0]), - ); - assert_eq!( - id_index.resolve_prefix_with(&HexPrefix::new("0001").unwrap(), |&v| v), - PrefixResolution::NoMatch, - ); - assert_eq!( - sorted(id_index.resolve_prefix_with(&HexPrefix::new("009").unwrap(), |&v| v)), - PrefixResolution::SingleMatch(vec![1, 2]), - ); - assert_eq!( - id_index.resolve_prefix_with(&HexPrefix::new("0aa").unwrap(), |&v| v), - PrefixResolution::AmbiguousMatch, - ); - assert_eq!( - id_index.resolve_prefix_with(&HexPrefix::new("0aab").unwrap(), |&v| v), - PrefixResolution::SingleMatch(vec![4]), - ); - assert_eq!( - id_index.resolve_prefix_with(&HexPrefix::new("f").unwrap(), |&v| v), - PrefixResolution::NoMatch, - ); - } - - #[test] - fn test_id_index_shortest_unique_prefix_len() { - // No crash if empty - let id_index = IdIndex::from_vec(vec![] as Vec<(ChangeId, ())>); - assert_eq!( - id_index.shortest_unique_prefix_len(&ChangeId::from_hex("00")), - 0 - ); - - let id_index = IdIndex::from_vec(vec![ - (ChangeId::from_hex("ab"), ()), - (ChangeId::from_hex("acd0"), ()), - (ChangeId::from_hex("acd0"), ()), // duplicated key is allowed - ]); - assert_eq!( - id_index.shortest_unique_prefix_len(&ChangeId::from_hex("acd0")), - 2 - ); - assert_eq!( - id_index.shortest_unique_prefix_len(&ChangeId::from_hex("ac")), - 3 - ); - - let id_index = IdIndex::from_vec(vec![ - (ChangeId::from_hex("ab"), ()), - (ChangeId::from_hex("acd0"), ()), - (ChangeId::from_hex("acf0"), ()), - (ChangeId::from_hex("a0"), ()), - (ChangeId::from_hex("ba"), ()), - ]); - - assert_eq!( - id_index.shortest_unique_prefix_len(&ChangeId::from_hex("a0")), - 2 - ); - assert_eq!( - id_index.shortest_unique_prefix_len(&ChangeId::from_hex("ba")), - 1 - ); - assert_eq!( - id_index.shortest_unique_prefix_len(&ChangeId::from_hex("ab")), - 2 - ); - assert_eq!( - id_index.shortest_unique_prefix_len(&ChangeId::from_hex("acd0")), - 3 - ); - // If it were there, the length would be 1. - assert_eq!( - id_index.shortest_unique_prefix_len(&ChangeId::from_hex("c0")), - 1 - ); - } -}