repo: move IdIndex to revset engine

This commit is contained in:
Martin von Zweigbergk 2023-03-22 14:08:39 -07:00 committed by Martin von Zweigbergk
parent 13a000caa7
commit d4e1156957
2 changed files with 185 additions and 190 deletions

View file

@ -18,17 +18,17 @@ use std::iter::Peekable;
use itertools::Itertools; use itertools::Itertools;
use crate::backend::{ChangeId, CommitId}; use crate::backend::{ChangeId, CommitId, ObjectId};
use crate::default_index_store::{CompositeIndex, IndexEntry, IndexPosition}; use crate::default_index_store::{CompositeIndex, IndexEntry, IndexPosition};
use crate::default_revset_graph_iterator::RevsetGraphIterator; use crate::default_revset_graph_iterator::RevsetGraphIterator;
use crate::index::{HexPrefix, PrefixResolution}; use crate::index::{HexPrefix, PrefixResolution};
use crate::matchers::{EverythingMatcher, Matcher, PrefixMatcher}; use crate::matchers::{EverythingMatcher, Matcher, PrefixMatcher};
use crate::repo::{IdIndex, Repo}; use crate::repo::Repo;
use crate::revset::{ use crate::revset::{
ChangeIdIndex, Revset, RevsetError, RevsetExpression, RevsetFilterPredicate, RevsetGraphEdge, ChangeIdIndex, Revset, RevsetError, RevsetExpression, RevsetFilterPredicate, RevsetGraphEdge,
RevsetIteratorExt, GENERATION_RANGE_FULL, RevsetIteratorExt, GENERATION_RANGE_FULL,
}; };
use crate::rewrite; use crate::{backend, rewrite};
trait ToPredicateFn<'index> { trait ToPredicateFn<'index> {
/// Creates function that tests if the given entry is included in the set. /// Creates function that tests if the given entry is included in the set.
@ -111,6 +111,82 @@ impl ChangeIdIndex for ChangeIdIndexImpl<'_> {
} }
} }
#[derive(Debug, Clone)]
struct IdIndex<K, V>(Vec<(K, V)>);
impl<K, V> IdIndex<K, V>
where
K: ObjectId + Ord,
{
/// Creates new index from the given entries. Multiple values can be
/// associated with a single key.
pub fn from_vec(mut vec: Vec<(K, V)>) -> Self {
vec.sort_unstable_by(|(k0, _), (k1, _)| k0.cmp(k1));
IdIndex(vec)
}
/// Looks up entries with the given prefix, and collects values if matched
/// entries have unambiguous keys.
pub fn resolve_prefix_with<U>(
&self,
prefix: &HexPrefix,
mut value_mapper: impl FnMut(&V) -> U,
) -> PrefixResolution<Vec<U>> {
let mut range = self.resolve_prefix_range(prefix).peekable();
if let Some((first_key, _)) = range.peek().copied() {
let maybe_entries: Option<Vec<_>> = range
.map(|(k, v)| (k == first_key).then(|| value_mapper(v)))
.collect();
if let Some(entries) = maybe_entries {
PrefixResolution::SingleMatch(entries)
} else {
PrefixResolution::AmbiguousMatch
}
} else {
PrefixResolution::NoMatch
}
}
/// Iterates over entries with the given prefix.
pub fn resolve_prefix_range<'a: 'b, 'b>(
&'a self,
prefix: &'b HexPrefix,
) -> impl Iterator<Item = (&'a K, &'a V)> + 'b {
let min_bytes = prefix.min_prefix_bytes();
let pos = self.0.partition_point(|(k, _)| k.as_bytes() < min_bytes);
self.0[pos..]
.iter()
.take_while(|(k, _)| prefix.matches(k))
.map(|(k, v)| (k, v))
}
/// This function returns the shortest length of a prefix of `key` that
/// disambiguates it from every other key in the index.
///
/// The length to be returned is a number of hexadecimal digits.
///
/// This has some properties that we do not currently make much use of:
///
/// - The algorithm works even if `key` itself is not in the index.
///
/// - In the special case when there are keys in the trie for which our
/// `key` is an exact prefix, returns `key.len() + 1`. Conceptually, in
/// order to disambiguate, you need every letter of the key *and* the
/// additional fact that it's the entire key). This case is extremely
/// unlikely for hashes with 12+ hexadecimal characters.
pub fn shortest_unique_prefix_len(&self, key: &K) -> usize {
let pos = self.0.partition_point(|(k, _)| k < key);
let left = pos.checked_sub(1).map(|p| &self.0[p]);
let right = self.0[pos..].iter().find(|(k, _)| k != key);
itertools::chain(left, right)
.map(|(neighbor, _value)| {
backend::common_hex_len(key.as_bytes(), neighbor.as_bytes()) + 1
})
.max()
.unwrap_or(0)
}
}
struct EagerRevset<'index> { struct EagerRevset<'index> {
index_entries: Vec<IndexEntry<'index>>, index_entries: Vec<IndexEntry<'index>>,
} }
@ -712,6 +788,112 @@ mod tests {
use crate::default_index_store::MutableIndexImpl; use crate::default_index_store::MutableIndexImpl;
use crate::index::Index; use crate::index::Index;
#[test]
fn test_id_index_resolve_prefix() {
fn sorted(resolution: PrefixResolution<Vec<i32>>) -> PrefixResolution<Vec<i32>> {
match resolution {
PrefixResolution::SingleMatch(mut xs) => {
xs.sort(); // order of values might not be preserved by IdIndex
PrefixResolution::SingleMatch(xs)
}
_ => resolution,
}
}
let id_index = IdIndex::from_vec(vec![
(ChangeId::from_hex("0000"), 0),
(ChangeId::from_hex("0099"), 1),
(ChangeId::from_hex("0099"), 2),
(ChangeId::from_hex("0aaa"), 3),
(ChangeId::from_hex("0aab"), 4),
]);
assert_eq!(
id_index.resolve_prefix_with(&HexPrefix::new("0").unwrap(), |&v| v),
PrefixResolution::AmbiguousMatch,
);
assert_eq!(
id_index.resolve_prefix_with(&HexPrefix::new("00").unwrap(), |&v| v),
PrefixResolution::AmbiguousMatch,
);
assert_eq!(
id_index.resolve_prefix_with(&HexPrefix::new("000").unwrap(), |&v| v),
PrefixResolution::SingleMatch(vec![0]),
);
assert_eq!(
id_index.resolve_prefix_with(&HexPrefix::new("0001").unwrap(), |&v| v),
PrefixResolution::NoMatch,
);
assert_eq!(
sorted(id_index.resolve_prefix_with(&HexPrefix::new("009").unwrap(), |&v| v)),
PrefixResolution::SingleMatch(vec![1, 2]),
);
assert_eq!(
id_index.resolve_prefix_with(&HexPrefix::new("0aa").unwrap(), |&v| v),
PrefixResolution::AmbiguousMatch,
);
assert_eq!(
id_index.resolve_prefix_with(&HexPrefix::new("0aab").unwrap(), |&v| v),
PrefixResolution::SingleMatch(vec![4]),
);
assert_eq!(
id_index.resolve_prefix_with(&HexPrefix::new("f").unwrap(), |&v| v),
PrefixResolution::NoMatch,
);
}
#[test]
fn test_id_index_shortest_unique_prefix_len() {
// No crash if empty
let id_index = IdIndex::from_vec(vec![] as Vec<(ChangeId, ())>);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("00")),
0
);
let id_index = IdIndex::from_vec(vec![
(ChangeId::from_hex("ab"), ()),
(ChangeId::from_hex("acd0"), ()),
(ChangeId::from_hex("acd0"), ()), // duplicated key is allowed
]);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("acd0")),
2
);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("ac")),
3
);
let id_index = IdIndex::from_vec(vec![
(ChangeId::from_hex("ab"), ()),
(ChangeId::from_hex("acd0"), ()),
(ChangeId::from_hex("acf0"), ()),
(ChangeId::from_hex("a0"), ()),
(ChangeId::from_hex("ba"), ()),
]);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("a0")),
2
);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("ba")),
1
);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("ab")),
2
);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("acd0")),
3
);
// If it were there, the length would be 1.
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("c0")),
1
);
}
/// Generator of unique 16-byte ChangeId excluding root id /// Generator of unique 16-byte ChangeId excluding root id
fn change_id_generator() -> impl FnMut() -> ChangeId { fn change_id_generator() -> impl FnMut() -> ChangeId {
let mut iter = (1_u128..).map(|n| ChangeId::new(n.to_le_bytes().into())); let mut iter = (1_u128..).map(|n| ChangeId::new(n.to_le_bytes().into()));

View file

@ -1265,190 +1265,3 @@ mod dirty_cell {
} }
} }
} }
#[derive(Debug, Clone)]
pub struct IdIndex<K, V>(Vec<(K, V)>);
impl<K, V> IdIndex<K, V>
where
K: ObjectId + Ord,
{
/// Creates new index from the given entries. Multiple values can be
/// associated with a single key.
pub fn from_vec(mut vec: Vec<(K, V)>) -> Self {
vec.sort_unstable_by(|(k0, _), (k1, _)| k0.cmp(k1));
IdIndex(vec)
}
/// Looks up entries with the given prefix, and collects values if matched
/// entries have unambiguous keys.
pub fn resolve_prefix_with<U>(
&self,
prefix: &HexPrefix,
mut value_mapper: impl FnMut(&V) -> U,
) -> PrefixResolution<Vec<U>> {
let mut range = self.resolve_prefix_range(prefix).peekable();
if let Some((first_key, _)) = range.peek().copied() {
let maybe_entries: Option<Vec<_>> = range
.map(|(k, v)| (k == first_key).then(|| value_mapper(v)))
.collect();
if let Some(entries) = maybe_entries {
PrefixResolution::SingleMatch(entries)
} else {
PrefixResolution::AmbiguousMatch
}
} else {
PrefixResolution::NoMatch
}
}
/// Iterates over entries with the given prefix.
pub fn resolve_prefix_range<'a: 'b, 'b>(
&'a self,
prefix: &'b HexPrefix,
) -> impl Iterator<Item = (&'a K, &'a V)> + 'b {
let min_bytes = prefix.min_prefix_bytes();
let pos = self.0.partition_point(|(k, _)| k.as_bytes() < min_bytes);
self.0[pos..]
.iter()
.take_while(|(k, _)| prefix.matches(k))
.map(|(k, v)| (k, v))
}
/// This function returns the shortest length of a prefix of `key` that
/// disambiguates it from every other key in the index.
///
/// The length to be returned is a number of hexadecimal digits.
///
/// This has some properties that we do not currently make much use of:
///
/// - The algorithm works even if `key` itself is not in the index.
///
/// - In the special case when there are keys in the trie for which our
/// `key` is an exact prefix, returns `key.len() + 1`. Conceptually, in
/// order to disambiguate, you need every letter of the key *and* the
/// additional fact that it's the entire key). This case is extremely
/// unlikely for hashes with 12+ hexadecimal characters.
pub fn shortest_unique_prefix_len(&self, key: &K) -> usize {
let pos = self.0.partition_point(|(k, _)| k < key);
let left = pos.checked_sub(1).map(|p| &self.0[p]);
let right = self.0[pos..].iter().find(|(k, _)| k != key);
itertools::chain(left, right)
.map(|(neighbor, _value)| {
backend::common_hex_len(key.as_bytes(), neighbor.as_bytes()) + 1
})
.max()
.unwrap_or(0)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_id_index_resolve_prefix() {
fn sorted(resolution: PrefixResolution<Vec<i32>>) -> PrefixResolution<Vec<i32>> {
match resolution {
PrefixResolution::SingleMatch(mut xs) => {
xs.sort(); // order of values might not be preserved by IdIndex
PrefixResolution::SingleMatch(xs)
}
_ => resolution,
}
}
let id_index = IdIndex::from_vec(vec![
(ChangeId::from_hex("0000"), 0),
(ChangeId::from_hex("0099"), 1),
(ChangeId::from_hex("0099"), 2),
(ChangeId::from_hex("0aaa"), 3),
(ChangeId::from_hex("0aab"), 4),
]);
assert_eq!(
id_index.resolve_prefix_with(&HexPrefix::new("0").unwrap(), |&v| v),
PrefixResolution::AmbiguousMatch,
);
assert_eq!(
id_index.resolve_prefix_with(&HexPrefix::new("00").unwrap(), |&v| v),
PrefixResolution::AmbiguousMatch,
);
assert_eq!(
id_index.resolve_prefix_with(&HexPrefix::new("000").unwrap(), |&v| v),
PrefixResolution::SingleMatch(vec![0]),
);
assert_eq!(
id_index.resolve_prefix_with(&HexPrefix::new("0001").unwrap(), |&v| v),
PrefixResolution::NoMatch,
);
assert_eq!(
sorted(id_index.resolve_prefix_with(&HexPrefix::new("009").unwrap(), |&v| v)),
PrefixResolution::SingleMatch(vec![1, 2]),
);
assert_eq!(
id_index.resolve_prefix_with(&HexPrefix::new("0aa").unwrap(), |&v| v),
PrefixResolution::AmbiguousMatch,
);
assert_eq!(
id_index.resolve_prefix_with(&HexPrefix::new("0aab").unwrap(), |&v| v),
PrefixResolution::SingleMatch(vec![4]),
);
assert_eq!(
id_index.resolve_prefix_with(&HexPrefix::new("f").unwrap(), |&v| v),
PrefixResolution::NoMatch,
);
}
#[test]
fn test_id_index_shortest_unique_prefix_len() {
// No crash if empty
let id_index = IdIndex::from_vec(vec![] as Vec<(ChangeId, ())>);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("00")),
0
);
let id_index = IdIndex::from_vec(vec![
(ChangeId::from_hex("ab"), ()),
(ChangeId::from_hex("acd0"), ()),
(ChangeId::from_hex("acd0"), ()), // duplicated key is allowed
]);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("acd0")),
2
);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("ac")),
3
);
let id_index = IdIndex::from_vec(vec![
(ChangeId::from_hex("ab"), ()),
(ChangeId::from_hex("acd0"), ()),
(ChangeId::from_hex("acf0"), ()),
(ChangeId::from_hex("a0"), ()),
(ChangeId::from_hex("ba"), ()),
]);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("a0")),
2
);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("ba")),
1
);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("ab")),
2
);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("acd0")),
3
);
// If it were there, the length would be 1.
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("c0")),
1
);
}
}