mirror of
https://github.com/martinvonz/jj.git
synced 2025-01-18 02:04:19 +00:00
index: make IdIndex store raw bytes, not hex bytes
This helps us to migrate commit_id index to ReadonlyIndex. For large repositories, this also reduces initialization cost, but that's not the main intent of this change. https://github.com/martinvonz/jj/pull/1041#issuecomment-1399225876 common_hex_len() and iter_half_bytes() are added to backend.rs since more call sites will be added to index.rs, and I feel index.rs isn't a good place to host this kind of utility functions.
This commit is contained in:
parent
65a659347e
commit
1a4b5c5ee6
3 changed files with 75 additions and 26 deletions
|
@ -330,6 +330,26 @@ impl Tree {
|
|||
}
|
||||
}
|
||||
|
||||
/// Calculates common prefix length of two bytes. The length to be returned is
|
||||
/// a number of hexadecimal digits.
|
||||
pub fn common_hex_len(bytes_a: &[u8], bytes_b: &[u8]) -> usize {
|
||||
iter_half_bytes(bytes_a)
|
||||
.zip(iter_half_bytes(bytes_b))
|
||||
.take_while(|(a, b)| a == b)
|
||||
.count()
|
||||
}
|
||||
|
||||
fn iter_half_bytes(bytes: &[u8]) -> impl ExactSizeIterator<Item = u8> + '_ {
|
||||
(0..bytes.len() * 2).map(|i| {
|
||||
let v = bytes[i / 2];
|
||||
if i & 1 == 0 {
|
||||
v >> 4
|
||||
} else {
|
||||
v & 0xf
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn root_change_id() -> &'static ChangeId {
|
||||
static ROOT_CHANGE_ID: Lazy<ChangeId> =
|
||||
Lazy::new(|| ChangeId::new(vec![0; CHANGE_ID_HASH_LENGTH]));
|
||||
|
|
|
@ -250,7 +250,7 @@ impl ReadonlyRepo {
|
|||
// We need to account for rewritten commits as well
|
||||
let mut id_index = IdIndex::new();
|
||||
for entry in self.as_repo_ref().index().iter() {
|
||||
id_index.insert(entry.commit_id().hex().as_bytes(), ());
|
||||
id_index.insert(entry.commit_id().as_bytes(), ());
|
||||
}
|
||||
id_index
|
||||
})
|
||||
|
@ -263,22 +263,22 @@ impl ReadonlyRepo {
|
|||
.unwrap();
|
||||
let mut id_index = IdIndex::new();
|
||||
for entry in all_visible_revisions.iter() {
|
||||
id_index.insert(entry.change_id().hex().as_bytes(), ());
|
||||
id_index.insert(entry.change_id().as_bytes(), ());
|
||||
}
|
||||
id_index
|
||||
})
|
||||
}
|
||||
|
||||
pub fn shortest_unique_prefix_length(&self, target_id_hex: &str) -> usize {
|
||||
pub fn shortest_unique_prefix_length(&self, target_id_bytes: &[u8]) -> usize {
|
||||
// For `len = index.shortest(id)`, a prefix of length `len` will disambiguate
|
||||
// `id` from all other ids in the index. This will be just as true for
|
||||
// `max(len, anything_else)`, so a max of such lengths will disambiguate in all
|
||||
// indices.
|
||||
cmp::max(
|
||||
self.commit_id_index()
|
||||
.shortest_unique_prefix_len(target_id_hex.as_bytes()),
|
||||
.shortest_unique_prefix_len(target_id_bytes),
|
||||
self.change_id_index()
|
||||
.shortest_unique_prefix_len(target_id_hex.as_bytes()),
|
||||
.shortest_unique_prefix_len(target_id_bytes),
|
||||
)
|
||||
}
|
||||
|
||||
|
@ -1194,6 +1194,7 @@ impl IdIndex {
|
|||
Self::default()
|
||||
}
|
||||
|
||||
/// Inserts a bytes key to the index.
|
||||
pub fn insert(&mut self, key: &[u8], value: IdIndexValue) -> Option<IdIndexValue> {
|
||||
self.0.insert(key.to_vec(), value)
|
||||
}
|
||||
|
@ -1201,6 +1202,9 @@ impl IdIndex {
|
|||
/// This function returns the shortest length of a prefix of `key` that
|
||||
/// disambiguates it from every other key in the index.
|
||||
///
|
||||
/// The given `key` must be provided as bytes, not as ASCII hexadecimal
|
||||
/// digits. The length to be returned is a number of hexadecimal digits.
|
||||
///
|
||||
/// This has some properties that we do not currently make much use of:
|
||||
///
|
||||
/// - The algorithm works even if `key` itself is not in the index.
|
||||
|
@ -1217,10 +1221,7 @@ impl IdIndex {
|
|||
.next_back();
|
||||
let right = self.0.range::<[u8], _>((Excluded(key), Unbounded)).next();
|
||||
itertools::chain(left, right)
|
||||
.map(|(neighbor, _value)| {
|
||||
let common_len = key.iter().zip(neighbor).take_while(|(a, b)| a == b).count();
|
||||
common_len + 1
|
||||
})
|
||||
.map(|(neighbor, _value)| backend::common_hex_len(key, neighbor) + 1)
|
||||
.max()
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
@ -1233,23 +1234,44 @@ mod tests {
|
|||
#[test]
|
||||
fn test_id_index() {
|
||||
let mut id_index = IdIndex::new();
|
||||
id_index.insert(b"ab", ());
|
||||
id_index.insert(b"acd0", ());
|
||||
assert_eq!(id_index.shortest_unique_prefix_len(b"acd0"), 2);
|
||||
assert_eq!(id_index.shortest_unique_prefix_len(b"ac"), 3);
|
||||
id_index.insert(&hex::decode("ab").unwrap(), ());
|
||||
id_index.insert(&hex::decode("acd0").unwrap(), ());
|
||||
assert_eq!(
|
||||
id_index.shortest_unique_prefix_len(&hex::decode("acd0").unwrap()),
|
||||
2
|
||||
);
|
||||
assert_eq!(
|
||||
id_index.shortest_unique_prefix_len(&hex::decode("ac").unwrap()),
|
||||
3
|
||||
);
|
||||
|
||||
let mut id_index = IdIndex::new();
|
||||
id_index.insert(b"ab", ());
|
||||
id_index.insert(b"acd0", ());
|
||||
id_index.insert(b"acf0", ());
|
||||
id_index.insert(b"a0", ());
|
||||
id_index.insert(b"ba", ());
|
||||
id_index.insert(&hex::decode("ab").unwrap(), ());
|
||||
id_index.insert(&hex::decode("acd0").unwrap(), ());
|
||||
id_index.insert(&hex::decode("acf0").unwrap(), ());
|
||||
id_index.insert(&hex::decode("a0").unwrap(), ());
|
||||
id_index.insert(&hex::decode("ba").unwrap(), ());
|
||||
|
||||
assert_eq!(id_index.shortest_unique_prefix_len(b"a0"), 2);
|
||||
assert_eq!(id_index.shortest_unique_prefix_len(b"ba"), 1);
|
||||
assert_eq!(id_index.shortest_unique_prefix_len(b"ab"), 2);
|
||||
assert_eq!(id_index.shortest_unique_prefix_len(b"acd0"), 3);
|
||||
assert_eq!(
|
||||
id_index.shortest_unique_prefix_len(&hex::decode("a0").unwrap()),
|
||||
2
|
||||
);
|
||||
assert_eq!(
|
||||
id_index.shortest_unique_prefix_len(&hex::decode("ba").unwrap()),
|
||||
1
|
||||
);
|
||||
assert_eq!(
|
||||
id_index.shortest_unique_prefix_len(&hex::decode("ab").unwrap()),
|
||||
2
|
||||
);
|
||||
assert_eq!(
|
||||
id_index.shortest_unique_prefix_len(&hex::decode("acd0").unwrap()),
|
||||
3
|
||||
);
|
||||
// If it were there, the length would be 1.
|
||||
assert_eq!(id_index.shortest_unique_prefix_len(b"c0"), 1);
|
||||
assert_eq!(
|
||||
id_index.shortest_unique_prefix_len(&hex::decode("c0").unwrap()),
|
||||
1
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -412,6 +412,10 @@ impl<'a, C, I, O> TemplateProperty<C, O> for TemplateFunction<'a, C, I, O> {
|
|||
pub struct CommitOrChangeId(Vec<u8>);
|
||||
|
||||
impl CommitOrChangeId {
|
||||
pub fn as_bytes(&self) -> &[u8] {
|
||||
&self.0
|
||||
}
|
||||
|
||||
pub fn hex(&self) -> String {
|
||||
hex::encode(&self.0)
|
||||
}
|
||||
|
@ -423,12 +427,15 @@ impl CommitOrChangeId {
|
|||
}
|
||||
|
||||
pub fn short_prefix_and_brackets(&self, repo: RepoRef) -> String {
|
||||
highlight_shortest_prefix(self.hex(), 12, repo)
|
||||
highlight_shortest_prefix(self, 12, repo)
|
||||
}
|
||||
}
|
||||
|
||||
fn highlight_shortest_prefix(mut hex: String, total_len: usize, repo: RepoRef) -> String {
|
||||
let prefix_len = repo.base_repo().shortest_unique_prefix_length(&hex);
|
||||
fn highlight_shortest_prefix(id: &CommitOrChangeId, total_len: usize, repo: RepoRef) -> String {
|
||||
let prefix_len = repo
|
||||
.base_repo()
|
||||
.shortest_unique_prefix_length(id.as_bytes());
|
||||
let mut hex = id.hex();
|
||||
if prefix_len < total_len - 2 {
|
||||
format!(
|
||||
"{}[{}]",
|
||||
|
|
Loading…
Reference in a new issue