From 1a4b5c5ee6e549a4e73a47e9b3ae43a0b3400e20 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Wed, 18 Jan 2023 20:18:57 +0900 Subject: [PATCH] index: make IdIndex store raw bytes, not hex bytes This helps us to migrate commit_id index to ReadonlyIndex. For large repositories, this also reduces initialization cost, but that's not the main intent of this change. https://github.com/martinvonz/jj/pull/1041#issuecomment-1399225876 common_hex_len() and iter_half_bytes() are added to backend.rs since more call sites will be added to index.rs, and I feel index.rs isn't a good place to host this kind of utility functions. --- lib/src/backend.rs | 20 ++++++++++++++ lib/src/repo.rs | 68 ++++++++++++++++++++++++++++++---------------- src/templater.rs | 13 +++++++-- 3 files changed, 75 insertions(+), 26 deletions(-) diff --git a/lib/src/backend.rs b/lib/src/backend.rs index 64d60ce08..b1bbff7b7 100644 --- a/lib/src/backend.rs +++ b/lib/src/backend.rs @@ -330,6 +330,26 @@ impl Tree { } } +/// Calculates common prefix length of two bytes. The length to be returned is +/// a number of hexadecimal digits. +pub fn common_hex_len(bytes_a: &[u8], bytes_b: &[u8]) -> usize { + iter_half_bytes(bytes_a) + .zip(iter_half_bytes(bytes_b)) + .take_while(|(a, b)| a == b) + .count() +} + +fn iter_half_bytes(bytes: &[u8]) -> impl ExactSizeIterator + '_ { + (0..bytes.len() * 2).map(|i| { + let v = bytes[i / 2]; + if i & 1 == 0 { + v >> 4 + } else { + v & 0xf + } + }) +} + pub fn root_change_id() -> &'static ChangeId { static ROOT_CHANGE_ID: Lazy = Lazy::new(|| ChangeId::new(vec![0; CHANGE_ID_HASH_LENGTH])); diff --git a/lib/src/repo.rs b/lib/src/repo.rs index ea9f2e11b..91778446f 100644 --- a/lib/src/repo.rs +++ b/lib/src/repo.rs @@ -250,7 +250,7 @@ impl ReadonlyRepo { // We need to account for rewritten commits as well let mut id_index = IdIndex::new(); for entry in self.as_repo_ref().index().iter() { - id_index.insert(entry.commit_id().hex().as_bytes(), ()); + id_index.insert(entry.commit_id().as_bytes(), ()); } id_index }) @@ -263,22 +263,22 @@ impl ReadonlyRepo { .unwrap(); let mut id_index = IdIndex::new(); for entry in all_visible_revisions.iter() { - id_index.insert(entry.change_id().hex().as_bytes(), ()); + id_index.insert(entry.change_id().as_bytes(), ()); } id_index }) } - pub fn shortest_unique_prefix_length(&self, target_id_hex: &str) -> usize { + pub fn shortest_unique_prefix_length(&self, target_id_bytes: &[u8]) -> usize { // For `len = index.shortest(id)`, a prefix of length `len` will disambiguate // `id` from all other ids in the index. This will be just as true for // `max(len, anything_else)`, so a max of such lengths will disambiguate in all // indices. cmp::max( self.commit_id_index() - .shortest_unique_prefix_len(target_id_hex.as_bytes()), + .shortest_unique_prefix_len(target_id_bytes), self.change_id_index() - .shortest_unique_prefix_len(target_id_hex.as_bytes()), + .shortest_unique_prefix_len(target_id_bytes), ) } @@ -1194,6 +1194,7 @@ impl IdIndex { Self::default() } + /// Inserts a bytes key to the index. pub fn insert(&mut self, key: &[u8], value: IdIndexValue) -> Option { self.0.insert(key.to_vec(), value) } @@ -1201,6 +1202,9 @@ impl IdIndex { /// This function returns the shortest length of a prefix of `key` that /// disambiguates it from every other key in the index. /// + /// The given `key` must be provided as bytes, not as ASCII hexadecimal + /// digits. The length to be returned is a number of hexadecimal digits. + /// /// This has some properties that we do not currently make much use of: /// /// - The algorithm works even if `key` itself is not in the index. @@ -1217,10 +1221,7 @@ impl IdIndex { .next_back(); let right = self.0.range::<[u8], _>((Excluded(key), Unbounded)).next(); itertools::chain(left, right) - .map(|(neighbor, _value)| { - let common_len = key.iter().zip(neighbor).take_while(|(a, b)| a == b).count(); - common_len + 1 - }) + .map(|(neighbor, _value)| backend::common_hex_len(key, neighbor) + 1) .max() .unwrap_or(0) } @@ -1233,23 +1234,44 @@ mod tests { #[test] fn test_id_index() { let mut id_index = IdIndex::new(); - id_index.insert(b"ab", ()); - id_index.insert(b"acd0", ()); - assert_eq!(id_index.shortest_unique_prefix_len(b"acd0"), 2); - assert_eq!(id_index.shortest_unique_prefix_len(b"ac"), 3); + id_index.insert(&hex::decode("ab").unwrap(), ()); + id_index.insert(&hex::decode("acd0").unwrap(), ()); + assert_eq!( + id_index.shortest_unique_prefix_len(&hex::decode("acd0").unwrap()), + 2 + ); + assert_eq!( + id_index.shortest_unique_prefix_len(&hex::decode("ac").unwrap()), + 3 + ); let mut id_index = IdIndex::new(); - id_index.insert(b"ab", ()); - id_index.insert(b"acd0", ()); - id_index.insert(b"acf0", ()); - id_index.insert(b"a0", ()); - id_index.insert(b"ba", ()); + id_index.insert(&hex::decode("ab").unwrap(), ()); + id_index.insert(&hex::decode("acd0").unwrap(), ()); + id_index.insert(&hex::decode("acf0").unwrap(), ()); + id_index.insert(&hex::decode("a0").unwrap(), ()); + id_index.insert(&hex::decode("ba").unwrap(), ()); - assert_eq!(id_index.shortest_unique_prefix_len(b"a0"), 2); - assert_eq!(id_index.shortest_unique_prefix_len(b"ba"), 1); - assert_eq!(id_index.shortest_unique_prefix_len(b"ab"), 2); - assert_eq!(id_index.shortest_unique_prefix_len(b"acd0"), 3); + assert_eq!( + id_index.shortest_unique_prefix_len(&hex::decode("a0").unwrap()), + 2 + ); + assert_eq!( + id_index.shortest_unique_prefix_len(&hex::decode("ba").unwrap()), + 1 + ); + assert_eq!( + id_index.shortest_unique_prefix_len(&hex::decode("ab").unwrap()), + 2 + ); + assert_eq!( + id_index.shortest_unique_prefix_len(&hex::decode("acd0").unwrap()), + 3 + ); // If it were there, the length would be 1. - assert_eq!(id_index.shortest_unique_prefix_len(b"c0"), 1); + assert_eq!( + id_index.shortest_unique_prefix_len(&hex::decode("c0").unwrap()), + 1 + ); } } diff --git a/src/templater.rs b/src/templater.rs index 989ae02b8..d6e691439 100644 --- a/src/templater.rs +++ b/src/templater.rs @@ -412,6 +412,10 @@ impl<'a, C, I, O> TemplateProperty for TemplateFunction<'a, C, I, O> { pub struct CommitOrChangeId(Vec); impl CommitOrChangeId { + pub fn as_bytes(&self) -> &[u8] { + &self.0 + } + pub fn hex(&self) -> String { hex::encode(&self.0) } @@ -423,12 +427,15 @@ impl CommitOrChangeId { } pub fn short_prefix_and_brackets(&self, repo: RepoRef) -> String { - highlight_shortest_prefix(self.hex(), 12, repo) + highlight_shortest_prefix(self, 12, repo) } } -fn highlight_shortest_prefix(mut hex: String, total_len: usize, repo: RepoRef) -> String { - let prefix_len = repo.base_repo().shortest_unique_prefix_length(&hex); +fn highlight_shortest_prefix(id: &CommitOrChangeId, total_len: usize, repo: RepoRef) -> String { + let prefix_len = repo + .base_repo() + .shortest_unique_prefix_length(id.as_bytes()); + let mut hex = id.hex(); if prefix_len < total_len - 2 { format!( "{}[{}]",