repo: turn IdIndex into sorted Vec, use binary search

Since IdIndex is immutable, we don't need fast insertion provided by BTreeMap.
Let's simply use Vec for some speed up. More importantly, this allows us to
store multiple (ChangeId, CommitId) pairs for the same change id, and will
unblock the use of IdIndex in revset::resolve_symbol().

Some benchmark numbers (against my "linux" repo) follow.

Command:
    hyperfine --warmup 3 "jj log -r master \
      -T 'commit_id.short_prefix_and_brackets()' \
      --no-commit-working-copy --no-graph"

Original:
    Time (mean ± σ):      1.892 s ±  0.031 s    [User: 1.800 s, System: 0.092 s]
    Range (min … max):    1.833 s …  1.935 s    10 runs

This commit:
    Time (mean ± σ):     867.5 ms ±   2.7 ms    [User: 809.9 ms, System: 57.7 ms]
    Range (min … max):   862.3 ms … 871.0 ms    10 runs
This commit is contained in:
Yuya Nishihara 2023-01-21 18:32:58 +09:00
parent 1a6e71170e
commit c82a62cf99

View file

@ -12,10 +12,9 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
use std::collections::{BTreeMap, HashMap, HashSet}; use std::collections::{HashMap, HashSet};
use std::fmt::{Debug, Formatter}; use std::fmt::{Debug, Formatter};
use std::io::ErrorKind; use std::io::ErrorKind;
use std::ops::Bound::{Excluded, Unbounded};
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::sync::Arc; use std::sync::Arc;
use std::{cmp, fs, io}; use std::{cmp, fs, io};
@ -248,11 +247,12 @@ impl ReadonlyRepo {
let all_visible_revisions = crate::revset::RevsetExpression::all() let all_visible_revisions = crate::revset::RevsetExpression::all()
.evaluate(self.as_repo_ref(), None) .evaluate(self.as_repo_ref(), None)
.unwrap(); .unwrap();
let mut id_index = IdIndex::new(); IdIndex::from_vec(
for entry in all_visible_revisions.iter() { all_visible_revisions
id_index.insert(entry.change_id().as_bytes(), ()); .iter()
} .map(|entry| (entry.change_id().to_bytes(), ()))
id_index .collect(),
)
}) })
} }
@ -1186,17 +1186,14 @@ mod dirty_cell {
// This value would be used to find divergent changes, for example, or if it is // This value would be used to find divergent changes, for example, or if it is
// necessary to mark whether an id is a Change or a Commit id. // necessary to mark whether an id is a Change or a Commit id.
type IdIndexValue = (); type IdIndexValue = ();
#[derive(Debug, Clone, Default)] #[derive(Debug, Clone)]
pub struct IdIndex(BTreeMap<Vec<u8>, IdIndexValue>); pub struct IdIndex(Vec<(Vec<u8>, IdIndexValue)>);
impl IdIndex { impl IdIndex {
pub fn new() -> Self { /// Creates new index from the given keys. Keys may have duplicates.
Self::default() pub fn from_vec(mut vec: Vec<(Vec<u8>, IdIndexValue)>) -> Self {
} vec.sort_unstable_by(|(k0, _), (k1, _)| k0.cmp(k1));
IdIndex(vec)
/// Inserts a bytes key to the index.
pub fn insert(&mut self, key: &[u8], value: IdIndexValue) -> Option<IdIndexValue> {
self.0.insert(key.to_vec(), value)
} }
/// This function returns the shortest length of a prefix of `key` that /// This function returns the shortest length of a prefix of `key` that
@ -1215,11 +1212,9 @@ impl IdIndex {
/// additional fact that it's the entire key). This case is extremely /// additional fact that it's the entire key). This case is extremely
/// unlikely for hashes with 12+ hexadecimal characters. /// unlikely for hashes with 12+ hexadecimal characters.
pub fn shortest_unique_prefix_len(&self, key: &[u8]) -> usize { pub fn shortest_unique_prefix_len(&self, key: &[u8]) -> usize {
let left = self let pos = self.0.partition_point(|(k, _)| k.as_slice() < key);
.0 let left = pos.checked_sub(1).map(|p| &self.0[p]);
.range::<[u8], _>((Unbounded, Excluded(key))) let right = self.0[pos..].iter().find(|(k, _)| k.as_slice() != key);
.next_back();
let right = self.0.range::<[u8], _>((Excluded(key), Unbounded)).next();
itertools::chain(left, right) itertools::chain(left, right)
.map(|(neighbor, _value)| backend::common_hex_len(key, neighbor) + 1) .map(|(neighbor, _value)| backend::common_hex_len(key, neighbor) + 1)
.max() .max()
@ -1233,9 +1228,18 @@ mod tests {
#[test] #[test]
fn test_id_index() { fn test_id_index() {
let mut id_index = IdIndex::new(); // No crash if empty
id_index.insert(&hex::decode("ab").unwrap(), ()); let id_index = IdIndex::from_vec(vec![]);
id_index.insert(&hex::decode("acd0").unwrap(), ()); assert_eq!(
id_index.shortest_unique_prefix_len(&hex::decode("00").unwrap()),
0
);
let id_index = IdIndex::from_vec(vec![
(hex::decode("ab").unwrap(), ()),
(hex::decode("acd0").unwrap(), ()),
(hex::decode("acd0").unwrap(), ()), // duplicated key is allowed
]);
assert_eq!( assert_eq!(
id_index.shortest_unique_prefix_len(&hex::decode("acd0").unwrap()), id_index.shortest_unique_prefix_len(&hex::decode("acd0").unwrap()),
2 2
@ -1245,12 +1249,13 @@ mod tests {
3 3
); );
let mut id_index = IdIndex::new(); let id_index = IdIndex::from_vec(vec![
id_index.insert(&hex::decode("ab").unwrap(), ()); (hex::decode("ab").unwrap(), ()),
id_index.insert(&hex::decode("acd0").unwrap(), ()); (hex::decode("acd0").unwrap(), ()),
id_index.insert(&hex::decode("acf0").unwrap(), ()); (hex::decode("acf0").unwrap(), ()),
id_index.insert(&hex::decode("a0").unwrap(), ()); (hex::decode("a0").unwrap(), ()),
id_index.insert(&hex::decode("ba").unwrap(), ()); (hex::decode("ba").unwrap(), ()),
]);
assert_eq!( assert_eq!(
id_index.shortest_unique_prefix_len(&hex::decode("a0").unwrap()), id_index.shortest_unique_prefix_len(&hex::decode("a0").unwrap()),