mirror of
https://github.com/martinvonz/jj.git
synced 2024-12-04 05:28:02 +00:00
index: merge overlapped generation ranges to be enqueued
Before, the number of the generations to track would increase at each merge point. This was really bad for queries like ':@--' in merge-heavy history, but I didn't notice the problem because ancestors query is lazy and the default log template is slow. Since I'm going to reuse RevWalk for 'roots++:' queries, which can't be lazy, I need to fix this problem first. As we don't have a revset expression to specify exact generation range, gen.end is initialized to either 1 or close to u32::MAX. So, this change means long-lived generation ranges will eventually be merged into one.
This commit is contained in:
parent
9a6a7c50db
commit
38e7eff09f
2 changed files with 99 additions and 21 deletions
|
@ -13,7 +13,7 @@
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
use std::any::Any;
|
use std::any::Any;
|
||||||
use std::cmp::{max, min, Ordering};
|
use std::cmp::{max, min, Ordering, Reverse};
|
||||||
use std::collections::{BTreeMap, BTreeSet, BinaryHeap, Bound, HashMap, HashSet};
|
use std::collections::{BTreeMap, BTreeSet, BinaryHeap, Bound, HashMap, HashSet};
|
||||||
use std::fmt::{Debug, Formatter};
|
use std::fmt::{Debug, Formatter};
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
|
@ -1332,7 +1332,8 @@ impl<'a> Iterator for RevWalk<'a> {
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct RevWalkGenerationRange<'a> {
|
pub struct RevWalkGenerationRange<'a> {
|
||||||
queue: RevWalkQueue<'a, RevWalkItemGenerationRange>,
|
// Sort item generations in ascending order
|
||||||
|
queue: RevWalkQueue<'a, Reverse<RevWalkItemGenerationRange>>,
|
||||||
generation_end: u32,
|
generation_end: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1351,7 +1352,7 @@ impl<'a> RevWalkGenerationRange<'a> {
|
||||||
end: u32::saturating_sub(generation_range.end, generation_range.start),
|
end: u32::saturating_sub(generation_range.end, generation_range.start),
|
||||||
};
|
};
|
||||||
RevWalkGenerationRange {
|
RevWalkGenerationRange {
|
||||||
queue: queue.map_wanted(|()| item_range),
|
queue: queue.map_wanted(|()| Reverse(item_range)),
|
||||||
generation_end: generation_range.end,
|
generation_end: generation_range.end,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1365,7 +1366,7 @@ impl<'a> RevWalkGenerationRange<'a> {
|
||||||
start: gen.start + 1,
|
start: gen.start + 1,
|
||||||
end: gen.end.saturating_add(1),
|
end: gen.end.saturating_add(1),
|
||||||
};
|
};
|
||||||
self.queue.push_wanted_parents(entry, succ_gen);
|
self.queue.push_wanted_parents(entry, Reverse(succ_gen));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1374,25 +1375,26 @@ impl<'a> Iterator for RevWalkGenerationRange<'a> {
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
while let Some(item) = self.queue.pop() {
|
while let Some(item) = self.queue.pop() {
|
||||||
if let RevWalkWorkItemState::Wanted(mut known_gen) = item.state {
|
if let RevWalkWorkItemState::Wanted(Reverse(mut pending_gen)) = item.state {
|
||||||
let mut some_in_range = known_gen.contains_end(self.generation_end);
|
let mut some_in_range = pending_gen.contains_end(self.generation_end);
|
||||||
self.enqueue_wanted_parents(&item.entry.0, known_gen);
|
|
||||||
while let Some(x) = self.queue.pop_eq(&item.entry.0) {
|
while let Some(x) = self.queue.pop_eq(&item.entry.0) {
|
||||||
// For wanted item, simply track all generation chains. This can
|
// Merge overlapped ranges to reduce number of the queued items.
|
||||||
// be optimized if the wanted range is just upper/lower bounded.
|
// For queries like `:(heads-)`, `gen.end` is close to `u32::MAX`, so
|
||||||
// If the range is fully bounded and if the range is wide, we
|
// ranges can be merged into one. If this is still slow, maybe we can add
|
||||||
// can instead extend 'gen' to a range of the same width, and
|
// special case for upper/lower bounded ranges.
|
||||||
// merge overlapping generation ranges.
|
if let RevWalkWorkItemState::Wanted(Reverse(gen)) = x.state {
|
||||||
match x.state {
|
some_in_range |= gen.contains_end(self.generation_end);
|
||||||
RevWalkWorkItemState::Wanted(gen) if known_gen != gen => {
|
pending_gen = if let Some(merged) = pending_gen.try_merge_end(gen) {
|
||||||
some_in_range |= gen.contains_end(self.generation_end);
|
merged
|
||||||
self.enqueue_wanted_parents(&item.entry.0, gen);
|
} else {
|
||||||
known_gen = gen;
|
self.enqueue_wanted_parents(&item.entry.0, pending_gen);
|
||||||
}
|
gen
|
||||||
RevWalkWorkItemState::Wanted(_) => {}
|
};
|
||||||
RevWalkWorkItemState::Unwanted => unreachable!(),
|
} else {
|
||||||
|
unreachable!("no more unwanted items of the same entry");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
self.enqueue_wanted_parents(&item.entry.0, pending_gen);
|
||||||
if some_in_range {
|
if some_in_range {
|
||||||
return Some(item.entry.0);
|
return Some(item.entry.0);
|
||||||
}
|
}
|
||||||
|
@ -1421,6 +1423,15 @@ struct RevWalkItemGenerationRange {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RevWalkItemGenerationRange {
|
impl RevWalkItemGenerationRange {
|
||||||
|
/// Suppose sorted ranges `self, other`, merges them if overlapped.
|
||||||
|
#[must_use]
|
||||||
|
fn try_merge_end(self, other: Self) -> Option<Self> {
|
||||||
|
(other.start <= self.end).then(|| RevWalkItemGenerationRange {
|
||||||
|
start: self.start,
|
||||||
|
end: max(self.end, other.end),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
#[must_use]
|
#[must_use]
|
||||||
fn contains_end(self, end: u32) -> bool {
|
fn contains_end(self, end: u32) -> bool {
|
||||||
self.start < end && end <= self.end
|
self.start < end && end <= self.end
|
||||||
|
@ -2701,6 +2712,62 @@ mod tests {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[allow(clippy::redundant_clone)] // allow id_n.clone()
|
||||||
|
fn test_walk_revs_filter_by_generation_range_merging() {
|
||||||
|
let mut new_change_id = change_id_generator();
|
||||||
|
let mut index = MutableIndexImpl::full(3, 16);
|
||||||
|
// Long linear history with some short branches
|
||||||
|
let ids = (0..11)
|
||||||
|
.map(|n| CommitId::from_hex(&format!("{n:06x}")))
|
||||||
|
.collect_vec();
|
||||||
|
index.add_commit_data(ids[0].clone(), new_change_id(), &[]);
|
||||||
|
for i in 1..ids.len() {
|
||||||
|
index.add_commit_data(ids[i].clone(), new_change_id(), &[ids[i - 1].clone()]);
|
||||||
|
}
|
||||||
|
let id_branch5_0 = CommitId::from_hex("050000");
|
||||||
|
let id_branch5_1 = CommitId::from_hex("050001");
|
||||||
|
index.add_commit_data(id_branch5_0.clone(), new_change_id(), &[ids[5].clone()]);
|
||||||
|
index.add_commit_data(
|
||||||
|
id_branch5_1.clone(),
|
||||||
|
new_change_id(),
|
||||||
|
&[id_branch5_0.clone()],
|
||||||
|
);
|
||||||
|
|
||||||
|
let walk_commit_ids = |wanted: &[CommitId], range: Range<u32>| {
|
||||||
|
index
|
||||||
|
.walk_revs(wanted, &[])
|
||||||
|
.filter_by_generation(range)
|
||||||
|
.map(|entry| entry.commit_id())
|
||||||
|
.collect_vec()
|
||||||
|
};
|
||||||
|
|
||||||
|
// Multiple non-overlapping generation ranges to track:
|
||||||
|
// 9->6: 3..5, 6: 0..2
|
||||||
|
assert_eq!(
|
||||||
|
walk_commit_ids(&[&ids[9], &ids[6]].map(Clone::clone), 4..6),
|
||||||
|
[&ids[5], &ids[4], &ids[2], &ids[1]].map(Clone::clone)
|
||||||
|
);
|
||||||
|
|
||||||
|
// Multiple non-overlapping generation ranges to track, and merged later:
|
||||||
|
// 10->7: 3..5, 7: 0..2
|
||||||
|
// 10->6: 4..6, 7->6, 1..3, 6: 0..2
|
||||||
|
assert_eq!(
|
||||||
|
walk_commit_ids(&[&ids[10], &ids[7], &ids[6]].map(Clone::clone), 5..7),
|
||||||
|
[&ids[5], &ids[4], &ids[2], &ids[1], &ids[0]].map(Clone::clone)
|
||||||
|
);
|
||||||
|
|
||||||
|
// Merge range with sub-range (1..4 + 2..3 should be 1..4, not 1..3):
|
||||||
|
// 8,7,6->5:1..4, B5_1->5:2..3
|
||||||
|
assert_eq!(
|
||||||
|
walk_commit_ids(
|
||||||
|
&[&ids[8], &ids[7], &ids[6], &id_branch5_1].map(Clone::clone),
|
||||||
|
5..6
|
||||||
|
),
|
||||||
|
[&ids[3], &ids[2], &ids[1]].map(Clone::clone)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_heads() {
|
fn test_heads() {
|
||||||
let mut new_change_id = change_id_generator();
|
let mut new_change_id = change_id_generator();
|
||||||
|
|
|
@ -38,8 +38,19 @@ heads(author(peff))
|
||||||
# Roots and heads of range
|
# Roots and heads of range
|
||||||
roots(:v2.40.0)
|
roots(:v2.40.0)
|
||||||
heads(:v2.40.0)
|
heads(:v2.40.0)
|
||||||
# Parents and children of small subset
|
# Parents and ancestors of old commit
|
||||||
|
v1.0.0-
|
||||||
|
v1.0.0---
|
||||||
|
:v1.0.0---
|
||||||
|
# Parents and ancestors of recent commit
|
||||||
|
v2.40.0-
|
||||||
|
v2.40.0---
|
||||||
|
:v2.40.0---
|
||||||
|
# Parents and ancestors of small subset
|
||||||
tags()-
|
tags()-
|
||||||
|
tags()---
|
||||||
|
:tags()---
|
||||||
|
# Children of small subset
|
||||||
tags()+
|
tags()+
|
||||||
# Filter that doesn't read commit object
|
# Filter that doesn't read commit object
|
||||||
merges()
|
merges()
|
||||||
|
|
Loading…
Reference in a new issue