mirror of
https://github.com/martinvonz/jj.git
synced 2025-02-08 05:30:25 +00:00
diff: reuse result vec when recurse into unchanged_ranges()
It's silly that we build new Vec for each recursion stack and merge elements back. I don't see a measurable performance difference in the diff bench, but this change will help simplify the next patch. If a result vec were created for each unchanged_ranges() invocation, it would probably make more sense to return a list of "local" word positions. Then, callers would have to translate the returned positions to the caller's local positions.
This commit is contained in:
parent
f6277bbdb8
commit
483db9d7d2
1 changed files with 36 additions and 26 deletions
|
@ -190,15 +190,20 @@ fn find_lcs(input: &[usize]) -> Vec<(usize, usize)> {
|
||||||
|
|
||||||
/// Finds unchanged ranges among the ones given as arguments. The data between
|
/// Finds unchanged ranges among the ones given as arguments. The data between
|
||||||
/// those ranges is ignored.
|
/// those ranges is ignored.
|
||||||
fn unchanged_ranges(left: &DiffSource, right: &DiffSource) -> Vec<(Range<usize>, Range<usize>)> {
|
fn collect_unchanged_ranges(
|
||||||
|
found_ranges: &mut Vec<(Range<usize>, Range<usize>)>,
|
||||||
|
left: &DiffSource,
|
||||||
|
right: &DiffSource,
|
||||||
|
) {
|
||||||
if left.ranges.is_empty() || right.ranges.is_empty() {
|
if left.ranges.is_empty() || right.ranges.is_empty() {
|
||||||
return vec![];
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Prioritize LCS-based algorithm than leading/trailing matches
|
// Prioritize LCS-based algorithm than leading/trailing matches
|
||||||
let result = unchanged_ranges_lcs(left, right);
|
let old_len = found_ranges.len();
|
||||||
if !result.is_empty() {
|
collect_unchanged_ranges_lcs(found_ranges, left, right);
|
||||||
return result;
|
if found_ranges.len() != old_len {
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Trim leading common ranges (i.e. grow previous unchanged region)
|
// Trim leading common ranges (i.e. grow previous unchanged region)
|
||||||
|
@ -215,7 +220,7 @@ fn unchanged_ranges(left: &DiffSource, right: &DiffSource) -> Vec<(Range<usize>,
|
||||||
let left_trailing_ranges = &left_ranges[(left_ranges.len() - common_trailing_len)..];
|
let left_trailing_ranges = &left_ranges[(left_ranges.len() - common_trailing_len)..];
|
||||||
let right_trailing_ranges = &right_ranges[(right_ranges.len() - common_trailing_len)..];
|
let right_trailing_ranges = &right_ranges[(right_ranges.len() - common_trailing_len)..];
|
||||||
|
|
||||||
itertools::chain(
|
found_ranges.extend(itertools::chain(
|
||||||
iter::zip(
|
iter::zip(
|
||||||
left_leading_ranges.iter().cloned(),
|
left_leading_ranges.iter().cloned(),
|
||||||
right_leading_ranges.iter().cloned(),
|
right_leading_ranges.iter().cloned(),
|
||||||
|
@ -224,20 +229,20 @@ fn unchanged_ranges(left: &DiffSource, right: &DiffSource) -> Vec<(Range<usize>,
|
||||||
left_trailing_ranges.iter().cloned(),
|
left_trailing_ranges.iter().cloned(),
|
||||||
right_trailing_ranges.iter().cloned(),
|
right_trailing_ranges.iter().cloned(),
|
||||||
),
|
),
|
||||||
)
|
));
|
||||||
.collect()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn unchanged_ranges_lcs(
|
fn collect_unchanged_ranges_lcs(
|
||||||
|
found_ranges: &mut Vec<(Range<usize>, Range<usize>)>,
|
||||||
left: &DiffSource,
|
left: &DiffSource,
|
||||||
right: &DiffSource,
|
right: &DiffSource,
|
||||||
) -> Vec<(Range<usize>, Range<usize>)> {
|
) {
|
||||||
let max_occurrences = 100;
|
let max_occurrences = 100;
|
||||||
let left_histogram = Histogram::calculate(left, max_occurrences);
|
let left_histogram = Histogram::calculate(left, max_occurrences);
|
||||||
let left_count_to_entries = left_histogram.build_count_to_entries();
|
let left_count_to_entries = left_histogram.build_count_to_entries();
|
||||||
if *left_count_to_entries.keys().next().unwrap() > max_occurrences {
|
if *left_count_to_entries.keys().next().unwrap() > max_occurrences {
|
||||||
// If there are very many occurrences of all words, then we just give up.
|
// If there are very many occurrences of all words, then we just give up.
|
||||||
return vec![];
|
return;
|
||||||
}
|
}
|
||||||
let right_histogram = Histogram::calculate(right, max_occurrences);
|
let right_histogram = Histogram::calculate(right, max_occurrences);
|
||||||
// Look for words with few occurrences in `left` (could equally well have picked
|
// Look for words with few occurrences in `left` (could equally well have picked
|
||||||
|
@ -256,7 +261,7 @@ fn unchanged_ranges_lcs(
|
||||||
both_positions.peek().is_some().then_some(both_positions)
|
both_positions.peek().is_some().then_some(both_positions)
|
||||||
})
|
})
|
||||||
else {
|
else {
|
||||||
return vec![];
|
return;
|
||||||
};
|
};
|
||||||
|
|
||||||
// [(index into ranges, serial to identify {word, occurrence #})]
|
// [(index into ranges, serial to identify {word, occurrence #})]
|
||||||
|
@ -283,31 +288,26 @@ fn unchanged_ranges_lcs(
|
||||||
|
|
||||||
// Produce output ranges, recursing into the modified areas between the elements
|
// Produce output ranges, recursing into the modified areas between the elements
|
||||||
// in the LCS.
|
// in the LCS.
|
||||||
let mut result = vec![];
|
|
||||||
let mut previous_left_position = WordPosition(0);
|
let mut previous_left_position = WordPosition(0);
|
||||||
let mut previous_right_position = WordPosition(0);
|
let mut previous_right_position = WordPosition(0);
|
||||||
for (left_index, right_index) in lcs {
|
for (left_index, right_index) in lcs {
|
||||||
let (left_position, _) = left_positions[left_index];
|
let (left_position, _) = left_positions[left_index];
|
||||||
let (right_position, _) = right_positions[right_index];
|
let (right_position, _) = right_positions[right_index];
|
||||||
for unchanged_nested_range in unchanged_ranges(
|
collect_unchanged_ranges(
|
||||||
|
found_ranges,
|
||||||
&left.narrowed(previous_left_position..left_position),
|
&left.narrowed(previous_left_position..left_position),
|
||||||
&right.narrowed(previous_right_position..right_position),
|
&right.narrowed(previous_right_position..right_position),
|
||||||
) {
|
);
|
||||||
result.push(unchanged_nested_range);
|
found_ranges.push((left.range_at(left_position), right.range_at(right_position)));
|
||||||
}
|
|
||||||
result.push((left.range_at(left_position), right.range_at(right_position)));
|
|
||||||
previous_left_position = WordPosition(left_position.0 + 1);
|
previous_left_position = WordPosition(left_position.0 + 1);
|
||||||
previous_right_position = WordPosition(right_position.0 + 1);
|
previous_right_position = WordPosition(right_position.0 + 1);
|
||||||
}
|
}
|
||||||
// Also recurse into range at end (after common ranges).
|
// Also recurse into range at end (after common ranges).
|
||||||
for unchanged_nested_range in unchanged_ranges(
|
collect_unchanged_ranges(
|
||||||
|
found_ranges,
|
||||||
&left.narrowed(previous_left_position..WordPosition(left.ranges.len())),
|
&left.narrowed(previous_left_position..WordPosition(left.ranges.len())),
|
||||||
&right.narrowed(previous_right_position..WordPosition(right.ranges.len())),
|
&right.narrowed(previous_right_position..WordPosition(right.ranges.len())),
|
||||||
) {
|
);
|
||||||
result.push(unchanged_nested_range);
|
|
||||||
}
|
|
||||||
|
|
||||||
result
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, PartialEq, Eq, Debug)]
|
#[derive(Clone, PartialEq, Eq, Debug)]
|
||||||
|
@ -451,8 +451,9 @@ impl<'input> Diff<'input> {
|
||||||
}];
|
}];
|
||||||
for (other_input, other_token_ranges) in iter::zip(&other_inputs, other_token_ranges) {
|
for (other_input, other_token_ranges) in iter::zip(&other_inputs, other_token_ranges) {
|
||||||
let other_source = DiffSource::new(other_input, other_token_ranges);
|
let other_source = DiffSource::new(other_input, other_token_ranges);
|
||||||
let unchanged_diff_ranges = unchanged_ranges(&base_source, &other_source);
|
let mut new_ranges = Vec::new();
|
||||||
unchanged_regions = intersect_regions(unchanged_regions, &unchanged_diff_ranges);
|
collect_unchanged_ranges(&mut new_ranges, &base_source, &other_source);
|
||||||
|
unchanged_regions = intersect_regions(unchanged_regions, &new_ranges);
|
||||||
}
|
}
|
||||||
// Add an empty range at the end to make life easier for hunks().
|
// Add an empty range at the end to make life easier for hunks().
|
||||||
let offsets = other_inputs
|
let offsets = other_inputs
|
||||||
|
@ -777,6 +778,15 @@ mod tests {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn unchanged_ranges(
|
||||||
|
left: &DiffSource,
|
||||||
|
right: &DiffSource,
|
||||||
|
) -> Vec<(Range<usize>, Range<usize>)> {
|
||||||
|
let mut found_ranges = Vec::new();
|
||||||
|
collect_unchanged_ranges(&mut found_ranges, left, right);
|
||||||
|
found_ranges
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_unchanged_ranges_insert_in_middle() {
|
fn test_unchanged_ranges_insert_in_middle() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
|
Loading…
Reference in a new issue