mirror of
https://github.com/martinvonz/jj.git
synced 2025-01-20 03:20:08 +00:00
diff: match up leading/trailing ranges if no match found by uncommon lcs
This is adapted from Breezy/Python patiencediff. AFAICT, Git implementation is
slightly different (and maybe more efficient?), but it's not super easy to
integrate with our diff logic. I'm not sure which one is better overall, but I
think the result is good so long as "uncommon LCS" matching is attempted first.
a9a3e4edc3/patiencediff/_patiencediff_py.py (L108)
This patch prevents some weird test changes that would otherwise be introduced
by the next patch.
This commit is contained in:
parent
ba087f9350
commit
831bbc0b11
1 changed files with 105 additions and 1 deletions
106
lib/src/diff.rs
106
lib/src/diff.rs
|
@ -18,7 +18,7 @@ use std::cmp::{max, min, Ordering};
|
|||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use std::ops::Range;
|
||||
use std::slice;
|
||||
use std::{iter, slice};
|
||||
|
||||
use itertools::Itertools;
|
||||
|
||||
|
@ -168,6 +168,56 @@ pub(crate) fn unchanged_ranges(
|
|||
return vec![];
|
||||
}
|
||||
|
||||
// Prioritize LCS-based algorithm than leading/trailing matches
|
||||
let result = unchanged_ranges_lcs(left, right, left_ranges, right_ranges);
|
||||
if !result.is_empty() {
|
||||
return result;
|
||||
}
|
||||
|
||||
// Trim leading common ranges (i.e. grow previous unchanged region)
|
||||
let common_leading_len = iter::zip(left_ranges, right_ranges)
|
||||
.take_while(|&(l, r)| left[l.clone()] == right[r.clone()])
|
||||
.count();
|
||||
if common_leading_len > 0 {
|
||||
let (left_leading_ranges, left_ranges) = left_ranges.split_at(common_leading_len);
|
||||
let (right_leading_ranges, right_ranges) = right_ranges.split_at(common_leading_len);
|
||||
let mut result = unchanged_ranges(left, right, left_ranges, right_ranges);
|
||||
result.splice(
|
||||
0..0,
|
||||
iter::zip(
|
||||
left_leading_ranges.iter().cloned(),
|
||||
right_leading_ranges.iter().cloned(),
|
||||
),
|
||||
);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Trim trailing common ranges (i.e. grow next unchanged region)
|
||||
let common_trailing_len = iter::zip(left_ranges.iter().rev(), right_ranges.iter().rev())
|
||||
.take_while(|&(l, r)| left[l.clone()] == right[r.clone()])
|
||||
.count();
|
||||
if common_trailing_len > 0 {
|
||||
let (left_ranges, left_trailing_ranges) =
|
||||
left_ranges.split_at(left_ranges.len() - common_trailing_len);
|
||||
let (right_ranges, right_trailing_ranges) =
|
||||
right_ranges.split_at(right_ranges.len() - common_trailing_len);
|
||||
let mut result = unchanged_ranges(left, right, left_ranges, right_ranges);
|
||||
result.extend(iter::zip(
|
||||
left_trailing_ranges.iter().cloned(),
|
||||
right_trailing_ranges.iter().cloned(),
|
||||
));
|
||||
return result;
|
||||
}
|
||||
|
||||
vec![]
|
||||
}
|
||||
|
||||
fn unchanged_ranges_lcs(
|
||||
left: &[u8],
|
||||
right: &[u8],
|
||||
left_ranges: &[Range<usize>],
|
||||
right_ranges: &[Range<usize>],
|
||||
) -> Vec<(Range<usize>, Range<usize>)> {
|
||||
let max_occurrences = 100;
|
||||
let left_histogram = Histogram::calculate(left, left_ranges, max_occurrences);
|
||||
if *left_histogram.count_to_words.keys().next().unwrap() > max_occurrences {
|
||||
|
@ -759,6 +809,33 @@ mod tests {
|
|||
),
|
||||
vec![(0..1, 0..1), (2..3, 4..5)]
|
||||
);
|
||||
assert_eq!(
|
||||
unchanged_ranges(
|
||||
b"a a a a",
|
||||
b"b a c a",
|
||||
&[0..1, 2..3, 4..5, 6..7],
|
||||
&[0..1, 2..3, 4..5, 6..7],
|
||||
),
|
||||
vec![(0..1, 2..3), (2..3, 6..7)]
|
||||
);
|
||||
assert_eq!(
|
||||
unchanged_ranges(
|
||||
b"a a a a",
|
||||
b"b a a c",
|
||||
&[0..1, 2..3, 4..5, 6..7],
|
||||
&[0..1, 2..3, 4..5, 6..7],
|
||||
),
|
||||
vec![(0..1, 2..3), (2..3, 4..5)]
|
||||
);
|
||||
assert_eq!(
|
||||
unchanged_ranges(
|
||||
b"a a a a",
|
||||
b"a b c a",
|
||||
&[0..1, 2..3, 4..5, 6..7],
|
||||
&[0..1, 2..3, 4..5, 6..7],
|
||||
),
|
||||
vec![(0..1, 0..1), (2..3, 6..7)]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -772,6 +849,33 @@ mod tests {
|
|||
),
|
||||
vec![(0..1, 0..1), (4..5, 2..3)]
|
||||
);
|
||||
assert_eq!(
|
||||
unchanged_ranges(
|
||||
b"b a c a",
|
||||
b"a a a a",
|
||||
&[0..1, 2..3, 4..5, 6..7],
|
||||
&[0..1, 2..3, 4..5, 6..7],
|
||||
),
|
||||
vec![(2..3, 0..1), (6..7, 2..3)]
|
||||
);
|
||||
assert_eq!(
|
||||
unchanged_ranges(
|
||||
b"b a a c",
|
||||
b"a a a a",
|
||||
&[0..1, 2..3, 4..5, 6..7],
|
||||
&[0..1, 2..3, 4..5, 6..7],
|
||||
),
|
||||
vec![(2..3, 0..1), (4..5, 2..3)]
|
||||
);
|
||||
assert_eq!(
|
||||
unchanged_ranges(
|
||||
b"a b c a",
|
||||
b"a a a a",
|
||||
&[0..1, 2..3, 4..5, 6..7],
|
||||
&[0..1, 2..3, 4..5, 6..7],
|
||||
),
|
||||
vec![(0..1, 0..1), (6..7, 2..3)]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
Loading…
Reference in a new issue