diff: add short for Diff::for_tokenizer(_, find_line_ranges)

Line-by-line diff is common. Let's add a helper method for convenience.
This commit is contained in:
Yuya Nishihara 2024-07-08 19:59:42 +09:00
parent 44818e9cf8
commit 6a1d9262a0
5 changed files with 13 additions and 19 deletions

View file

@ -852,7 +852,7 @@ fn unified_diff_hunks<'content>(
right_line_range: 1..1, right_line_range: 1..1,
lines: vec![], lines: vec![],
}; };
let diff = Diff::for_tokenizer(&[left_content, right_content], diff::find_line_ranges); let diff = Diff::by_line(&[left_content, right_content]);
let mut diff_hunks = diff.hunks().peekable(); let mut diff_hunks = diff.hunks().peekable();
while let Some(hunk) = diff_hunks.next() { while let Some(hunk) = diff_hunks.next() {
match hunk { match hunk {
@ -1128,10 +1128,7 @@ fn get_diff_stat(
// TODO: this matches git's behavior, which is to count the number of newlines // TODO: this matches git's behavior, which is to count the number of newlines
// in the file. but that behavior seems unhelpful; no one really cares how // in the file. but that behavior seems unhelpful; no one really cares how
// many `0xa0` characters are in an image. // many `0xa0` characters are in an image.
let diff = Diff::for_tokenizer( let diff = Diff::by_line(&[&left_content.contents, &right_content.contents]);
&[&left_content.contents, &right_content.contents],
diff::find_line_ranges,
);
let mut added = 0; let mut added = 0;
let mut removed = 0; let mut removed = 0;
for hunk in diff.hunks() { for hunk in diff.hunks() {

View file

@ -6,7 +6,7 @@ use futures::{StreamExt, TryFutureExt, TryStreamExt};
use itertools::Itertools; use itertools::Itertools;
use jj_lib::backend::{BackendError, BackendResult, FileId, MergedTreeId, TreeValue}; use jj_lib::backend::{BackendError, BackendResult, FileId, MergedTreeId, TreeValue};
use jj_lib::conflicts::{materialize_tree_value, MaterializedTreeValue}; use jj_lib::conflicts::{materialize_tree_value, MaterializedTreeValue};
use jj_lib::diff::{find_line_ranges, Diff, DiffHunk}; use jj_lib::diff::{Diff, DiffHunk};
use jj_lib::files::{self, ContentHunk, MergeResult}; use jj_lib::files::{self, ContentHunk, MergeResult};
use jj_lib::matchers::Matcher; use jj_lib::matchers::Matcher;
use jj_lib::merge::Merge; use jj_lib::merge::Merge;
@ -225,10 +225,7 @@ fn make_diff_sections(
left_contents: &str, left_contents: &str,
right_contents: &str, right_contents: &str,
) -> Result<Vec<scm_record::Section<'static>>, BuiltinToolError> { ) -> Result<Vec<scm_record::Section<'static>>, BuiltinToolError> {
let diff = Diff::for_tokenizer( let diff = Diff::by_line(&[left_contents.as_bytes(), right_contents.as_bytes()]);
&[left_contents.as_bytes(), right_contents.as_bytes()],
find_line_ranges,
);
let mut sections = Vec::new(); let mut sections = Vec::new();
for hunk in diff.hunks() { for hunk in diff.hunks() {
match hunk { match hunk {

View file

@ -22,7 +22,7 @@ use itertools::Itertools;
use regex::bytes::Regex; use regex::bytes::Regex;
use crate::backend::{BackendError, BackendResult, CommitId, FileId, SymlinkId, TreeId, TreeValue}; use crate::backend::{BackendError, BackendResult, CommitId, FileId, SymlinkId, TreeId, TreeValue};
use crate::diff::{find_line_ranges, Diff, DiffHunk}; use crate::diff::{Diff, DiffHunk};
use crate::files; use crate::files;
use crate::files::{ContentHunk, MergeResult}; use crate::files::{ContentHunk, MergeResult};
use crate::merge::{Merge, MergeBuilder, MergedTreeValue}; use crate::merge::{Merge, MergeBuilder, MergedTreeValue};
@ -259,17 +259,12 @@ pub fn materialize_merge_result(
output.write_all(&left.0)?; output.write_all(&left.0)?;
continue; continue;
}; };
let diff1 = Diff::for_tokenizer(&[&left.0, &right1.0], find_line_ranges) let diff1 = Diff::by_line(&[&left.0, &right1.0]).hunks().collect_vec();
.hunks()
.collect_vec();
// Check if the diff against the next positive term is better. Since // Check if the diff against the next positive term is better. Since
// we want to preserve the order of the terms, we don't match against // we want to preserve the order of the terms, we don't match against
// any later positive terms. // any later positive terms.
if let Some(right2) = hunk.get_add(add_index + 1) { if let Some(right2) = hunk.get_add(add_index + 1) {
let diff2 = let diff2 = Diff::by_line(&[&left.0, &right2.0]).hunks().collect_vec();
Diff::for_tokenizer(&[&left.0, &right2.0], find_line_ranges)
.hunks()
.collect_vec();
if diff_size(&diff2) < diff_size(&diff1) { if diff_size(&diff2) < diff_size(&diff1) {
// If the next positive term is a better match, emit // If the next positive term is a better match, emit
// the current positive term as a snapshot and the next // the current positive term as a snapshot and the next

View file

@ -462,6 +462,11 @@ impl<'input> Diff<'input> {
Diff::for_tokenizer(inputs, |_| vec![]) Diff::for_tokenizer(inputs, |_| vec![])
} }
/// Compares `inputs` line by line.
pub fn by_line(inputs: &[&'input [u8]]) -> Self {
Diff::for_tokenizer(inputs, find_line_ranges)
}
// TODO: At least when merging, it's wasteful to refine the diff if e.g. if 2 // TODO: At least when merging, it's wasteful to refine the diff if e.g. if 2
// out of 3 inputs match in the differing regions. Perhaps the refine() // out of 3 inputs match in the differing regions. Perhaps the refine()
// method should be on the hunk instead (probably returning a new Diff)? // method should be on the hunk instead (probably returning a new Diff)?

View file

@ -165,7 +165,7 @@ pub fn merge(slices: &Merge<&[u8]>) -> MergeResult {
let num_diffs = slices.removes().len(); let num_diffs = slices.removes().len();
let diff_inputs = slices.removes().chain(slices.adds()).copied().collect_vec(); let diff_inputs = slices.removes().chain(slices.adds()).copied().collect_vec();
let diff = Diff::for_tokenizer(&diff_inputs, diff::find_line_ranges); let diff = Diff::by_line(&diff_inputs);
let mut resolved_hunk = ContentHunk(vec![]); let mut resolved_hunk = ContentHunk(vec![]);
let mut merge_hunks: Vec<Merge<ContentHunk>> = vec![]; let mut merge_hunks: Vec<Merge<ContentHunk>> = vec![];
for diff_hunk in diff.hunks() { for diff_hunk in diff.hunks() {