diff: add short for Diff::for_tokenizer(_, find_line_ranges)

Line-by-line diff is common. Let's add a helper method for convenience.
2025-02-01 00:50:57 +00:00 · 2024-07-08 19:59:42 +09:00 · 2024-07-08 19:59:42 +09:00 · 6a1d9262a0
commit 6a1d9262a0
parent 44818e9cf8
5 changed files with 13 additions and 19 deletions
--- a/cli/src/diff_util.rs
+++ b/cli/src/diff_util.rs
@ -852,7 +852,7 @@ fn unified_diff_hunks<'content>(
        right_line_range: 1..1,
        lines: vec![],
    };
-    let diff = Diff::for_tokenizer(&[left_content, right_content], diff::find_line_ranges);
+    let diff = Diff::by_line(&[left_content, right_content]);
    let mut diff_hunks = diff.hunks().peekable();
    while let Some(hunk) = diff_hunks.next() {
        match hunk {
@ -1128,10 +1128,7 @@ fn get_diff_stat(
    // TODO: this matches git's behavior, which is to count the number of newlines
    // in the file. but that behavior seems unhelpful; no one really cares how
    // many `0xa0` characters are in an image.
-    let diff = Diff::for_tokenizer(
+    let diff = Diff::by_line(&[&left_content.contents, &right_content.contents]);
        &[&left_content.contents, &right_content.contents],
        diff::find_line_ranges,
    );
    let mut added = 0;
    let mut removed = 0;
    for hunk in diff.hunks() {
--- a/cli/src/merge_tools/builtin.rs
+++ b/cli/src/merge_tools/builtin.rs
@ -6,7 +6,7 @@ use futures::{StreamExt, TryFutureExt, TryStreamExt};
 use itertools::Itertools;
 use jj_lib::backend::{BackendError, BackendResult, FileId, MergedTreeId, TreeValue};
 use jj_lib::conflicts::{materialize_tree_value, MaterializedTreeValue};
-use jj_lib::diff::{find_line_ranges, Diff, DiffHunk};
+use jj_lib::diff::{Diff, DiffHunk};
 use jj_lib::files::{self, ContentHunk, MergeResult};
 use jj_lib::matchers::Matcher;
 use jj_lib::merge::Merge;
@ -225,10 +225,7 @@ fn make_diff_sections(
    left_contents: &str,
    right_contents: &str,
 ) -> Result<Vec<scm_record::Section<'static>>, BuiltinToolError> {
-    let diff = Diff::for_tokenizer(
+    let diff = Diff::by_line(&[left_contents.as_bytes(), right_contents.as_bytes()]);
        &[left_contents.as_bytes(), right_contents.as_bytes()],
        find_line_ranges,
    );
    let mut sections = Vec::new();
    for hunk in diff.hunks() {
        match hunk {
--- a/lib/src/conflicts.rs
+++ b/lib/src/conflicts.rs
@ -22,7 +22,7 @@ use itertools::Itertools;
 use regex::bytes::Regex;
 use crate::backend::{BackendError, BackendResult, CommitId, FileId, SymlinkId, TreeId, TreeValue};
-use crate::diff::{find_line_ranges, Diff, DiffHunk};
+use crate::diff::{Diff, DiffHunk};
 use crate::files;
 use crate::files::{ContentHunk, MergeResult};
 use crate::merge::{Merge, MergeBuilder, MergedTreeValue};
@ -259,17 +259,12 @@ pub fn materialize_merge_result(
                            output.write_all(&left.0)?;
                            continue;
                        };
-                        let diff1 = Diff::for_tokenizer(&[&left.0, &right1.0], find_line_ranges)
+                        let diff1 = Diff::by_line(&[&left.0, &right1.0]).hunks().collect_vec();
                            .hunks()
                            .collect_vec();
                        // Check if the diff against the next positive term is better. Since
                        // we want to preserve the order of the terms, we don't match against
                        // any later positive terms.
                        if let Some(right2) = hunk.get_add(add_index + 1) {
-                            let diff2 =
+                            let diff2 = Diff::by_line(&[&left.0, &right2.0]).hunks().collect_vec();
                                Diff::for_tokenizer(&[&left.0, &right2.0], find_line_ranges)
                                    .hunks()
                                    .collect_vec();
                            if diff_size(&diff2) < diff_size(&diff1) {
                                // If the next positive term is a better match, emit
                                // the current positive term as a snapshot and the next
--- a/lib/src/diff.rs
+++ b/lib/src/diff.rs
@ -462,6 +462,11 @@ impl<'input> Diff<'input> {
        Diff::for_tokenizer(inputs, |_| vec![])
    }
    /// Compares `inputs` line by line.
    pub fn by_line(inputs: &[&'input [u8]]) -> Self {
        Diff::for_tokenizer(inputs, find_line_ranges)
    }
    // TODO: At least when merging, it's wasteful to refine the diff if e.g. if 2
    // out of 3 inputs match in the differing regions. Perhaps the refine()
    // method should be on the hunk instead (probably returning a new Diff)?
--- a/lib/src/files.rs
+++ b/lib/src/files.rs
@ -165,7 +165,7 @@ pub fn merge(slices: &Merge<&[u8]>) -> MergeResult {
    let num_diffs = slices.removes().len();
    let diff_inputs = slices.removes().chain(slices.adds()).copied().collect_vec();
-    let diff = Diff::for_tokenizer(&diff_inputs, diff::find_line_ranges);
+    let diff = Diff::by_line(&diff_inputs);
    let mut resolved_hunk = ContentHunk(vec![]);
    let mut merge_hunks: Vec<Merge<ContentHunk>> = vec![];
    for diff_hunk in diff.hunks() {