diff --git a/lib/src/annotate.rs b/lib/src/annotate.rs index fb382d427..e5142d98c 100644 --- a/lib/src/annotate.rs +++ b/lib/src/annotate.rs @@ -22,6 +22,7 @@ use std::collections::hash_map; use std::collections::HashMap; use bstr::BString; +use itertools::Itertools as _; use pollster::FutureExt; use crate::backend::BackendError; @@ -61,8 +62,8 @@ type CommitSourceMap = HashMap; #[derive(Clone, Debug)] struct Source { /// Mapping of line numbers in the file at the current commit to the - /// original file. - line_map: HashMap, + /// original file, sorted by the line numbers at the current commit. + line_map: Vec<(usize, usize)>, /// File content at the current commit. text: BString, } @@ -72,7 +73,7 @@ impl Source { let tree = commit.tree()?; let text = get_file_contents(commit.store(), file_path, &tree)?; Ok(Source { - line_map: HashMap::new(), + line_map: Vec::new(), text: text.into(), }) } @@ -194,20 +195,30 @@ fn process_commit( // commit B. Then, we update local line_map to say that "Commit B line 6 // goes to line 7 of the original file". We repeat this for all lines in // common in the two commits. + let mut current_lines = current_source.line_map.iter().copied().peekable(); + let mut new_current_line_map = Vec::new(); + let mut new_parent_line_map = Vec::new(); copy_same_lines_with( ¤t_source.text, &parent_source.text, - |current_line_number, parent_line_number| { - let Some(original_line_number) = - current_source.line_map.remove(¤t_line_number) - else { - return; - }; - parent_source - .line_map - .insert(parent_line_number, original_line_number); + |current_start, parent_start, count| { + new_current_line_map + .extend(current_lines.peeking_take_while(|&(cur, _)| cur < current_start)); + while let Some((current, original)) = + current_lines.next_if(|&(cur, _)| cur < current_start + count) + { + let parent = parent_start + (current - current_start); + new_parent_line_map.push((parent, original)); + } }, ); + new_current_line_map.extend(current_lines); + current_source.line_map = new_current_line_map; + parent_source.line_map = if parent_source.line_map.is_empty() { + new_parent_line_map + } else { + itertools::merge(parent_source.line_map.iter().copied(), new_parent_line_map).collect() + }; if parent_source.line_map.is_empty() { commit_source_map.remove(parent_commit_id); } @@ -216,19 +227,19 @@ fn process_commit( // Once we've looked at all parents of a commit, any leftover lines must be // original to the current commit, so we save this information in // original_line_map. - for &original_line_number in current_source.line_map.values() { + for (_, original_line_number) in current_source.line_map { original_line_map[original_line_number] = Some(current_commit_id.clone()); } Ok(()) } -/// For two files, calls `copy(current, parent)` for each line in common -/// (e.g. line 8 maps to line 9) +/// For two files, calls `copy(current_start, parent_start, count)` for each +/// range of contiguous lines in common (e.g. line 8-10 maps to line 9-11.) fn copy_same_lines_with( current_contents: &[u8], parent_contents: &[u8], - mut copy: impl FnMut(usize, usize), + mut copy: impl FnMut(usize, usize, usize), ) { let diff = Diff::by_line([current_contents, parent_contents]); let mut current_line_counter: usize = 0; @@ -236,11 +247,10 @@ fn copy_same_lines_with( for hunk in diff.hunks() { match hunk.kind { DiffHunkKind::Matching => { - for _ in hunk.contents[0].split_inclusive(|b| *b == b'\n') { - copy(current_line_counter, parent_line_counter); - current_line_counter += 1; - parent_line_counter += 1; - } + let count = hunk.contents[0].split_inclusive(|b| *b == b'\n').count(); + copy(current_line_counter, parent_line_counter, count); + current_line_counter += count; + parent_line_counter += count; } DiffHunkKind::Different => { let current_output = hunk.contents[0]; diff --git a/lib/tests/test_annotate.rs b/lib/tests/test_annotate.rs index 7e8f63728..25cd3dad3 100644 --- a/lib/tests/test_annotate.rs +++ b/lib/tests/test_annotate.rs @@ -183,7 +183,59 @@ fn test_annotate_merge_split() { } #[test] -#[should_panic] // FIXME: shouldn't panic because of duplicated "1"s +fn test_annotate_merge_split_interleaved() { + let settings = testutils::user_settings(); + let test_repo = TestRepo::init(); + let repo = &test_repo.repo; + + let root_commit_id = repo.store().root_commit_id(); + let file_path = RepoPath::from_internal_string("file"); + + // 6 "1a 4 1b 6 2a 5 2b" + // |\ + // | 5 "1b 5 2b" + // | | + // 4 | "1a 4 2a" + // |/ + // 3 "1a 1b 2a 2b" + // |\ + // | 2 "2a 2b" + // | + // 1 "1a 1b" + let mut tx = repo.start_transaction(&settings); + let mut create_commit = create_commit_fn(tx.repo_mut(), &settings); + let content1 = "1a\n1b\n"; + let content2 = "2a\n2b\n"; + let content3 = "1a\n1b\n2a\n2b\n"; + let content4 = "1a\n4\n2a\n"; + let content5 = "1b\n5\n2b\n"; + let content6 = "1a\n4\n1b\n6\n2a\n5\n2b\n"; + let tree1 = create_tree(repo, &[(file_path, content1)]); + let tree2 = create_tree(repo, &[(file_path, content2)]); + let tree3 = create_tree(repo, &[(file_path, content3)]); + let tree4 = create_tree(repo, &[(file_path, content4)]); + let tree5 = create_tree(repo, &[(file_path, content5)]); + let tree6 = create_tree(repo, &[(file_path, content6)]); + let commit1 = create_commit("commit1", &[root_commit_id], tree1.id()); + let commit2 = create_commit("commit2", &[root_commit_id], tree2.id()); + let commit3 = create_commit("commit3", &[commit1.id(), commit2.id()], tree3.id()); + let commit4 = create_commit("commit4", &[commit3.id()], tree4.id()); + let commit5 = create_commit("commit5", &[commit3.id()], tree5.id()); + let commit6 = create_commit("commit6", &[commit4.id(), commit5.id()], tree6.id()); + drop(create_commit); + + insta::assert_snapshot!(annotate(tx.repo(), &commit6, file_path), @r#" + commit1: 1a + commit4: 4 + commit1: 1b + commit6: 6 + commit2: 2a + commit5: 5 + commit2: 2b + "#); +} + +#[test] fn test_annotate_merge_dup() { let settings = testutils::user_settings(); let test_repo = TestRepo::init(); @@ -215,6 +267,9 @@ fn test_annotate_merge_dup() { let commit4 = create_commit("commit4", &[commit2.id(), commit3.id()], tree4.id()); drop(create_commit); + // Both "1"s can be propagated to commit1 through commit2 and commit3. + // Alternatively, it's also good to interpret that one of the "1"s was + // produced at commit2, commit3, or commit4. insta::assert_snapshot!(annotate(tx.repo(), &commit4, file_path), @r#" commit2: 2 commit1: 1