annotate: use sorted Vec<(usize, usize)> to propagate lines to ancestors
Some checks are pending
binaries / Build binary artifacts (linux-aarch64-gnu, ubuntu-24.04, aarch64-unknown-linux-gnu) (push) Waiting to run
binaries / Build binary artifacts (linux-aarch64-musl, ubuntu-24.04, aarch64-unknown-linux-musl) (push) Waiting to run
binaries / Build binary artifacts (linux-x86_64-gnu, ubuntu-24.04, x86_64-unknown-linux-gnu) (push) Waiting to run
binaries / Build binary artifacts (linux-x86_64-musl, ubuntu-24.04, x86_64-unknown-linux-musl) (push) Waiting to run
binaries / Build binary artifacts (macos-aarch64, macos-14, aarch64-apple-darwin) (push) Waiting to run
binaries / Build binary artifacts (macos-x86_64, macos-13, x86_64-apple-darwin) (push) Waiting to run
binaries / Build binary artifacts (win-x86_64, windows-2022, x86_64-pc-windows-msvc) (push) Waiting to run
nix / flake check (macos-14) (push) Waiting to run
nix / flake check (ubuntu-latest) (push) Waiting to run
build / build (, macos-13) (push) Waiting to run
build / build (, macos-14) (push) Waiting to run
build / build (, ubuntu-latest) (push) Waiting to run
build / build (, windows-latest) (push) Waiting to run
build / build (--all-features, ubuntu-latest) (push) Waiting to run
build / Build jj-lib without Git support (push) Waiting to run
build / Check protos (push) Waiting to run
build / Check formatting (push) Waiting to run
build / Check that MkDocs can build the docs (push) Waiting to run
build / Check that MkDocs can build the docs with Poetry 1.8 (push) Waiting to run
build / cargo-deny (advisories) (push) Waiting to run
build / cargo-deny (bans licenses sources) (push) Waiting to run
build / Clippy check (push) Waiting to run
Codespell / Codespell (push) Waiting to run
website / prerelease-docs-build-deploy (ubuntu-latest) (push) Waiting to run
Scorecards supply-chain security / Scorecards analysis (push) Waiting to run

This isn't so complicated compared to the HashMap version, and we can handle
multiple (cur, orig1), (cur, orig2) pairs. It's also cheaper to access.
This commit is contained in:
Yuya Nishihara 2024-10-27 15:39:30 +09:00
parent 1fd628a0cf
commit bd1024547d
2 changed files with 87 additions and 22 deletions

View file

@ -22,6 +22,7 @@ use std::collections::hash_map;
use std::collections::HashMap;
use bstr::BString;
use itertools::Itertools as _;
use pollster::FutureExt;
use crate::backend::BackendError;
@ -61,8 +62,8 @@ type CommitSourceMap = HashMap<CommitId, Source>;
#[derive(Clone, Debug)]
struct Source {
/// Mapping of line numbers in the file at the current commit to the
/// original file.
line_map: HashMap<usize, usize>,
/// original file, sorted by the line numbers at the current commit.
line_map: Vec<(usize, usize)>,
/// File content at the current commit.
text: BString,
}
@ -72,7 +73,7 @@ impl Source {
let tree = commit.tree()?;
let text = get_file_contents(commit.store(), file_path, &tree)?;
Ok(Source {
line_map: HashMap::new(),
line_map: Vec::new(),
text: text.into(),
})
}
@ -194,20 +195,30 @@ fn process_commit(
// commit B. Then, we update local line_map to say that "Commit B line 6
// goes to line 7 of the original file". We repeat this for all lines in
// common in the two commits.
let mut current_lines = current_source.line_map.iter().copied().peekable();
let mut new_current_line_map = Vec::new();
let mut new_parent_line_map = Vec::new();
copy_same_lines_with(
&current_source.text,
&parent_source.text,
|current_line_number, parent_line_number| {
let Some(original_line_number) =
current_source.line_map.remove(&current_line_number)
else {
return;
};
parent_source
.line_map
.insert(parent_line_number, original_line_number);
|current_start, parent_start, count| {
new_current_line_map
.extend(current_lines.peeking_take_while(|&(cur, _)| cur < current_start));
while let Some((current, original)) =
current_lines.next_if(|&(cur, _)| cur < current_start + count)
{
let parent = parent_start + (current - current_start);
new_parent_line_map.push((parent, original));
}
},
);
new_current_line_map.extend(current_lines);
current_source.line_map = new_current_line_map;
parent_source.line_map = if parent_source.line_map.is_empty() {
new_parent_line_map
} else {
itertools::merge(parent_source.line_map.iter().copied(), new_parent_line_map).collect()
};
if parent_source.line_map.is_empty() {
commit_source_map.remove(parent_commit_id);
}
@ -216,19 +227,19 @@ fn process_commit(
// Once we've looked at all parents of a commit, any leftover lines must be
// original to the current commit, so we save this information in
// original_line_map.
for &original_line_number in current_source.line_map.values() {
for (_, original_line_number) in current_source.line_map {
original_line_map[original_line_number] = Some(current_commit_id.clone());
}
Ok(())
}
/// For two files, calls `copy(current, parent)` for each line in common
/// (e.g. line 8 maps to line 9)
/// For two files, calls `copy(current_start, parent_start, count)` for each
/// range of contiguous lines in common (e.g. line 8-10 maps to line 9-11.)
fn copy_same_lines_with(
current_contents: &[u8],
parent_contents: &[u8],
mut copy: impl FnMut(usize, usize),
mut copy: impl FnMut(usize, usize, usize),
) {
let diff = Diff::by_line([current_contents, parent_contents]);
let mut current_line_counter: usize = 0;
@ -236,11 +247,10 @@ fn copy_same_lines_with(
for hunk in diff.hunks() {
match hunk.kind {
DiffHunkKind::Matching => {
for _ in hunk.contents[0].split_inclusive(|b| *b == b'\n') {
copy(current_line_counter, parent_line_counter);
current_line_counter += 1;
parent_line_counter += 1;
}
let count = hunk.contents[0].split_inclusive(|b| *b == b'\n').count();
copy(current_line_counter, parent_line_counter, count);
current_line_counter += count;
parent_line_counter += count;
}
DiffHunkKind::Different => {
let current_output = hunk.contents[0];

View file

@ -183,7 +183,59 @@ fn test_annotate_merge_split() {
}
#[test]
#[should_panic] // FIXME: shouldn't panic because of duplicated "1"s
fn test_annotate_merge_split_interleaved() {
let settings = testutils::user_settings();
let test_repo = TestRepo::init();
let repo = &test_repo.repo;
let root_commit_id = repo.store().root_commit_id();
let file_path = RepoPath::from_internal_string("file");
// 6 "1a 4 1b 6 2a 5 2b"
// |\
// | 5 "1b 5 2b"
// | |
// 4 | "1a 4 2a"
// |/
// 3 "1a 1b 2a 2b"
// |\
// | 2 "2a 2b"
// |
// 1 "1a 1b"
let mut tx = repo.start_transaction(&settings);
let mut create_commit = create_commit_fn(tx.repo_mut(), &settings);
let content1 = "1a\n1b\n";
let content2 = "2a\n2b\n";
let content3 = "1a\n1b\n2a\n2b\n";
let content4 = "1a\n4\n2a\n";
let content5 = "1b\n5\n2b\n";
let content6 = "1a\n4\n1b\n6\n2a\n5\n2b\n";
let tree1 = create_tree(repo, &[(file_path, content1)]);
let tree2 = create_tree(repo, &[(file_path, content2)]);
let tree3 = create_tree(repo, &[(file_path, content3)]);
let tree4 = create_tree(repo, &[(file_path, content4)]);
let tree5 = create_tree(repo, &[(file_path, content5)]);
let tree6 = create_tree(repo, &[(file_path, content6)]);
let commit1 = create_commit("commit1", &[root_commit_id], tree1.id());
let commit2 = create_commit("commit2", &[root_commit_id], tree2.id());
let commit3 = create_commit("commit3", &[commit1.id(), commit2.id()], tree3.id());
let commit4 = create_commit("commit4", &[commit3.id()], tree4.id());
let commit5 = create_commit("commit5", &[commit3.id()], tree5.id());
let commit6 = create_commit("commit6", &[commit4.id(), commit5.id()], tree6.id());
drop(create_commit);
insta::assert_snapshot!(annotate(tx.repo(), &commit6, file_path), @r#"
commit1: 1a
commit4: 4
commit1: 1b
commit6: 6
commit2: 2a
commit5: 5
commit2: 2b
"#);
}
#[test]
fn test_annotate_merge_dup() {
let settings = testutils::user_settings();
let test_repo = TestRepo::init();
@ -215,6 +267,9 @@ fn test_annotate_merge_dup() {
let commit4 = create_commit("commit4", &[commit2.id(), commit3.id()], tree4.id());
drop(create_commit);
// Both "1"s can be propagated to commit1 through commit2 and commit3.
// Alternatively, it's also good to interpret that one of the "1"s was
// produced at commit2, commit3, or commit4.
insta::assert_snapshot!(annotate(tx.repo(), &commit4, file_path), @r#"
commit2: 2
commit1: 1