merged_tree: add Stream-based version of diff(), delegating for now

I'm going to implement a `Stream`-based version optimized for
high-latency (RPC-based) commit backends. So far, that implementation
is about 20% slower in the Linux repo when running `jj diff
--ignore-working-copy -s --from v5.0 --to v6.0`. I think that's almost
only because the algorithm is different, not because it's async per
se.

This commit adds a `Stream`-based version of `MergedTree::diff()` that
just wraps the regular iterator in stream. I updated `jj diff` to use
it. I couldn't measure any difference on the command above in the
Linux repo. I think that means we can safely use the same
`Stream`-based interface regardless of backend, even if we end up
needing two different implementations of the `Stream`. We would then
be using the wrapped iterator from this commit for local backends, and
the new implementation for remote backends. But ideally we can make
the remote-friendly implementation fast enough that we don't need two
implementations.
This commit is contained in:
Martin von Zweigbergk 2023-11-01 10:42:52 -07:00 committed by Martin von Zweigbergk
parent 24b706641f
commit 72245cfac5
3 changed files with 211 additions and 168 deletions

View file

@ -61,7 +61,7 @@ pub(crate) fn cmd_status(
diff_util::show_diff_summary(
formatter,
&workspace_command,
parent_tree.diff(&tree, &EverythingMatcher),
parent_tree.diff_stream(&tree, &EverythingMatcher),
)?;
}

View file

@ -18,6 +18,7 @@ use std::io;
use std::ops::Range;
use std::sync::Arc;
use futures::StreamExt;
use itertools::Itertools;
use jj_lib::backend::{ObjectId, TreeValue};
use jj_lib::commit::Commit;
@ -25,7 +26,7 @@ use jj_lib::diff::{Diff, DiffHunk};
use jj_lib::files::DiffLine;
use jj_lib::matchers::Matcher;
use jj_lib::merge::{Merge, MergedTreeValue};
use jj_lib::merged_tree::{MergedTree, TreeDiffIterator};
use jj_lib::merged_tree::{MergedTree, TreeDiffStream};
use jj_lib::repo::{ReadonlyRepo, Repo};
use jj_lib::repo_path::RepoPath;
use jj_lib::settings::{ConfigResultExt as _, UserSettings};
@ -180,23 +181,23 @@ pub fn show_diff(
for format in formats {
match format {
DiffFormat::Summary => {
let tree_diff = from_tree.diff(to_tree, matcher);
let tree_diff = from_tree.diff_stream(to_tree, matcher);
show_diff_summary(formatter, workspace_command, tree_diff)?;
}
DiffFormat::Stat => {
let tree_diff = from_tree.diff(to_tree, matcher);
let tree_diff = from_tree.diff_stream(to_tree, matcher);
show_diff_stat(ui, formatter, workspace_command, tree_diff)?;
}
DiffFormat::Types => {
let tree_diff = from_tree.diff(to_tree, matcher);
let tree_diff = from_tree.diff_stream(to_tree, matcher);
show_types(formatter, workspace_command, tree_diff)?;
}
DiffFormat::Git => {
let tree_diff = from_tree.diff(to_tree, matcher);
let tree_diff = from_tree.diff_stream(to_tree, matcher);
show_git_diff(formatter, workspace_command, tree_diff)?;
}
DiffFormat::ColorWords => {
let tree_diff = from_tree.diff(to_tree, matcher);
let tree_diff = from_tree.diff_stream(to_tree, matcher);
show_color_words_diff(formatter, workspace_command, tree_diff)?;
}
DiffFormat::Tool(tool) => {
@ -400,83 +401,87 @@ fn basic_diff_file_type(values: &MergedTreeValue) -> String {
pub fn show_color_words_diff(
formatter: &mut dyn Formatter,
workspace_command: &WorkspaceCommandHelper,
tree_diff: TreeDiffIterator,
mut tree_diff: TreeDiffStream,
) -> Result<(), CommandError> {
let repo = workspace_command.repo();
formatter.push_label("diff")?;
for (path, diff) in tree_diff {
let ui_path = workspace_command.format_file_path(&path);
let (left_value, right_value) = diff?;
if left_value.is_absent() {
let right_content = diff_content(repo, &path, &right_value)?;
let description = basic_diff_file_type(&right_value);
writeln!(
formatter.labeled("header"),
"Added {description} {ui_path}:"
)?;
if right_content.is_empty() {
writeln!(formatter.labeled("empty"), " (empty)")?;
} else {
show_color_words_diff_hunks(&[], &right_content, formatter)?;
}
} else if right_value.is_present() {
let left_content = diff_content(repo, &path, &left_value)?;
let right_content = diff_content(repo, &path, &right_value)?;
let description = match (left_value.into_resolved(), right_value.into_resolved()) {
(
Ok(Some(TreeValue::File {
executable: left_executable,
..
})),
Ok(Some(TreeValue::File {
executable: right_executable,
..
})),
) => {
if left_executable && right_executable {
"Modified executable file".to_string()
} else if left_executable {
"Executable file became non-executable at".to_string()
} else if right_executable {
"Non-executable file became executable at".to_string()
} else {
"Modified regular file".to_string()
async {
while let Some((path, diff)) = tree_diff.next().await {
let ui_path = workspace_command.format_file_path(&path);
let (left_value, right_value) = diff?;
if left_value.is_absent() {
let right_content = diff_content(repo, &path, &right_value)?;
let description = basic_diff_file_type(&right_value);
writeln!(
formatter.labeled("header"),
"Added {description} {ui_path}:"
)?;
if right_content.is_empty() {
writeln!(formatter.labeled("empty"), " (empty)")?;
} else {
show_color_words_diff_hunks(&[], &right_content, formatter)?;
}
} else if right_value.is_present() {
let left_content = diff_content(repo, &path, &left_value)?;
let right_content = diff_content(repo, &path, &right_value)?;
let description = match (left_value.into_resolved(), right_value.into_resolved()) {
(
Ok(Some(TreeValue::File {
executable: left_executable,
..
})),
Ok(Some(TreeValue::File {
executable: right_executable,
..
})),
) => {
if left_executable && right_executable {
"Modified executable file".to_string()
} else if left_executable {
"Executable file became non-executable at".to_string()
} else if right_executable {
"Non-executable file became executable at".to_string()
} else {
"Modified regular file".to_string()
}
}
}
(Err(_), Err(_)) => "Modified conflict in".to_string(),
(Err(_), _) => "Resolved conflict in".to_string(),
(_, Err(_)) => "Created conflict in".to_string(),
(Ok(Some(TreeValue::Symlink(_))), Ok(Some(TreeValue::Symlink(_)))) => {
"Symlink target changed at".to_string()
}
(Ok(left_value), Ok(right_value)) => {
let left_type = basic_diff_file_type(&Merge::resolved(left_value));
let right_type = basic_diff_file_type(&Merge::resolved(right_value));
let (first, rest) = left_type.split_at(1);
format!(
"{}{} became {} at",
first.to_ascii_uppercase(),
rest,
right_type
)
}
};
writeln!(formatter.labeled("header"), "{description} {ui_path}:")?;
show_color_words_diff_hunks(&left_content, &right_content, formatter)?;
} else {
let left_content = diff_content(repo, &path, &left_value)?;
let description = basic_diff_file_type(&left_value);
writeln!(
formatter.labeled("header"),
"Removed {description} {ui_path}:"
)?;
if left_content.is_empty() {
writeln!(formatter.labeled("empty"), " (empty)")?;
(Err(_), Err(_)) => "Modified conflict in".to_string(),
(Err(_), _) => "Resolved conflict in".to_string(),
(_, Err(_)) => "Created conflict in".to_string(),
(Ok(Some(TreeValue::Symlink(_))), Ok(Some(TreeValue::Symlink(_)))) => {
"Symlink target changed at".to_string()
}
(Ok(left_value), Ok(right_value)) => {
let left_type = basic_diff_file_type(&Merge::resolved(left_value));
let right_type = basic_diff_file_type(&Merge::resolved(right_value));
let (first, rest) = left_type.split_at(1);
format!(
"{}{} became {} at",
first.to_ascii_uppercase(),
rest,
right_type
)
}
};
writeln!(formatter.labeled("header"), "{description} {ui_path}:")?;
show_color_words_diff_hunks(&left_content, &right_content, formatter)?;
} else {
show_color_words_diff_hunks(&left_content, &[], formatter)?;
let left_content = diff_content(repo, &path, &left_value)?;
let description = basic_diff_file_type(&left_value);
writeln!(
formatter.labeled("header"),
"Removed {description} {ui_path}:"
)?;
if left_content.is_empty() {
writeln!(formatter.labeled("empty"), " (empty)")?;
} else {
show_color_words_diff_hunks(&left_content, &[], formatter)?;
}
}
}
Ok::<(), CommandError>(())
}
.block_on()?;
formatter.pop_label()?;
Ok(())
}
@ -675,60 +680,64 @@ fn show_unified_diff_hunks(
pub fn show_git_diff(
formatter: &mut dyn Formatter,
workspace_command: &WorkspaceCommandHelper,
tree_diff: TreeDiffIterator,
mut tree_diff: TreeDiffStream,
) -> Result<(), CommandError> {
let repo = workspace_command.repo();
formatter.push_label("diff")?;
for (path, diff) in tree_diff {
let path_string = path.to_internal_file_string();
let (left_value, right_value) = diff?;
if left_value.is_absent() {
let right_part = git_diff_part(repo, &path, &right_value)?;
formatter.with_label("file_header", |formatter| {
writeln!(formatter, "diff --git a/{path_string} b/{path_string}")?;
writeln!(formatter, "new file mode {}", &right_part.mode)?;
writeln!(formatter, "index 0000000000..{}", &right_part.hash)?;
writeln!(formatter, "--- /dev/null")?;
writeln!(formatter, "+++ b/{path_string}")
})?;
show_unified_diff_hunks(formatter, &[], &right_part.content)?;
} else if right_value.is_present() {
let left_part = git_diff_part(repo, &path, &left_value)?;
let right_part = git_diff_part(repo, &path, &right_value)?;
formatter.with_label("file_header", |formatter| {
writeln!(formatter, "diff --git a/{path_string} b/{path_string}")?;
if left_part.mode != right_part.mode {
writeln!(formatter, "old mode {}", &left_part.mode)?;
writeln!(formatter, "new mode {}", &right_part.mode)?;
if left_part.hash != right_part.hash {
writeln!(formatter, "index {}...{}", &left_part.hash, right_part.hash)?;
async {
while let Some((path, diff)) = tree_diff.next().await {
let path_string = path.to_internal_file_string();
let (left_value, right_value) = diff?;
if left_value.is_absent() {
let right_part = git_diff_part(repo, &path, &right_value)?;
formatter.with_label("file_header", |formatter| {
writeln!(formatter, "diff --git a/{path_string} b/{path_string}")?;
writeln!(formatter, "new file mode {}", &right_part.mode)?;
writeln!(formatter, "index 0000000000..{}", &right_part.hash)?;
writeln!(formatter, "--- /dev/null")?;
writeln!(formatter, "+++ b/{path_string}")
})?;
show_unified_diff_hunks(formatter, &[], &right_part.content)?;
} else if right_value.is_present() {
let left_part = git_diff_part(repo, &path, &left_value)?;
let right_part = git_diff_part(repo, &path, &right_value)?;
formatter.with_label("file_header", |formatter| {
writeln!(formatter, "diff --git a/{path_string} b/{path_string}")?;
if left_part.mode != right_part.mode {
writeln!(formatter, "old mode {}", &left_part.mode)?;
writeln!(formatter, "new mode {}", &right_part.mode)?;
if left_part.hash != right_part.hash {
writeln!(formatter, "index {}...{}", &left_part.hash, right_part.hash)?;
}
} else if left_part.hash != right_part.hash {
writeln!(
formatter,
"index {}...{} {}",
&left_part.hash, right_part.hash, left_part.mode
)?;
}
} else if left_part.hash != right_part.hash {
writeln!(
formatter,
"index {}...{} {}",
&left_part.hash, right_part.hash, left_part.mode
)?;
}
if left_part.content != right_part.content {
if left_part.content != right_part.content {
writeln!(formatter, "--- a/{path_string}")?;
writeln!(formatter, "+++ b/{path_string}")?;
}
Ok(())
})?;
show_unified_diff_hunks(formatter, &left_part.content, &right_part.content)?;
} else {
let left_part = git_diff_part(repo, &path, &left_value)?;
formatter.with_label("file_header", |formatter| {
writeln!(formatter, "diff --git a/{path_string} b/{path_string}")?;
writeln!(formatter, "deleted file mode {}", &left_part.mode)?;
writeln!(formatter, "index {}..0000000000", &left_part.hash)?;
writeln!(formatter, "--- a/{path_string}")?;
writeln!(formatter, "+++ b/{path_string}")?;
}
Ok(())
})?;
show_unified_diff_hunks(formatter, &left_part.content, &right_part.content)?;
} else {
let left_part = git_diff_part(repo, &path, &left_value)?;
formatter.with_label("file_header", |formatter| {
writeln!(formatter, "diff --git a/{path_string} b/{path_string}")?;
writeln!(formatter, "deleted file mode {}", &left_part.mode)?;
writeln!(formatter, "index {}..0000000000", &left_part.hash)?;
writeln!(formatter, "--- a/{path_string}")?;
writeln!(formatter, "+++ /dev/null")
})?;
show_unified_diff_hunks(formatter, &left_part.content, &[])?;
writeln!(formatter, "+++ /dev/null")
})?;
show_unified_diff_hunks(formatter, &left_part.content, &[])?;
}
}
Ok::<(), CommandError>(())
}
.block_on()?;
formatter.pop_label()?;
Ok(())
}
@ -737,32 +746,35 @@ pub fn show_git_diff(
pub fn show_diff_summary(
formatter: &mut dyn Formatter,
workspace_command: &WorkspaceCommandHelper,
tree_diff: TreeDiffIterator,
mut tree_diff: TreeDiffStream,
) -> io::Result<()> {
formatter.with_label("diff", |formatter| {
for (repo_path, diff) in tree_diff {
let (before, after) = diff.unwrap();
if before.is_present() && after.is_present() {
writeln!(
formatter.labeled("modified"),
"M {}",
workspace_command.format_file_path(&repo_path)
)?;
} else if before.is_absent() {
writeln!(
formatter.labeled("added"),
"A {}",
workspace_command.format_file_path(&repo_path)
)?;
} else {
writeln!(
formatter.labeled("removed"),
"R {}",
workspace_command.format_file_path(&repo_path)
)?;
formatter.with_label("diff", |formatter| -> io::Result<()> {
async {
while let Some((repo_path, diff)) = tree_diff.next().await {
let (before, after) = diff.unwrap();
if before.is_present() && after.is_present() {
writeln!(
formatter.labeled("modified"),
"M {}",
workspace_command.format_file_path(&repo_path)
)?;
} else if before.is_absent() {
writeln!(
formatter.labeled("added"),
"A {}",
workspace_command.format_file_path(&repo_path)
)?;
} else {
writeln!(
formatter.labeled("removed"),
"R {}",
workspace_command.format_file_path(&repo_path)
)?;
}
}
Ok(())
}
Ok(())
.block_on()
})
}
@ -796,21 +808,26 @@ pub fn show_diff_stat(
ui: &Ui,
formatter: &mut dyn Formatter,
workspace_command: &WorkspaceCommandHelper,
tree_diff: TreeDiffIterator,
mut tree_diff: TreeDiffStream,
) -> Result<(), CommandError> {
let mut stats: Vec<DiffStat> = vec![];
let mut max_path_width = 0;
let mut max_diffs = 0;
for (repo_path, diff) in tree_diff {
let (left, right) = diff?;
let path = workspace_command.format_file_path(&repo_path);
let left_content = diff_content(workspace_command.repo(), &repo_path, &left)?;
let right_content = diff_content(workspace_command.repo(), &repo_path, &right)?;
max_path_width = max(max_path_width, path.width());
let stat = get_diff_stat(path, &left_content, &right_content);
max_diffs = max(max_diffs, stat.added + stat.removed);
stats.push(stat);
async {
while let Some((repo_path, diff)) = tree_diff.next().await {
let (left, right) = diff?;
let path = workspace_command.format_file_path(&repo_path);
let left_content = diff_content(workspace_command.repo(), &repo_path, &left)?;
let right_content = diff_content(workspace_command.repo(), &repo_path, &right)?;
max_path_width = max(max_path_width, path.width());
let stat = get_diff_stat(path, &left_content, &right_content);
max_diffs = max(max_diffs, stat.added + stat.removed);
stats.push(stat);
}
Ok::<(), CommandError>(())
}
.block_on()?;
let number_padding = max_diffs.to_string().len();
// 4 characters padding for the graph
@ -866,20 +883,23 @@ pub fn show_diff_stat(
pub fn show_types(
formatter: &mut dyn Formatter,
workspace_command: &WorkspaceCommandHelper,
tree_diff: TreeDiffIterator,
mut tree_diff: TreeDiffStream,
) -> io::Result<()> {
formatter.with_label("diff", |formatter| {
for (repo_path, diff) in tree_diff {
let (before, after) = diff.unwrap();
writeln!(
formatter.labeled("modified"),
"{}{} {}",
diff_summary_char(&before),
diff_summary_char(&after),
workspace_command.format_file_path(&repo_path)
)?;
async {
while let Some((repo_path, diff)) = tree_diff.next().await {
let (before, after) = diff.unwrap();
writeln!(
formatter.labeled("modified"),
"{}{} {}",
diff_summary_char(&before),
diff_summary_char(&after),
workspace_command.format_file_path(&repo_path)
)?;
}
Ok(())
}
Ok(())
.block_on()
})
}

View file

@ -17,11 +17,12 @@
use std::cmp::max;
use std::collections::BTreeMap;
use std::iter::zip;
use std::pin::Pin;
use std::sync::Arc;
use std::{iter, vec};
use futures::stream::StreamExt;
use futures::TryStreamExt;
use futures::{Stream, TryStreamExt};
use itertools::Itertools;
use pollster::FutureExt;
@ -336,6 +337,19 @@ impl MergedTree {
TreeDiffIterator::new(self.clone(), other.clone(), matcher)
}
/// Stream of the differences between this tree and another tree.
pub fn diff_stream<'matcher>(
&self,
other: &MergedTree,
matcher: &'matcher dyn Matcher,
) -> TreeDiffStream<'matcher> {
Box::pin(futures::stream::iter(TreeDiffIterator::new(
self.clone(),
other.clone(),
matcher,
)))
}
/// Collects lists of modified, added, and removed files between this tree
/// and another tree.
pub fn diff_summary(
@ -413,6 +427,15 @@ impl MergedTree {
}
}
/// Type alias for the result from `MergedTree::diff_stream()`. We use a
/// `Stream` instead of an `Iterator` so high-latency backends (e.g. cloud-based
/// ones) can fetch trees asynchronously.
pub type TreeDiffStream<'matcher> = Pin<
Box<
dyn Stream<Item = (RepoPath, BackendResult<(MergedTreeValue, MergedTreeValue)>)> + 'matcher,
>,
>;
fn all_tree_conflict_names(trees: &Merge<Tree>) -> impl Iterator<Item = &RepoPathComponent> {
itertools::chain(trees.removes(), trees.adds())
.map(|tree| tree.data().names())