diff --git a/CHANGELOG.md b/CHANGELOG.md index 1dd604831..f8c8cefce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -73,6 +73,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * `jj workspace root` was aliased to `jj root`, for ease of discoverability +* `jj diff` no longer shows the contents of binary files. + ### Fixed bugs * Fixed snapshots of symlinks in `gitignore`-d directory. diff --git a/cli/src/config/colors.toml b/cli/src/config/colors.toml index 2be8a32bd..6f9f2d6b0 100644 --- a/cli/src/config/colors.toml +++ b/cli/src/config/colors.toml @@ -60,6 +60,7 @@ "working_copy empty description placeholder" = "bright green" "diff header" = "yellow" "diff empty" = "cyan" +"diff binary" = "cyan" "diff file_header" = { bold = true } "diff hunk_header" = "cyan" "diff removed" = "red" diff --git a/cli/src/diff_util.rs b/cli/src/diff_util.rs index e7cca3650..52ded4013 100644 --- a/cli/src/diff_util.rs +++ b/cli/src/diff_util.rs @@ -346,19 +346,66 @@ fn show_color_words_diff_line( Ok(()) } -fn diff_content(path: &RepoPath, value: MaterializedTreeValue) -> Result, CommandError> { +struct FileContent { + /// false if this file is likely text; true if it is likely binary. + is_binary: bool, + contents: Vec, +} + +impl FileContent { + fn empty() -> Self { + Self { + is_binary: false, + contents: vec![], + } + } + + pub(crate) fn is_empty(&self) -> bool { + self.contents.is_empty() + } +} + +fn file_content_for_diff(reader: &mut dyn io::Read) -> io::Result { + // If this is a binary file, don't show the full contents. + // Determine whether it's binary by whether the first 8k bytes contain a null + // character; this is the same heuristic used by git as of writing: https://github.com/git/git/blob/eea0e59ffbed6e33d171ace5be13cde9faa41639/xdiff-interface.c#L192-L198 + const PEEK_SIZE: usize = 8000; + // TODO: currently we look at the whole file, even though for binary files we + // only need to know the file size. To change that we'd have to extend all + // the data backends to support getting the length. + let mut contents = vec![]; + reader.read_to_end(&mut contents)?; + + let start = &contents[..PEEK_SIZE.min(contents.len())]; + Ok(FileContent { + is_binary: start.contains(&b'\0'), + contents, + }) +} + +fn diff_content( + path: &RepoPath, + value: MaterializedTreeValue, +) -> Result { match value { - MaterializedTreeValue::Absent => Ok(vec![]), + MaterializedTreeValue::Absent => Ok(FileContent::empty()), MaterializedTreeValue::File { mut reader, .. } => { - let mut contents = vec![]; - reader.read_to_end(&mut contents)?; - Ok(contents) + file_content_for_diff(&mut reader).map_err(Into::into) } - MaterializedTreeValue::Symlink { id: _, target } => Ok(target.into_bytes()), - MaterializedTreeValue::GitSubmodule(id) => { - Ok(format!("Git submodule checked out at {}", id.hex()).into_bytes()) - } - MaterializedTreeValue::Conflict { id: _, contents } => Ok(contents), + MaterializedTreeValue::Symlink { id: _, target } => Ok(FileContent { + // Unix file paths can't contain null bytes. + is_binary: false, + contents: target.into_bytes(), + }), + MaterializedTreeValue::GitSubmodule(id) => Ok(FileContent { + is_binary: false, + contents: format!("Git submodule checked out at {}", id.hex()).into_bytes(), + }), + // TODO: are we sure this is never binary? + MaterializedTreeValue::Conflict { id: _, contents } => Ok(FileContent { + is_binary: false, + contents, + }), MaterializedTreeValue::Tree(id) => { panic!("Unexpected tree with id {id:?} in diff at path {path:?}"); } @@ -404,8 +451,10 @@ pub fn show_color_words_diff( let right_content = diff_content(&path, right_value)?; if right_content.is_empty() { writeln!(formatter.labeled("empty"), " (empty)")?; + } else if right_content.is_binary { + writeln!(formatter.labeled("binary"), " (binary)")?; } else { - show_color_words_diff_hunks(&[], &right_content, formatter)?; + show_color_words_diff_hunks(&[], &right_content.contents, formatter)?; } } else if right_value.is_present() { let description = match (&left_value, &right_value) { @@ -458,7 +507,15 @@ pub fn show_color_words_diff( let left_content = diff_content(&path, left_value)?; let right_content = diff_content(&path, right_value)?; writeln!(formatter.labeled("header"), "{description} {ui_path}:")?; - show_color_words_diff_hunks(&left_content, &right_content, formatter)?; + if left_content.is_binary || right_content.is_binary { + writeln!(formatter.labeled("binary"), " (binary)")?; + } else { + show_color_words_diff_hunks( + &left_content.contents, + &right_content.contents, + formatter, + )?; + } } else { let description = basic_diff_file_type(&left_value); writeln!( @@ -468,8 +525,10 @@ pub fn show_color_words_diff( let left_content = diff_content(&path, left_value)?; if left_content.is_empty() { writeln!(formatter.labeled("empty"), " (empty)")?; + } else if left_content.is_binary { + writeln!(formatter.labeled("binary"), " (binary)")?; } else { - show_color_words_diff_hunks(&left_content, &[], formatter)?; + show_color_words_diff_hunks(&left_content.contents, &[], formatter)?; } } } @@ -508,6 +567,7 @@ fn git_diff_part( "100644".to_string() }; hash = id.hex(); + // TODO: use `file_content_for_diff` instead of showing binary contents = vec![]; reader.read_to_end(&mut contents)?; } @@ -810,8 +870,15 @@ struct DiffStat { removed: usize, } -fn get_diff_stat(path: String, left_content: &[u8], right_content: &[u8]) -> DiffStat { - let hunks = unified_diff_hunks(left_content, right_content, 0); +fn get_diff_stat( + path: String, + left_content: &FileContent, + right_content: &FileContent, +) -> DiffStat { + // TODO: this matches git's behavior, which is to count the number of newlines + // in the file. but that behavior seems unhelpful; no one really cares how + // many `0xa0` characters are in an image. + let hunks = unified_diff_hunks(&left_content.contents, &right_content.contents, 0); let mut added = 0; let mut removed = 0; for hunk in hunks { diff --git a/cli/tests/common/mod.rs b/cli/tests/common/mod.rs index c9bad7ea6..4b768782e 100644 --- a/cli/tests/common/mod.rs +++ b/cli/tests/common/mod.rs @@ -181,6 +181,7 @@ impl TestEnvironment { } /// Run a `jj` command, check that it was successful, and return its stdout + #[track_caller] pub fn jj_cmd_success(&self, current_dir: &Path, args: &[&str]) -> String { if self.debug_allow_stderr { let (stdout, stderr) = self.jj_cmd_ok(current_dir, args); @@ -340,10 +341,12 @@ impl TestEnvironment { } } +#[track_caller] pub fn get_stdout_string(assert: &assert_cmd::assert::Assert) -> String { String::from_utf8(assert.get_output().stdout.clone()).unwrap() } +#[track_caller] pub fn get_stderr_string(assert: &assert_cmd::assert::Assert) -> String { String::from_utf8(assert.get_output().stderr.clone()).unwrap() } diff --git a/cli/tests/test_diff_command.rs b/cli/tests/test_diff_command.rs index 8b567b97c..ad3beb89a 100644 --- a/cli/tests/test_diff_command.rs +++ b/cli/tests/test_diff_command.rs @@ -941,3 +941,40 @@ fn test_diff_stat_long_name_or_stat() { 2 files changed, 20 insertions(+), 0 deletions(-) "###); } + +#[test] +fn test_diff_binary() { + let test_env = TestEnvironment::default(); + test_env.jj_cmd_ok(test_env.env_root(), &["init", "repo", "--git"]); + let repo_path = test_env.env_root().join("repo"); + + std::fs::write(repo_path.join("file1.png"), b"\x89PNG\r\n\x1a\nabcdefg\0").unwrap(); + std::fs::write(repo_path.join("file2.png"), b"\x89PNG\r\n\x1a\n0123456\0").unwrap(); + test_env.jj_cmd_ok(&repo_path, &["new"]); + std::fs::remove_file(repo_path.join("file1.png")).unwrap(); + std::fs::write(repo_path.join("file2.png"), "foo\nbar\n").unwrap(); + std::fs::write(repo_path.join("file3.png"), b"\x89PNG\r\n\x1a\nxyz\0").unwrap(); + // try a file that's valid UTF-8 but contains control characters + std::fs::write(repo_path.join("file4.png"), b"\0\0\0").unwrap(); + + let stdout = test_env.jj_cmd_success(&repo_path, &["diff"]); + insta::assert_snapshot!(stdout, @r###" + Removed regular file file1.png: + (binary) + Modified regular file file2.png: + (binary) + Added regular file file3.png: + (binary) + Added regular file file4.png: + (binary) + "###); + + let stdout = test_env.jj_cmd_success(&repo_path, &["diff", "--stat"]); + insta::assert_snapshot!(stdout, @r###" + file1.png | 3 --- + file2.png | 5 ++--- + file3.png | 3 +++ + file4.png | 1 + + 4 files changed, 6 insertions(+), 6 deletions(-) + "###); +}