merge_tools: create builtin diff editor

This commit is contained in:
Waleed Khan 2023-08-29 23:13:35 +02:00
parent ccd67e8156
commit 56c61fd047
6 changed files with 659 additions and 2 deletions

61
Cargo.lock generated
View file

@ -269,6 +269,12 @@ dependencies = [
"thiserror",
]
[[package]]
name = "cassowary"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df8670b8c7b9dae1793364eafadf7239c40d669904660c5960d74cfd80b46a53"
[[package]]
name = "cast"
version = "0.3.0"
@ -517,6 +523,22 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "crossterm"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e64e6c0fbe2c17357405f7c758c1ef960fce08bdfb2c03d88d2a18d7e09c4b67"
dependencies = [
"bitflags 1.3.2",
"crossterm_winapi",
"libc",
"mio",
"parking_lot",
"signal-hook",
"signal-hook-mio",
"winapi",
]
[[package]]
name = "crossterm"
version = "0.26.1"
@ -1004,7 +1026,7 @@ dependencies = [
"clap_mangen",
"config",
"criterion",
"crossterm",
"crossterm 0.26.1",
"dirs",
"esl01-renderdag",
"git2",
@ -1021,6 +1043,7 @@ dependencies = [
"pest_derive",
"regex",
"rpassword",
"scm-record",
"serde",
"slab",
"tempfile",
@ -1834,6 +1857,23 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "scm-record"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5de5add99102ba58391e9c1e0c2ee5410654d8ffe7605a393db863a403cb5a2d"
dependencies = [
"cassowary",
"crossterm 0.26.1",
"num-traits",
"serde",
"serde_json",
"thiserror",
"tracing",
"tui",
"unicode-width",
]
[[package]]
name = "scopeguard"
version = "1.2.0"
@ -2321,6 +2361,19 @@ dependencies = [
"tracing-log",
]
[[package]]
name = "tui"
version = "0.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ccdd26cbd674007e649a272da4475fb666d3aa0ad0531da7136db6fab0e5bad1"
dependencies = [
"bitflags 1.3.2",
"cassowary",
"crossterm 0.25.0",
"unicode-segmentation",
"unicode-width",
]
[[package]]
name = "typenum"
version = "1.16.0"
@ -2360,6 +2413,12 @@ dependencies = [
"tinyvec",
]
[[package]]
name = "unicode-segmentation"
version = "1.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36"
[[package]]
name = "unicode-width"
version = "0.1.10"

View file

@ -65,6 +65,7 @@ smallvec = { version = "1.11.0", features = [
"const_new",
"union",
] }
scm-record = "0.1.0"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0.105"
slab = "0.4.9"

View file

@ -49,6 +49,7 @@ pest = { workspace = true }
pest_derive = { workspace = true }
regex = { workspace = true }
rpassword = { workspace = true }
scm-record = { workspace = true }
serde = { workspace = true }
slab = { workspace = true }
tempfile = { workspace = true }

View file

@ -0,0 +1,575 @@
use std::borrow::Cow;
use std::path::Path;
use std::sync::Arc;
use itertools::Itertools;
use jj_lib::backend::{BackendError, FileId, MergedTreeId, ObjectId, TreeValue};
use jj_lib::diff::{find_line_ranges, Diff, DiffHunk};
use jj_lib::matchers::EverythingMatcher;
use jj_lib::merge::Merge;
use jj_lib::merged_tree::{MergedTree, MergedTreeBuilder};
use jj_lib::repo_path::RepoPath;
use jj_lib::store::Store;
use thiserror::Error;
#[derive(Debug, Error)]
pub enum BuiltinToolError {
#[error("Failed to record changes: {0}")]
Record(#[from] scm_record::RecordError),
#[error(transparent)]
ReadFileBackend(BackendError),
#[error("Failed to read file {path:?} with ID {id:?}: {source}")]
ReadFileIo {
path: RepoPath,
id: FileId,
source: std::io::Error,
},
#[error(transparent)]
ReadSymlink(BackendError),
#[error("Failed to decode UTF-8 text for item {item} (this should not happen): {source}")]
DecodeUtf8 {
source: std::str::Utf8Error,
item: &'static str,
},
#[error("Rendering {item} {id} is unimplemented for the builtin difftool/mergetool")]
Unimplemented { item: &'static str, id: String },
#[error("Backend error: {0:?}")]
BackendError(#[from] jj_lib::backend::BackendError),
}
#[derive(Clone, Debug)]
enum FileContents {
Absent,
Text {
contents: String,
hash: Option<String>,
num_bytes: u64,
},
Binary {
hash: Option<String>,
num_bytes: u64,
},
}
/// Information about a file that was read from disk. Note that the file may not
/// have existed, in which case its contents will be marked as absent.
#[derive(Clone, Debug)]
pub struct FileInfo {
file_mode: scm_record::FileMode,
contents: FileContents,
}
/// File modes according to the Git file mode conventions. used for display
/// purposes and equality comparison.
///
/// TODO: let `scm-record` accept strings instead of numbers for file modes? Or
/// figure out some other way to represent file mode changes in a jj-compatible
/// manner?
mod mode {
pub const NORMAL: usize = 0o100644;
pub const EXECUTABLE: usize = 0o100755;
pub const SYMLINK: usize = 0o120000;
}
fn describe_binary(hash: Option<&str>, num_bytes: u64) -> String {
match hash {
Some(hash) => {
format!("{hash} ({num_bytes}B)")
}
None => format!("({num_bytes}B)"),
}
}
fn buf_to_file_contents(hash: Option<String>, buf: Vec<u8>) -> FileContents {
let num_bytes: u64 = buf.len().try_into().unwrap();
let text = if buf.contains(&0) {
None
} else {
String::from_utf8(buf).ok()
};
match text {
Some(text) => FileContents::Text {
contents: text,
hash,
num_bytes,
},
None => FileContents::Binary { hash, num_bytes },
}
}
fn read_file_contents(
store: &Store,
tree: &MergedTree,
path: &RepoPath,
) -> Result<FileInfo, BuiltinToolError> {
match tree.path_value(path).into_resolved() {
Ok(None) => Ok(FileInfo {
file_mode: scm_record::FileMode::absent(),
contents: FileContents::Absent,
}),
Ok(Some(TreeValue::File { id, executable })) => {
let mut reader = store
.read_file(path, &id)
.map_err(BuiltinToolError::ReadFileBackend)?;
let mut buf = Vec::new();
reader
.read_to_end(&mut buf)
.map_err(|err| BuiltinToolError::ReadFileIo {
path: path.clone(),
id: id.clone(),
source: err,
})?;
let file_mode = if executable {
scm_record::FileMode(mode::EXECUTABLE)
} else {
scm_record::FileMode(mode::NORMAL)
};
let contents = buf_to_file_contents(Some(id.hex()), buf);
Ok(FileInfo {
file_mode,
contents,
})
}
Ok(Some(TreeValue::Symlink(id))) => {
let value = store
.read_symlink(path, &id)
.map_err(BuiltinToolError::ReadSymlink)?;
let file_mode = scm_record::FileMode(mode::SYMLINK);
let num_bytes = value.len().try_into().unwrap();
Ok(FileInfo {
file_mode,
contents: FileContents::Text {
contents: value,
hash: Some(id.hex()),
num_bytes,
},
})
}
Ok(Some(TreeValue::Tree(tree_id))) => {
unreachable!("list of changed files included a tree: {tree_id:?}");
}
Ok(Some(TreeValue::GitSubmodule(id))) => Err(BuiltinToolError::Unimplemented {
item: "git submodule",
id: id.hex(),
}),
Ok(Some(TreeValue::Conflict(id))) => {
unreachable!("list of changed files included a conflict: {id:?}");
}
Err(merge) => {
unreachable!("list of changed files included a merge: {merge:?}");
}
}
}
fn make_section_changed_lines(
contents: &str,
change_type: scm_record::ChangeType,
) -> Vec<scm_record::SectionChangedLine<'static>> {
contents
.split_inclusive('\n')
.map(|line| scm_record::SectionChangedLine {
is_checked: false,
change_type,
line: Cow::Owned(line.to_owned()),
})
.collect()
}
fn make_diff_sections(
left_contents: &str,
right_contents: &str,
) -> Result<Vec<scm_record::Section<'static>>, BuiltinToolError> {
let diff = Diff::for_tokenizer(
&[left_contents.as_bytes(), right_contents.as_bytes()],
&find_line_ranges,
);
let mut sections = Vec::new();
for hunk in diff.hunks() {
match hunk {
DiffHunk::Matching(text) => {
let text =
std::str::from_utf8(text).map_err(|err| BuiltinToolError::DecodeUtf8 {
source: err,
item: "matching text in diff hunk",
})?;
sections.push(scm_record::Section::Unchanged {
lines: text
.split_inclusive('\n')
.map(|line| Cow::Owned(line.to_owned()))
.collect(),
})
}
DiffHunk::Different(sides) => {
assert_eq!(sides.len(), 2, "only two inputs were provided to the diff");
let left_side =
std::str::from_utf8(sides[0]).map_err(|err| BuiltinToolError::DecodeUtf8 {
source: err,
item: "left side of diff hunk",
})?;
let right_side =
std::str::from_utf8(sides[1]).map_err(|err| BuiltinToolError::DecodeUtf8 {
source: err,
item: "right side of diff hunk",
})?;
sections.push(scm_record::Section::Changed {
lines: [
make_section_changed_lines(left_side, scm_record::ChangeType::Removed),
make_section_changed_lines(right_side, scm_record::ChangeType::Added),
]
.concat(),
})
}
}
}
Ok(sections)
}
pub fn make_diff_files(
store: &Arc<Store>,
left_tree: &MergedTree,
right_tree: &MergedTree,
changed_files: &[RepoPath],
) -> Result<Vec<scm_record::File<'static>>, BuiltinToolError> {
let mut files = Vec::new();
for changed_path in changed_files {
let FileInfo {
file_mode: left_file_mode,
contents: left_contents,
} = read_file_contents(store, left_tree, changed_path)?;
let FileInfo {
file_mode: right_file_mode,
contents: right_contents,
} = read_file_contents(store, right_tree, changed_path)?;
let mut sections = Vec::new();
if left_file_mode != right_file_mode
&& left_file_mode != scm_record::FileMode::absent()
&& right_file_mode != scm_record::FileMode::absent()
{
sections.push(scm_record::Section::FileMode {
is_checked: false,
before: left_file_mode,
after: right_file_mode,
});
}
match (left_contents, right_contents) {
(FileContents::Absent, FileContents::Absent) => {}
(
FileContents::Absent,
FileContents::Text {
contents,
hash: _,
num_bytes: _,
},
) => sections.push(scm_record::Section::Changed {
lines: make_section_changed_lines(&contents, scm_record::ChangeType::Added),
}),
(FileContents::Absent, FileContents::Binary { hash, num_bytes }) => {
sections.push(scm_record::Section::Binary {
is_checked: false,
old_description: None,
new_description: Some(Cow::Owned(describe_binary(hash.as_deref(), num_bytes))),
})
}
(
FileContents::Text {
contents,
hash: _,
num_bytes: _,
},
FileContents::Absent,
) => sections.push(scm_record::Section::Changed {
lines: make_section_changed_lines(&contents, scm_record::ChangeType::Removed),
}),
(
FileContents::Text {
contents: old_contents,
hash: _,
num_bytes: _,
},
FileContents::Text {
contents: new_contents,
hash: _,
num_bytes: _,
},
) => {
sections.extend(make_diff_sections(&old_contents, &new_contents)?);
}
(
FileContents::Text {
contents: _,
hash: old_hash,
num_bytes: old_num_bytes,
}
| FileContents::Binary {
hash: old_hash,
num_bytes: old_num_bytes,
},
FileContents::Text {
contents: _,
hash: new_hash,
num_bytes: new_num_bytes,
}
| FileContents::Binary {
hash: new_hash,
num_bytes: new_num_bytes,
},
) => sections.push(scm_record::Section::Binary {
is_checked: false,
old_description: Some(Cow::Owned(describe_binary(
old_hash.as_deref(),
old_num_bytes,
))),
new_description: Some(Cow::Owned(describe_binary(
new_hash.as_deref(),
new_num_bytes,
))),
}),
(FileContents::Binary { hash, num_bytes }, FileContents::Absent) => {
sections.push(scm_record::Section::Binary {
is_checked: false,
old_description: Some(Cow::Owned(describe_binary(hash.as_deref(), num_bytes))),
new_description: None,
})
}
}
files.push(scm_record::File {
old_path: None,
path: Cow::Owned(changed_path.to_fs_path(Path::new(""))),
file_mode: None,
sections,
});
}
Ok(files)
}
pub fn apply_diff_builtin(
store: Arc<Store>,
left_tree: &MergedTree,
right_tree: &MergedTree,
changed_files: Vec<RepoPath>,
files: &[scm_record::File],
) -> Result<MergedTreeId, BackendError> {
let mut tree_builder = MergedTreeBuilder::new(left_tree.id().clone());
assert_eq!(
changed_files.len(),
files.len(),
"result had a different number of files"
);
for (path, file) in changed_files.into_iter().zip(files) {
let (selected, _unselected) = file.get_selected_contents();
match selected {
scm_record::SelectedContents::Absent => {
tree_builder.set_or_remove(path, Merge::absent());
}
scm_record::SelectedContents::Unchanged => {
// Do nothing.
}
scm_record::SelectedContents::Binary {
old_description: _,
new_description: _,
} => {
let value = right_tree.path_value(&path);
tree_builder.set_or_remove(path, value);
}
scm_record::SelectedContents::Present { contents } => {
let file_id = store.write_file(&path, &mut contents.as_bytes())?;
tree_builder.set_or_remove(
path,
Merge::normal(TreeValue::File {
id: file_id,
executable: file.get_file_mode()
== Some(scm_record::FileMode(mode::EXECUTABLE)),
}),
)
}
}
}
let tree_id = tree_builder.write_tree(left_tree.store())?;
Ok(tree_id)
}
pub fn edit_diff_builtin(
left_tree: &MergedTree,
right_tree: &MergedTree,
) -> Result<MergedTreeId, BuiltinToolError> {
let store = left_tree.store().clone();
let changed_files = left_tree
.diff(right_tree, &EverythingMatcher)
.map(|(path, _left, _right)| path)
.collect_vec();
let files = make_diff_files(&store, left_tree, right_tree, &changed_files)?;
let recorder = scm_record::Recorder::new(
scm_record::RecordState {
is_read_only: false,
files,
},
scm_record::EventSource::Crossterm,
);
let result = recorder.run().map_err(BuiltinToolError::Record)?;
let tree_id = apply_diff_builtin(store, left_tree, right_tree, changed_files, &result.files)
.map_err(BuiltinToolError::BackendError)?;
Ok(tree_id)
}
#[cfg(test)]
mod tests {
use jj_lib::repo::Repo;
use testutils::TestRepo;
use super::*;
#[test]
fn test_edit_diff_builtin() {
let test_repo = TestRepo::init(false);
let store = test_repo.repo.store();
let unused_path = RepoPath::from_internal_string("unused");
let unchanged = RepoPath::from_internal_string("unchanged");
let changed_path = RepoPath::from_internal_string("changed");
let added_path = RepoPath::from_internal_string("added");
let left_tree = testutils::create_tree(
&test_repo.repo,
&[
(&unused_path, "unused\n"),
(&unchanged, "unchanged\n"),
(&changed_path, "line1\nline2\nline3\n"),
],
);
let right_tree = testutils::create_tree(
&test_repo.repo,
&[
(&unused_path, "unused\n"),
(&unchanged, "unchanged\n"),
(&changed_path, "line1\nchanged1\nchanged2\nline3\nadded1\n"),
(&added_path, "added\n"),
],
);
let changed_files = vec![unchanged.clone(), changed_path.clone(), added_path.clone()];
let files = make_diff_files(store, &left_tree, &right_tree, &changed_files).unwrap();
insta::assert_debug_snapshot!(files, @r###"
[
File {
old_path: None,
path: "unchanged",
file_mode: None,
sections: [
Unchanged {
lines: [
"unchanged\n",
],
},
],
},
File {
old_path: None,
path: "changed",
file_mode: None,
sections: [
Unchanged {
lines: [
"line1\n",
],
},
Changed {
lines: [
SectionChangedLine {
is_checked: false,
change_type: Removed,
line: "line2\n",
},
SectionChangedLine {
is_checked: false,
change_type: Added,
line: "changed1\n",
},
SectionChangedLine {
is_checked: false,
change_type: Added,
line: "changed2\n",
},
],
},
Unchanged {
lines: [
"line3\n",
],
},
Changed {
lines: [
SectionChangedLine {
is_checked: false,
change_type: Added,
line: "added1\n",
},
],
},
],
},
File {
old_path: None,
path: "added",
file_mode: None,
sections: [
Changed {
lines: [
SectionChangedLine {
is_checked: false,
change_type: Added,
line: "added\n",
},
],
},
],
},
]
"###);
let no_changes_tree_id = apply_diff_builtin(
store.clone(),
&left_tree,
&right_tree,
changed_files.clone(),
&files,
)
.unwrap();
let no_changes_tree = store.get_root_tree(&no_changes_tree_id).unwrap();
assert_eq!(
no_changes_tree.id(),
left_tree.id(),
"no-changes tree was different",
);
let mut files = files;
for file in files.iter_mut() {
file.toggle_all();
}
let all_changes_tree_id = apply_diff_builtin(
store.clone(),
&left_tree,
&right_tree,
changed_files,
&files,
)
.unwrap();
let all_changes_tree = store.get_root_tree(&all_changes_tree_id).unwrap();
assert_eq!(
all_changes_tree.id(),
right_tree.id(),
"all-changes tree was different",
);
}
}

View file

@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
mod builtin;
mod external;
use std::sync::Arc;
@ -26,6 +27,7 @@ use jj_lib::settings::{ConfigResultExt as _, UserSettings};
use jj_lib::working_copy::SnapshotError;
use thiserror::Error;
use self::builtin::{edit_diff_builtin, BuiltinToolError};
use self::external::{edit_diff_external, DiffCheckoutError, ExternalToolError};
pub use self::external::{generate_diff, ExternalMergeTool};
use crate::config::CommandNameAndArgs;
@ -33,6 +35,8 @@ use crate::ui::Ui;
#[derive(Debug, Error)]
pub enum DiffEditError {
#[error(transparent)]
InternalTool(#[from] Box<BuiltinToolError>),
#[error(transparent)]
ExternalTool(#[from] ExternalToolError),
#[error(transparent)]
@ -53,6 +57,8 @@ pub enum DiffGenerateError {
#[derive(Debug, Error)]
pub enum ConflictResolveError {
#[error(transparent)]
InternalTool(#[from] Box<BuiltinToolError>),
#[error(transparent)]
ExternalTool(#[from] ExternalToolError),
#[error("Couldn't find the path {0:?} in this revision")]
@ -125,7 +131,10 @@ pub fn edit_diff(
// Start a diff editor on the two directories.
let editor = get_diff_editor_from_settings(ui, settings)?;
match editor {
MergeTool::Builtin => unimplemented!("run_mergetool with builtin mergetool"),
MergeTool::Builtin => {
let tree_id = edit_diff_builtin(left_tree, right_tree).map_err(Box::new)?;
Ok(tree_id)
}
MergeTool::External(editor) => edit_diff_external(
editor,
left_tree,

View file

@ -289,6 +289,18 @@ pub enum TreeValue {
Conflict(ConflictId),
}
impl TreeValue {
pub fn hex(&self) -> String {
match self {
TreeValue::File { id, .. } => id.hex(),
TreeValue::Symlink(id) => id.hex(),
TreeValue::Tree(id) => id.hex(),
TreeValue::GitSubmodule(id) => id.hex(),
TreeValue::Conflict(id) => id.hex(),
}
}
}
impl ContentHash for TreeValue {
fn hash(&self, state: &mut impl digest::Update) {
use TreeValue::*;