working_copy: preserve conflicts in the working copy until markers are removed

I realized only recently that we can try to parse conflict markers in
files and leave them as conflicted if they haven't changed. If they
have changed and some conflict markers have been removed, we can even
update the conflict with that partial resolution.

This change teaches the working copy to write conflicts to the working
copy. It used to expect that the caller had already updated the tree
by materializing conflicts. With this change, we also start parsing
the conflict markers and leave the conflicts unresolved in the working
copy if the conflict markers remain.

There are some cases that we don't handle yet. For example, we don't
even try to set the executable bit correctly when we write
conflicts. OTOH, we didn't do that even before this change.

We still never actually write conflicts to the working copy (outside
of tests) because we currently materialize conflicts in
`MutRepo::check_out()`. I'll change that next.
This commit is contained in:
Martin von Zweigbergk 2021-10-28 20:55:36 -07:00 committed by Martin von Zweigbergk
parent cea6061f3d
commit ea82340654
5 changed files with 327 additions and 40 deletions

View file

@ -18,12 +18,15 @@ enum FileType {
Normal = 0;
Symlink = 1;
Executable = 2;
Conflict = 3;
}
message FileState {
uint64 mtime_millis_since_epoch = 1;
uint64 size = 2;
FileType file_type = 3;
// Set only if file_type is Conflict
bytes conflict_id = 4;
}
message TreeState {
@ -33,4 +36,4 @@ message TreeState {
message Checkout {
bytes commit_id = 1;
}
}

View file

@ -17,7 +17,7 @@ use std::io::{Cursor, Write};
use itertools::Itertools;
use crate::backend::{Conflict, ConflictPart, TreeValue};
use crate::backend::{BackendResult, Conflict, ConflictId, ConflictPart, TreeValue};
use crate::diff::{find_line_ranges, Diff, DiffHunk};
use crate::files;
use crate::files::{MergeHunk, MergeResult};
@ -143,14 +143,14 @@ pub fn materialize_conflict(
store: &Store,
path: &RepoPath,
conflict: &Conflict,
file: &mut dyn Write,
output: &mut dyn Write,
) -> std::io::Result<()> {
let file_adds = file_parts(&conflict.adds);
let file_removes = file_parts(&conflict.removes);
if file_adds.len() != conflict.adds.len() || file_removes.len() != conflict.removes.len() {
// Unless all parts are regular files, we can't do much better than to try to
// describe the conflict.
describe_conflict(conflict, file)?;
describe_conflict(conflict, output)?;
return Ok(());
}
@ -171,34 +171,34 @@ pub fn materialize_conflict(
let merge_result = files::merge(&removed_slices, &added_slices);
match merge_result {
MergeResult::Resolved(content) => {
file.write_all(&content)?;
output.write_all(&content)?;
}
MergeResult::Conflict(hunks) => {
for hunk in hunks {
match hunk {
MergeHunk::Resolved(content) => {
file.write_all(&content)?;
output.write_all(&content)?;
}
MergeHunk::Conflict { removes, adds } => {
let num_diffs = min(removes.len(), adds.len());
// TODO: Pair up a remove with an add in a way that minimizes the size of
// the diff
file.write_all(CONFLICT_START_LINE)?;
output.write_all(CONFLICT_START_LINE)?;
for i in 0..num_diffs {
file.write_all(CONFLICT_MINUS_LINE)?;
file.write_all(CONFLICT_PLUS_LINE)?;
write_diff_hunks(&removes[i], &adds[i], file)?;
output.write_all(CONFLICT_MINUS_LINE)?;
output.write_all(CONFLICT_PLUS_LINE)?;
write_diff_hunks(&removes[i], &adds[i], output)?;
}
for slice in removes.iter().skip(num_diffs) {
file.write_all(CONFLICT_MINUS_LINE)?;
file.write_all(slice)?;
output.write_all(CONFLICT_MINUS_LINE)?;
output.write_all(slice)?;
}
for slice in adds.iter().skip(num_diffs) {
file.write_all(CONFLICT_PLUS_LINE)?;
file.write_all(slice)?;
output.write_all(CONFLICT_PLUS_LINE)?;
output.write_all(slice)?;
}
file.write_all(CONFLICT_END_LINE)?;
output.write_all(CONFLICT_END_LINE)?;
}
}
}
@ -317,3 +317,74 @@ fn parse_conflict_hunk(input: &[u8]) -> MergeHunk {
MergeHunk::Conflict { removes, adds }
}
pub fn update_conflict_from_content(
store: &Store,
path: &RepoPath,
conflict_id: &ConflictId,
content: &[u8],
) -> BackendResult<Option<ConflictId>> {
let mut conflict = store.read_conflict(conflict_id)?;
// First check if the new content is unchanged compared to the old content. If
// it is, we don't need parse the content or write any new objects to the
// store. This is also a way of making sure that unchanged tree/file
// conflicts (for example) are not converted to regular files in the working
// copy.
let mut old_content = Vec::with_capacity(content.len());
materialize_conflict(store, path, &conflict, &mut old_content).unwrap();
if content == old_content {
return Ok(Some(conflict_id.clone()));
}
let mut removed_content = vec![vec![]; conflict.removes.len()];
let mut added_content = vec![vec![]; conflict.adds.len()];
if let Some(hunks) = parse_conflict(content, conflict.removes.len(), conflict.adds.len()) {
for hunk in hunks {
match hunk {
MergeHunk::Resolved(slice) => {
for buf in &mut removed_content {
buf.extend_from_slice(&slice);
}
for buf in &mut added_content {
buf.extend_from_slice(&slice);
}
}
MergeHunk::Conflict { removes, adds } => {
for (i, buf) in removes.iter().enumerate() {
removed_content[i].extend_from_slice(buf);
}
for (i, buf) in adds.iter().enumerate() {
added_content[i].extend_from_slice(buf);
}
}
}
}
// Now write the new files contents we found by parsing the file
// with conflict markers. Update the Conflict object with the new
// FileIds.
for (i, buf) in removed_content.iter().enumerate() {
let file_id = store.write_file(path, &mut Cursor::new(buf))?;
if let TreeValue::Normal { id, executable: _ } = &mut conflict.removes[i].value {
*id = file_id;
} else {
// TODO: This can actually happen. We should check earlier
// that the we only attempt to parse the conflicts if it's a
// file-only conflict.
panic!("Found conflict markers in merge of non-files");
}
}
for (i, buf) in added_content.iter().enumerate() {
let file_id = store.write_file(path, &mut Cursor::new(buf))?;
if let TreeValue::Normal { id, executable: _ } = &mut conflict.adds[i].value {
*id = file_id;
} else {
panic!("Found conflict markers in merge of non-files");
}
}
let new_conflict_id = store.write_conflict(&conflict)?;
Ok(Some(new_conflict_id))
} else {
Ok(None)
}
}

View file

@ -32,9 +32,10 @@ use tempfile::NamedTempFile;
use thiserror::Error;
use crate::backend::{
BackendError, CommitId, FileId, MillisSinceEpoch, SymlinkId, TreeId, TreeValue,
BackendError, CommitId, ConflictId, FileId, MillisSinceEpoch, SymlinkId, TreeId, TreeValue,
};
use crate::commit::Commit;
use crate::conflicts::{materialize_conflict, update_conflict_from_content};
use crate::gitignore::GitIgnoreFile;
use crate::lock::FileLock;
use crate::matchers::EverythingMatcher;
@ -47,6 +48,7 @@ use crate::tree_builder::TreeBuilder;
pub enum FileType {
Normal { executable: bool },
Symlink,
Conflict { id: ConflictId },
}
#[derive(Debug, PartialEq, Eq, Clone)]
@ -90,6 +92,10 @@ fn file_state_from_proto(proto: &crate::protos::working_copy::FileState) -> File
crate::protos::working_copy::FileType::Normal => FileType::Normal { executable: false },
crate::protos::working_copy::FileType::Executable => FileType::Normal { executable: true },
crate::protos::working_copy::FileType::Symlink => FileType::Symlink,
crate::protos::working_copy::FileType::Conflict => {
let id = ConflictId(proto.conflict_id.to_vec());
FileType::Conflict { id }
}
};
FileState {
file_type,
@ -104,6 +110,10 @@ fn file_state_to_proto(file_state: &FileState) -> crate::protos::working_copy::F
FileType::Normal { executable: false } => crate::protos::working_copy::FileType::Normal,
FileType::Normal { executable: true } => crate::protos::working_copy::FileType::Executable,
FileType::Symlink => crate::protos::working_copy::FileType::Symlink,
FileType::Conflict { id } => {
proto.conflict_id = id.0.to_vec();
crate::protos::working_copy::FileType::Conflict
}
};
proto.file_type = file_type;
proto.mtime_millis_since_epoch = file_state.mtime.0;
@ -371,8 +381,8 @@ impl TreeState {
git_ignore: &GitIgnoreFile,
tree_builder: &mut TreeBuilder,
) {
let current_file_state = self.file_states.get_mut(&repo_path);
if current_file_state.is_none()
let maybe_current_file_state = self.file_states.get_mut(&repo_path);
if maybe_current_file_state.is_none()
&& git_ignore.matches_file(&repo_path.to_internal_file_string())
{
// If it wasn't already tracked and it matches the ignored paths, then
@ -381,7 +391,7 @@ impl TreeState {
}
#[cfg_attr(unix, allow(unused_mut))]
let mut new_file_state = file_state(&disk_path).unwrap();
match current_file_state {
match maybe_current_file_state {
None => {
// untracked
let file_type = new_file_state.file_type.clone();
@ -389,23 +399,66 @@ impl TreeState {
let file_value = self.write_path_to_store(&repo_path, &disk_path, file_type);
tree_builder.set(repo_path, file_value);
}
Some(current_entry) => {
Some(current_file_state) => {
#[cfg(windows)]
{
// On Windows, we preserve the state we had recorded
// when we wrote the file.
new_file_state.mark_executable(current_entry.is_executable());
new_file_state.mark_executable(current_file_state.is_executable());
}
// If the file's mtime was set at the same time as this state file's own mtime,
// then we don't know if the file was modified before or after this state file.
// We set the file's mtime to 0 to simplify later code.
if current_entry.mtime >= self.own_mtime {
current_entry.mtime = MillisSinceEpoch(0);
if current_file_state.mtime >= self.own_mtime {
current_file_state.mtime = MillisSinceEpoch(0);
}
let mut clean = current_file_state == &new_file_state;
// Because the file system doesn't have a built-in way of indicating a conflict,
// we look at the current state instead. If that indicates that the path has a
// conflict and the contents are now a file, then we take interpret that as if
// it is still a conflict.
if !clean
&& matches!(current_file_state.file_type, FileType::Conflict { .. })
&& matches!(new_file_state.file_type, FileType::Normal { .. })
{
// If the only change is that the type changed from conflict to regular file,
// then we consider it clean (the same as a regular file being clean, it's
// just that the file system doesn't have a conflict type).
if new_file_state.mtime == current_file_state.mtime
&& new_file_state.size == current_file_state.size
{
clean = true;
} else {
// If the file contained a conflict before and is now a normal file on disk
// (new_file_state cannot be a Conflict at this point), we try to parse
// any conflict markers in the file into a conflict.
if let (FileType::Conflict { id }, FileType::Normal { executable: _ }) =
(&current_file_state.file_type, &new_file_state.file_type)
{
let mut file = File::open(&disk_path).unwrap();
let mut content = vec![];
file.read_to_end(&mut content).unwrap();
if let Some(new_conflict_id) = update_conflict_from_content(
self.store.as_ref(),
&repo_path,
id,
&content,
)
.unwrap()
{
new_file_state.file_type = FileType::Conflict {
id: new_conflict_id.clone(),
};
*current_file_state = new_file_state;
tree_builder.set(repo_path, TreeValue::Conflict(new_conflict_id));
return;
}
}
}
}
let clean = current_entry == &new_file_state;
if !clean {
let file_type = new_file_state.file_type.clone();
*current_entry = new_file_state;
*current_file_state = new_file_state;
let file_value = self.write_path_to_store(&repo_path, &disk_path, file_type);
tree_builder.set(repo_path, file_value);
}
@ -428,6 +481,7 @@ impl TreeState {
let id = self.write_symlink_to_store(repo_path, disk_path);
TreeValue::Symlink(id)
}
FileType::Conflict { .. } => panic!("conflicts should be handled by the caller"),
}
}
@ -478,6 +532,25 @@ impl TreeState {
file_state(disk_path).unwrap()
}
fn write_conflict(&self, disk_path: &Path, path: &RepoPath, id: &ConflictId) -> FileState {
create_parent_dirs(disk_path);
let conflict = self.store.read_conflict(id).unwrap();
// TODO: Check that we're not overwriting an un-ignored file here (which might
// be created by a concurrent process).
let mut file = OpenOptions::new()
.write(true)
.create(true)
.truncate(true)
.open(disk_path)
.unwrap_or_else(|err| panic!("failed to open {:?} for write: {:?}", &disk_path, err));
materialize_conflict(self.store.as_ref(), path, &conflict, &mut file).unwrap();
// TODO: Set the executable bit correctly (when possible) and preserve that on
// Windows like we do with the executable bit for regular files.
let mut result = file_state(disk_path).unwrap();
result.file_type = FileType::Conflict { id: id.clone() };
result
}
#[cfg_attr(windows, allow(unused_variables))]
fn set_executable(&self, disk_path: &Path, executable: bool) {
#[cfg(windows)]
@ -536,6 +609,7 @@ impl TreeState {
self.write_file(&disk_path, &path, &id, executable)
}
TreeValue::Symlink(id) => self.write_symlink(&disk_path, &path, &id),
TreeValue::Conflict(id) => self.write_conflict(&disk_path, &path, &id),
TreeValue::GitSubmodule(_id) => {
println!("ignoring git submodule at {:?}", path);
continue;
@ -543,12 +617,6 @@ impl TreeState {
TreeValue::Tree(_id) => {
panic!("unexpected tree entry in diff at {:?}", path);
}
TreeValue::Conflict(_id) => {
panic!(
"conflicts cannot be represented in the working copy: {:?}",
path
);
}
};
self.file_states.insert(path.clone(), file_state);
stats.added_files += 1;
@ -574,6 +642,7 @@ impl TreeState {
self.write_file(&disk_path, &path, &id, executable)
}
(_, TreeValue::Symlink(id)) => self.write_symlink(&disk_path, &path, &id),
(_, TreeValue::Conflict(id)) => self.write_conflict(&disk_path, &path, &id),
(_, TreeValue::GitSubmodule(_id)) => {
println!("ignoring git submodule at {:?}", path);
self.file_states.remove(&path);
@ -582,12 +651,6 @@ impl TreeState {
(_, TreeValue::Tree(_id)) => {
panic!("unexpected tree entry in diff at {:?}", path);
}
(_, TreeValue::Conflict(_id)) => {
panic!(
"conflicts cannot be represented in the working copy: {:?}",
path
);
}
};
self.file_states.insert(path.clone(), file_state);

View file

@ -13,7 +13,7 @@
// limitations under the License.
use jujutsu_lib::backend::{Conflict, ConflictPart, TreeValue};
use jujutsu_lib::conflicts::{materialize_conflict, parse_conflict};
use jujutsu_lib::conflicts::{materialize_conflict, parse_conflict, update_conflict_from_content};
use jujutsu_lib::files::MergeHunk;
use jujutsu_lib::repo_path::RepoPath;
use jujutsu_lib::testutils;
@ -416,3 +416,93 @@ line 5
None
)
}
#[test]
fn test_update_conflict_from_content() {
let settings = testutils::user_settings();
let (_temp_dir, repo) = testutils::init_repo(&settings, false);
let path = RepoPath::from_internal_string("dir/file");
let base_file_id = testutils::write_file(repo.store(), &path, "line 1\nline 2\nline 3\n");
let left_file_id = testutils::write_file(repo.store(), &path, "left 1\nline 2\nleft 3\n");
let right_file_id = testutils::write_file(repo.store(), &path, "right 1\nline 2\nright 3\n");
let conflict = Conflict {
removes: vec![ConflictPart {
value: TreeValue::Normal {
id: base_file_id,
executable: false,
},
}],
adds: vec![
ConflictPart {
value: TreeValue::Normal {
id: left_file_id,
executable: false,
},
},
ConflictPart {
value: TreeValue::Normal {
id: right_file_id,
executable: false,
},
},
],
};
let conflict_id = repo.store().write_conflict(&conflict).unwrap();
// If the content is unchanged compared to the materialized value, we get the
// old conflict id back.
let mut materialized = vec![];
materialize_conflict(repo.store(), &path, &conflict, &mut materialized).unwrap();
let result =
update_conflict_from_content(repo.store(), &path, &conflict_id, &materialized).unwrap();
assert_eq!(result, Some(conflict_id.clone()));
// If the conflict is resolved, we None back to indicate that.
let result = update_conflict_from_content(
repo.store(),
&path,
&conflict_id,
b"resolved 1\nline 2\nresolved 3\n",
)
.unwrap();
assert_eq!(result, None);
// If the conflict is partially resolved, we get a new conflict back.
let result = update_conflict_from_content(repo.store(), &path, &conflict_id, b"resolved 1\nline 2\n<<<<<<<\n-------\n+++++++\n-line 3\n+left 3\n+++++++\nright 3\n>>>>>>>\n").unwrap();
assert_ne!(result, None);
assert_ne!(result, Some(conflict_id));
let new_conflict = repo.store().read_conflict(&result.unwrap()).unwrap();
// Calculate expected new FileIds
let new_base_file_id =
testutils::write_file(repo.store(), &path, "resolved 1\nline 2\nline 3\n");
let new_left_file_id =
testutils::write_file(repo.store(), &path, "resolved 1\nline 2\nleft 3\n");
let new_right_file_id =
testutils::write_file(repo.store(), &path, "resolved 1\nline 2\nright 3\n");
assert_eq!(
new_conflict,
Conflict {
removes: vec![ConflictPart {
value: TreeValue::Normal {
id: new_base_file_id,
executable: false
}
}],
adds: vec![
ConflictPart {
value: TreeValue::Normal {
id: new_left_file_id,
executable: false
}
},
ConflictPart {
value: TreeValue::Normal {
id: new_right_file_id,
executable: false
}
}
]
}
)
}

View file

@ -19,7 +19,7 @@ use std::os::unix::fs::PermissionsExt;
use std::sync::Arc;
use itertools::Itertools;
use jujutsu_lib::backend::TreeValue;
use jujutsu_lib::backend::{Conflict, ConflictPart, TreeValue};
use jujutsu_lib::commit_builder::CommitBuilder;
use jujutsu_lib::repo::ReadonlyRepo;
use jujutsu_lib::repo_path::{RepoPath, RepoPathComponent};
@ -61,6 +61,7 @@ fn test_checkout_file_transitions(use_git: bool) {
Missing,
Normal,
Executable,
Conflict,
#[cfg_attr(windows, allow(dead_code))]
Symlink,
Tree,
@ -101,6 +102,47 @@ fn test_checkout_file_transitions(use_git: bool) {
executable: true,
}
}
Kind::Conflict => {
let base_file_id = testutils::write_file(
store,
&RepoPath::from_internal_string(path),
"base file contents",
);
let left_file_id = testutils::write_file(
store,
&RepoPath::from_internal_string(path),
"left file contents",
);
let right_file_id = testutils::write_file(
store,
&RepoPath::from_internal_string(path),
"right file contents",
);
let conflict = Conflict {
removes: vec![ConflictPart {
value: TreeValue::Normal {
id: base_file_id,
executable: false,
},
}],
adds: vec![
ConflictPart {
value: TreeValue::Normal {
id: left_file_id,
executable: false,
},
},
ConflictPart {
value: TreeValue::Normal {
id: right_file_id,
executable: false,
},
},
],
};
let conflict_id = store.write_conflict(&conflict).unwrap();
TreeValue::Conflict(conflict_id)
}
Kind::Symlink => {
let id = store
.write_symlink(&RepoPath::from_internal_string(path), "target")
@ -133,7 +175,13 @@ fn test_checkout_file_transitions(use_git: bool) {
tree_builder.set(RepoPath::from_internal_string(path), value);
}
let mut kinds = vec![Kind::Missing, Kind::Normal, Kind::Executable, Kind::Tree];
let mut kinds = vec![
Kind::Missing,
Kind::Normal,
Kind::Executable,
Kind::Conflict,
Kind::Tree,
];
#[cfg(unix)]
kinds.push(Kind::Symlink);
if use_git {
@ -212,6 +260,18 @@ fn test_checkout_file_transitions(use_git: bool) {
path
);
}
Kind::Conflict => {
assert!(maybe_metadata.is_ok(), "{:?} should exist", path);
let metadata = maybe_metadata.unwrap();
assert!(metadata.is_file(), "{:?} should be a file", path);
#[cfg(unix)]
assert_eq!(
metadata.permissions().mode() & 0o111,
0,
"{:?} should not be executable",
path
);
}
Kind::Symlink => {
assert!(maybe_metadata.is_ok(), "{:?} should exist", path);
let metadata = maybe_metadata.unwrap();