From cea6061f3d4277a9ae0939ccfcdae513e06d0d82 Mon Sep 17 00:00:00 2001 From: Martin von Zweigbergk Date: Sun, 31 Oct 2021 11:57:12 -0700 Subject: [PATCH] conflicts: add a function for parsing a materialized conflict I initially made the working copy materialize conflicts in its `check_out()` method. Then I changed it later (exactly a year ago, on Halloween of 2020, actually) so that the working copy expected conflicts to already have been materalized, which happens in `MutableRepo::check_out`(). I think my reasoning then was that the file system cannot represent a conflict. While it's true that the file system itself doesn't have information to know whether a file represents a conflict, we can record that ourselves. We already record whether a file is executable or not and then preserve that if we're on a file system that isn't able to record it. It's not that different to do the same for conflicts if we're on a file system that doesn't understand conflicts (i.e. all file systems). The plan is to have the working copy remember whether a file represents a conflict. When we check if it has changed, we parse the file, including conflict markers, and recreate the conflict from it. We should be able to do that losslessly (and we should adjust formats to make it possible if we find cases where it's not). Having the working copy preserve conflict states has several advantages: * Because conflicts are not materialized in the working copy, you can rebase the conflicted commit and the working copy without causing more conflicts (that's currently a UX bug I run into every now and then). * If you don't change anything in the working copy, it will be unchanged compared to its parent, which means we'll automatically abandon it if you update away from it. * The user can choose to resolve only some of the conflicts in a file and squash those in, and it'll work they way you'd hope. * It should make it easier to implement support for external merge tools (#18) without having them treat the working copy differently. This patch prepares for that work by adding support for parsing materialized conflicts. --- lib/src/conflicts.rs | 114 ++++++++++++++++++++++-- lib/tests/test_conflicts.rs | 168 +++++++++++++++++++++++++++++++++++- 2 files changed, 275 insertions(+), 7 deletions(-) diff --git a/lib/src/conflicts.rs b/lib/src/conflicts.rs index a0bc70ac1..13158778f 100644 --- a/lib/src/conflicts.rs +++ b/lib/src/conflicts.rs @@ -24,6 +24,11 @@ use crate::files::{MergeHunk, MergeResult}; use crate::repo_path::RepoPath; use crate::store::Store; +const CONFLICT_START_LINE: &[u8] = b"<<<<<<<\n"; +const CONFLICT_END_LINE: &[u8] = b">>>>>>>\n"; +const CONFLICT_MINUS_LINE: &[u8] = b"-------\n"; +const CONFLICT_PLUS_LINE: &[u8] = b"+++++++\n"; + fn describe_conflict_part(part: &ConflictPart) -> String { match &part.value { TreeValue::Normal { @@ -179,21 +184,21 @@ pub fn materialize_conflict( // TODO: Pair up a remove with an add in a way that minimizes the size of // the diff - file.write_all(b"<<<<<<<\n")?; + file.write_all(CONFLICT_START_LINE)?; for i in 0..num_diffs { - file.write_all(b"-------\n")?; - file.write_all(b"+++++++\n")?; + file.write_all(CONFLICT_MINUS_LINE)?; + file.write_all(CONFLICT_PLUS_LINE)?; write_diff_hunks(&removes[i], &adds[i], file)?; } for slice in removes.iter().skip(num_diffs) { - file.write_all(b"-------\n")?; + file.write_all(CONFLICT_MINUS_LINE)?; file.write_all(slice)?; } for slice in adds.iter().skip(num_diffs) { - file.write_all(b"+++++++\n")?; + file.write_all(CONFLICT_PLUS_LINE)?; file.write_all(slice)?; } - file.write_all(b">>>>>>>\n")?; + file.write_all(CONFLICT_END_LINE)?; } } } @@ -215,3 +220,100 @@ pub fn conflict_to_materialized_value( executable: false, } } + +/// Parses conflict markers from a slice. Returns None if there were no valid +/// conflict markers. The caller has to provide the expected number of removed +/// and added inputs to the conflicts. Conflict markers that are otherwise valid +/// will be considered invalid if they don't have the expected arity. +// TODO: "parse" is not usually the opposite of "materialize", so maybe we +// should rename them to "serialize" and "deserialize"? +pub fn parse_conflict(input: &[u8], num_removes: usize, num_adds: usize) -> Option> { + if input.is_empty() { + return None; + } + let mut hunks = vec![]; + let mut pos = 0; + let mut resolved_start = 0; + let mut conflict_start = None; + for line in input.split_inclusive(|b| *b == b'\n') { + if line == CONFLICT_START_LINE { + conflict_start = Some(pos); + } else if conflict_start.is_some() && line == CONFLICT_END_LINE { + let conflict_body = &input[conflict_start.unwrap() + CONFLICT_START_LINE.len()..pos]; + let hunk = parse_conflict_hunk(conflict_body); + match &hunk { + MergeHunk::Conflict { removes, adds } + if removes.len() == num_removes && adds.len() == num_adds => + { + let resolved_slice = &input[resolved_start..conflict_start.unwrap()]; + if !resolved_slice.is_empty() { + hunks.push(MergeHunk::Resolved(resolved_slice.to_vec())); + } + hunks.push(hunk); + resolved_start = pos + line.len(); + } + _ => {} + } + conflict_start = None; + } + pos += line.len(); + } + + if hunks.is_empty() { + None + } else { + if resolved_start < input.len() { + hunks.push(MergeHunk::Resolved(input[resolved_start..].to_vec())); + } + Some(hunks) + } +} + +fn parse_conflict_hunk(input: &[u8]) -> MergeHunk { + let mut minus_seen = false; + let mut plus_seen = false; + let mut body_seen = false; + let mut removes = vec![]; + let mut adds = vec![]; + for line in input.split_inclusive(|b| *b == b'\n') { + if line == CONFLICT_MINUS_LINE { + minus_seen = true; + if body_seen { + plus_seen = false; + body_seen = false; + } + removes.push(vec![]); + } else if line == CONFLICT_PLUS_LINE { + plus_seen = true; + if body_seen { + minus_seen = false; + body_seen = false; + } + adds.push(vec![]); + } else if minus_seen && plus_seen { + body_seen = true; + if let Some(rest) = line.strip_prefix(b"-") { + removes.last_mut().unwrap().extend_from_slice(rest); + } else if let Some(rest) = line.strip_prefix(b"+") { + adds.last_mut().unwrap().extend_from_slice(rest); + } else if let Some(rest) = line.strip_prefix(b" ") { + removes.last_mut().unwrap().extend_from_slice(rest); + adds.last_mut().unwrap().extend_from_slice(rest); + } else { + // Doesn't look like a conflict + return MergeHunk::Resolved(vec![]); + } + } else if minus_seen { + body_seen = true; + removes.last_mut().unwrap().extend_from_slice(line); + } else if plus_seen { + body_seen = true; + adds.last_mut().unwrap().extend_from_slice(line); + } else { + // Doesn't look like a conflict + return MergeHunk::Resolved(vec![]); + } + } + + MergeHunk::Conflict { removes, adds } +} diff --git a/lib/tests/test_conflicts.rs b/lib/tests/test_conflicts.rs index 4b727f46e..094c85b5c 100644 --- a/lib/tests/test_conflicts.rs +++ b/lib/tests/test_conflicts.rs @@ -13,7 +13,8 @@ // limitations under the License. use jujutsu_lib::backend::{Conflict, ConflictPart, TreeValue}; -use jujutsu_lib::conflicts::materialize_conflict; +use jujutsu_lib::conflicts::{materialize_conflict, parse_conflict}; +use jujutsu_lib::files::MergeHunk; use jujutsu_lib::repo_path::RepoPath; use jujutsu_lib::testutils; @@ -250,3 +251,168 @@ line 5 " ); } + +#[test] +fn test_parse_conflict_resolved() { + assert_eq!( + parse_conflict( + b"line 1 +line 2 +line 3 +line 4 +line 5 +", + 1, + 2 + ), + None + ) +} + +#[test] +fn test_parse_conflict_simple() { + assert_eq!( + parse_conflict( + b"line 1 +<<<<<<< +------- ++++++++ + line 2 +-line 3 ++left + line 4 ++++++++ +right +>>>>>>> +line 5 +", + 1, + 2 + ), + Some(vec![ + MergeHunk::Resolved(b"line 1\n".to_vec()), + MergeHunk::Conflict { + removes: vec![b"line 2\nline 3\nline 4\n".to_vec()], + adds: vec![b"line 2\nleft\nline 4\n".to_vec(), b"right\n".to_vec()] + }, + MergeHunk::Resolved(b"line 5\n".to_vec()) + ]) + ) +} + +#[test] +fn test_parse_conflict_multi_way() { + assert_eq!( + parse_conflict( + b"line 1 +<<<<<<< +------- ++++++++ + line 2 +-line 3 ++left + line 4 ++++++++ +right +------- ++++++++ + line 2 ++forward + line 3 + line 4 +>>>>>>> +line 5 +", + 2, + 3 + ), + Some(vec![ + MergeHunk::Resolved(b"line 1\n".to_vec()), + MergeHunk::Conflict { + removes: vec![ + b"line 2\nline 3\nline 4\n".to_vec(), + b"line 2\nline 3\nline 4\n".to_vec() + ], + adds: vec![ + b"line 2\nleft\nline 4\n".to_vec(), + b"right\n".to_vec(), + b"line 2\nforward\nline 3\nline 4\n".to_vec() + ] + }, + MergeHunk::Resolved(b"line 5\n".to_vec()) + ]) + ) +} + +#[test] +fn test_parse_conflict_different_wrong_arity() { + assert_eq!( + parse_conflict( + b"line 1 +<<<<<<< +------- ++++++++ + line 2 +-line 3 ++left + line 4 ++++++++ +right +>>>>>>> +line 5 +", + 2, + 3 + ), + None + ) +} + +#[test] +fn test_parse_conflict_malformed_marker() { + // The conflict marker is missing `-------` and `+++++++` (it needs at least one + // of them) + assert_eq!( + parse_conflict( + b"line 1 +<<<<<<< + line 2 +-line 3 ++left + line 4 ++++++++ +right +>>>>>>> +line 5 +", + 1, + 2 + ), + None + ) +} + +#[test] +fn test_parse_conflict_malformed_diff() { + // The diff part is invalid (missing space before "line 4") + assert_eq!( + parse_conflict( + b"line 1 +<<<<<<< +------- ++++++++ + line 2 +-line 3 ++left +line 4 ++++++++ +right +>>>>>>> +line 5 +", + 1, + 2 + ), + None + ) +}