conflicts: refactor conflict marker writing and parsing

These changes make the code a bit more readable, and they will make it
easier to have conflict markers of different lengths in the next commit.
This commit is contained in:
Scott Taylor 2024-11-24 15:20:18 -06:00 committed by Scott Taylor
parent 75ce7f6b7f
commit 369e8ea057

View file

@ -28,8 +28,6 @@ use futures::StreamExt;
use futures::TryStreamExt;
use itertools::Itertools;
use pollster::FutureExt;
use regex::bytes::Regex;
use regex::bytes::RegexBuilder;
use crate::backend::BackendError;
use crate::backend::BackendResult;
@ -51,49 +49,25 @@ use crate::merge::MergedTreeValue;
use crate::repo_path::RepoPath;
use crate::store::Store;
const CONFLICT_START_LINE: &str = "<<<<<<<";
const CONFLICT_END_LINE: &str = ">>>>>>>";
const CONFLICT_DIFF_LINE: &str = "%%%%%%%";
const CONFLICT_MINUS_LINE: &str = "-------";
const CONFLICT_PLUS_LINE: &str = "+++++++";
const CONFLICT_GIT_ANCESTOR_LINE: &str = "|||||||";
const CONFLICT_GIT_SEPARATOR_LINE: &str = "=======";
const CONFLICT_START_LINE_CHAR: u8 = CONFLICT_START_LINE.as_bytes()[0];
const CONFLICT_END_LINE_CHAR: u8 = CONFLICT_END_LINE.as_bytes()[0];
const CONFLICT_DIFF_LINE_CHAR: u8 = CONFLICT_DIFF_LINE.as_bytes()[0];
const CONFLICT_MINUS_LINE_CHAR: u8 = CONFLICT_MINUS_LINE.as_bytes()[0];
const CONFLICT_PLUS_LINE_CHAR: u8 = CONFLICT_PLUS_LINE.as_bytes()[0];
const CONFLICT_GIT_ANCESTOR_LINE_CHAR: u8 = CONFLICT_GIT_ANCESTOR_LINE.as_bytes()[0];
const CONFLICT_GIT_SEPARATOR_LINE_CHAR: u8 = CONFLICT_GIT_SEPARATOR_LINE.as_bytes()[0];
/// A conflict marker is one of the separators, optionally followed by a space
/// and some text.
// TODO: All the `{7}` could be replaced with `{7,}` to allow longer
// separators. This could be useful to make it possible to allow conflict
// markers inside the text of the conflicts.
static CONFLICT_MARKER_REGEX: once_cell::sync::Lazy<Regex> = once_cell::sync::Lazy::new(|| {
RegexBuilder::new(r"^(<{7}|>{7}|%{7}|\-{7}|\+{7}|\|{7}|={7})( .*)?$")
.multi_line(true)
.build()
.unwrap()
});
/// Length of conflict markers.
pub const CONFLICT_MARKER_LEN: usize = 7;
fn write_diff_hunks(hunks: &[DiffHunk], file: &mut dyn Write) -> io::Result<()> {
for hunk in hunks {
match hunk.kind {
DiffHunkKind::Matching => {
debug_assert!(hunk.contents.iter().all_equal());
for line in hunk.contents[0].split_inclusive(|b| *b == b'\n') {
for line in hunk.contents[0].lines_with_terminator() {
file.write_all(b" ")?;
file.write_all(line)?;
}
}
DiffHunkKind::Different => {
for line in hunk.contents[0].split_inclusive(|b| *b == b'\n') {
for line in hunk.contents[0].lines_with_terminator() {
file.write_all(b"-")?;
file.write_all(line)?;
}
for line in hunk.contents[1].split_inclusive(|b| *b == b'\n') {
for line in hunk.contents[1].lines_with_terminator() {
file.write_all(b"+")?;
file.write_all(line)?;
}
@ -250,6 +224,77 @@ pub enum ConflictMarkerStyle {
Git,
}
/// Characters which can be repeated to form a conflict marker line when
/// materializing and parsing conflicts.
#[derive(Clone, Copy, PartialEq, Eq)]
#[repr(u8)]
enum ConflictMarkerLineChar {
ConflictStart = b'<',
ConflictEnd = b'>',
Add = b'+',
Remove = b'-',
Diff = b'%',
GitAncestor = b'|',
GitSeparator = b'=',
}
impl ConflictMarkerLineChar {
/// Get the ASCII byte used for this conflict marker.
fn to_byte(self) -> u8 {
self as u8
}
/// Parse a byte to see if it corresponds with any kind of conflict marker.
fn parse_byte(byte: u8) -> Option<Self> {
match byte {
b'<' => Some(Self::ConflictStart),
b'>' => Some(Self::ConflictEnd),
b'+' => Some(Self::Add),
b'-' => Some(Self::Remove),
b'%' => Some(Self::Diff),
b'|' => Some(Self::GitAncestor),
b'=' => Some(Self::GitSeparator),
_ => None,
}
}
}
/// Write a conflict marker to an output file.
fn write_conflict_marker(
output: &mut dyn Write,
kind: ConflictMarkerLineChar,
suffix_text: &str,
) -> io::Result<()> {
let conflict_marker = BString::new(vec![kind.to_byte(); CONFLICT_MARKER_LEN]);
if suffix_text.is_empty() {
writeln!(output, "{conflict_marker}")
} else {
writeln!(output, "{conflict_marker} {suffix_text}")
}
}
/// Parse a conflict marker from a line of a file. The conflict marker must have
/// the correct length (CONFLICT_MARKER_LEN).
fn parse_conflict_marker(line: &[u8]) -> Option<ConflictMarkerLineChar> {
let first_byte = *line.first()?;
let kind = ConflictMarkerLineChar::parse_byte(first_byte)?;
let len = line.iter().take_while(|&&b| b == first_byte).count();
if len != CONFLICT_MARKER_LEN {
return None;
}
if let Some(next_byte) = line.get(len) {
// If there is a character after the marker, it must be ASCII whitespace
if !next_byte.is_ascii_whitespace() {
return None;
}
}
Some(kind)
}
pub fn materialize_merge_result<T: AsRef<[u8]>>(
single_hunk: &Merge<T>,
conflict_marker_style: ConflictMarkerStyle,
@ -323,14 +368,22 @@ fn materialize_git_style_conflict(
conflict_info: &str,
output: &mut dyn Write,
) -> io::Result<()> {
writeln!(output, "{CONFLICT_START_LINE} Side #1 ({conflict_info})")?;
write_conflict_marker(
output,
ConflictMarkerLineChar::ConflictStart,
&format!("Side #1 ({conflict_info})"),
)?;
output.write_all(left)?;
writeln!(output, "{CONFLICT_GIT_ANCESTOR_LINE} Base")?;
write_conflict_marker(output, ConflictMarkerLineChar::GitAncestor, "Base")?;
output.write_all(base)?;
// VS Code doesn't seem to support any trailing text on the separator line
writeln!(output, "{CONFLICT_GIT_SEPARATOR_LINE}")?;
write_conflict_marker(output, ConflictMarkerLineChar::GitSeparator, "")?;
output.write_all(right)?;
writeln!(output, "{CONFLICT_END_LINE} Side #2 ({conflict_info} ends)")?;
write_conflict_marker(
output,
ConflictMarkerLineChar::ConflictEnd,
&format!("Side #2 ({conflict_info} ends)"),
)?;
Ok(())
}
@ -343,17 +396,21 @@ fn materialize_jj_style_conflict(
) -> io::Result<()> {
// Write a positive snapshot (side) of a conflict
fn write_side(add_index: usize, data: &[u8], output: &mut dyn Write) -> io::Result<()> {
writeln!(
write_conflict_marker(
output,
"{CONFLICT_PLUS_LINE} Contents of side #{}",
add_index + 1
ConflictMarkerLineChar::Add,
&format!("Contents of side #{}", add_index + 1),
)?;
output.write_all(data)
}
// Write a negative snapshot (base) of a conflict
fn write_base(base_str: &str, data: &[u8], output: &mut dyn Write) -> io::Result<()> {
writeln!(output, "{CONFLICT_MINUS_LINE} Contents of {base_str}")?;
write_conflict_marker(
output,
ConflictMarkerLineChar::Remove,
&format!("Contents of {base_str}"),
)?;
output.write_all(data)
}
@ -364,15 +421,15 @@ fn materialize_jj_style_conflict(
diff: &[DiffHunk],
output: &mut dyn Write,
) -> io::Result<()> {
writeln!(
write_conflict_marker(
output,
"{CONFLICT_DIFF_LINE} Changes from {base_str} to side #{}",
add_index + 1
ConflictMarkerLineChar::Diff,
&format!("Changes from {base_str} to side #{}", add_index + 1),
)?;
write_diff_hunks(diff, output)
}
writeln!(output, "{CONFLICT_START_LINE} {conflict_info}")?;
write_conflict_marker(output, ConflictMarkerLineChar::ConflictStart, conflict_info)?;
let mut add_index = 0;
for (base_index, left) in hunk.removes().enumerate() {
// The vast majority of conflicts one actually tries to resolve manually have 1
@ -422,7 +479,11 @@ fn materialize_jj_style_conflict(
for (add_index, slice) in hunk.adds().enumerate().skip(add_index) {
write_side(add_index, slice, output)?;
}
writeln!(output, "{CONFLICT_END_LINE} {conflict_info} ends")?;
write_conflict_marker(
output,
ConflictMarkerLineChar::ConflictEnd,
&format!("{conflict_info} ends"),
)?;
Ok(())
}
@ -480,24 +541,27 @@ pub fn parse_conflict(input: &[u8], num_sides: usize) -> Option<Vec<Merge<BStrin
let mut resolved_start = 0;
let mut conflict_start = None;
let mut conflict_start_len = 0;
for line in input.split_inclusive(|b| *b == b'\n') {
if is_conflict_marker_line(line) {
if line[0] == CONFLICT_START_LINE_CHAR {
for line in input.lines_with_terminator() {
match parse_conflict_marker(line) {
Some(ConflictMarkerLineChar::ConflictStart) => {
conflict_start = Some(pos);
conflict_start_len = line.len();
} else if conflict_start.is_some() && line[0] == CONFLICT_END_LINE_CHAR {
let conflict_body = &input[conflict_start.unwrap() + conflict_start_len..pos];
let hunk = parse_conflict_hunk(conflict_body);
if hunk.num_sides() == num_sides {
let resolved_slice = &input[resolved_start..conflict_start.unwrap()];
if !resolved_slice.is_empty() {
hunks.push(Merge::resolved(BString::from(resolved_slice)));
}
hunks.push(hunk);
resolved_start = pos + line.len();
}
conflict_start = None;
}
Some(ConflictMarkerLineChar::ConflictEnd) => {
if let Some(conflict_start_index) = conflict_start.take() {
let conflict_body = &input[conflict_start_index + conflict_start_len..pos];
let hunk = parse_conflict_hunk(conflict_body);
if hunk.num_sides() == num_sides {
let resolved_slice = &input[resolved_start..conflict_start_index];
if !resolved_slice.is_empty() {
hunks.push(Merge::resolved(BString::from(resolved_slice)));
}
hunks.push(hunk);
resolved_start = pos + line.len();
}
}
}
_ => {}
}
pos += line.len();
}
@ -519,20 +583,21 @@ pub fn parse_conflict(input: &[u8], num_sides: usize) -> Option<Vec<Merge<BStrin
/// line of the hunk.
fn parse_conflict_hunk(input: &[u8]) -> Merge<BString> {
// If the hunk starts with a conflict marker, find its first character
let initial_conflict_marker_char = input
let initial_conflict_marker = input
.lines_with_terminator()
.next()
.filter(|line| is_conflict_marker_line(line))
.map(|line| line[0]);
.and_then(parse_conflict_marker);
match initial_conflict_marker_char {
match initial_conflict_marker {
// JJ-style conflicts must start with one of these 3 conflict marker lines
Some(CONFLICT_DIFF_LINE_CHAR | CONFLICT_MINUS_LINE_CHAR | CONFLICT_PLUS_LINE_CHAR) => {
parse_jj_style_conflict_hunk(input)
}
Some(
ConflictMarkerLineChar::Diff
| ConflictMarkerLineChar::Remove
| ConflictMarkerLineChar::Add,
) => parse_jj_style_conflict_hunk(input),
// Git-style conflicts either must not start with a conflict marker line, or must start with
// the "|||||||" conflict marker line (if the first side was empty)
None | Some(CONFLICT_GIT_ANCESTOR_LINE_CHAR) => parse_git_style_conflict_hunk(input),
None | Some(ConflictMarkerLineChar::GitAncestor) => parse_git_style_conflict_hunk(input),
// No other conflict markers are allowed at the start of a hunk
Some(_) => Merge::resolved(BString::new(vec![])),
}
@ -541,34 +606,32 @@ fn parse_conflict_hunk(input: &[u8]) -> Merge<BString> {
fn parse_jj_style_conflict_hunk(input: &[u8]) -> Merge<BString> {
enum State {
Diff,
Minus,
Plus,
Remove,
Add,
Unknown,
}
let mut state = State::Unknown;
let mut removes = vec![];
let mut adds = vec![];
for line in input.lines_with_terminator() {
if is_conflict_marker_line(line) {
match line[0] {
CONFLICT_DIFF_LINE_CHAR => {
state = State::Diff;
removes.push(BString::new(vec![]));
adds.push(BString::new(vec![]));
continue;
}
CONFLICT_MINUS_LINE_CHAR => {
state = State::Minus;
removes.push(BString::new(vec![]));
continue;
}
CONFLICT_PLUS_LINE_CHAR => {
state = State::Plus;
adds.push(BString::new(vec![]));
continue;
}
_ => {}
match parse_conflict_marker(line) {
Some(ConflictMarkerLineChar::Diff) => {
state = State::Diff;
removes.push(BString::new(vec![]));
adds.push(BString::new(vec![]));
continue;
}
Some(ConflictMarkerLineChar::Remove) => {
state = State::Remove;
removes.push(BString::new(vec![]));
continue;
}
Some(ConflictMarkerLineChar::Add) => {
state = State::Add;
adds.push(BString::new(vec![]));
continue;
}
_ => {}
}
match state {
State::Diff => {
@ -590,10 +653,10 @@ fn parse_jj_style_conflict_hunk(input: &[u8]) -> Merge<BString> {
return Merge::resolved(BString::new(vec![]));
}
}
State::Minus => {
State::Remove => {
removes.last_mut().unwrap().extend_from_slice(line);
}
State::Plus => {
State::Add => {
adds.last_mut().unwrap().extend_from_slice(line);
}
State::Unknown => {
@ -623,28 +686,26 @@ fn parse_git_style_conflict_hunk(input: &[u8]) -> Merge<BString> {
let mut base = BString::new(vec![]);
let mut right = BString::new(vec![]);
for line in input.lines_with_terminator() {
if is_conflict_marker_line(line) {
match line[0] {
CONFLICT_GIT_ANCESTOR_LINE_CHAR => {
if state == State::Left {
state = State::Base;
continue;
} else {
// Base must come after left
return Merge::resolved(BString::new(vec![]));
}
match parse_conflict_marker(line) {
Some(ConflictMarkerLineChar::GitAncestor) => {
if state == State::Left {
state = State::Base;
continue;
} else {
// Base must come after left
return Merge::resolved(BString::new(vec![]));
}
CONFLICT_GIT_SEPARATOR_LINE_CHAR => {
if state == State::Base {
state = State::Right;
continue;
} else {
// Right must come after base
return Merge::resolved(BString::new(vec![]));
}
}
_ => {}
}
Some(ConflictMarkerLineChar::GitSeparator) => {
if state == State::Base {
state = State::Right;
continue;
} else {
// Right must come after base
return Merge::resolved(BString::new(vec![]));
}
}
_ => {}
}
match state {
State::Left => left.extend_from_slice(line),
@ -661,13 +722,6 @@ fn parse_git_style_conflict_hunk(input: &[u8]) -> Merge<BString> {
}
}
/// Check whether a line is a conflict marker. Removes trailing whitespace
/// before checking against regex to ensure it parses CRLF endings correctly.
fn is_conflict_marker_line(line: &[u8]) -> bool {
let line = line.trim_end_with(|ch| ch.is_ascii_whitespace());
CONFLICT_MARKER_REGEX.is_match_at(line, 0)
}
/// Parses conflict markers in `content` and returns an updated version of
/// `file_ids` with the new contents. If no (valid) conflict markers remain, a
/// single resolves `FileId` will be returned.