conflicts: refactor conflict marker writing and parsing

These changes make the code a bit more readable, and they will make it easier to have conflict markers of different lengths in the next commit.
2025-01-18 18:27:38 +00:00 · 2024-11-24 15:20:18 -06:00 · 2024-11-24 15:20:18 -06:00 · 369e8ea057
commit 369e8ea057
parent 75ce7f6b7f
1 changed files with 171 additions and 117 deletions
--- a/lib/src/conflicts.rs
+++ b/lib/src/conflicts.rs
@ -28,8 +28,6 @@ use futures::StreamExt;
 use futures::TryStreamExt;
 use itertools::Itertools;
 use pollster::FutureExt;
-use regex::bytes::Regex;
-use regex::bytes::RegexBuilder;

 use crate::backend::BackendError;
 use crate::backend::BackendResult;
@ -51,49 +49,25 @@ use crate::merge::MergedTreeValue;
 use crate::repo_path::RepoPath;
 use crate::store::Store;

-const CONFLICT_START_LINE: &str = "<<<<<<<";
-const CONFLICT_END_LINE: &str = ">>>>>>>";
-const CONFLICT_DIFF_LINE: &str = "%%%%%%%";
-const CONFLICT_MINUS_LINE: &str = "-------";
-const CONFLICT_PLUS_LINE: &str = "+++++++";
-const CONFLICT_GIT_ANCESTOR_LINE: &str = "|||||||";
-const CONFLICT_GIT_SEPARATOR_LINE: &str = "=======";
-const CONFLICT_START_LINE_CHAR: u8 = CONFLICT_START_LINE.as_bytes()[0];
-const CONFLICT_END_LINE_CHAR: u8 = CONFLICT_END_LINE.as_bytes()[0];
-const CONFLICT_DIFF_LINE_CHAR: u8 = CONFLICT_DIFF_LINE.as_bytes()[0];
-const CONFLICT_MINUS_LINE_CHAR: u8 = CONFLICT_MINUS_LINE.as_bytes()[0];
-const CONFLICT_PLUS_LINE_CHAR: u8 = CONFLICT_PLUS_LINE.as_bytes()[0];
-const CONFLICT_GIT_ANCESTOR_LINE_CHAR: u8 = CONFLICT_GIT_ANCESTOR_LINE.as_bytes()[0];
-const CONFLICT_GIT_SEPARATOR_LINE_CHAR: u8 = CONFLICT_GIT_SEPARATOR_LINE.as_bytes()[0];
-
-/// A conflict marker is one of the separators, optionally followed by a space
-/// and some text.
-// TODO: All the `{7}` could be replaced with `{7,}` to allow longer
-// separators. This could be useful to make it possible to allow conflict
-// markers inside the text of the conflicts.
-static CONFLICT_MARKER_REGEX: once_cell::sync::Lazy<Regex> = once_cell::sync::Lazy::new(|| {
-    RegexBuilder::new(r"^(<{7}|>{7}|%{7}|\-{7}|\+{7}|\|{7}|={7})( .*)?$")
-        .multi_line(true)
-        .build()
-        .unwrap()
-});
+/// Length of conflict markers.
+pub const CONFLICT_MARKER_LEN: usize = 7;

 fn write_diff_hunks(hunks: &[DiffHunk], file: &mut dyn Write) -> io::Result<()> {
    for hunk in hunks {
        match hunk.kind {
            DiffHunkKind::Matching => {
                debug_assert!(hunk.contents.iter().all_equal());
-                for line in hunk.contents[0].split_inclusive(|b| *b == b'\n') {
+                for line in hunk.contents[0].lines_with_terminator() {
                    file.write_all(b" ")?;
                    file.write_all(line)?;
                }
            }
            DiffHunkKind::Different => {
-                for line in hunk.contents[0].split_inclusive(|b| *b == b'\n') {
+                for line in hunk.contents[0].lines_with_terminator() {
                    file.write_all(b"-")?;
                    file.write_all(line)?;
                }
-                for line in hunk.contents[1].split_inclusive(|b| *b == b'\n') {
+                for line in hunk.contents[1].lines_with_terminator() {
                    file.write_all(b"+")?;
                    file.write_all(line)?;
                }
@ -250,6 +224,77 @@ pub enum ConflictMarkerStyle {
    Git,
 }

+/// Characters which can be repeated to form a conflict marker line when
+/// materializing and parsing conflicts.
+#[derive(Clone, Copy, PartialEq, Eq)]
+#[repr(u8)]
+enum ConflictMarkerLineChar {
+    ConflictStart = b'<',
+    ConflictEnd = b'>',
+    Add = b'+',
+    Remove = b'-',
+    Diff = b'%',
+    GitAncestor = b'|',
+    GitSeparator = b'=',
+}
+
+impl ConflictMarkerLineChar {
+    /// Get the ASCII byte used for this conflict marker.
+    fn to_byte(self) -> u8 {
+        self as u8
+    }
+
+    /// Parse a byte to see if it corresponds with any kind of conflict marker.
+    fn parse_byte(byte: u8) -> Option<Self> {
+        match byte {
+            b'<' => Some(Self::ConflictStart),
+            b'>' => Some(Self::ConflictEnd),
+            b'+' => Some(Self::Add),
+            b'-' => Some(Self::Remove),
+            b'%' => Some(Self::Diff),
+            b'|' => Some(Self::GitAncestor),
+            b'=' => Some(Self::GitSeparator),
+            _ => None,
+        }
+    }
+}
+
+/// Write a conflict marker to an output file.
+fn write_conflict_marker(
+    output: &mut dyn Write,
+    kind: ConflictMarkerLineChar,
+    suffix_text: &str,
+) -> io::Result<()> {
+    let conflict_marker = BString::new(vec![kind.to_byte(); CONFLICT_MARKER_LEN]);
+
+    if suffix_text.is_empty() {
+        writeln!(output, "{conflict_marker}")
+    } else {
+        writeln!(output, "{conflict_marker} {suffix_text}")
+    }
+}
+
+/// Parse a conflict marker from a line of a file. The conflict marker must have
+/// the correct length (CONFLICT_MARKER_LEN).
+fn parse_conflict_marker(line: &[u8]) -> Option<ConflictMarkerLineChar> {
+    let first_byte = *line.first()?;
+    let kind = ConflictMarkerLineChar::parse_byte(first_byte)?;
+    let len = line.iter().take_while(|&&b| b == first_byte).count();
+
+    if len != CONFLICT_MARKER_LEN {
+        return None;
+    }
+
+    if let Some(next_byte) = line.get(len) {
+        // If there is a character after the marker, it must be ASCII whitespace
+        if !next_byte.is_ascii_whitespace() {
+            return None;
+        }
+    }
+
+    Some(kind)
+}
+
 pub fn materialize_merge_result<T: AsRef<[u8]>>(
    single_hunk: &Merge<T>,
    conflict_marker_style: ConflictMarkerStyle,
@ -323,14 +368,22 @@ fn materialize_git_style_conflict(
    conflict_info: &str,
    output: &mut dyn Write,
 ) -> io::Result<()> {
-    writeln!(output, "{CONFLICT_START_LINE} Side #1 ({conflict_info})")?;
+    write_conflict_marker(
+        output,
+        ConflictMarkerLineChar::ConflictStart,
+        &format!("Side #1 ({conflict_info})"),
+    )?;
    output.write_all(left)?;
-    writeln!(output, "{CONFLICT_GIT_ANCESTOR_LINE} Base")?;
+    write_conflict_marker(output, ConflictMarkerLineChar::GitAncestor, "Base")?;
    output.write_all(base)?;
    // VS Code doesn't seem to support any trailing text on the separator line
-    writeln!(output, "{CONFLICT_GIT_SEPARATOR_LINE}")?;
+    write_conflict_marker(output, ConflictMarkerLineChar::GitSeparator, "")?;
    output.write_all(right)?;
-    writeln!(output, "{CONFLICT_END_LINE} Side #2 ({conflict_info} ends)")?;
+    write_conflict_marker(
+        output,
+        ConflictMarkerLineChar::ConflictEnd,
+        &format!("Side #2 ({conflict_info} ends)"),
+    )?;

    Ok(())
 }
@ -343,17 +396,21 @@ fn materialize_jj_style_conflict(
 ) -> io::Result<()> {
    // Write a positive snapshot (side) of a conflict
    fn write_side(add_index: usize, data: &[u8], output: &mut dyn Write) -> io::Result<()> {
-        writeln!(
+        write_conflict_marker(
            output,
-            "{CONFLICT_PLUS_LINE} Contents of side #{}",
-            add_index + 1
+            ConflictMarkerLineChar::Add,
+            &format!("Contents of side #{}", add_index + 1),
        )?;
        output.write_all(data)
    }

    // Write a negative snapshot (base) of a conflict
    fn write_base(base_str: &str, data: &[u8], output: &mut dyn Write) -> io::Result<()> {
-        writeln!(output, "{CONFLICT_MINUS_LINE} Contents of {base_str}")?;
+        write_conflict_marker(
+            output,
+            ConflictMarkerLineChar::Remove,
+            &format!("Contents of {base_str}"),
+        )?;
        output.write_all(data)
    }

@ -364,15 +421,15 @@ fn materialize_jj_style_conflict(
        diff: &[DiffHunk],
        output: &mut dyn Write,
    ) -> io::Result<()> {
-        writeln!(
+        write_conflict_marker(
            output,
-            "{CONFLICT_DIFF_LINE} Changes from {base_str} to side #{}",
-            add_index + 1
+            ConflictMarkerLineChar::Diff,
+            &format!("Changes from {base_str} to side #{}", add_index + 1),
        )?;
        write_diff_hunks(diff, output)
    }

-    writeln!(output, "{CONFLICT_START_LINE} {conflict_info}")?;
+    write_conflict_marker(output, ConflictMarkerLineChar::ConflictStart, conflict_info)?;
    let mut add_index = 0;
    for (base_index, left) in hunk.removes().enumerate() {
        // The vast majority of conflicts one actually tries to resolve manually have 1
@ -422,7 +479,11 @@ fn materialize_jj_style_conflict(
    for (add_index, slice) in hunk.adds().enumerate().skip(add_index) {
        write_side(add_index, slice, output)?;
    }
-    writeln!(output, "{CONFLICT_END_LINE} {conflict_info} ends")?;
+    write_conflict_marker(
+        output,
+        ConflictMarkerLineChar::ConflictEnd,
+        &format!("{conflict_info} ends"),
+    )?;
    Ok(())
 }

@ -480,24 +541,27 @@ pub fn parse_conflict(input: &[u8], num_sides: usize) -> Option<Vec<Merge<BStrin
    let mut resolved_start = 0;
    let mut conflict_start = None;
    let mut conflict_start_len = 0;
-    for line in input.split_inclusive(|b| *b == b'\n') {
-        if is_conflict_marker_line(line) {
-            if line[0] == CONFLICT_START_LINE_CHAR {
+    for line in input.lines_with_terminator() {
+        match parse_conflict_marker(line) {
+            Some(ConflictMarkerLineChar::ConflictStart) => {
                conflict_start = Some(pos);
                conflict_start_len = line.len();
-            } else if conflict_start.is_some() && line[0] == CONFLICT_END_LINE_CHAR {
-                let conflict_body = &input[conflict_start.unwrap() + conflict_start_len..pos];
-                let hunk = parse_conflict_hunk(conflict_body);
-                if hunk.num_sides() == num_sides {
-                    let resolved_slice = &input[resolved_start..conflict_start.unwrap()];
-                    if !resolved_slice.is_empty() {
-                        hunks.push(Merge::resolved(BString::from(resolved_slice)));
-                    }
-                    hunks.push(hunk);
-                    resolved_start = pos + line.len();
-                }
-                conflict_start = None;
            }
+            Some(ConflictMarkerLineChar::ConflictEnd) => {
+                if let Some(conflict_start_index) = conflict_start.take() {
+                    let conflict_body = &input[conflict_start_index + conflict_start_len..pos];
+                    let hunk = parse_conflict_hunk(conflict_body);
+                    if hunk.num_sides() == num_sides {
+                        let resolved_slice = &input[resolved_start..conflict_start_index];
+                        if !resolved_slice.is_empty() {
+                            hunks.push(Merge::resolved(BString::from(resolved_slice)));
+                        }
+                        hunks.push(hunk);
+                        resolved_start = pos + line.len();
+                    }
+                }
+            }
+            _ => {}
        }
        pos += line.len();
    }
@ -519,20 +583,21 @@ pub fn parse_conflict(input: &[u8], num_sides: usize) -> Option<Vec<Merge<BStrin
 /// line of the hunk.
 fn parse_conflict_hunk(input: &[u8]) -> Merge<BString> {
    // If the hunk starts with a conflict marker, find its first character
-    let initial_conflict_marker_char = input
+    let initial_conflict_marker = input
        .lines_with_terminator()
        .next()
-        .filter(|line| is_conflict_marker_line(line))
-        .map(|line| line[0]);
+        .and_then(parse_conflict_marker);

-    match initial_conflict_marker_char {
+    match initial_conflict_marker {
        // JJ-style conflicts must start with one of these 3 conflict marker lines
-        Some(CONFLICT_DIFF_LINE_CHAR | CONFLICT_MINUS_LINE_CHAR | CONFLICT_PLUS_LINE_CHAR) => {
-            parse_jj_style_conflict_hunk(input)
-        }
+        Some(
+            ConflictMarkerLineChar::Diff
+            | ConflictMarkerLineChar::Remove
+            | ConflictMarkerLineChar::Add,
+        ) => parse_jj_style_conflict_hunk(input),
        // Git-style conflicts either must not start with a conflict marker line, or must start with
        // the "|||||||" conflict marker line (if the first side was empty)
-        None | Some(CONFLICT_GIT_ANCESTOR_LINE_CHAR) => parse_git_style_conflict_hunk(input),
+        None | Some(ConflictMarkerLineChar::GitAncestor) => parse_git_style_conflict_hunk(input),
        // No other conflict markers are allowed at the start of a hunk
        Some(_) => Merge::resolved(BString::new(vec![])),
    }
@ -541,34 +606,32 @@ fn parse_conflict_hunk(input: &[u8]) -> Merge<BString> {
 fn parse_jj_style_conflict_hunk(input: &[u8]) -> Merge<BString> {
    enum State {
        Diff,
-        Minus,
-        Plus,
+        Remove,
+        Add,
        Unknown,
    }
    let mut state = State::Unknown;
    let mut removes = vec![];
    let mut adds = vec![];
    for line in input.lines_with_terminator() {
-        if is_conflict_marker_line(line) {
-            match line[0] {
-                CONFLICT_DIFF_LINE_CHAR => {
-                    state = State::Diff;
-                    removes.push(BString::new(vec![]));
-                    adds.push(BString::new(vec![]));
-                    continue;
-                }
-                CONFLICT_MINUS_LINE_CHAR => {
-                    state = State::Minus;
-                    removes.push(BString::new(vec![]));
-                    continue;
-                }
-                CONFLICT_PLUS_LINE_CHAR => {
-                    state = State::Plus;
-                    adds.push(BString::new(vec![]));
-                    continue;
-                }
-                _ => {}
+        match parse_conflict_marker(line) {
+            Some(ConflictMarkerLineChar::Diff) => {
+                state = State::Diff;
+                removes.push(BString::new(vec![]));
+                adds.push(BString::new(vec![]));
+                continue;
            }
+            Some(ConflictMarkerLineChar::Remove) => {
+                state = State::Remove;
+                removes.push(BString::new(vec![]));
+                continue;
+            }
+            Some(ConflictMarkerLineChar::Add) => {
+                state = State::Add;
+                adds.push(BString::new(vec![]));
+                continue;
+            }
+            _ => {}
        }
        match state {
            State::Diff => {
@ -590,10 +653,10 @@ fn parse_jj_style_conflict_hunk(input: &[u8]) -> Merge<BString> {
                    return Merge::resolved(BString::new(vec![]));
                }
            }
-            State::Minus => {
+            State::Remove => {
                removes.last_mut().unwrap().extend_from_slice(line);
            }
-            State::Plus => {
+            State::Add => {
                adds.last_mut().unwrap().extend_from_slice(line);
            }
            State::Unknown => {
@ -623,28 +686,26 @@ fn parse_git_style_conflict_hunk(input: &[u8]) -> Merge<BString> {
    let mut base = BString::new(vec![]);
    let mut right = BString::new(vec![]);
    for line in input.lines_with_terminator() {
-        if is_conflict_marker_line(line) {
-            match line[0] {
-                CONFLICT_GIT_ANCESTOR_LINE_CHAR => {
-                    if state == State::Left {
-                        state = State::Base;
-                        continue;
-                    } else {
-                        // Base must come after left
-                        return Merge::resolved(BString::new(vec![]));
-                    }
+        match parse_conflict_marker(line) {
+            Some(ConflictMarkerLineChar::GitAncestor) => {
+                if state == State::Left {
+                    state = State::Base;
+                    continue;
+                } else {
+                    // Base must come after left
+                    return Merge::resolved(BString::new(vec![]));
                }
-                CONFLICT_GIT_SEPARATOR_LINE_CHAR => {
-                    if state == State::Base {
-                        state = State::Right;
-                        continue;
-                    } else {
-                        // Right must come after base
-                        return Merge::resolved(BString::new(vec![]));
-                    }
-                }
-                _ => {}
            }
+            Some(ConflictMarkerLineChar::GitSeparator) => {
+                if state == State::Base {
+                    state = State::Right;
+                    continue;
+                } else {
+                    // Right must come after base
+                    return Merge::resolved(BString::new(vec![]));
+                }
+            }
+            _ => {}
        }
        match state {
            State::Left => left.extend_from_slice(line),
@ -661,13 +722,6 @@ fn parse_git_style_conflict_hunk(input: &[u8]) -> Merge<BString> {
    }
 }

-/// Check whether a line is a conflict marker. Removes trailing whitespace
-/// before checking against regex to ensure it parses CRLF endings correctly.
-fn is_conflict_marker_line(line: &[u8]) -> bool {
-    let line = line.trim_end_with(|ch| ch.is_ascii_whitespace());
-    CONFLICT_MARKER_REGEX.is_match_at(line, 0)
-}
-
 /// Parses conflict markers in `content` and returns an updated version of
 /// `file_ids` with the new contents. If no (valid) conflict markers remain, a
 /// single resolves `FileId` will be returned.