conflicts: replace ContentHunk with BString

ContentHunk is basically a nice wrapper around Vec<u8>. I think it would give
little benefit for type safety.
This commit is contained in:
Yuya Nishihara 2024-08-26 01:16:01 +09:00
parent dd8ec3dece
commit 1ba581b37c
5 changed files with 57 additions and 81 deletions

View file

@ -2,6 +2,7 @@ use std::borrow::Cow;
use std::path::Path; use std::path::Path;
use std::sync::Arc; use std::sync::Arc;
use bstr::BString;
use futures::StreamExt; use futures::StreamExt;
use futures::TryFutureExt; use futures::TryFutureExt;
use futures::TryStreamExt; use futures::TryStreamExt;
@ -16,7 +17,6 @@ use jj_lib::conflicts::MaterializedTreeValue;
use jj_lib::diff::Diff; use jj_lib::diff::Diff;
use jj_lib::diff::DiffHunk; use jj_lib::diff::DiffHunk;
use jj_lib::files; use jj_lib::files;
use jj_lib::files::ContentHunk;
use jj_lib::files::MergeResult; use jj_lib::files::MergeResult;
use jj_lib::matchers::Matcher; use jj_lib::matchers::Matcher;
use jj_lib::merge::Merge; use jj_lib::merge::Merge;
@ -534,8 +534,8 @@ fn make_merge_sections(
) -> Result<Vec<scm_record::Section<'static>>, BuiltinToolError> { ) -> Result<Vec<scm_record::Section<'static>>, BuiltinToolError> {
let mut sections = Vec::new(); let mut sections = Vec::new();
match merge_result { match merge_result {
MergeResult::Resolved(ContentHunk(buf)) => { MergeResult::Resolved(buf) => {
let contents = buf_to_file_contents(None, buf); let contents = buf_to_file_contents(None, buf.into());
let section = match contents { let section = match contents {
FileContents::Absent => None, FileContents::Absent => None,
FileContents::Text { FileContents::Text {
@ -561,7 +561,7 @@ fn make_merge_sections(
MergeResult::Conflict(hunks) => { MergeResult::Conflict(hunks) => {
for hunk in hunks { for hunk in hunks {
let section = match hunk.into_resolved() { let section = match hunk.into_resolved() {
Ok(ContentHunk(contents)) => { Ok(contents) => {
let contents = std::str::from_utf8(&contents).map_err(|err| { let contents = std::str::from_utf8(&contents).map_err(|err| {
BuiltinToolError::DecodeUtf8 { BuiltinToolError::DecodeUtf8 {
source: err, source: err,
@ -587,7 +587,6 @@ fn make_merge_sections(
.cycle(), .cycle(),
) )
.map(|(contents, change_type)| -> Result<_, BuiltinToolError> { .map(|(contents, change_type)| -> Result<_, BuiltinToolError> {
let ContentHunk(contents) = contents;
let contents = std::str::from_utf8(contents).map_err(|err| { let contents = std::str::from_utf8(contents).map_err(|err| {
BuiltinToolError::DecodeUtf8 { BuiltinToolError::DecodeUtf8 {
source: err, source: err,
@ -613,7 +612,7 @@ fn make_merge_sections(
pub fn edit_merge_builtin( pub fn edit_merge_builtin(
tree: &MergedTree, tree: &MergedTree,
path: &RepoPath, path: &RepoPath,
content: Merge<ContentHunk>, content: Merge<BString>,
) -> Result<MergedTreeId, BuiltinToolError> { ) -> Result<MergedTreeId, BuiltinToolError> {
let merge_result = files::merge(&content); let merge_result = files::merge(&content);
let sections = make_merge_sections(merge_result)?; let sections = make_merge_sections(merge_result)?;

View file

@ -6,6 +6,7 @@ use std::process::ExitStatus;
use std::process::Stdio; use std::process::Stdio;
use std::sync::Arc; use std::sync::Arc;
use bstr::BString;
use itertools::Itertools; use itertools::Itertools;
use jj_lib::backend::FileId; use jj_lib::backend::FileId;
use jj_lib::backend::MergedTreeId; use jj_lib::backend::MergedTreeId;
@ -151,7 +152,7 @@ pub enum ExternalToolError {
pub fn run_mergetool_external( pub fn run_mergetool_external(
editor: &ExternalMergeTool, editor: &ExternalMergeTool,
file_merge: Merge<Option<FileId>>, file_merge: Merge<Option<FileId>>,
content: Merge<jj_lib::files::ContentHunk>, content: Merge<BString>,
repo_path: &RepoPath, repo_path: &RepoPath,
conflict: MergedTreeValue, conflict: MergedTreeValue,
tree: &MergedTree, tree: &MergedTree,
@ -166,9 +167,9 @@ pub fn run_mergetool_external(
}; };
assert_eq!(content.num_sides(), 2); assert_eq!(content.num_sides(), 2);
let files: HashMap<&str, &[u8]> = maplit::hashmap! { let files: HashMap<&str, &[u8]> = maplit::hashmap! {
"base" => content.get_remove(0).unwrap().0.as_slice(), "base" => content.get_remove(0).unwrap().as_slice(),
"left" => content.get_add(0).unwrap().0.as_slice(), "left" => content.get_add(0).unwrap().as_slice(),
"right" => content.get_add(1).unwrap().0.as_slice(), "right" => content.get_add(1).unwrap().as_slice(),
"output" => initial_output_content.as_slice(), "output" => initial_output_content.as_slice(),
}; };

View file

@ -18,6 +18,7 @@ use std::io::Read;
use std::io::Write; use std::io::Write;
use std::iter::zip; use std::iter::zip;
use bstr::BString;
use futures::stream::BoxStream; use futures::stream::BoxStream;
use futures::try_join; use futures::try_join;
use futures::Stream; use futures::Stream;
@ -39,7 +40,6 @@ use crate::copies::CopiesTreeDiffEntryPath;
use crate::diff::Diff; use crate::diff::Diff;
use crate::diff::DiffHunk; use crate::diff::DiffHunk;
use crate::files; use crate::files;
use crate::files::ContentHunk;
use crate::files::MergeResult; use crate::files::MergeResult;
use crate::merge::Merge; use crate::merge::Merge;
use crate::merge::MergeBuilder; use crate::merge::MergeBuilder;
@ -98,7 +98,7 @@ async fn get_file_contents(
store: &Store, store: &Store,
path: &RepoPath, path: &RepoPath,
term: &Option<FileId>, term: &Option<FileId>,
) -> BackendResult<ContentHunk> { ) -> BackendResult<BString> {
match term { match term {
Some(id) => { Some(id) => {
let mut content = vec![]; let mut content = vec![];
@ -111,11 +111,11 @@ async fn get_file_contents(
id: id.clone(), id: id.clone(),
source: err.into(), source: err.into(),
})?; })?;
Ok(ContentHunk(content)) Ok(BString::new(content))
} }
// If the conflict had removed the file on one side, we pretend that the file // If the conflict had removed the file on one side, we pretend that the file
// was empty there. // was empty there.
None => Ok(ContentHunk(vec![])), None => Ok(BString::new(vec![])),
} }
} }
@ -123,8 +123,8 @@ pub async fn extract_as_single_hunk(
merge: &Merge<Option<FileId>>, merge: &Merge<Option<FileId>>,
store: &Store, store: &Store,
path: &RepoPath, path: &RepoPath,
) -> BackendResult<Merge<ContentHunk>> { ) -> BackendResult<Merge<BString>> {
let builder: MergeBuilder<ContentHunk> = futures::stream::iter(merge.iter()) let builder: MergeBuilder<BString> = futures::stream::iter(merge.iter())
.then(|term| get_file_contents(store, path, term)) .then(|term| get_file_contents(store, path, term))
.try_collect() .try_collect()
.await?; .await?;
@ -232,13 +232,13 @@ async fn materialize_tree_value_no_access_denied(
} }
pub fn materialize_merge_result( pub fn materialize_merge_result(
single_hunk: &Merge<ContentHunk>, single_hunk: &Merge<BString>,
output: &mut dyn Write, output: &mut dyn Write,
) -> std::io::Result<()> { ) -> std::io::Result<()> {
let merge_result = files::merge(single_hunk); let merge_result = files::merge(single_hunk);
match merge_result { match merge_result {
MergeResult::Resolved(content) => { MergeResult::Resolved(content) => {
output.write_all(&content.0)?; output.write_all(&content)?;
} }
MergeResult::Conflict(hunks) => { MergeResult::Conflict(hunks) => {
let num_conflicts = hunks let num_conflicts = hunks
@ -248,7 +248,7 @@ pub fn materialize_merge_result(
let mut conflict_index = 0; let mut conflict_index = 0;
for hunk in hunks { for hunk in hunks {
if let Some(content) = hunk.as_resolved() { if let Some(content) = hunk.as_resolved() {
output.write_all(&content.0)?; output.write_all(content)?;
} else { } else {
conflict_index += 1; conflict_index += 1;
output.write_all(CONFLICT_START_LINE)?; output.write_all(CONFLICT_START_LINE)?;
@ -272,15 +272,15 @@ pub fn materialize_merge_result(
// terms as snapshots. // terms as snapshots.
output.write_all(CONFLICT_MINUS_LINE)?; output.write_all(CONFLICT_MINUS_LINE)?;
output.write_all(format!(" Contents of {base_str}\n").as_bytes())?; output.write_all(format!(" Contents of {base_str}\n").as_bytes())?;
output.write_all(&left.0)?; output.write_all(left)?;
continue; continue;
}; };
let diff1 = Diff::by_line([&left.0, &right1.0]).hunks().collect_vec(); let diff1 = Diff::by_line([&left, &right1]).hunks().collect_vec();
// Check if the diff against the next positive term is better. Since // Check if the diff against the next positive term is better. Since
// we want to preserve the order of the terms, we don't match against // we want to preserve the order of the terms, we don't match against
// any later positive terms. // any later positive terms.
if let Some(right2) = hunk.get_add(add_index + 1) { if let Some(right2) = hunk.get_add(add_index + 1) {
let diff2 = Diff::by_line([&left.0, &right2.0]).hunks().collect_vec(); let diff2 = Diff::by_line([&left, &right2]).hunks().collect_vec();
if diff_size(&diff2) < diff_size(&diff1) { if diff_size(&diff2) < diff_size(&diff1) {
// If the next positive term is a better match, emit // If the next positive term is a better match, emit
// the current positive term as a snapshot and the next // the current positive term as a snapshot and the next
@ -289,7 +289,7 @@ pub fn materialize_merge_result(
output.write_all( output.write_all(
format!(" Contents of side #{}\n", add_index + 1).as_bytes(), format!(" Contents of side #{}\n", add_index + 1).as_bytes(),
)?; )?;
output.write_all(&right1.0)?; output.write_all(right1)?;
output.write_all(CONFLICT_DIFF_LINE)?; output.write_all(CONFLICT_DIFF_LINE)?;
output.write_all( output.write_all(
format!( format!(
@ -319,7 +319,7 @@ pub fn materialize_merge_result(
output.write_all( output.write_all(
format!(" Contents of side #{}\n", add_index + 1).as_bytes(), format!(" Contents of side #{}\n", add_index + 1).as_bytes(),
)?; )?;
output.write_all(&slice.0)?; output.write_all(slice)?;
} }
output.write_all(CONFLICT_END_LINE)?; output.write_all(CONFLICT_END_LINE)?;
output.write_all( output.write_all(
@ -375,7 +375,7 @@ pub fn materialized_diff_stream<'a>(
/// invalid if they don't have the expected arity. /// invalid if they don't have the expected arity.
// TODO: "parse" is not usually the opposite of "materialize", so maybe we // TODO: "parse" is not usually the opposite of "materialize", so maybe we
// should rename them to "serialize" and "deserialize"? // should rename them to "serialize" and "deserialize"?
pub fn parse_conflict(input: &[u8], num_sides: usize) -> Option<Vec<Merge<ContentHunk>>> { pub fn parse_conflict(input: &[u8], num_sides: usize) -> Option<Vec<Merge<BString>>> {
if input.is_empty() { if input.is_empty() {
return None; return None;
} }
@ -395,7 +395,7 @@ pub fn parse_conflict(input: &[u8], num_sides: usize) -> Option<Vec<Merge<Conten
if hunk.num_sides() == num_sides { if hunk.num_sides() == num_sides {
let resolved_slice = &input[resolved_start..conflict_start.unwrap()]; let resolved_slice = &input[resolved_start..conflict_start.unwrap()];
if !resolved_slice.is_empty() { if !resolved_slice.is_empty() {
hunks.push(Merge::resolved(ContentHunk(resolved_slice.to_vec()))); hunks.push(Merge::resolved(BString::from(resolved_slice)));
} }
hunks.push(hunk); hunks.push(hunk);
resolved_start = pos + line.len(); resolved_start = pos + line.len();
@ -410,15 +410,13 @@ pub fn parse_conflict(input: &[u8], num_sides: usize) -> Option<Vec<Merge<Conten
None None
} else { } else {
if resolved_start < input.len() { if resolved_start < input.len() {
hunks.push(Merge::resolved(ContentHunk( hunks.push(Merge::resolved(BString::from(&input[resolved_start..])));
input[resolved_start..].to_vec(),
)));
} }
Some(hunks) Some(hunks)
} }
} }
fn parse_conflict_hunk(input: &[u8]) -> Merge<ContentHunk> { fn parse_conflict_hunk(input: &[u8]) -> Merge<BString> {
enum State { enum State {
Diff, Diff,
Minus, Minus,
@ -433,18 +431,18 @@ fn parse_conflict_hunk(input: &[u8]) -> Merge<ContentHunk> {
match line[0] { match line[0] {
CONFLICT_DIFF_LINE_CHAR => { CONFLICT_DIFF_LINE_CHAR => {
state = State::Diff; state = State::Diff;
removes.push(ContentHunk(vec![])); removes.push(BString::new(vec![]));
adds.push(ContentHunk(vec![])); adds.push(BString::new(vec![]));
continue; continue;
} }
CONFLICT_MINUS_LINE_CHAR => { CONFLICT_MINUS_LINE_CHAR => {
state = State::Minus; state = State::Minus;
removes.push(ContentHunk(vec![])); removes.push(BString::new(vec![]));
continue; continue;
} }
CONFLICT_PLUS_LINE_CHAR => { CONFLICT_PLUS_LINE_CHAR => {
state = State::Plus; state = State::Plus;
adds.push(ContentHunk(vec![])); adds.push(BString::new(vec![]));
continue; continue;
} }
_ => {} _ => {}
@ -453,26 +451,26 @@ fn parse_conflict_hunk(input: &[u8]) -> Merge<ContentHunk> {
match state { match state {
State::Diff => { State::Diff => {
if let Some(rest) = line.strip_prefix(b"-") { if let Some(rest) = line.strip_prefix(b"-") {
removes.last_mut().unwrap().0.extend_from_slice(rest); removes.last_mut().unwrap().extend_from_slice(rest);
} else if let Some(rest) = line.strip_prefix(b"+") { } else if let Some(rest) = line.strip_prefix(b"+") {
adds.last_mut().unwrap().0.extend_from_slice(rest); adds.last_mut().unwrap().extend_from_slice(rest);
} else if let Some(rest) = line.strip_prefix(b" ") { } else if let Some(rest) = line.strip_prefix(b" ") {
removes.last_mut().unwrap().0.extend_from_slice(rest); removes.last_mut().unwrap().extend_from_slice(rest);
adds.last_mut().unwrap().0.extend_from_slice(rest); adds.last_mut().unwrap().extend_from_slice(rest);
} else { } else {
// Doesn't look like a conflict // Doesn't look like a conflict
return Merge::resolved(ContentHunk(vec![])); return Merge::resolved(BString::new(vec![]));
} }
} }
State::Minus => { State::Minus => {
removes.last_mut().unwrap().0.extend_from_slice(line); removes.last_mut().unwrap().extend_from_slice(line);
} }
State::Plus => { State::Plus => {
adds.last_mut().unwrap().0.extend_from_slice(line); adds.last_mut().unwrap().extend_from_slice(line);
} }
State::Unknown => { State::Unknown => {
// Doesn't look like a conflict // Doesn't look like a conflict
return Merge::resolved(ContentHunk(vec![])); return Merge::resolved(BString::new(vec![]));
} }
} }
} }
@ -526,11 +524,11 @@ pub async fn update_from_content(
for hunk in hunks { for hunk in hunks {
if let Some(slice) = hunk.as_resolved() { if let Some(slice) = hunk.as_resolved() {
for content in contents.iter_mut() { for content in contents.iter_mut() {
content.extend_from_slice(&slice.0); content.extend_from_slice(slice);
} }
} else { } else {
for (content, slice) in zip(contents.iter_mut(), hunk.into_iter()) { for (content, slice) in zip(contents.iter_mut(), hunk.into_iter()) {
content.extend(slice.0); content.extend(Vec::from(slice));
} }
} }
} }

View file

@ -16,13 +16,11 @@
use std::borrow::Borrow; use std::borrow::Borrow;
use std::collections::VecDeque; use std::collections::VecDeque;
use std::fmt::Debug;
use std::fmt::Error;
use std::fmt::Formatter;
use std::iter; use std::iter;
use std::mem; use std::mem;
use bstr::BStr; use bstr::BStr;
use bstr::BString;
use crate::diff::Diff; use crate::diff::Diff;
use crate::diff::DiffHunk; use crate::diff::DiffHunk;
@ -184,26 +182,10 @@ where
} }
} }
// TODO: Maybe ContentHunk can be replaced with BString?
#[derive(PartialEq, Eq, Clone)]
pub struct ContentHunk(pub Vec<u8>);
impl AsRef<[u8]> for ContentHunk {
fn as_ref(&self) -> &[u8] {
self.0.as_ref()
}
}
impl Debug for ContentHunk {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
String::from_utf8_lossy(&self.0).fmt(f)
}
}
#[derive(PartialEq, Eq, Clone, Debug)] #[derive(PartialEq, Eq, Clone, Debug)]
pub enum MergeResult { pub enum MergeResult {
Resolved(ContentHunk), Resolved(BString),
Conflict(Vec<Merge<ContentHunk>>), Conflict(Vec<Merge<BString>>),
} }
pub fn merge<T: AsRef<[u8]>>(slices: &Merge<T>) -> MergeResult { pub fn merge<T: AsRef<[u8]>>(slices: &Merge<T>) -> MergeResult {
@ -216,28 +198,24 @@ pub fn merge<T: AsRef<[u8]>>(slices: &Merge<T>) -> MergeResult {
} }
fn merge_hunks(diff: &Diff, num_diffs: usize) -> MergeResult { fn merge_hunks(diff: &Diff, num_diffs: usize) -> MergeResult {
let mut resolved_hunk = ContentHunk(vec![]); let mut resolved_hunk = BString::new(vec![]);
let mut merge_hunks: Vec<Merge<ContentHunk>> = vec![]; let mut merge_hunks: Vec<Merge<BString>> = vec![];
for diff_hunk in diff.hunks() { for diff_hunk in diff.hunks() {
match diff_hunk { match diff_hunk {
DiffHunk::Matching(content) => { DiffHunk::Matching(content) => {
resolved_hunk.0.extend_from_slice(content); resolved_hunk.extend_from_slice(content);
} }
DiffHunk::Different(parts) => { DiffHunk::Different(parts) => {
if let Some(resolved) = trivial_merge(&parts[..num_diffs], &parts[num_diffs..]) { if let Some(resolved) = trivial_merge(&parts[..num_diffs], &parts[num_diffs..]) {
resolved_hunk.0.extend_from_slice(resolved); resolved_hunk.extend_from_slice(resolved);
} else { } else {
if !resolved_hunk.0.is_empty() { if !resolved_hunk.is_empty() {
merge_hunks.push(Merge::resolved(resolved_hunk)); merge_hunks.push(Merge::resolved(resolved_hunk));
resolved_hunk = ContentHunk(vec![]); resolved_hunk = BString::new(vec![]);
} }
merge_hunks.push(Merge::from_removes_adds( merge_hunks.push(Merge::from_removes_adds(
parts[..num_diffs] parts[..num_diffs].iter().copied().map(BString::from),
.iter() parts[num_diffs..].iter().copied().map(BString::from),
.map(|part| ContentHunk(part.to_vec())),
parts[num_diffs..]
.iter()
.map(|part| ContentHunk(part.to_vec())),
)); ));
} }
} }
@ -247,7 +225,7 @@ fn merge_hunks(diff: &Diff, num_diffs: usize) -> MergeResult {
if merge_hunks.is_empty() { if merge_hunks.is_empty() {
MergeResult::Resolved(resolved_hunk) MergeResult::Resolved(resolved_hunk)
} else { } else {
if !resolved_hunk.0.is_empty() { if !resolved_hunk.is_empty() {
merge_hunks.push(Merge::resolved(resolved_hunk)); merge_hunks.push(Merge::resolved(resolved_hunk));
} }
MergeResult::Conflict(merge_hunks) MergeResult::Conflict(merge_hunks)
@ -258,8 +236,8 @@ fn merge_hunks(diff: &Diff, num_diffs: usize) -> MergeResult {
mod tests { mod tests {
use super::*; use super::*;
fn hunk(data: &[u8]) -> ContentHunk { fn hunk(data: &[u8]) -> BString {
ContentHunk(data.to_vec()) data.into()
} }
fn merge(removes: &[&[u8]], adds: &[&[u8]]) -> MergeResult { fn merge(removes: &[&[u8]], adds: &[&[u8]]) -> MergeResult {

View file

@ -471,7 +471,7 @@ pub fn try_resolve_file_conflict(
let merge_result = files::merge(&contents); let merge_result = files::merge(&contents);
match merge_result { match merge_result {
MergeResult::Resolved(merged_content) => { MergeResult::Resolved(merged_content) => {
let id = store.write_file(filename, &mut merged_content.0.as_slice())?; let id = store.write_file(filename, &mut merged_content.as_slice())?;
Ok(Some(TreeValue::File { id, executable })) Ok(Some(TreeValue::File { id, executable }))
} }
MergeResult::Conflict(_) => Ok(None), MergeResult::Conflict(_) => Ok(None),