copy-tracking: adjust backend signature

- use a single commit instead of an array of them.  This simplifies the
  implementation.  A higher level api can wrap this when an array of
  commits is desired and those semantics are figured out.
- since this API is directly 1-1 on parents, there are no conflicts
- if we introduce a higher level API that handles lists of commits, we
  may need to restore the conflict/resolved distinction, but for now
  simplify
This commit is contained in:
Matt Kulukundis 2024-07-15 16:55:44 -04:00 committed by Matt Fowles Kulukundis
parent c9e147c425
commit e667a2b403
9 changed files with 72 additions and 130 deletions

View file

@ -178,10 +178,10 @@ impl Backend for JitBackend {
fn get_copy_records(
&self,
paths: &[RepoPathBuf],
roots: &[CommitId],
heads: &[CommitId],
root: &CommitId,
head: &CommitId,
) -> BackendResult<BoxStream<BackendResult<CopyRecord>>> {
self.inner.get_copy_records(paths, roots, heads)
self.inner.get_copy_records(paths, root, head)
}
fn gc(&self, index: &dyn Index, keep_newer: SystemTime) -> BackendResult<()> {

View file

@ -12,21 +12,14 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#![allow(unused, dead_code)]
use std::fmt::Debug;
use std::io::Write as _;
use std::path::{Path, PathBuf};
use futures::executor::block_on_stream;
use futures::StreamExt;
use jj_lib::backend::{Backend, CopyRecord, CopySource, CopySources};
use jj_lib::default_index::{AsCompositeIndex as _, DefaultIndexStore};
use jj_lib::op_walk;
use jj_lib::repo_path::{RepoPath, RepoPathBuf};
use jj_lib::backend::{Backend, CopyRecord};
use crate::cli_util::{CommandHelper, RevisionArg};
use crate::command_error::{internal_error, user_error, CommandError};
use crate::command_error::CommandError;
use crate::ui::Ui;
/// Rebuild commit index
@ -48,32 +41,18 @@ pub fn cmd_debug_copy_detection(
return Ok(());
};
let commit = ws.resolve_single_rev(&args.revision)?;
let tree = commit.tree()?;
let paths: Vec<RepoPathBuf> = tree.entries().map(|(path, _)| path).collect();
let commits = [commit.id().clone()];
let parents = commit.parent_ids();
for copy_record in
block_on_stream(git.get_copy_records(&paths, parents, &commits)?).filter_map(|r| r.ok())
{
match copy_record.sources {
CopySources::Resolved(CopySource { path, .. }) => {
write!(ui.stdout(), "{}", path.as_internal_file_string());
}
CopySources::Conflict(conflicting) => {
let mut sorted: Vec<_> = conflicting
.iter()
.map(|s| s.path.as_internal_file_string())
.collect();
sorted.sort();
write!(ui.stdout(), "{{ {} }}", sorted.join(", "));
}
for parent_id in commit.parent_ids() {
for CopyRecord { target, source, .. } in
block_on_stream(git.get_copy_records(&[], parent_id, commit.id())?)
.filter_map(|r| r.ok())
{
writeln!(
ui.stdout(),
"{} -> {}",
source.as_internal_file_string(),
target.as_internal_file_string()
)?;
}
writeln!(
ui.stdout(),
" -> {}",
copy_record.target.as_internal_file_string()
);
}
Ok(())
}

View file

@ -15,7 +15,7 @@
#![allow(missing_docs)]
use std::any::Any;
use std::collections::{BTreeMap, HashSet};
use std::collections::BTreeMap;
use std::fmt::Debug;
use std::io::Read;
use std::time::SystemTime;
@ -153,18 +153,21 @@ pub struct Conflict {
pub adds: Vec<ConflictTerm>,
}
/// An individual copy source.
#[derive(Debug, PartialEq, Eq, Hash, Clone)]
pub struct CopySource {
/// An individual copy event, from file A -> B.
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct CopyRecord {
/// The destination of the copy, B.
pub target: RepoPathBuf,
/// The CommitId where the copy took place.
pub target_commit: CommitId,
/// The source path a target was copied from.
///
/// It is not required that the source path is different than the target
/// path. A custom backend may choose to represent 'rollbacks' as copies
/// from a file unto itself, from a specific prior commit.
pub path: RepoPathBuf,
pub file: FileId,
/// The source commit the target was copied from. If not specified, then the
/// parent of the target commit is the source commit. Backends may use this
pub source: RepoPathBuf,
pub source_file: FileId,
/// The source commit the target was copied from. Backends may use this
/// field to implement 'integration' logic, where a source may be
/// periodically merged into a target, similar to a branch, but the
/// branching occurs at the file level rather than the repository level. It
@ -172,26 +175,9 @@ pub struct CopySource {
/// commit should avoid copy propagation on rebasing, which is desirable
/// for 'fork' style copies.
///
/// If specified, it is required that the commit id is an ancestor of the
/// commit with which this copy source is associated.
pub commit: Option<CommitId>,
}
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum CopySources {
Resolved(CopySource),
Conflict(HashSet<CopySource>),
}
/// An individual copy event, from file A -> B.
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct CopyRecord {
/// The destination of the copy, B.
pub target: RepoPathBuf,
/// The CommitId where the copy took place.
pub id: CommitId,
/// The source of the copy, A.
pub sources: CopySources,
/// It is required that the commit id is an ancestor of the commit with
/// which this copy source is associated.
pub source_commit: CommitId,
}
/// Error that may occur during backend initialization.
@ -458,7 +444,8 @@ pub trait Backend: Send + Sync + Debug {
sign_with: Option<&mut SigningFn>,
) -> BackendResult<(CommitId, Commit)>;
/// Get all copy records for `paths` in the dag range `roots..heads`.
/// Get copy records for the dag range `root..head`. If `paths` is None
/// include all paths, otherwise restrict to only `paths`.
///
/// The exact order these are returned is unspecified, but it is guaranteed
/// to be reverse-topological. That is, for any two copy records with
@ -472,8 +459,8 @@ pub trait Backend: Send + Sync + Debug {
fn get_copy_records(
&self,
paths: &[RepoPathBuf],
roots: &[CommitId],
heads: &[CommitId],
root: &CommitId,
head: &CommitId,
) -> BackendResult<BoxStream<BackendResult<CopyRecord>>>;
/// Perform garbage collection.

View file

@ -26,7 +26,7 @@ use std::{fs, io, str};
use async_trait::async_trait;
use futures::stream::BoxStream;
use gix::bstr::{BStr, BString};
use gix::bstr::BString;
use gix::objs::{CommitRef, CommitRefIter, WriteTo};
use itertools::Itertools;
use pollster::FutureExt;
@ -36,9 +36,9 @@ use thiserror::Error;
use crate::backend::{
make_root_commit, Backend, BackendError, BackendInitError, BackendLoadError, BackendResult,
ChangeId, Commit, CommitId, Conflict, ConflictId, ConflictTerm, CopyRecord, CopySource,
CopySources, FileId, MergedTreeId, MillisSinceEpoch, SecureSig, Signature, SigningFn,
SymlinkId, Timestamp, Tree, TreeId, TreeValue,
ChangeId, Commit, CommitId, Conflict, ConflictId, ConflictTerm, CopyRecord, FileId,
MergedTreeId, MillisSinceEpoch, SecureSig, Signature, SigningFn, SymlinkId, Timestamp, Tree,
TreeId, TreeValue,
};
use crate::file_util::{IoResultExt as _, PathError};
use crate::index::Index;
@ -1256,28 +1256,13 @@ impl Backend for GitBackend {
fn get_copy_records(
&self,
paths: &[RepoPathBuf],
roots: &[CommitId],
heads: &[CommitId],
root_id: &CommitId,
head_id: &CommitId,
) -> BackendResult<BoxStream<BackendResult<CopyRecord>>> {
if paths.is_empty() || roots.is_empty() || heads.is_empty() {
return Ok(Box::pin(futures::stream::empty()));
}
if roots.len() != 1 || heads.len() != 1 {
return Err(BackendError::Unsupported(
"GitBackend does not support getting copy records for multiple commits".into(),
));
}
let root_id = &roots[0];
let head_id = &heads[0];
let repo = self.git_repo();
let root_tree = self.read_tree_for_commit(&repo, root_id)?;
let head_tree = self.read_tree_for_commit(&repo, head_id)?;
let paths: HashSet<&BStr> = paths
.iter()
.map(|p| BStr::new(p.as_internal_file_string()))
.collect();
let change_to_copy_record =
|change: gix::object::tree::diff::Change| -> BackendResult<Option<CopyRecord>> {
let gix::object::tree::diff::Change {
@ -1293,22 +1278,23 @@ impl Backend for GitBackend {
else {
return Ok(None);
};
if !paths.contains(dest_location) {
return Ok(None);
}
let source = str::from_utf8(source_location)
.map_err(|err| to_invalid_utf8_err(err, root_id))?;
let dest = str::from_utf8(dest_location)
.map_err(|err| to_invalid_utf8_err(err, head_id))?;
let target = RepoPathBuf::from_internal_string(dest);
if !paths.is_empty() && !paths.contains(&target) {
return Ok(None);
}
Ok(Some(CopyRecord {
target: RepoPathBuf::from_internal_string(dest),
id: head_id.clone(),
sources: CopySources::Resolved(CopySource {
path: RepoPathBuf::from_internal_string(source),
file: FileId::from_bytes(source_id.as_bytes()),
commit: Some(root_id.clone()),
}),
target,
target_commit: head_id.clone(),
source: RepoPathBuf::from_internal_string(source),
source_file: FileId::from_bytes(source_id.as_bytes()),
source_commit: root_id.clone(),
}))
};

View file

@ -305,8 +305,8 @@ impl Backend for LocalBackend {
fn get_copy_records(
&self,
_paths: &[RepoPathBuf],
_roots: &[CommitId],
_heads: &[CommitId],
_root: &CommitId,
_head: &CommitId,
) -> BackendResult<BoxStream<BackendResult<CopyRecord>>> {
Err(BackendError::Unsupported("get_copy_records".into()))
}

View file

@ -171,10 +171,10 @@ impl Backend for SecretBackend {
fn get_copy_records(
&self,
paths: &[RepoPathBuf],
roots: &[CommitId],
heads: &[CommitId],
root: &CommitId,
head: &CommitId,
) -> BackendResult<BoxStream<BackendResult<CopyRecord>>> {
self.inner.get_copy_records(paths, roots, heads)
self.inner.get_copy_records(paths, root, head)
}
fn gc(&self, index: &dyn Index, keep_newer: SystemTime) -> BackendResult<()> {

View file

@ -86,10 +86,10 @@ impl Store {
pub fn get_copy_records(
&self,
paths: &[RepoPathBuf],
roots: &[CommitId],
heads: &[CommitId],
root: &CommitId,
head: &CommitId,
) -> BackendResult<BoxStream<BackendResult<CopyRecord>>> {
self.backend.get_copy_records(paths, roots, heads)
self.backend.get_copy_records(paths, root, head)
}
pub fn commit_id_length(&self) -> usize {

View file

@ -19,7 +19,7 @@ use std::sync::Arc;
use std::time::{Duration, SystemTime};
use futures::executor::block_on_stream;
use jj_lib::backend::{CommitId, CopySource, CopySources};
use jj_lib::backend::{CommitId, CopyRecord};
use jj_lib::commit::Commit;
use jj_lib::git_backend::GitBackend;
use jj_lib::repo::{ReadonlyRepo, Repo};
@ -54,23 +54,13 @@ fn get_copy_records(
paths: &[RepoPathBuf],
a: &Commit,
b: &Commit,
) -> HashMap<String, Vec<String>> {
let stream = store
.get_copy_records(paths, &[a.id().clone()], &[b.id().clone()])
.unwrap();
let mut res: HashMap<String, Vec<String>> = HashMap::new();
for copy_record in block_on_stream(stream).map(|r| r.unwrap()) {
) -> HashMap<String, String> {
let stream = store.get_copy_records(paths, a.id(), b.id()).unwrap();
let mut res: HashMap<String, String> = HashMap::new();
for CopyRecord { target, source, .. } in block_on_stream(stream).filter_map(|r| r.ok()) {
res.insert(
copy_record.target.as_internal_file_string().into(),
match copy_record.sources {
CopySources::Resolved(CopySource { path, .. }) => {
vec![path.as_internal_file_string().into()]
}
CopySources::Conflict(conflicting) => conflicting
.iter()
.map(|s| s.path.as_internal_file_string().into())
.collect(),
},
target.as_internal_file_string().into(),
source.as_internal_file_string().into(),
);
}
res
@ -273,18 +263,18 @@ fn test_copy_detection() {
let store = repo.store();
assert_eq!(
get_copy_records(store, paths, &commit_a, &commit_b),
HashMap::from([("file1".to_string(), vec!["file0".to_string()])])
HashMap::from([("file1".to_string(), "file0".to_string())])
);
assert_eq!(
get_copy_records(store, paths, &commit_b, &commit_c),
HashMap::from([("file2".to_string(), vec!["file1".to_string()])])
HashMap::from([("file2".to_string(), "file1".to_string())])
);
assert_eq!(
get_copy_records(store, paths, &commit_a, &commit_c),
HashMap::from([("file2".to_string(), vec!["file0".to_string()])])
HashMap::from([("file2".to_string(), "file0".to_string())])
);
assert_eq!(
get_copy_records(store, &[], &commit_a, &commit_c),
get_copy_records(store, &[paths[1].clone()], &commit_a, &commit_c),
HashMap::default(),
);
assert_eq!(

View file

@ -304,8 +304,8 @@ impl Backend for TestBackend {
fn get_copy_records(
&self,
_paths: &[RepoPathBuf],
_roots: &[CommitId],
_heads: &[CommitId],
_root: &CommitId,
_head: &CommitId,
) -> BackendResult<BoxStream<BackendResult<CopyRecord>>> {
Err(BackendError::Unsupported("get_copy_records".into()))
}