git_backend: replace git2::Repository with gix::Repository

My gut feeling is that gitoxide aims to be more transparent than libgit2. We'll
need to know more about the underlying Git data model.

Random comments on gix API:

 * gix::Repository provides API similar to git2::Repository, but has less
   "convenient" functions. For example, we need to use .find_object() +
   .try_to/into_<kind>() instead of .find_<kind>().
 * gix::Object, Blob, etc. own raw data as bytes. gix::object and gix::objs
   types provide high-level views on such data.
 * Tree building is pretty low-level compared to git2.
 * gix leverages bstr (i.e. bytes) extensively.

It's probably not difficult to migrate git::import/export_refs(). It might
help eliminate the startup overhead of libssl initialization. The gix-based
GitBackend appears to be a bit faster, but that wouldn't practically matter.

#2316
This commit is contained in:
Yuya Nishihara 2023-10-29 16:39:39 +09:00
parent 9a86b77e38
commit c88e69ad6f
2 changed files with 252 additions and 196 deletions

View file

@ -104,7 +104,7 @@ fn test_edit_current_wc_commit_missing() {
.assert() .assert()
.code(255); .code(255);
insta::assert_snapshot!(get_stderr_string(&assert), @r###" insta::assert_snapshot!(get_stderr_string(&assert), @r###"
Internal error: Failed to edit a commit: Current working-copy commit not found: Object 69542c1984c1f9d91f7c6c9c9e6941782c944bd9 of type commit not found: object not found - no match for id (69542c1984c1f9d91f7c6c9c9e6941782c944bd9); class=Odb (9); code=NotFound (-3) Internal error: Failed to edit a commit: Current working-copy commit not found: Object 69542c1984c1f9d91f7c6c9c9e6941782c944bd9 of type commit not found: An object with id 69542c1984c1f9d91f7c6c9c9e6941782c944bd9 could not be found
"###); "###);
} }

View file

@ -17,13 +17,11 @@
use std::any::Any; use std::any::Any;
use std::fmt::{Debug, Error, Formatter}; use std::fmt::{Debug, Error, Formatter};
use std::io::{Cursor, Read}; use std::io::{Cursor, Read};
use std::ops::Deref; use std::path::Path;
use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex, MutexGuard}; use std::sync::{Arc, Mutex, MutexGuard};
use std::{fs, str}; use std::{fs, str};
use async_trait::async_trait; use async_trait::async_trait;
use git2::Oid;
use itertools::Itertools; use itertools::Itertools;
use prost::Message; use prost::Message;
use thiserror::Error; use thiserror::Error;
@ -50,9 +48,9 @@ const CONFLICT_SUFFIX: &str = ".jjconflict";
#[derive(Debug, Error)] #[derive(Debug, Error)]
pub enum GitBackendInitError { pub enum GitBackendInitError {
#[error("Failed to initialize git repository: {0}")] #[error("Failed to initialize git repository: {0}")]
InitRepository(#[source] git2::Error), InitRepository(#[source] gix::init::Error),
#[error("Failed to open git repository: {0}")] #[error("Failed to open git repository: {0}")]
OpenRepository(#[source] git2::Error), OpenRepository(#[source] gix::open::Error),
#[error(transparent)] #[error(transparent)]
Path(PathError), Path(PathError),
} }
@ -66,7 +64,7 @@ impl From<Box<GitBackendInitError>> for BackendInitError {
#[derive(Debug, Error)] #[derive(Debug, Error)]
pub enum GitBackendLoadError { pub enum GitBackendLoadError {
#[error("Failed to open git repository: {0}")] #[error("Failed to open git repository: {0}")]
OpenRepository(#[source] git2::Error), OpenRepository(#[source] gix::open::Error),
#[error(transparent)] #[error(transparent)]
Path(PathError), Path(PathError),
} }
@ -93,7 +91,12 @@ impl From<GitBackendError> for BackendError {
} }
pub struct GitBackend { pub struct GitBackend {
repo: Mutex<git2::Repository>, // While gix::Repository can be created from gix::ThreadSafeRepository, it's
// cheaper to cache the thread-local instance behind a mutex than creating
// one for each backend method call. Our GitBackend is most likely to be
// used in a single-threaded context.
base_repo: gix::ThreadSafeRepository,
repo: Mutex<gix::Repository>,
root_commit_id: CommitId, root_commit_id: CommitId,
root_change_id: ChangeId, root_change_id: ChangeId,
empty_tree_id: TreeId, empty_tree_id: TreeId,
@ -106,12 +109,14 @@ impl GitBackend {
"git" "git"
} }
fn new(repo: git2::Repository, extra_metadata_store: TableStore) -> Self { fn new(base_repo: gix::ThreadSafeRepository, extra_metadata_store: TableStore) -> Self {
let repo = Mutex::new(base_repo.to_thread_local());
let root_commit_id = CommitId::from_bytes(&[0; HASH_LENGTH]); let root_commit_id = CommitId::from_bytes(&[0; HASH_LENGTH]);
let root_change_id = ChangeId::from_bytes(&[0; CHANGE_ID_LENGTH]); let root_change_id = ChangeId::from_bytes(&[0; CHANGE_ID_LENGTH]);
let empty_tree_id = TreeId::from_hex("4b825dc642cb6eb9a060e54bf8d69288fbee4904"); let empty_tree_id = TreeId::from_hex("4b825dc642cb6eb9a060e54bf8d69288fbee4904");
GitBackend { GitBackend {
repo: Mutex::new(repo), base_repo,
repo,
root_commit_id, root_commit_id,
root_change_id, root_change_id,
empty_tree_id, empty_tree_id,
@ -121,8 +126,12 @@ impl GitBackend {
} }
pub fn init_internal(store_path: &Path) -> Result<Self, Box<GitBackendInitError>> { pub fn init_internal(store_path: &Path) -> Result<Self, Box<GitBackendInitError>> {
let git_repo = git2::Repository::init_bare(store_path.join("git")) let git_repo = gix::ThreadSafeRepository::init(
.map_err(GitBackendInitError::InitRepository)?; store_path.join("git"),
gix::create::Kind::Bare,
gix::create::Options::default(),
)
.map_err(GitBackendInitError::InitRepository)?;
let extra_path = store_path.join("extra"); let extra_path = store_path.join("extra");
fs::create_dir(&extra_path) fs::create_dir(&extra_path)
.context(&extra_path) .context(&extra_path)
@ -169,7 +178,7 @@ impl GitBackend {
.context(&path) .context(&path)
.map_err(GitBackendInitError::Path)? .map_err(GitBackendInitError::Path)?
}; };
let repo = git2::Repository::open(canonical_git_repo_path) let repo = gix::ThreadSafeRepository::open(canonical_git_repo_path)
.map_err(GitBackendInitError::OpenRepository)?; .map_err(GitBackendInitError::OpenRepository)?;
let extra_metadata_store = TableStore::init(extra_path, HASH_LENGTH); let extra_metadata_store = TableStore::init(extra_path, HASH_LENGTH);
Ok(GitBackend::new(repo, extra_metadata_store)) Ok(GitBackend::new(repo, extra_metadata_store))
@ -187,35 +196,37 @@ impl GitBackend {
.context(&git_repo_path) .context(&git_repo_path)
.map_err(GitBackendLoadError::Path)? .map_err(GitBackendLoadError::Path)?
}; };
let repo = let repo = gix::ThreadSafeRepository::open(git_repo_path)
git2::Repository::open(git_repo_path).map_err(GitBackendLoadError::OpenRepository)?; .map_err(GitBackendLoadError::OpenRepository)?;
let extra_metadata_store = TableStore::load(store_path.join("extra"), HASH_LENGTH); let extra_metadata_store = TableStore::load(store_path.join("extra"), HASH_LENGTH);
Ok(GitBackend::new(repo, extra_metadata_store)) Ok(GitBackend::new(repo, extra_metadata_store))
} }
fn git_repo(&self) -> MutexGuard<'_, git2::Repository> { fn lock_git_repo(&self) -> MutexGuard<'_, gix::Repository> {
self.repo.lock().unwrap() self.repo.lock().unwrap()
} }
// TODO: add public API to obtain new gix::Repository from base_repo
/// Creates new owned git repository instance. /// Creates new owned git repository instance.
pub fn open_git_repo(&self) -> Result<git2::Repository, git2::Error> { pub fn open_git_repo(&self) -> Result<git2::Repository, git2::Error> {
let locked_repo = self.git_repo(); git2::Repository::open(self.git_repo_path())
git2::Repository::open(locked_repo.path())
} }
/// Git configuration for this repository. /// Git configuration for this repository.
pub fn git_config(&self) -> Result<git2::Config, git2::Error> { pub fn git_config(&self) -> Result<git2::Config, git2::Error> {
self.git_repo().config() // TODO: switch to gix config type
self.open_git_repo().and_then(|repo| repo.config())
} }
/// Path to the `.git` directory or the repository itself if it's bare. /// Path to the `.git` directory or the repository itself if it's bare.
pub fn git_repo_path(&self) -> PathBuf { pub fn git_repo_path(&self) -> &Path {
self.git_repo().path().to_owned() self.base_repo.path()
} }
/// Path to the working directory if the repository isn't bare. /// Path to the working directory if the repository isn't bare.
pub fn git_workdir(&self) -> Option<PathBuf> { pub fn git_workdir(&self) -> Option<&Path> {
self.git_repo().workdir().map(|path| path.to_owned()) self.base_repo.work_dir()
} }
fn cached_extra_metadata_table(&self) -> BackendResult<Arc<ReadonlyTable>> { fn cached_extra_metadata_table(&self) -> BackendResult<Arc<ReadonlyTable>> {
@ -278,7 +289,7 @@ impl GitBackend {
heads_count = missing_head_ids.len(), heads_count = missing_head_ids.len(),
"import extra metadata entries" "import extra metadata entries"
); );
let locked_repo = self.repo.lock().unwrap(); let locked_repo = self.lock_git_repo();
let (table, table_lock) = self.read_extra_metadata_table_locked()?; let (table, table_lock) = self.read_extra_metadata_table_locked()?;
let mut mut_table = table.start_mutation(); let mut mut_table = table.start_mutation();
// Concurrent write_commit() might have updated the table before taking a lock. // Concurrent write_commit() might have updated the table before taking a lock.
@ -298,17 +309,19 @@ impl GitBackend {
fn read_file_sync(&self, id: &FileId) -> BackendResult<Box<dyn Read>> { fn read_file_sync(&self, id: &FileId) -> BackendResult<Box<dyn Read>> {
let git_blob_id = validate_git_object_id(id)?; let git_blob_id = validate_git_object_id(id)?;
let locked_repo = self.repo.lock().unwrap(); let locked_repo = self.lock_git_repo();
let blob = locked_repo let mut blob = locked_repo
.find_blob(git_blob_id) .find_object(git_blob_id)
.map_err(|err| map_not_found_err(err, id))?; .map_err(|err| map_not_found_err(err, id))?
let content = blob.content().to_owned(); .try_into_blob()
Ok(Box::new(Cursor::new(content))) .map_err(|err| to_read_object_err(err, id))?;
Ok(Box::new(Cursor::new(blob.take_data())))
} }
} }
fn commit_from_git_without_root_parent( fn commit_from_git_without_root_parent(
commit: &git2::Commit, id: &CommitId,
commit: &gix::objs::CommitRef,
uses_tree_conflict_format: bool, uses_tree_conflict_format: bool,
) -> Commit { ) -> Commit {
// We reverse the bits of the commit id to create the change id. We don't want // We reverse the bits of the commit id to create the change id. We don't want
@ -318,17 +331,17 @@ fn commit_from_git_without_root_parent(
// leading 16 bytes to address that. We also reverse the bits to make it less // leading 16 bytes to address that. We also reverse the bits to make it less
// likely that users depend on any relationship between the two ids. // likely that users depend on any relationship between the two ids.
let change_id = ChangeId::new( let change_id = ChangeId::new(
commit.id().as_bytes()[4..HASH_LENGTH] id.as_bytes()[4..HASH_LENGTH]
.iter() .iter()
.rev() .rev()
.map(|b| b.reverse_bits()) .map(|b| b.reverse_bits())
.collect(), .collect(),
); );
let parents = commit let parents = commit
.parent_ids() .parents()
.map(|oid| CommitId::from_bytes(oid.as_bytes())) .map(|oid| CommitId::from_bytes(oid.as_bytes()))
.collect_vec(); .collect_vec();
let tree_id = TreeId::from_bytes(commit.tree_id().as_bytes()); let tree_id = TreeId::from_bytes(commit.tree().as_bytes());
// If this commit is a conflict, we'll update the root tree later, when we read // If this commit is a conflict, we'll update the root tree later, when we read
// the extra metadata. // the extra metadata.
let root_tree = if uses_tree_conflict_format { let root_tree = if uses_tree_conflict_format {
@ -338,7 +351,8 @@ fn commit_from_git_without_root_parent(
}; };
// Use lossy conversion as commit message with "mojibake" is still better than // Use lossy conversion as commit message with "mojibake" is still better than
// nothing. // nothing.
let description = String::from_utf8_lossy(commit.message_bytes()).into_owned(); // TODO: what should we do with commit.encoding?
let description = String::from_utf8_lossy(commit.message).into_owned();
let author = signature_from_git(commit.author()); let author = signature_from_git(commit.author());
let committer = signature_from_git(commit.committer()); let committer = signature_from_git(commit.committer());
@ -356,21 +370,21 @@ fn commit_from_git_without_root_parent(
const EMPTY_STRING_PLACEHOLDER: &str = "JJ_EMPTY_STRING"; const EMPTY_STRING_PLACEHOLDER: &str = "JJ_EMPTY_STRING";
fn signature_from_git(signature: git2::Signature) -> Signature { fn signature_from_git(signature: gix::actor::SignatureRef) -> Signature {
let name = signature.name().unwrap_or_default(); let name = signature.name;
let name = if name != EMPTY_STRING_PLACEHOLDER { let name = if name != EMPTY_STRING_PLACEHOLDER {
name.to_owned() String::from_utf8_lossy(name).into_owned()
} else { } else {
"".to_string() "".to_string()
}; };
let email = signature.email().unwrap_or_default(); let email = signature.email;
let email = if email != EMPTY_STRING_PLACEHOLDER { let email = if email != EMPTY_STRING_PLACEHOLDER {
email.to_owned() String::from_utf8_lossy(email).into_owned()
} else { } else {
"".to_string() "".to_string()
}; };
let timestamp = MillisSinceEpoch(signature.when().seconds() * 1000); let timestamp = MillisSinceEpoch(signature.time.seconds * 1000);
let tz_offset = signature.when().offset_minutes(); let tz_offset = signature.time.offset.div_euclid(60); // in minutes
Signature { Signature {
name, name,
email, email,
@ -381,7 +395,7 @@ fn signature_from_git(signature: git2::Signature) -> Signature {
} }
} }
fn signature_to_git(signature: &Signature) -> git2::Signature<'static> { fn signature_to_git(signature: &Signature) -> gix::actor::SignatureRef<'_> {
// git does not support empty names or emails // git does not support empty names or emails
let name = if !signature.name.is_empty() { let name = if !signature.name.is_empty() {
&signature.name &signature.name
@ -393,11 +407,15 @@ fn signature_to_git(signature: &Signature) -> git2::Signature<'static> {
} else { } else {
EMPTY_STRING_PLACEHOLDER EMPTY_STRING_PLACEHOLDER
}; };
let time = git2::Time::new( let time = gix::date::Time::new(
signature.timestamp.timestamp.0.div_euclid(1000), signature.timestamp.timestamp.0.div_euclid(1000),
signature.timestamp.tz_offset, signature.timestamp.tz_offset * 60, // in seconds
); );
git2::Signature::new(name, email, &time).unwrap() gix::actor::SignatureRef {
name: name.into(),
email: email.into(),
time,
}
} }
fn serialize_extras(commit: &Commit) -> Vec<u8> { fn serialize_extras(commit: &Commit) -> Vec<u8> {
@ -450,19 +468,19 @@ fn create_no_gc_ref() -> String {
format!("{NO_GC_REF_NAMESPACE}{}", hex::encode(random_bytes)) format!("{NO_GC_REF_NAMESPACE}{}", hex::encode(random_bytes))
} }
fn prevent_gc(git_repo: &git2::Repository, id: &CommitId) -> Result<(), BackendError> { fn prevent_gc(git_repo: &gix::Repository, id: &CommitId) -> Result<(), BackendError> {
git_repo git_repo
.reference( .reference(
&format!("{NO_GC_REF_NAMESPACE}{}", id.hex()), format!("{NO_GC_REF_NAMESPACE}{}", id.hex()),
Oid::from_bytes(id.as_bytes()).unwrap(), validate_git_object_id(id).unwrap(),
true, gix::refs::transaction::PreviousValue::Any,
"used by jj", "used by jj",
) )
.map_err(|err| BackendError::Other(Box::new(err)))?; .map_err(|err| BackendError::Other(Box::new(err)))?;
Ok(()) Ok(())
} }
fn validate_git_object_id(id: &impl ObjectId) -> Result<git2::Oid, BackendError> { fn validate_git_object_id(id: &impl ObjectId) -> Result<gix::ObjectId, BackendError> {
if id.as_bytes().len() != HASH_LENGTH { if id.as_bytes().len() != HASH_LENGTH {
return Err(BackendError::InvalidHashLength { return Err(BackendError::InvalidHashLength {
expected: HASH_LENGTH, expected: HASH_LENGTH,
@ -471,22 +489,29 @@ fn validate_git_object_id(id: &impl ObjectId) -> Result<git2::Oid, BackendError>
hash: id.hex(), hash: id.hex(),
}); });
} }
Ok(git2::Oid::from_bytes(id.as_bytes()).unwrap()) Ok(id.as_bytes().into())
} }
fn map_not_found_err(err: git2::Error, id: &impl ObjectId) -> BackendError { fn map_not_found_err(err: gix::object::find::existing::Error, id: &impl ObjectId) -> BackendError {
if err.code() == git2::ErrorCode::NotFound { if matches!(err, gix::object::find::existing::Error::NotFound { .. }) {
BackendError::ObjectNotFound { BackendError::ObjectNotFound {
object_type: id.object_type(), object_type: id.object_type(),
hash: id.hex(), hash: id.hex(),
source: Box::new(err), source: Box::new(err),
} }
} else { } else {
BackendError::ReadObject { to_read_object_err(err, id)
object_type: id.object_type(), }
hash: id.hex(), }
source: Box::new(err),
} fn to_read_object_err(
err: impl Into<Box<dyn std::error::Error + Send + Sync>>,
id: &impl ObjectId,
) -> BackendError {
BackendError::ReadObject {
object_type: id.object_type(),
hash: id.hex(),
source: err.into(),
} }
} }
@ -499,7 +524,7 @@ fn to_invalid_utf8_err(source: str::Utf8Error, id: &impl ObjectId) -> BackendErr
} }
fn import_extra_metadata_entries_from_heads( fn import_extra_metadata_entries_from_heads(
git_repo: &git2::Repository, git_repo: &gix::Repository,
mut_table: &mut MutableTable, mut_table: &mut MutableTable,
_table_lock: &FileLock, _table_lock: &FileLock,
missing_head_ids: &[&CommitId], missing_head_ids: &[&CommitId],
@ -507,13 +532,17 @@ fn import_extra_metadata_entries_from_heads(
) -> BackendResult<()> { ) -> BackendResult<()> {
let mut work_ids = missing_head_ids.iter().map(|&id| id.clone()).collect_vec(); let mut work_ids = missing_head_ids.iter().map(|&id| id.clone()).collect_vec();
while let Some(id) = work_ids.pop() { while let Some(id) = work_ids.pop() {
let git_commit = git_repo let git_object = git_repo
.find_commit(validate_git_object_id(&id)?) .find_object(validate_git_object_id(&id)?)
.map_err(|err| map_not_found_err(err, &id))?; .map_err(|err| map_not_found_err(err, &id))?;
let git_commit = git_object
.try_to_commit_ref()
.map_err(|err| to_read_object_err(err, &id))?;
// TODO(#1624): Should we read the root tree here and check if it has a // TODO(#1624): Should we read the root tree here and check if it has a
// `.jjconflict-...` entries? That could happen if the user used `git` to e.g. // `.jjconflict-...` entries? That could happen if the user used `git` to e.g.
// change the description of a commit with tree-level conflicts. // change the description of a commit with tree-level conflicts.
let commit = commit_from_git_without_root_parent(&git_commit, uses_tree_conflict_format); let commit =
commit_from_git_without_root_parent(&id, &git_commit, uses_tree_conflict_format);
mut_table.add_entry(id.to_bytes(), serialize_extras(&commit)); mut_table.add_entry(id.to_bytes(), serialize_extras(&commit));
work_ids.extend( work_ids.extend(
commit commit
@ -527,8 +556,8 @@ fn import_extra_metadata_entries_from_heads(
impl Debug for GitBackend { impl Debug for GitBackend {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
f.debug_struct("GitStore") f.debug_struct("GitBackend")
.field("path", &self.repo.lock().unwrap().path()) .field("path", &self.git_repo_path())
.finish() .finish()
} }
} }
@ -570,9 +599,9 @@ impl Backend for GitBackend {
fn write_file(&self, _path: &RepoPath, contents: &mut dyn Read) -> BackendResult<FileId> { fn write_file(&self, _path: &RepoPath, contents: &mut dyn Read) -> BackendResult<FileId> {
let mut bytes = Vec::new(); let mut bytes = Vec::new();
contents.read_to_end(&mut bytes).unwrap(); contents.read_to_end(&mut bytes).unwrap();
let locked_repo = self.repo.lock().unwrap(); let locked_repo = self.lock_git_repo();
let oid = locked_repo let oid = locked_repo
.blob(&bytes) .write_blob(bytes)
.map_err(|err| BackendError::WriteObject { .map_err(|err| BackendError::WriteObject {
object_type: "file", object_type: "file",
source: Box::new(err), source: Box::new(err),
@ -582,24 +611,27 @@ impl Backend for GitBackend {
async fn read_symlink(&self, _path: &RepoPath, id: &SymlinkId) -> Result<String, BackendError> { async fn read_symlink(&self, _path: &RepoPath, id: &SymlinkId) -> Result<String, BackendError> {
let git_blob_id = validate_git_object_id(id)?; let git_blob_id = validate_git_object_id(id)?;
let locked_repo = self.repo.lock().unwrap(); let locked_repo = self.lock_git_repo();
let blob = locked_repo let mut blob = locked_repo
.find_blob(git_blob_id) .find_object(git_blob_id)
.map_err(|err| map_not_found_err(err, id))?; .map_err(|err| map_not_found_err(err, id))?
let target = str::from_utf8(blob.content()) .try_into_blob()
.map_err(|err| to_invalid_utf8_err(err, id))? .map_err(|err| to_read_object_err(err, id))?;
let target = String::from_utf8(blob.take_data())
.map_err(|err| to_invalid_utf8_err(err.utf8_error(), id))?
.to_owned(); .to_owned();
Ok(target) Ok(target)
} }
fn write_symlink(&self, _path: &RepoPath, target: &str) -> Result<SymlinkId, BackendError> { fn write_symlink(&self, _path: &RepoPath, target: &str) -> Result<SymlinkId, BackendError> {
let locked_repo = self.repo.lock().unwrap(); let locked_repo = self.lock_git_repo();
let oid = locked_repo let oid =
.blob(target.as_bytes()) locked_repo
.map_err(|err| BackendError::WriteObject { .write_blob(target.as_bytes())
object_type: "symlink", .map_err(|err| BackendError::WriteObject {
source: Box::new(err), object_type: "symlink",
})?; source: Box::new(err),
})?;
Ok(SymlinkId::new(oid.as_bytes().to_vec())) Ok(SymlinkId::new(oid.as_bytes().to_vec()))
} }
@ -609,25 +641,28 @@ impl Backend for GitBackend {
} }
let git_tree_id = validate_git_object_id(id)?; let git_tree_id = validate_git_object_id(id)?;
let locked_repo = self.repo.lock().unwrap(); let locked_repo = self.lock_git_repo();
let git_tree = locked_repo let git_tree = locked_repo
.find_tree(git_tree_id) .find_object(git_tree_id)
.map_err(|err| map_not_found_err(err, id))?; .map_err(|err| map_not_found_err(err, id))?
.try_into_tree()
.map_err(|err| to_read_object_err(err, id))?;
let mut tree = Tree::default(); let mut tree = Tree::default();
for entry in git_tree.iter() { for entry in git_tree.iter() {
let entry = entry.map_err(|err| to_read_object_err(err, id))?;
let name = let name =
str::from_utf8(entry.name_bytes()).map_err(|err| to_invalid_utf8_err(err, id))?; str::from_utf8(entry.filename()).map_err(|err| to_invalid_utf8_err(err, id))?;
let (name, value) = match entry.filemode() { let (name, value) = match entry.mode() {
0o040000 => { gix::object::tree::EntryMode::Tree => {
let id = TreeId::from_bytes(entry.id().as_bytes()); let id = TreeId::from_bytes(entry.oid().as_bytes());
(name, TreeValue::Tree(id)) (name, TreeValue::Tree(id))
} }
0o100644 => { gix::object::tree::EntryMode::Blob => {
let id = FileId::from_bytes(entry.id().as_bytes()); let id = FileId::from_bytes(entry.oid().as_bytes());
if let Some(basename) = name.strip_suffix(CONFLICT_SUFFIX) { if let Some(basename) = name.strip_suffix(CONFLICT_SUFFIX) {
( (
basename, basename,
TreeValue::Conflict(ConflictId::from_bytes(entry.id().as_bytes())), TreeValue::Conflict(ConflictId::from_bytes(entry.oid().as_bytes())),
) )
} else { } else {
( (
@ -639,8 +674,8 @@ impl Backend for GitBackend {
) )
} }
} }
0o100755 => { gix::object::tree::EntryMode::BlobExecutable => {
let id = FileId::from_bytes(entry.id().as_bytes()); let id = FileId::from_bytes(entry.oid().as_bytes());
( (
name, name,
TreeValue::File { TreeValue::File {
@ -649,15 +684,14 @@ impl Backend for GitBackend {
}, },
) )
} }
0o120000 => { gix::object::tree::EntryMode::Link => {
let id = SymlinkId::from_bytes(entry.id().as_bytes()); let id = SymlinkId::from_bytes(entry.oid().as_bytes());
(name, TreeValue::Symlink(id)) (name, TreeValue::Symlink(id))
} }
0o160000 => { gix::object::tree::EntryMode::Commit => {
let id = CommitId::from_bytes(entry.id().as_bytes()); let id = CommitId::from_bytes(entry.oid().as_bytes());
(name, TreeValue::GitSubmodule(id)) (name, TreeValue::GitSubmodule(id))
} }
mode => panic!("unexpected file mode {mode:?}"),
}; };
tree.set(RepoPathComponent::from(name), value); tree.set(RepoPathComponent::from(name), value);
} }
@ -665,36 +699,60 @@ impl Backend for GitBackend {
} }
fn write_tree(&self, _path: &RepoPath, contents: &Tree) -> BackendResult<TreeId> { fn write_tree(&self, _path: &RepoPath, contents: &Tree) -> BackendResult<TreeId> {
let locked_repo = self.repo.lock().unwrap(); // Tree entries to be written must be sorted by Entry::filename(), which
let mut builder = locked_repo.treebuilder(None).unwrap(); // is slightly different from the order of our backend::Tree.
for entry in contents.entries() { let entries = contents
let name = entry.name().string(); .entries()
let (name, id, filemode) = match entry.value() { .map(|entry| {
TreeValue::File { let name = entry.name().string();
id, match entry.value() {
executable: false, TreeValue::File {
} => (name, id.as_bytes(), 0o100644), id,
TreeValue::File { executable: false,
id, } => gix::objs::tree::Entry {
executable: true, mode: gix::object::tree::EntryMode::Blob,
} => (name, id.as_bytes(), 0o100755), filename: name.into(),
TreeValue::Symlink(id) => (name, id.as_bytes(), 0o120000), oid: id.as_bytes().into(),
TreeValue::Tree(id) => (name, id.as_bytes(), 0o040000), },
TreeValue::GitSubmodule(id) => (name, id.as_bytes(), 0o160000), TreeValue::File {
TreeValue::Conflict(id) => ( id,
entry.name().string() + CONFLICT_SUFFIX, executable: true,
id.as_bytes(), } => gix::objs::tree::Entry {
0o100644, mode: gix::object::tree::EntryMode::BlobExecutable,
), filename: name.into(),
}; oid: id.as_bytes().into(),
builder },
.insert(name, Oid::from_bytes(id).unwrap(), filemode) TreeValue::Symlink(id) => gix::objs::tree::Entry {
.unwrap(); mode: gix::object::tree::EntryMode::Link,
} filename: name.into(),
let oid = builder.write().map_err(|err| BackendError::WriteObject { oid: id.as_bytes().into(),
object_type: "tree", },
source: Box::new(err), TreeValue::Tree(id) => gix::objs::tree::Entry {
})?; mode: gix::object::tree::EntryMode::Tree,
filename: name.into(),
oid: id.as_bytes().into(),
},
TreeValue::GitSubmodule(id) => gix::objs::tree::Entry {
mode: gix::object::tree::EntryMode::Commit,
filename: name.into(),
oid: id.as_bytes().into(),
},
TreeValue::Conflict(id) => gix::objs::tree::Entry {
mode: gix::object::tree::EntryMode::Blob,
filename: (name + CONFLICT_SUFFIX).into(),
oid: id.as_bytes().into(),
},
}
})
.sorted_unstable()
.collect();
let locked_repo = self.lock_git_repo();
let oid = locked_repo
.write_object(gix::objs::Tree { entries })
.map_err(|err| BackendError::WriteObject {
object_type: "tree",
source: Box::new(err),
})?;
Ok(TreeId::from_bytes(oid.as_bytes())) Ok(TreeId::from_bytes(oid.as_bytes()))
} }
@ -721,9 +779,9 @@ impl Backend for GitBackend {
}); });
let json_string = json.to_string(); let json_string = json.to_string();
let bytes = json_string.as_bytes(); let bytes = json_string.as_bytes();
let locked_repo = self.repo.lock().unwrap(); let locked_repo = self.lock_git_repo();
let oid = locked_repo let oid = locked_repo
.blob(bytes) .write_blob(bytes)
.map_err(|err| BackendError::WriteObject { .map_err(|err| BackendError::WriteObject {
object_type: "conflict", object_type: "conflict",
source: Box::new(err), source: Box::new(err),
@ -742,11 +800,14 @@ impl Backend for GitBackend {
let git_commit_id = validate_git_object_id(id)?; let git_commit_id = validate_git_object_id(id)?;
let mut commit = { let mut commit = {
let locked_repo = self.repo.lock().unwrap(); let locked_repo = self.lock_git_repo();
let git_commit = locked_repo let git_object = locked_repo
.find_commit(git_commit_id) .find_object(git_commit_id)
.map_err(|err| map_not_found_err(err, id))?; .map_err(|err| map_not_found_err(err, id))?;
commit_from_git_without_root_parent(&git_commit, false) let git_commit = git_object
.try_to_commit_ref()
.map_err(|err| to_read_object_err(err, id))?;
commit_from_git_without_root_parent(id, &git_commit, false)
}; };
if commit.parents.is_empty() { if commit.parents.is_empty() {
commit.parents.push(self.root_commit_id.clone()); commit.parents.push(self.root_commit_id.clone());
@ -771,17 +832,14 @@ impl Backend for GitBackend {
} }
fn write_commit(&self, mut contents: Commit) -> BackendResult<(CommitId, Commit)> { fn write_commit(&self, mut contents: Commit) -> BackendResult<(CommitId, Commit)> {
let locked_repo = self.repo.lock().unwrap(); let locked_repo = self.lock_git_repo();
let git_tree_id = match &contents.root_tree { let git_tree_id = match &contents.root_tree {
MergedTreeId::Legacy(tree_id) => validate_git_object_id(tree_id)?, MergedTreeId::Legacy(tree_id) => validate_git_object_id(tree_id)?,
MergedTreeId::Merge(tree_ids) => match tree_ids.as_resolved() { MergedTreeId::Merge(tree_ids) => match tree_ids.as_resolved() {
Some(tree_id) => validate_git_object_id(tree_id)?, Some(tree_id) => validate_git_object_id(tree_id)?,
None => write_tree_conflict(locked_repo.deref(), tree_ids)?, None => write_tree_conflict(&locked_repo, tree_ids)?,
}, },
}; };
let git_tree = locked_repo
.find_tree(git_tree_id)
.map_err(|err| map_not_found_err(err, &TreeId::from_bytes(git_tree_id.as_bytes())))?;
let author = signature_to_git(&contents.author); let author = signature_to_git(&contents.author);
let mut committer = signature_to_git(&contents.committer); let mut committer = signature_to_git(&contents.committer);
let message = &contents.description; let message = &contents.description;
@ -805,14 +863,9 @@ impl Backend for GitBackend {
)); ));
} }
} else { } else {
let git_commit_id = validate_git_object_id(parent_id)?; parents.push(validate_git_object_id(parent_id)?);
let parent_git_commit = locked_repo
.find_commit(git_commit_id)
.map_err(|err| map_not_found_err(err, parent_id))?;
parents.push(parent_git_commit);
} }
} }
let parent_refs = parents.iter().collect_vec();
let extras = serialize_extras(&contents); let extras = serialize_extras(&contents);
// If two writers write commits of the same id with different metadata, they // If two writers write commits of the same id with different metadata, they
// will both succeed and the metadata entries will be "merged" later. Since // will both succeed and the metadata entries will be "merged" later. Since
@ -823,13 +876,13 @@ impl Backend for GitBackend {
let (table, table_lock) = self.read_extra_metadata_table_locked()?; let (table, table_lock) = self.read_extra_metadata_table_locked()?;
let id = loop { let id = loop {
let git_id = locked_repo let git_id = locked_repo
.commit( .commit_as(
Some(&create_no_gc_ref()), committer,
&author, author,
&committer, create_no_gc_ref(),
message, message,
&git_tree, git_tree_id,
&parent_refs, parents.iter().copied(),
) )
.map_err(|err| BackendError::WriteObject { .map_err(|err| BackendError::WriteObject {
object_type: "commit", object_type: "commit",
@ -840,16 +893,7 @@ impl Backend for GitBackend {
Some(existing_extras) if existing_extras != extras => { Some(existing_extras) if existing_extras != extras => {
// It's possible a commit already exists with the same commit id but different // It's possible a commit already exists with the same commit id but different
// change id. Adjust the timestamp until this is no longer the case. // change id. Adjust the timestamp until this is no longer the case.
let new_when = git2::Time::new( committer.time.seconds -= 1;
committer.when().seconds() - 1,
committer.when().offset_minutes(),
);
committer = git2::Signature::new(
committer.name().unwrap(),
committer.email().unwrap(),
&new_when,
)
.unwrap();
} }
_ => { _ => {
break id; break id;
@ -858,8 +902,7 @@ impl Backend for GitBackend {
}; };
// Update the signature to match the one that was actually written to the object // Update the signature to match the one that was actually written to the object
// store // store
contents.committer.timestamp.timestamp = contents.committer.timestamp.timestamp = MillisSinceEpoch(committer.time.seconds * 1000);
MillisSinceEpoch(committer.when().seconds() * 1000);
let mut mut_table = table.start_mutation(); let mut mut_table = table.start_mutation();
mut_table.add_entry(id.to_bytes(), extras); mut_table.add_entry(id.to_bytes(), extras);
self.save_extra_metadata_table(mut_table, &table_lock)?; self.save_extra_metadata_table(mut_table, &table_lock)?;
@ -870,24 +913,36 @@ impl Backend for GitBackend {
/// Write a tree conflict as a special tree with `.jjconflict-base-N` and /// Write a tree conflict as a special tree with `.jjconflict-base-N` and
/// `.jjconflict-base-N` subtrees. This ensure that the parts are not GC'd. /// `.jjconflict-base-N` subtrees. This ensure that the parts are not GC'd.
fn write_tree_conflict( fn write_tree_conflict(
repo: &git2::Repository, repo: &gix::Repository,
conflict: &Merge<TreeId>, conflict: &Merge<TreeId>,
) -> Result<Oid, BackendError> { ) -> Result<gix::ObjectId, BackendError> {
let mut builder = repo.treebuilder(None).unwrap(); // Tree entries to be written must be sorted by Entry::filename().
let mut add_tree_entry = |name, tree_id: &TreeId| { let entries = itertools::chain(
let tree_oid = Oid::from_bytes(tree_id.as_bytes()).unwrap(); conflict
builder.insert(name, tree_oid, 0o040000).unwrap(); .removes()
}; .iter()
for (i, tree_id) in conflict.removes().iter().enumerate() { .enumerate()
add_tree_entry(format!(".jjconflict-base-{i}"), tree_id); .map(|(i, tree_id)| (format!(".jjconflict-base-{i}"), tree_id)),
} conflict
for (i, tree_id) in conflict.adds().iter().enumerate() { .adds()
add_tree_entry(format!(".jjconflict-side-{i}"), tree_id); .iter()
} .enumerate()
builder.write().map_err(|err| BackendError::WriteObject { .map(|(i, tree_id)| (format!(".jjconflict-side-{i}"), tree_id)),
object_type: "tree", )
source: Box::new(err), .map(|(name, tree_id)| gix::objs::tree::Entry {
mode: gix::object::tree::EntryMode::Tree,
filename: name.into(),
oid: tree_id.as_bytes().into(),
}) })
.sorted_unstable()
.collect();
let id = repo
.write_object(gix::objs::Tree { entries })
.map_err(|err| BackendError::WriteObject {
object_type: "tree",
source: Box::new(err),
})?;
Ok(id.detach())
} }
fn conflict_term_list_to_json(parts: &[ConflictTerm]) -> serde_json::Value { fn conflict_term_list_to_json(parts: &[ConflictTerm]) -> serde_json::Value {
@ -965,6 +1020,7 @@ fn bytes_vec_from_json(value: &serde_json::Value) -> Vec<u8> {
mod tests { mod tests {
use assert_matches::assert_matches; use assert_matches::assert_matches;
use futures::executor::block_on; use futures::executor::block_on;
use git2::Oid;
use test_case::test_case; use test_case::test_case;
use super::*; use super::*;
@ -1040,7 +1096,8 @@ mod tests {
.unwrap(); .unwrap();
// Ref should be created only for the head commit // Ref should be created only for the head commit
let git_refs = backend let git_refs = backend
.git_repo() .open_git_repo()
.unwrap()
.references_glob("refs/jj/keep/*") .references_glob("refs/jj/keep/*")
.unwrap() .unwrap()
.map(|git_ref| git_ref.unwrap().target().unwrap()) .map(|git_ref| git_ref.unwrap().target().unwrap())
@ -1167,21 +1224,19 @@ mod tests {
#[test] #[test]
fn read_empty_string_placeholder() { fn read_empty_string_placeholder() {
let git_signature1 = git2::Signature::new( let git_signature1 = gix::actor::SignatureRef {
EMPTY_STRING_PLACEHOLDER, name: EMPTY_STRING_PLACEHOLDER.into(),
"git.author@example.com", email: "git.author@example.com".into(),
&git2::Time::new(1000, 60), time: gix::date::Time::new(1000, 60 * 60),
) };
.unwrap();
let signature1 = signature_from_git(git_signature1); let signature1 = signature_from_git(git_signature1);
assert!(signature1.name.is_empty()); assert!(signature1.name.is_empty());
assert_eq!(signature1.email, "git.author@example.com"); assert_eq!(signature1.email, "git.author@example.com");
let git_signature2 = git2::Signature::new( let git_signature2 = gix::actor::SignatureRef {
"git committer", name: "git committer".into(),
EMPTY_STRING_PLACEHOLDER, email: EMPTY_STRING_PLACEHOLDER.into(),
&git2::Time::new(2000, -480), time: gix::date::Time::new(2000, -480 * 60),
) };
.unwrap();
let signature2 = signature_from_git(git_signature2); let signature2 = signature_from_git(git_signature2);
assert_eq!(signature2.name, "git committer"); assert_eq!(signature2.name, "git committer");
assert!(signature2.email.is_empty()); assert!(signature2.email.is_empty());
@ -1198,8 +1253,8 @@ mod tests {
}, },
}; };
let git_signature1 = signature_to_git(&signature1); let git_signature1 = signature_to_git(&signature1);
assert_eq!(git_signature1.name().unwrap(), EMPTY_STRING_PLACEHOLDER); assert_eq!(git_signature1.name, EMPTY_STRING_PLACEHOLDER);
assert_eq!(git_signature1.email().unwrap(), "someone@example.com"); assert_eq!(git_signature1.email, "someone@example.com");
let signature2 = Signature { let signature2 = Signature {
name: "Someone".to_string(), name: "Someone".to_string(),
email: "".to_string(), email: "".to_string(),
@ -1209,8 +1264,8 @@ mod tests {
}, },
}; };
let git_signature2 = signature_to_git(&signature2); let git_signature2 = signature_to_git(&signature2);
assert_eq!(git_signature2.name().unwrap(), "Someone"); assert_eq!(git_signature2.name, "Someone");
assert_eq!(git_signature2.email().unwrap(), EMPTY_STRING_PLACEHOLDER); assert_eq!(git_signature2.email, EMPTY_STRING_PLACEHOLDER);
} }
/// Test that parents get written correctly /// Test that parents get written correctly
@ -1374,7 +1429,8 @@ mod tests {
}; };
let commit_id = backend.write_commit(commit).unwrap().0; let commit_id = backend.write_commit(commit).unwrap().0;
let git_refs = backend let git_refs = backend
.git_repo() .open_git_repo()
.unwrap()
.references_glob("refs/jj/keep/*") .references_glob("refs/jj/keep/*")
.unwrap() .unwrap()
.map(|git_ref| git_ref.unwrap().target().unwrap()) .map(|git_ref| git_ref.unwrap().target().unwrap())