mirror of
https://github.com/martinvonz/jj.git
synced 2025-01-16 17:19:37 +00:00
1cc271441f
This adds an initial `jj util gc` command, which simply calls `git gc` when using the Git backend. That should already be useful in non-colocated repos because it's not obvious how to GC (repack) such repos. In my own jj repo, it shrunk `.jj/repo/store/` from 2.4 GiB to 780 MiB, and `jj log --ignore-working-copy` was sped up from 157 ms to 86 ms. I haven't added any tests because the functionality depends on having `git` binary on the PATH, which we don't yet depend on anywhere else. I think we'll still be able to test much of the future parts of garbage collection without a `git` binary because the interesting parts are about manipulating the Git repo before calling `git gc` on it.
558 lines
19 KiB
Rust
558 lines
19 KiB
Rust
// Copyright 2020 The Jujutsu Authors
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// https://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#![allow(missing_docs)]
|
|
|
|
use std::any::Any;
|
|
use std::fmt::Debug;
|
|
use std::fs;
|
|
use std::fs::File;
|
|
use std::io::{Read, Write};
|
|
use std::path::{Path, PathBuf};
|
|
|
|
use async_trait::async_trait;
|
|
use blake2::{Blake2b512, Digest};
|
|
use prost::Message;
|
|
use tempfile::NamedTempFile;
|
|
|
|
use crate::backend::{
|
|
make_root_commit, Backend, BackendError, BackendResult, ChangeId, Commit, CommitId, Conflict,
|
|
ConflictId, ConflictTerm, FileId, MergedTreeId, MillisSinceEpoch, ObjectId, SecureSig,
|
|
Signature, SigningFn, SymlinkId, Timestamp, Tree, TreeId, TreeValue,
|
|
};
|
|
use crate::content_hash::blake2b_hash;
|
|
use crate::file_util::persist_content_addressed_temp_file;
|
|
use crate::merge::MergeBuilder;
|
|
use crate::repo_path::{RepoPath, RepoPathComponentBuf};
|
|
|
|
const COMMIT_ID_LENGTH: usize = 64;
|
|
const CHANGE_ID_LENGTH: usize = 16;
|
|
|
|
fn map_not_found_err(err: std::io::Error, id: &impl ObjectId) -> BackendError {
|
|
if err.kind() == std::io::ErrorKind::NotFound {
|
|
BackendError::ObjectNotFound {
|
|
object_type: id.object_type(),
|
|
hash: id.hex(),
|
|
source: Box::new(err),
|
|
}
|
|
} else {
|
|
BackendError::ReadObject {
|
|
object_type: id.object_type(),
|
|
hash: id.hex(),
|
|
source: Box::new(err),
|
|
}
|
|
}
|
|
}
|
|
|
|
fn to_other_err(err: impl Into<Box<dyn std::error::Error + Send + Sync>>) -> BackendError {
|
|
BackendError::Other(err.into())
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub struct LocalBackend {
|
|
path: PathBuf,
|
|
root_commit_id: CommitId,
|
|
root_change_id: ChangeId,
|
|
empty_tree_id: TreeId,
|
|
}
|
|
|
|
impl LocalBackend {
|
|
pub fn name() -> &'static str {
|
|
"local"
|
|
}
|
|
|
|
pub fn init(store_path: &Path) -> Self {
|
|
fs::create_dir(store_path.join("commits")).unwrap();
|
|
fs::create_dir(store_path.join("trees")).unwrap();
|
|
fs::create_dir(store_path.join("files")).unwrap();
|
|
fs::create_dir(store_path.join("symlinks")).unwrap();
|
|
fs::create_dir(store_path.join("conflicts")).unwrap();
|
|
let backend = Self::load(store_path);
|
|
let empty_tree_id = backend
|
|
.write_tree(RepoPath::root(), &Tree::default())
|
|
.unwrap();
|
|
assert_eq!(empty_tree_id, backend.empty_tree_id);
|
|
backend
|
|
}
|
|
|
|
pub fn load(store_path: &Path) -> Self {
|
|
let root_commit_id = CommitId::from_bytes(&[0; COMMIT_ID_LENGTH]);
|
|
let root_change_id = ChangeId::from_bytes(&[0; CHANGE_ID_LENGTH]);
|
|
let empty_tree_id = TreeId::from_hex("482ae5a29fbe856c7272f2071b8b0f0359ee2d89ff392b8a900643fbd0836eccd067b8bf41909e206c90d45d6e7d8b6686b93ecaee5fe1a9060d87b672101310");
|
|
LocalBackend {
|
|
path: store_path.to_path_buf(),
|
|
root_commit_id,
|
|
root_change_id,
|
|
empty_tree_id,
|
|
}
|
|
}
|
|
|
|
fn file_path(&self, id: &FileId) -> PathBuf {
|
|
self.path.join("files").join(id.hex())
|
|
}
|
|
|
|
fn symlink_path(&self, id: &SymlinkId) -> PathBuf {
|
|
self.path.join("symlinks").join(id.hex())
|
|
}
|
|
|
|
fn tree_path(&self, id: &TreeId) -> PathBuf {
|
|
self.path.join("trees").join(id.hex())
|
|
}
|
|
|
|
fn commit_path(&self, id: &CommitId) -> PathBuf {
|
|
self.path.join("commits").join(id.hex())
|
|
}
|
|
|
|
fn conflict_path(&self, id: &ConflictId) -> PathBuf {
|
|
self.path.join("conflicts").join(id.hex())
|
|
}
|
|
}
|
|
|
|
#[async_trait]
|
|
impl Backend for LocalBackend {
|
|
fn as_any(&self) -> &dyn Any {
|
|
self
|
|
}
|
|
|
|
fn name(&self) -> &str {
|
|
Self::name()
|
|
}
|
|
|
|
fn commit_id_length(&self) -> usize {
|
|
COMMIT_ID_LENGTH
|
|
}
|
|
|
|
fn change_id_length(&self) -> usize {
|
|
CHANGE_ID_LENGTH
|
|
}
|
|
|
|
fn root_commit_id(&self) -> &CommitId {
|
|
&self.root_commit_id
|
|
}
|
|
|
|
fn root_change_id(&self) -> &ChangeId {
|
|
&self.root_change_id
|
|
}
|
|
|
|
fn empty_tree_id(&self) -> &TreeId {
|
|
&self.empty_tree_id
|
|
}
|
|
|
|
fn concurrency(&self) -> usize {
|
|
1
|
|
}
|
|
|
|
async fn read_file(&self, _path: &RepoPath, id: &FileId) -> BackendResult<Box<dyn Read>> {
|
|
let path = self.file_path(id);
|
|
let file = File::open(path).map_err(|err| map_not_found_err(err, id))?;
|
|
Ok(Box::new(zstd::Decoder::new(file).map_err(to_other_err)?))
|
|
}
|
|
|
|
fn write_file(&self, _path: &RepoPath, contents: &mut dyn Read) -> BackendResult<FileId> {
|
|
let temp_file = NamedTempFile::new_in(&self.path).map_err(to_other_err)?;
|
|
let mut encoder = zstd::Encoder::new(temp_file.as_file(), 0).map_err(to_other_err)?;
|
|
let mut hasher = Blake2b512::new();
|
|
let mut buff: Vec<u8> = vec![0; 1 << 14];
|
|
loop {
|
|
let bytes_read = contents.read(&mut buff).map_err(to_other_err)?;
|
|
if bytes_read == 0 {
|
|
break;
|
|
}
|
|
let bytes = &buff[..bytes_read];
|
|
encoder.write_all(bytes).map_err(to_other_err)?;
|
|
hasher.update(bytes);
|
|
}
|
|
encoder.finish().map_err(to_other_err)?;
|
|
let id = FileId::new(hasher.finalize().to_vec());
|
|
|
|
persist_content_addressed_temp_file(temp_file, self.file_path(&id))
|
|
.map_err(to_other_err)?;
|
|
Ok(id)
|
|
}
|
|
|
|
async fn read_symlink(&self, _path: &RepoPath, id: &SymlinkId) -> Result<String, BackendError> {
|
|
let path = self.symlink_path(id);
|
|
let target = fs::read_to_string(path).map_err(|err| map_not_found_err(err, id))?;
|
|
Ok(target)
|
|
}
|
|
|
|
fn write_symlink(&self, _path: &RepoPath, target: &str) -> Result<SymlinkId, BackendError> {
|
|
let mut temp_file = NamedTempFile::new_in(&self.path).map_err(to_other_err)?;
|
|
temp_file
|
|
.write_all(target.as_bytes())
|
|
.map_err(to_other_err)?;
|
|
let mut hasher = Blake2b512::new();
|
|
hasher.update(target.as_bytes());
|
|
let id = SymlinkId::new(hasher.finalize().to_vec());
|
|
|
|
persist_content_addressed_temp_file(temp_file, self.symlink_path(&id))
|
|
.map_err(to_other_err)?;
|
|
Ok(id)
|
|
}
|
|
|
|
async fn read_tree(&self, _path: &RepoPath, id: &TreeId) -> BackendResult<Tree> {
|
|
let path = self.tree_path(id);
|
|
let buf = fs::read(path).map_err(|err| map_not_found_err(err, id))?;
|
|
|
|
let proto = crate::protos::local_store::Tree::decode(&*buf).map_err(to_other_err)?;
|
|
Ok(tree_from_proto(proto))
|
|
}
|
|
|
|
fn write_tree(&self, _path: &RepoPath, tree: &Tree) -> BackendResult<TreeId> {
|
|
let temp_file = NamedTempFile::new_in(&self.path).map_err(to_other_err)?;
|
|
|
|
let proto = tree_to_proto(tree);
|
|
temp_file
|
|
.as_file()
|
|
.write_all(&proto.encode_to_vec())
|
|
.map_err(to_other_err)?;
|
|
|
|
let id = TreeId::new(blake2b_hash(tree).to_vec());
|
|
|
|
persist_content_addressed_temp_file(temp_file, self.tree_path(&id))
|
|
.map_err(to_other_err)?;
|
|
Ok(id)
|
|
}
|
|
|
|
fn read_conflict(&self, _path: &RepoPath, id: &ConflictId) -> BackendResult<Conflict> {
|
|
let path = self.conflict_path(id);
|
|
let buf = fs::read(path).map_err(|err| map_not_found_err(err, id))?;
|
|
|
|
let proto = crate::protos::local_store::Conflict::decode(&*buf).map_err(to_other_err)?;
|
|
Ok(conflict_from_proto(proto))
|
|
}
|
|
|
|
fn write_conflict(&self, _path: &RepoPath, conflict: &Conflict) -> BackendResult<ConflictId> {
|
|
let temp_file = NamedTempFile::new_in(&self.path).map_err(to_other_err)?;
|
|
|
|
let proto = conflict_to_proto(conflict);
|
|
temp_file
|
|
.as_file()
|
|
.write_all(&proto.encode_to_vec())
|
|
.map_err(to_other_err)?;
|
|
|
|
let id = ConflictId::new(blake2b_hash(conflict).to_vec());
|
|
|
|
persist_content_addressed_temp_file(temp_file, self.conflict_path(&id))
|
|
.map_err(to_other_err)?;
|
|
Ok(id)
|
|
}
|
|
|
|
async fn read_commit(&self, id: &CommitId) -> BackendResult<Commit> {
|
|
if *id == self.root_commit_id {
|
|
return Ok(make_root_commit(
|
|
self.root_change_id().clone(),
|
|
self.empty_tree_id.clone(),
|
|
));
|
|
}
|
|
|
|
let path = self.commit_path(id);
|
|
let buf = fs::read(path).map_err(|err| map_not_found_err(err, id))?;
|
|
|
|
let proto = crate::protos::local_store::Commit::decode(&*buf).map_err(to_other_err)?;
|
|
Ok(commit_from_proto(proto))
|
|
}
|
|
|
|
fn write_commit(
|
|
&self,
|
|
mut commit: Commit,
|
|
sign_with: Option<&mut SigningFn>,
|
|
) -> BackendResult<(CommitId, Commit)> {
|
|
assert!(commit.secure_sig.is_none(), "commit.secure_sig was set");
|
|
|
|
if commit.parents.is_empty() {
|
|
return Err(BackendError::Other(
|
|
"Cannot write a commit with no parents".into(),
|
|
));
|
|
}
|
|
let temp_file = NamedTempFile::new_in(&self.path).map_err(to_other_err)?;
|
|
|
|
let mut proto = commit_to_proto(&commit);
|
|
if let Some(sign) = sign_with {
|
|
let data = proto.encode_to_vec();
|
|
let sig = sign(&data).map_err(to_other_err)?;
|
|
proto.secure_sig = Some(sig.clone());
|
|
commit.secure_sig = Some(SecureSig { data, sig });
|
|
}
|
|
|
|
temp_file
|
|
.as_file()
|
|
.write_all(&proto.encode_to_vec())
|
|
.map_err(to_other_err)?;
|
|
|
|
let id = CommitId::new(blake2b_hash(&commit).to_vec());
|
|
|
|
persist_content_addressed_temp_file(temp_file, self.commit_path(&id))
|
|
.map_err(to_other_err)?;
|
|
Ok((id, commit))
|
|
}
|
|
|
|
fn gc(&self) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
pub fn commit_to_proto(commit: &Commit) -> crate::protos::local_store::Commit {
|
|
let mut proto = crate::protos::local_store::Commit::default();
|
|
for parent in &commit.parents {
|
|
proto.parents.push(parent.to_bytes());
|
|
}
|
|
for predecessor in &commit.predecessors {
|
|
proto.predecessors.push(predecessor.to_bytes());
|
|
}
|
|
match &commit.root_tree {
|
|
MergedTreeId::Legacy(tree_id) => {
|
|
proto.root_tree = vec![tree_id.to_bytes()];
|
|
}
|
|
MergedTreeId::Merge(tree_ids) => {
|
|
proto.uses_tree_conflict_format = true;
|
|
proto.root_tree = tree_ids.iter().map(|id| id.to_bytes()).collect();
|
|
}
|
|
}
|
|
proto.change_id = commit.change_id.to_bytes();
|
|
proto.description = commit.description.clone();
|
|
proto.author = Some(signature_to_proto(&commit.author));
|
|
proto.committer = Some(signature_to_proto(&commit.committer));
|
|
proto
|
|
}
|
|
|
|
fn commit_from_proto(mut proto: crate::protos::local_store::Commit) -> Commit {
|
|
// Note how .take() sets the secure_sig field to None before we encode the data.
|
|
// Needs to be done first since proto is partially moved a bunch below
|
|
let secure_sig = proto.secure_sig.take().map(|sig| SecureSig {
|
|
data: proto.encode_to_vec(),
|
|
sig,
|
|
});
|
|
|
|
let parents = proto.parents.into_iter().map(CommitId::new).collect();
|
|
let predecessors = proto.predecessors.into_iter().map(CommitId::new).collect();
|
|
let root_tree = if proto.uses_tree_conflict_format {
|
|
let merge_builder: MergeBuilder<_> = proto.root_tree.into_iter().map(TreeId::new).collect();
|
|
MergedTreeId::Merge(merge_builder.build())
|
|
} else {
|
|
assert_eq!(proto.root_tree.len(), 1);
|
|
MergedTreeId::Legacy(TreeId::new(proto.root_tree[0].to_vec()))
|
|
};
|
|
let change_id = ChangeId::new(proto.change_id);
|
|
Commit {
|
|
parents,
|
|
predecessors,
|
|
root_tree,
|
|
change_id,
|
|
description: proto.description,
|
|
author: signature_from_proto(proto.author.unwrap_or_default()),
|
|
committer: signature_from_proto(proto.committer.unwrap_or_default()),
|
|
secure_sig,
|
|
}
|
|
}
|
|
|
|
fn tree_to_proto(tree: &Tree) -> crate::protos::local_store::Tree {
|
|
let mut proto = crate::protos::local_store::Tree::default();
|
|
for entry in tree.entries() {
|
|
proto.entries.push(crate::protos::local_store::tree::Entry {
|
|
name: entry.name().as_str().to_owned(),
|
|
value: Some(tree_value_to_proto(entry.value())),
|
|
});
|
|
}
|
|
proto
|
|
}
|
|
|
|
fn tree_from_proto(proto: crate::protos::local_store::Tree) -> Tree {
|
|
let mut tree = Tree::default();
|
|
for proto_entry in proto.entries {
|
|
let value = tree_value_from_proto(proto_entry.value.unwrap());
|
|
tree.set(RepoPathComponentBuf::from(proto_entry.name), value);
|
|
}
|
|
tree
|
|
}
|
|
|
|
fn tree_value_to_proto(value: &TreeValue) -> crate::protos::local_store::TreeValue {
|
|
let mut proto = crate::protos::local_store::TreeValue::default();
|
|
match value {
|
|
TreeValue::File { id, executable } => {
|
|
proto.value = Some(crate::protos::local_store::tree_value::Value::File(
|
|
crate::protos::local_store::tree_value::File {
|
|
id: id.to_bytes(),
|
|
executable: *executable,
|
|
},
|
|
));
|
|
}
|
|
TreeValue::Symlink(id) => {
|
|
proto.value = Some(crate::protos::local_store::tree_value::Value::SymlinkId(
|
|
id.to_bytes(),
|
|
));
|
|
}
|
|
TreeValue::GitSubmodule(_id) => {
|
|
panic!("cannot store git submodules");
|
|
}
|
|
TreeValue::Tree(id) => {
|
|
proto.value = Some(crate::protos::local_store::tree_value::Value::TreeId(
|
|
id.to_bytes(),
|
|
));
|
|
}
|
|
TreeValue::Conflict(id) => {
|
|
proto.value = Some(crate::protos::local_store::tree_value::Value::ConflictId(
|
|
id.to_bytes(),
|
|
));
|
|
}
|
|
}
|
|
proto
|
|
}
|
|
|
|
fn tree_value_from_proto(proto: crate::protos::local_store::TreeValue) -> TreeValue {
|
|
match proto.value.unwrap() {
|
|
crate::protos::local_store::tree_value::Value::TreeId(id) => {
|
|
TreeValue::Tree(TreeId::new(id))
|
|
}
|
|
crate::protos::local_store::tree_value::Value::File(
|
|
crate::protos::local_store::tree_value::File { id, executable, .. },
|
|
) => TreeValue::File {
|
|
id: FileId::new(id),
|
|
executable,
|
|
},
|
|
crate::protos::local_store::tree_value::Value::SymlinkId(id) => {
|
|
TreeValue::Symlink(SymlinkId::new(id))
|
|
}
|
|
crate::protos::local_store::tree_value::Value::ConflictId(id) => {
|
|
TreeValue::Conflict(ConflictId::new(id))
|
|
}
|
|
}
|
|
}
|
|
|
|
fn signature_to_proto(signature: &Signature) -> crate::protos::local_store::commit::Signature {
|
|
crate::protos::local_store::commit::Signature {
|
|
name: signature.name.clone(),
|
|
email: signature.email.clone(),
|
|
timestamp: Some(crate::protos::local_store::commit::Timestamp {
|
|
millis_since_epoch: signature.timestamp.timestamp.0,
|
|
tz_offset: signature.timestamp.tz_offset,
|
|
}),
|
|
}
|
|
}
|
|
|
|
fn signature_from_proto(proto: crate::protos::local_store::commit::Signature) -> Signature {
|
|
let timestamp = proto.timestamp.unwrap_or_default();
|
|
Signature {
|
|
name: proto.name,
|
|
email: proto.email,
|
|
timestamp: Timestamp {
|
|
timestamp: MillisSinceEpoch(timestamp.millis_since_epoch),
|
|
tz_offset: timestamp.tz_offset,
|
|
},
|
|
}
|
|
}
|
|
|
|
fn conflict_to_proto(conflict: &Conflict) -> crate::protos::local_store::Conflict {
|
|
let mut proto = crate::protos::local_store::Conflict::default();
|
|
for term in &conflict.removes {
|
|
proto.removes.push(conflict_term_to_proto(term));
|
|
}
|
|
for term in &conflict.adds {
|
|
proto.adds.push(conflict_term_to_proto(term));
|
|
}
|
|
proto
|
|
}
|
|
|
|
fn conflict_from_proto(proto: crate::protos::local_store::Conflict) -> Conflict {
|
|
let mut conflict = Conflict::default();
|
|
for term in proto.removes {
|
|
conflict.removes.push(conflict_term_from_proto(term))
|
|
}
|
|
for term in proto.adds {
|
|
conflict.adds.push(conflict_term_from_proto(term))
|
|
}
|
|
conflict
|
|
}
|
|
|
|
fn conflict_term_from_proto(proto: crate::protos::local_store::conflict::Term) -> ConflictTerm {
|
|
ConflictTerm {
|
|
value: tree_value_from_proto(proto.content.unwrap()),
|
|
}
|
|
}
|
|
|
|
fn conflict_term_to_proto(part: &ConflictTerm) -> crate::protos::local_store::conflict::Term {
|
|
crate::protos::local_store::conflict::Term {
|
|
content: Some(tree_value_to_proto(&part.value)),
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use assert_matches::assert_matches;
|
|
use pollster::FutureExt;
|
|
|
|
use super::*;
|
|
use crate::backend::MillisSinceEpoch;
|
|
|
|
/// Test that parents get written correctly
|
|
#[test]
|
|
fn write_commit_parents() {
|
|
let temp_dir = testutils::new_temp_dir();
|
|
let store_path = temp_dir.path();
|
|
|
|
let backend = LocalBackend::init(store_path);
|
|
let mut commit = Commit {
|
|
parents: vec![],
|
|
predecessors: vec![],
|
|
root_tree: MergedTreeId::resolved(backend.empty_tree_id().clone()),
|
|
change_id: ChangeId::from_hex("abc123"),
|
|
description: "".to_string(),
|
|
author: create_signature(),
|
|
committer: create_signature(),
|
|
secure_sig: None,
|
|
};
|
|
|
|
// No parents
|
|
commit.parents = vec![];
|
|
assert_matches!(
|
|
backend.write_commit(commit.clone(), None),
|
|
Err(BackendError::Other(err)) if err.to_string().contains("no parents")
|
|
);
|
|
|
|
// Only root commit as parent
|
|
commit.parents = vec![backend.root_commit_id().clone()];
|
|
let first_id = backend.write_commit(commit.clone(), None).unwrap().0;
|
|
let first_commit = backend.read_commit(&first_id).block_on().unwrap();
|
|
assert_eq!(first_commit, commit);
|
|
|
|
// Only non-root commit as parent
|
|
commit.parents = vec![first_id.clone()];
|
|
let second_id = backend.write_commit(commit.clone(), None).unwrap().0;
|
|
let second_commit = backend.read_commit(&second_id).block_on().unwrap();
|
|
assert_eq!(second_commit, commit);
|
|
|
|
// Merge commit
|
|
commit.parents = vec![first_id.clone(), second_id.clone()];
|
|
let merge_id = backend.write_commit(commit.clone(), None).unwrap().0;
|
|
let merge_commit = backend.read_commit(&merge_id).block_on().unwrap();
|
|
assert_eq!(merge_commit, commit);
|
|
|
|
// Merge commit with root as one parent
|
|
commit.parents = vec![first_id, backend.root_commit_id().clone()];
|
|
let root_merge_id = backend.write_commit(commit.clone(), None).unwrap().0;
|
|
let root_merge_commit = backend.read_commit(&root_merge_id).block_on().unwrap();
|
|
assert_eq!(root_merge_commit, commit);
|
|
}
|
|
|
|
fn create_signature() -> Signature {
|
|
Signature {
|
|
name: "Someone".to_string(),
|
|
email: "someone@example.com".to_string(),
|
|
timestamp: Timestamp {
|
|
timestamp: MillisSinceEpoch(0),
|
|
tz_offset: 0,
|
|
},
|
|
}
|
|
}
|
|
}
|