gc: implement basic GC for Git backend

This adds an initial `jj util gc` command, which simply calls `git gc`
when using the Git backend. That should already be useful in
non-colocated repos because it's not obvious how to GC (repack) such
repos. In my own jj repo, it shrunk `.jj/repo/store/` from 2.4 GiB to
780 MiB, and `jj log --ignore-working-copy` was sped up from 157 ms to
86 ms.

I haven't added any tests because the functionality depends on having
`git` binary on the PATH, which we don't yet depend on anywhere
else. I think we'll still be able to test much of the future parts of
garbage collection without a `git` binary because the interesting
parts are about manipulating the Git repo before calling `git gc` on
it.
This commit is contained in:
Martin von Zweigbergk 2023-12-01 14:00:22 -08:00 committed by Martin von Zweigbergk
parent 60c111645f
commit 1cc271441f
9 changed files with 66 additions and 3 deletions

View file

@ -35,6 +35,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
* `jj rebase` now takes the flag `--skip-empty`, which doesn't copy over commits
that would become empty after a rebase.
* There is a new `jj util gc` command for cleaning up the repository storage.
For now, it simply runs `git gc` on the backing Git repo (when using the Git
backend).
### Fixed bugs
* Fixed another file conflict resolution issue where `jj status` would disagree

View file

@ -170,4 +170,8 @@ impl Backend for JitBackend {
) -> BackendResult<(CommitId, Commit)> {
self.inner.write_commit(contents, sign_with)
}
fn gc(&self) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
self.inner.gc()
}
}

View file

@ -15,15 +15,17 @@
use std::io::Write;
use clap::Subcommand;
use jj_lib::repo::Repo;
use tracing::instrument;
use crate::cli_util::{CommandError, CommandHelper};
use crate::cli_util::{user_error, CommandError, CommandHelper};
use crate::ui::Ui;
/// Infrequently used commands such as for generating shell completions
#[derive(Subcommand, Clone, Debug)]
pub(crate) enum UtilCommand {
Completion(UtilCompletionArgs),
Gc(UtilGcArgs),
Mangen(UtilMangenArgs),
ConfigSchema(UtilConfigSchemaArgs),
}
@ -56,6 +58,10 @@ pub(crate) struct UtilCompletionArgs {
zsh: bool,
}
/// Run backend-dependent garbage collection.
#[derive(clap::Args, Clone, Debug)]
pub(crate) struct UtilGcArgs {}
/// Print a ROFF (manpage)
#[derive(clap::Args, Clone, Debug)]
pub(crate) struct UtilMangenArgs {}
@ -72,6 +78,7 @@ pub(crate) fn cmd_util(
) -> Result<(), CommandError> {
match subcommand {
UtilCommand::Completion(args) => cmd_util_completion(ui, command, args),
UtilCommand::Gc(args) => cmd_util_gc(ui, command, args),
UtilCommand::Mangen(args) => cmd_util_mangen(ui, command, args),
UtilCommand::ConfigSchema(args) => cmd_util_config_schema(ui, command, args),
}
@ -96,6 +103,17 @@ fn cmd_util_completion(
Ok(())
}
fn cmd_util_gc(
ui: &mut Ui,
command: &CommandHelper,
_args: &UtilGcArgs,
) -> Result<(), CommandError> {
let workspace_command = command.workspace_helper(ui)?;
let store = workspace_command.repo().store();
store.gc().map_err(|err| user_error(err.to_string()))?;
Ok(())
}
fn cmd_util_mangen(
ui: &mut Ui,
command: &CommandHelper,

View file

@ -130,8 +130,8 @@ repos may require you to deal with more involved Jujutsu and Git concepts.
* In co-located repos with a very large number of branches or other refs, `jj`
commands can get noticeably slower because of the automatic `jj git import`
executed on each command. This can be mitigated by occasionally running `git
pack-refs --all` to speed up the import.
executed on each command. This can be mitigated by occasionally running `jj util
gc` to speed up the import (that command includes packing the Git refs).
* Git tools will have trouble with revisions that contain conflicted files. While
`jj` renders these files with conflict markers in the working copy, they are

View file

@ -533,4 +533,8 @@ pub trait Backend: Send + Sync + Debug {
contents: Commit,
sign_with: Option<&mut SigningFn>,
) -> BackendResult<(CommitId, Commit)>;
/// Perform garbage collection.
// TODO: pass in the set of commits to keep here
fn gc(&self) -> Result<(), Box<dyn std::error::Error + Send + Sync>>;
}

View file

@ -18,6 +18,7 @@ use std::any::Any;
use std::fmt::{Debug, Error, Formatter};
use std::io::{Cursor, Read};
use std::path::Path;
use std::process::ExitStatus;
use std::sync::{Arc, Mutex, MutexGuard};
use std::{fs, str};
@ -94,6 +95,14 @@ impl From<GitBackendError> for BackendError {
}
}
#[derive(Debug, Error)]
pub enum GitGcError {
#[error("Failed to run git gc command: {0}")]
GcCommand(#[source] std::io::Error),
#[error("git gc command exited with an error: {0}")]
GcCommandErrorStatus(ExitStatus),
}
pub struct GitBackend {
// While gix::Repository can be created from gix::ThreadSafeRepository, it's
// cheaper to cache the thread-local instance behind a mutex than creating
@ -1007,6 +1016,18 @@ impl Backend for GitBackend {
self.save_extra_metadata_table(mut_table, &table_lock)?;
Ok((id, contents))
}
fn gc(&self) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let mut git = std::process::Command::new("git");
git.env("GIT_DIR", self.git_repo_path());
git.args(["gc"]);
// TODO: pass output to UI layer instead of printing directly here
let status = git.status().map_err(GitGcError::GcCommand)?;
if !status.success() {
return Err(Box::new(GitGcError::GcCommandErrorStatus(status)));
}
Ok(())
}
}
/// Write a tree conflict as a special tree with `.jjconflict-base-N` and

View file

@ -297,6 +297,10 @@ impl Backend for LocalBackend {
.map_err(to_other_err)?;
Ok((id, commit))
}
fn gc(&self) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
Ok(())
}
}
pub fn commit_to_proto(commit: &Commit) -> crate::protos::local_store::Commit {

View file

@ -265,4 +265,8 @@ impl Store {
pub fn tree_builder(self: &Arc<Self>, base_tree_id: TreeId) -> TreeBuilder {
TreeBuilder::new(self.clone(), base_tree_id)
}
pub fn gc(&self) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
self.backend.gc()
}
}

View file

@ -292,4 +292,8 @@ impl Backend for TestBackend {
.insert(id.clone(), contents.clone());
Ok((id, contents))
}
fn gc(&self) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
Ok(())
}
}