cli: add "op abandon root..head" command that "reparents" operations

In order to implement GC (#12), we'll need to somehow prune old operations.
Perhaps the easiest implementation is to just remove unwanted operation files
and put tombstone file instead (like git shallow.) However, the removed
operations might be referenced by another jj process running in parallel. Since
the parallel operation thinks all the historical head commits are reachable, the
removed operations would have to be resurrected (or fix up index data, etc.)
when the op heads get merged.

The idea behind this patch is to split the "op log" GC into two steps:
 1. recreate operations to be retained and make the old history unreachable,
 2. delete unreachable operations if the head was created e.g. 3 days ago.
The latter will be run by "jj util gc". I don't think GC can be implemented
100% safe against lock-less append-only storage, and we'll probably need some
timestamp-based mechanism to not remove objects that might be referenced by
uncommitted operation.

FWIW, another nice thing about this implementation is that the index is
automatically invalidated as the op id changes. The bad thing is that the
"undo" description would contain an old op id. It seems the performance is
pretty okay.
This commit is contained in:
Yuya Nishihara 2023-12-29 16:02:56 +09:00
parent e5255135bb
commit f169c99fb4
3 changed files with 196 additions and 1 deletions

View file

@ -11,6 +11,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### New features
* New `jj op abandon` command is added to clean up the operation history. If GC
is implemented, Git refs and commit objects can be compacted.
### Fixed bugs

View file

@ -12,12 +12,19 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::io::Write as _;
use std::slice;
use clap::Subcommand;
use itertools::Itertools as _;
use jj_lib::backend::ObjectId;
use jj_lib::op_store::OperationId;
use jj_lib::op_walk;
use jj_lib::repo::Repo;
use crate::cli_util::{user_error, CommandError, CommandHelper, LogContentFormat};
use crate::cli_util::{
user_error, user_error_with_hint, CommandError, CommandHelper, LogContentFormat,
};
use crate::graphlog::{get_graphlog, Edge};
use crate::operation_templater;
use crate::templater::Template as _;
@ -29,6 +36,7 @@ use crate::ui::Ui;
/// https://github.com/martinvonz/jj/blob/main/docs/operation-log.md.
#[derive(Subcommand, Clone, Debug)]
pub enum OperationCommand {
Abandon(OperationAbandonArgs),
Log(OperationLogArgs),
Undo(OperationUndoArgs),
Restore(OperationRestoreArgs),
@ -89,6 +97,23 @@ pub struct OperationUndoArgs {
what: Vec<UndoWhatToRestore>,
}
/// Abandon operation history
///
/// To discard old operation history, use `jj op abandon ..<operation ID>`. It
/// will abandon the specified operation and all its ancestors. The descendants
/// will be reparented onto the root operation.
///
/// To discard recent operations, use `jj op restore <operation ID>` followed
/// by `jj op abandon <operation ID>..@-`.
///
/// The abandoned operations, commits, and other unreachable objects can later
/// be garbage collected by using `jj util gc` command.
#[derive(clap::Args, Clone, Debug)]
pub struct OperationAbandonArgs {
/// The operation or operation range to abandon
operation: String,
}
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, clap::ValueEnum)]
enum UndoWhatToRestore {
/// The jj repo state and local branches
@ -249,12 +274,82 @@ fn cmd_op_restore(
Ok(())
}
fn cmd_op_abandon(
ui: &mut Ui,
command: &CommandHelper,
args: &OperationAbandonArgs,
) -> Result<(), CommandError> {
let mut workspace_command = command.workspace_helper(ui)?;
let repo = workspace_command.repo();
let current_head_op = repo.operation();
let (abandon_root_op, abandon_head_op) =
if let Some((root_op_str, head_op_str)) = args.operation.split_once("..") {
let root_op = if root_op_str.is_empty() {
// TODO: Introduce a virtual root operation and use it instead.
op_walk::walk_ancestors(slice::from_ref(current_head_op))
.last()
.unwrap()?
} else {
workspace_command.resolve_single_op(root_op_str)?
};
let head_op = if head_op_str.is_empty() {
current_head_op.clone()
} else {
workspace_command.resolve_single_op(head_op_str)?
};
(root_op, head_op)
} else {
let op = workspace_command.resolve_single_op(&args.operation)?;
let parent_ops: Vec<_> = op.parents().try_collect()?;
let parent_op = match parent_ops.len() {
0 => return Err(user_error("Cannot abandon the root operation")),
1 => parent_ops.into_iter().next().unwrap(),
_ => return Err(user_error("Cannot abandon a merge operation")),
};
(parent_op, op)
};
if abandon_head_op == *current_head_op {
return Err(user_error_with_hint(
"Cannot abandon the current operation",
"Run `jj undo` to revert the current operation, then use `jj op abandon`",
));
}
// Reparent descendants, count the number of abandoned operations.
let stats = op_walk::reparent_range(
repo.op_store().as_ref(),
slice::from_ref(&abandon_head_op),
slice::from_ref(current_head_op),
&abandon_root_op,
)?;
let [new_head_id]: [OperationId; 1] = stats.new_head_ids.try_into().unwrap();
if current_head_op.id() == &new_head_id {
writeln!(ui.stderr(), "Nothing changed.")?;
return Ok(());
}
writeln!(
ui.stderr(),
"Abandoned {} operations and reparented {} descendant operations.",
stats.unreachable_count,
stats.rewritten_count,
)?;
repo.op_heads_store()
.update_op_heads(slice::from_ref(current_head_op.id()), &new_head_id);
// Remap the operation id of the current workspace. If there were any
// concurrent operations, user will need to re-abandon their ancestors.
let (locked_ws, _) = workspace_command.start_working_copy_mutation()?;
locked_ws.finish(new_head_id)?;
Ok(())
}
pub fn cmd_operation(
ui: &mut Ui,
command: &CommandHelper,
subcommand: &OperationCommand,
) -> Result<(), CommandError> {
match subcommand {
OperationCommand::Abandon(args) => cmd_op_abandon(ui, command, args),
OperationCommand::Log(args) => cmd_op_log(ui, command, args),
OperationCommand::Restore(args) => cmd_op_restore(ui, command, args),
OperationCommand::Undo(args) => cmd_op_undo(ui, command, args),

View file

@ -279,6 +279,103 @@ fn test_op_log_configurable() {
assert!(stdout.contains("my-username@my-hostname"));
}
#[test]
fn test_op_abandon_ancestors() {
let test_env = TestEnvironment::default();
test_env.jj_cmd_ok(test_env.env_root(), &["init", "repo", "--git"]);
let repo_path = test_env.env_root().join("repo");
test_env.jj_cmd_ok(&repo_path, &["commit", "-m", "commit 1"]);
test_env.jj_cmd_ok(&repo_path, &["commit", "-m", "commit 2"]);
insta::assert_snapshot!(test_env.jj_cmd_success(&repo_path, &["op", "log"]), @r###"
@ bacc8030a969 test-username@host.example.com 2001-02-03 04:05:09.000 +07:00 - 2001-02-03 04:05:09.000 +07:00
commit a8ac27b29a157ae7dabc0deb524df68823505730
args: jj commit -m 'commit 2'
bb26fe31d66f test-username@host.example.com 2001-02-03 04:05:08.000 +07:00 - 2001-02-03 04:05:08.000 +07:00
commit 230dd059e1b059aefc0da06a2e5a7dbf22362f22
args: jj commit -m 'commit 1'
19b8089fc78b test-username@host.example.com 2001-02-03 04:05:07.000 +07:00 - 2001-02-03 04:05:07.000 +07:00
add workspace 'default'
f1c462c494be test-username@host.example.com 2001-02-03 04:05:07.000 +07:00 - 2001-02-03 04:05:07.000 +07:00
initialize repo
"###);
// Abandon old operations. The working-copy operation id should be updated.
let (_stdout, stderr) = test_env.jj_cmd_ok(&repo_path, &["op", "abandon", "..@-"]);
insta::assert_snapshot!(stderr, @r###"
Abandoned 2 operations and reparented 1 descendant operations.
"###);
insta::assert_snapshot!(test_env.jj_cmd_success(&repo_path, &["debug", "workingcopy"]), @r###"
Current operation: OperationId("fb5252a68411468f5e3cf480a75b8b54d8ca9231406a3d0ddc4dfb31d851839a855aca5615ba4b09018fe45d11a04e1c051817a98de1c1ef5dd75cb6c2c09ba8")
Current tree: Merge(Resolved(TreeId("4b825dc642cb6eb9a060e54bf8d69288fbee4904")))
"###);
insta::assert_snapshot!(test_env.jj_cmd_success(&repo_path, &["op", "log"]), @r###"
@ fb5252a68411 test-username@host.example.com 2001-02-03 04:05:09.000 +07:00 - 2001-02-03 04:05:09.000 +07:00
commit a8ac27b29a157ae7dabc0deb524df68823505730
args: jj commit -m 'commit 2'
f1c462c494be test-username@host.example.com 2001-02-03 04:05:07.000 +07:00 - 2001-02-03 04:05:07.000 +07:00
initialize repo
"###);
// Abandon operation range.
test_env.jj_cmd_ok(&repo_path, &["commit", "-m", "commit 3"]);
test_env.jj_cmd_ok(&repo_path, &["commit", "-m", "commit 4"]);
test_env.jj_cmd_ok(&repo_path, &["commit", "-m", "commit 5"]);
let (_stdout, stderr) = test_env.jj_cmd_ok(&repo_path, &["op", "abandon", "@---..@-"]);
insta::assert_snapshot!(stderr, @r###"
Abandoned 2 operations and reparented 1 descendant operations.
"###);
insta::assert_snapshot!(test_env.jj_cmd_success(&repo_path, &["op", "log"]), @r###"
@ ee40c9ad806a test-username@host.example.com 2001-02-03 04:05:16.000 +07:00 - 2001-02-03 04:05:16.000 +07:00
commit e184d62c9ab118b0f62de91959b857550a9273a5
args: jj commit -m 'commit 5'
fb5252a68411 test-username@host.example.com 2001-02-03 04:05:09.000 +07:00 - 2001-02-03 04:05:09.000 +07:00
commit a8ac27b29a157ae7dabc0deb524df68823505730
args: jj commit -m 'commit 2'
f1c462c494be test-username@host.example.com 2001-02-03 04:05:07.000 +07:00 - 2001-02-03 04:05:07.000 +07:00
initialize repo
"###);
// Can't abandon the current operation.
let stderr = test_env.jj_cmd_failure(&repo_path, &["op", "abandon", "..@"]);
insta::assert_snapshot!(stderr, @r###"
Error: Cannot abandon the current operation
Hint: Run `jj undo` to revert the current operation, then use `jj op abandon`
"###);
// Abandon the current operation by undoing it first.
test_env.jj_cmd_ok(&repo_path, &["undo"]);
let (_stdout, stderr) = test_env.jj_cmd_ok(&repo_path, &["op", "abandon", "@-"]);
insta::assert_snapshot!(stderr, @r###"
Abandoned 1 operations and reparented 1 descendant operations.
"###);
insta::assert_snapshot!(test_env.jj_cmd_success(&repo_path, &["debug", "workingcopy"]), @r###"
Current operation: OperationId("05aebafee59813d56c0ea1576520b3074f5ba3e128f2b31df7370284cee593bed5043475dc2cdd30a6f22662c1dfb6aba92b83806147e77c17ad14356c07079d")
Current tree: Merge(Resolved(TreeId("4b825dc642cb6eb9a060e54bf8d69288fbee4904")))
"###);
insta::assert_snapshot!(test_env.jj_cmd_success(&repo_path, &["op", "log"]), @r###"
@ 05aebafee598 test-username@host.example.com 2001-02-03 04:05:20.000 +07:00 - 2001-02-03 04:05:20.000 +07:00
undo operation ee40c9ad806a7d42f351beab5aa81a8ac38d926d02711c059229bf6a7388b7b4a7c04c004067ee6c5b6253e8398fa82bc74d0d621f8bc2c8c11f33d445f90b77
args: jj undo
fb5252a68411 test-username@host.example.com 2001-02-03 04:05:09.000 +07:00 - 2001-02-03 04:05:09.000 +07:00
commit a8ac27b29a157ae7dabc0deb524df68823505730
args: jj commit -m 'commit 2'
f1c462c494be test-username@host.example.com 2001-02-03 04:05:07.000 +07:00 - 2001-02-03 04:05:07.000 +07:00
initialize repo
"###);
// Abandon empty range.
let (_stdout, stderr) = test_env.jj_cmd_ok(&repo_path, &["op", "abandon", "@-..@-"]);
insta::assert_snapshot!(stderr, @r###"
Nothing changed.
"###);
insta::assert_snapshot!(test_env.jj_cmd_success(&repo_path, &["op", "log", "-l1"]), @r###"
@ 05aebafee598 test-username@host.example.com 2001-02-03 04:05:20.000 +07:00 - 2001-02-03 04:05:20.000 +07:00
undo operation ee40c9ad806a7d42f351beab5aa81a8ac38d926d02711c059229bf6a7388b7b4a7c04c004067ee6c5b6253e8398fa82bc74d0d621f8bc2c8c11f33d445f90b77
args: jj undo
"###);
}
fn get_log_output(test_env: &TestEnvironment, repo_path: &Path, op_id: &str) -> String {
test_env.jj_cmd_success(
repo_path,