2022-11-26 23:57:50 +00:00
|
|
|
// Copyright 2020 The Jujutsu Authors
|
2021-09-12 06:52:38 +00:00
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// https://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
2023-07-10 15:17:00 +00:00
|
|
|
#![allow(missing_docs)]
|
|
|
|
|
2023-05-12 13:05:32 +00:00
|
|
|
use std::any::Any;
|
2021-09-12 06:52:38 +00:00
|
|
|
use std::collections::BTreeMap;
|
2023-07-26 18:39:43 +00:00
|
|
|
use std::fmt::Debug;
|
2021-09-12 06:52:38 +00:00
|
|
|
use std::io::Read;
|
|
|
|
use std::result::Result;
|
|
|
|
use std::vec::Vec;
|
|
|
|
|
backend: make read functions async
The commit backend at Google is cloud-based (and so are the other
backends); it reads and writes commits from/to a server, which stores
them in a database. That makes latency much higher than for disk-based
backends. To reduce the latency, we have a local daemon process that
caches and prefetches objects. There are still many cases where
latency is high, such as when diffing two uncached commits. We can
improve that by changing some of our (jj's) algorithms to read many
objects concurrently from the backend. In the case of tree-diffing, we
can fetch one level (depth) of the tree at a time. There are several
ways of doing that:
* Make the backend methods `async`
* Use many threads for reading from the backend
* Add backend methods for batch reading
I don't think we typically need CPU parallelism, so it's wasteful to
have hundreds of threads running in order to fetch hundreds of objects
in parallel (especially when using a synchronous backend like the Git
backend). Batching would work well for the tree-diffing case, but it's
not as composable as `async`. For example, if we wanted to fetch some
commits at the same time as we were doing a diff, it's hard to see how
to do that with batching. Using async seems like our best bet.
I didn't make the backend interface's write functions async because
writes are already async with the daemon we have at Google. That
daemon will hash the object and immediately return, and then send the
object to the server in the background. I think any cloud-based
solution will need a similar daemon process. However, we may need to
reconsider this if/when jj gets used on a server with a custom backend
that writes directly to a database (i.e. no async daemon in between).
I've tried to measure the performance impact. That's the largest
difference I've been able to measure was on `jj diff
--ignore-working-copy -s --from v5.0 --to v6.0` in the Linux repo,
which increases from 749 ms to 773 ms (3.3%). In most cases I've
tested, there's no measurable difference. I've tried diffing from the
root commit, as well as `jj --ignore-working-copy log --no-graph -r
'::v3.0 & author(torvalds)' -T 'commit_id ++ "\n"'` (to test a
commit-heavy load).
2023-09-06 19:59:17 +00:00
|
|
|
use async_trait::async_trait;
|
2021-09-12 06:52:38 +00:00
|
|
|
use thiserror::Error;
|
|
|
|
|
2022-11-12 19:19:03 +00:00
|
|
|
use crate::content_hash::ContentHash;
|
2023-08-06 16:21:35 +00:00
|
|
|
use crate::merge::Merge;
|
2023-11-25 08:46:17 +00:00
|
|
|
use crate::repo_path::{RepoPath, RepoPathComponent, RepoPathComponentBuf};
|
2023-11-24 21:08:16 +00:00
|
|
|
use crate::signing::SignResult;
|
2021-09-12 06:52:38 +00:00
|
|
|
|
2023-01-01 03:24:32 +00:00
|
|
|
pub trait ObjectId {
|
|
|
|
fn new(value: Vec<u8>) -> Self;
|
|
|
|
fn object_type(&self) -> String;
|
|
|
|
fn from_bytes(bytes: &[u8]) -> Self;
|
|
|
|
fn as_bytes(&self) -> &[u8];
|
|
|
|
fn to_bytes(&self) -> Vec<u8>;
|
|
|
|
fn from_hex(hex: &str) -> Self;
|
|
|
|
fn hex(&self) -> String;
|
|
|
|
}
|
|
|
|
|
2022-12-21 12:00:07 +00:00
|
|
|
macro_rules! id_type {
|
|
|
|
($vis:vis $name:ident) => {
|
|
|
|
content_hash! {
|
|
|
|
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)]
|
|
|
|
$vis struct $name(Vec<u8>);
|
|
|
|
}
|
2023-07-26 18:39:43 +00:00
|
|
|
$crate::backend::impl_id_type!($name);
|
2022-12-21 12:00:07 +00:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2022-12-21 11:40:36 +00:00
|
|
|
macro_rules! impl_id_type {
|
|
|
|
($name:ident) => {
|
2023-07-26 18:39:43 +00:00
|
|
|
impl std::fmt::Debug for $name {
|
|
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
|
2022-12-21 11:40:36 +00:00
|
|
|
f.debug_tuple(stringify!($name)).field(&self.hex()).finish()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-01 03:24:32 +00:00
|
|
|
impl crate::backend::ObjectId for $name {
|
|
|
|
fn new(value: Vec<u8>) -> Self {
|
2022-12-21 11:40:36 +00:00
|
|
|
Self(value)
|
|
|
|
}
|
|
|
|
|
2023-01-01 03:24:32 +00:00
|
|
|
fn object_type(&self) -> String {
|
|
|
|
stringify!($name)
|
|
|
|
.strip_suffix("Id")
|
|
|
|
.unwrap()
|
|
|
|
.to_ascii_lowercase()
|
|
|
|
.to_string()
|
|
|
|
}
|
|
|
|
|
|
|
|
fn from_bytes(bytes: &[u8]) -> Self {
|
2022-12-21 11:40:36 +00:00
|
|
|
Self(bytes.to_vec())
|
|
|
|
}
|
|
|
|
|
2023-01-01 03:24:32 +00:00
|
|
|
fn as_bytes(&self) -> &[u8] {
|
2022-12-21 11:40:36 +00:00
|
|
|
&self.0
|
|
|
|
}
|
|
|
|
|
2023-01-01 03:24:32 +00:00
|
|
|
fn to_bytes(&self) -> Vec<u8> {
|
2022-12-21 11:40:36 +00:00
|
|
|
self.0.clone()
|
|
|
|
}
|
|
|
|
|
2023-01-01 03:24:32 +00:00
|
|
|
fn from_hex(hex: &str) -> Self {
|
2022-12-21 11:40:36 +00:00
|
|
|
Self(hex::decode(hex).unwrap())
|
|
|
|
}
|
|
|
|
|
2023-01-01 03:24:32 +00:00
|
|
|
fn hex(&self) -> String {
|
2022-12-21 11:40:36 +00:00
|
|
|
hex::encode(&self.0)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2023-07-26 18:39:43 +00:00
|
|
|
pub(crate) use {id_type, impl_id_type};
|
|
|
|
|
2022-12-21 12:00:07 +00:00
|
|
|
id_type!(pub CommitId);
|
|
|
|
id_type!(pub ChangeId);
|
|
|
|
id_type!(pub TreeId);
|
|
|
|
id_type!(pub FileId);
|
|
|
|
id_type!(pub SymlinkId);
|
|
|
|
id_type!(pub ConflictId);
|
2021-09-12 06:52:38 +00:00
|
|
|
|
2022-11-11 17:33:22 +00:00
|
|
|
content_hash! {
|
|
|
|
#[derive(Debug, PartialEq, Eq, Clone, PartialOrd, Ord)]
|
|
|
|
pub struct MillisSinceEpoch(pub i64);
|
|
|
|
}
|
2021-09-12 06:52:38 +00:00
|
|
|
|
2022-11-11 17:33:22 +00:00
|
|
|
content_hash! {
|
|
|
|
#[derive(Debug, PartialEq, Eq, Clone, PartialOrd, Ord)]
|
|
|
|
pub struct Timestamp {
|
|
|
|
pub timestamp: MillisSinceEpoch,
|
|
|
|
// time zone offset in minutes
|
|
|
|
pub tz_offset: i32,
|
|
|
|
}
|
2021-09-12 06:52:38 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
impl Timestamp {
|
|
|
|
pub fn now() -> Self {
|
2022-03-05 06:33:15 +00:00
|
|
|
Self::from_datetime(chrono::offset::Local::now())
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn from_datetime<Tz: chrono::TimeZone<Offset = chrono::offset::FixedOffset>>(
|
|
|
|
datetime: chrono::DateTime<Tz>,
|
|
|
|
) -> Self {
|
2021-09-12 06:52:38 +00:00
|
|
|
Self {
|
2022-09-30 04:29:45 +00:00
|
|
|
timestamp: MillisSinceEpoch(datetime.timestamp_millis()),
|
2022-03-05 06:33:15 +00:00
|
|
|
tz_offset: datetime.offset().local_minus_utc() / 60,
|
2021-09-12 06:52:38 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-12 19:19:03 +00:00
|
|
|
content_hash! {
|
|
|
|
#[derive(Debug, PartialEq, Eq, Clone)]
|
|
|
|
pub struct Signature {
|
|
|
|
pub name: String,
|
|
|
|
pub email: String,
|
|
|
|
pub timestamp: Timestamp,
|
|
|
|
}
|
2021-09-12 06:52:38 +00:00
|
|
|
}
|
|
|
|
|
2023-11-09 01:10:39 +00:00
|
|
|
content_hash! {
|
|
|
|
#[derive(Debug, PartialEq, Eq, Clone)]
|
|
|
|
pub struct SecureSig {
|
|
|
|
pub data: Vec<u8>,
|
|
|
|
pub sig: Vec<u8>,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-11-28 06:34:02 +00:00
|
|
|
pub type SigningFn<'a> = dyn FnMut(&[u8]) -> SignResult<Vec<u8>> + 'a;
|
2023-11-12 01:40:23 +00:00
|
|
|
|
2023-08-24 23:59:07 +00:00
|
|
|
/// Identifies a single legacy tree, which may have path-level conflicts, or a
|
|
|
|
/// merge of multiple trees, where the individual trees do not have conflicts.
|
|
|
|
// TODO(#1624): Delete this type at some point in the future, when we decide to drop
|
|
|
|
// support for conflicts in older repos, or maybe after we have provided an upgrade
|
|
|
|
// mechanism.
|
2023-08-29 01:09:05 +00:00
|
|
|
#[derive(Debug, Clone)]
|
2023-08-24 23:59:07 +00:00
|
|
|
pub enum MergedTreeId {
|
|
|
|
/// The tree id of a legacy tree
|
|
|
|
Legacy(TreeId),
|
|
|
|
/// The tree id(s) of a merge tree
|
|
|
|
Merge(Merge<TreeId>),
|
|
|
|
}
|
|
|
|
|
2023-08-29 01:09:05 +00:00
|
|
|
impl PartialEq for MergedTreeId {
|
|
|
|
/// Overridden to make conflict-free trees be considered equal even if their
|
|
|
|
/// `MergedTreeId` variant is different.
|
|
|
|
fn eq(&self, other: &Self) -> bool {
|
|
|
|
self.to_merge() == other.to_merge()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Eq for MergedTreeId {}
|
|
|
|
|
2023-08-24 23:59:07 +00:00
|
|
|
impl ContentHash for MergedTreeId {
|
|
|
|
fn hash(&self, state: &mut impl digest::Update) {
|
|
|
|
match self {
|
|
|
|
MergedTreeId::Legacy(tree_id) => {
|
|
|
|
state.update(b"0");
|
|
|
|
ContentHash::hash(tree_id, state);
|
|
|
|
}
|
|
|
|
MergedTreeId::Merge(tree_ids) => {
|
|
|
|
state.update(b"1");
|
|
|
|
ContentHash::hash(tree_ids, state);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl MergedTreeId {
|
2023-11-09 01:10:39 +00:00
|
|
|
/// Create a resolved `MergedTreeId` from a single regular tree.
|
2023-08-24 23:59:07 +00:00
|
|
|
pub fn resolved(tree_id: TreeId) -> Self {
|
|
|
|
MergedTreeId::Merge(Merge::resolved(tree_id))
|
|
|
|
}
|
|
|
|
|
2023-08-28 19:46:14 +00:00
|
|
|
/// Return this id as `Merge<TreeId>`
|
|
|
|
pub fn to_merge(&self) -> Merge<TreeId> {
|
|
|
|
match self {
|
|
|
|
MergedTreeId::Legacy(tree_id) => Merge::resolved(tree_id.clone()),
|
|
|
|
MergedTreeId::Merge(tree_ids) => tree_ids.clone(),
|
|
|
|
}
|
|
|
|
}
|
2023-08-24 23:59:07 +00:00
|
|
|
}
|
|
|
|
|
2022-11-12 19:19:03 +00:00
|
|
|
content_hash! {
|
2023-01-20 19:40:16 +00:00
|
|
|
#[derive(Debug, PartialEq, Eq, Clone)]
|
2022-11-12 19:19:03 +00:00
|
|
|
pub struct Commit {
|
|
|
|
pub parents: Vec<CommitId>,
|
|
|
|
pub predecessors: Vec<CommitId>,
|
2023-08-24 23:59:07 +00:00
|
|
|
pub root_tree: MergedTreeId,
|
2022-11-12 19:19:03 +00:00
|
|
|
pub change_id: ChangeId,
|
|
|
|
pub description: String,
|
|
|
|
pub author: Signature,
|
|
|
|
pub committer: Signature,
|
2023-11-09 01:10:39 +00:00
|
|
|
pub secure_sig: Option<SecureSig>,
|
2022-11-12 19:19:03 +00:00
|
|
|
}
|
2021-09-12 06:52:38 +00:00
|
|
|
}
|
|
|
|
|
2022-11-12 19:19:03 +00:00
|
|
|
content_hash! {
|
|
|
|
#[derive(Debug, PartialEq, Eq, Clone)]
|
2023-02-17 22:34:41 +00:00
|
|
|
pub struct ConflictTerm {
|
2022-11-12 19:19:03 +00:00
|
|
|
pub value: TreeValue,
|
|
|
|
}
|
2021-09-12 06:52:38 +00:00
|
|
|
}
|
|
|
|
|
2022-11-12 19:19:03 +00:00
|
|
|
content_hash! {
|
|
|
|
#[derive(Default, Debug, PartialEq, Eq, Clone)]
|
|
|
|
pub struct Conflict {
|
|
|
|
// A conflict is represented by a list of positive and negative states that need to be applied.
|
|
|
|
// In a simple 3-way merge of B and C with merge base A, the conflict will be { add: [B, C],
|
|
|
|
// remove: [A] }. Also note that a conflict of the form { add: [A], remove: [] } is the
|
|
|
|
// same as non-conflict A.
|
2023-02-17 22:34:41 +00:00
|
|
|
pub removes: Vec<ConflictTerm>,
|
|
|
|
pub adds: Vec<ConflictTerm>,
|
2022-11-12 19:19:03 +00:00
|
|
|
}
|
2021-09-12 06:52:38 +00:00
|
|
|
}
|
|
|
|
|
2023-07-06 04:20:24 +00:00
|
|
|
/// Error that may occur during backend initialization.
|
|
|
|
#[derive(Debug, Error)]
|
|
|
|
#[error(transparent)]
|
|
|
|
pub struct BackendInitError(pub Box<dyn std::error::Error + Send + Sync>);
|
|
|
|
|
|
|
|
/// Error that may occur during backend loading.
|
|
|
|
#[derive(Debug, Error)]
|
|
|
|
#[error(transparent)]
|
|
|
|
pub struct BackendLoadError(pub Box<dyn std::error::Error + Send + Sync>);
|
|
|
|
|
|
|
|
/// Commit-backend error that may occur after the backend is loaded.
|
2022-12-31 08:25:40 +00:00
|
|
|
#[derive(Debug, Error)]
|
2021-09-12 06:52:38 +00:00
|
|
|
pub enum BackendError {
|
2023-01-01 04:30:18 +00:00
|
|
|
#[error(
|
2023-01-02 16:53:11 +00:00
|
|
|
"Invalid hash length for object of type {object_type} (expected {expected} bytes, got \
|
|
|
|
{actual} bytes): {hash}"
|
2023-01-01 04:30:18 +00:00
|
|
|
)]
|
|
|
|
InvalidHashLength {
|
|
|
|
expected: usize,
|
|
|
|
actual: usize,
|
2023-01-02 16:53:11 +00:00
|
|
|
object_type: String,
|
|
|
|
hash: String,
|
|
|
|
},
|
|
|
|
#[error("Invalid UTF-8 for object {hash} of type {object_type}: {source}")]
|
|
|
|
InvalidUtf8 {
|
|
|
|
object_type: String,
|
|
|
|
hash: String,
|
2023-10-29 21:33:14 +00:00
|
|
|
source: std::str::Utf8Error,
|
2023-01-02 16:53:11 +00:00
|
|
|
},
|
|
|
|
#[error("Object {hash} of type {object_type} not found: {source}")]
|
|
|
|
ObjectNotFound {
|
|
|
|
object_type: String,
|
|
|
|
hash: String,
|
|
|
|
source: Box<dyn std::error::Error + Send + Sync>,
|
|
|
|
},
|
|
|
|
#[error("Error when reading object {hash} of type {object_type}: {source}")]
|
|
|
|
ReadObject {
|
|
|
|
object_type: String,
|
|
|
|
hash: String,
|
|
|
|
source: Box<dyn std::error::Error + Send + Sync>,
|
|
|
|
},
|
|
|
|
#[error("Could not write object of type {object_type}: {source}")]
|
|
|
|
WriteObject {
|
|
|
|
object_type: &'static str,
|
|
|
|
source: Box<dyn std::error::Error + Send + Sync>,
|
2023-01-01 04:30:18 +00:00
|
|
|
},
|
2021-09-12 06:52:38 +00:00
|
|
|
#[error("Error: {0}")]
|
2023-07-05 10:55:54 +00:00
|
|
|
Other(Box<dyn std::error::Error + Send + Sync>),
|
2021-09-12 06:52:38 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
pub type BackendResult<T> = Result<T, BackendError>;
|
|
|
|
|
|
|
|
#[derive(Debug, PartialEq, Eq, Clone, Hash)]
|
|
|
|
pub enum TreeValue {
|
2022-11-14 21:27:18 +00:00
|
|
|
File { id: FileId, executable: bool },
|
2021-09-12 06:52:38 +00:00
|
|
|
Symlink(SymlinkId),
|
|
|
|
Tree(TreeId),
|
|
|
|
GitSubmodule(CommitId),
|
|
|
|
Conflict(ConflictId),
|
|
|
|
}
|
|
|
|
|
2023-08-29 21:13:35 +00:00
|
|
|
impl TreeValue {
|
|
|
|
pub fn hex(&self) -> String {
|
|
|
|
match self {
|
|
|
|
TreeValue::File { id, .. } => id.hex(),
|
|
|
|
TreeValue::Symlink(id) => id.hex(),
|
|
|
|
TreeValue::Tree(id) => id.hex(),
|
|
|
|
TreeValue::GitSubmodule(id) => id.hex(),
|
|
|
|
TreeValue::Conflict(id) => id.hex(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-12 19:19:03 +00:00
|
|
|
impl ContentHash for TreeValue {
|
|
|
|
fn hash(&self, state: &mut impl digest::Update) {
|
|
|
|
use TreeValue::*;
|
2023-01-14 17:51:13 +00:00
|
|
|
match self {
|
|
|
|
File { id, executable } => {
|
2022-11-12 19:19:03 +00:00
|
|
|
state.update(&0u32.to_le_bytes());
|
|
|
|
id.hash(state);
|
|
|
|
executable.hash(state);
|
|
|
|
}
|
2023-01-14 17:51:13 +00:00
|
|
|
Symlink(id) => {
|
2022-11-12 19:19:03 +00:00
|
|
|
state.update(&1u32.to_le_bytes());
|
|
|
|
id.hash(state);
|
|
|
|
}
|
2023-01-14 17:51:13 +00:00
|
|
|
Tree(id) => {
|
2022-11-12 19:19:03 +00:00
|
|
|
state.update(&2u32.to_le_bytes());
|
|
|
|
id.hash(state);
|
|
|
|
}
|
2023-01-14 17:51:13 +00:00
|
|
|
GitSubmodule(id) => {
|
2022-11-12 19:19:03 +00:00
|
|
|
state.update(&3u32.to_le_bytes());
|
|
|
|
id.hash(state);
|
|
|
|
}
|
2023-01-14 17:51:13 +00:00
|
|
|
Conflict(id) => {
|
2022-11-12 19:19:03 +00:00
|
|
|
state.update(&4u32.to_le_bytes());
|
|
|
|
id.hash(state);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-09-12 06:52:38 +00:00
|
|
|
#[derive(Debug, PartialEq, Eq, Clone)]
|
|
|
|
pub struct TreeEntry<'a> {
|
|
|
|
name: &'a RepoPathComponent,
|
|
|
|
value: &'a TreeValue,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> TreeEntry<'a> {
|
|
|
|
pub fn new(name: &'a RepoPathComponent, value: &'a TreeValue) -> Self {
|
|
|
|
TreeEntry { name, value }
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn name(&self) -> &'a RepoPathComponent {
|
|
|
|
self.name
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn value(&self) -> &'a TreeValue {
|
|
|
|
self.value
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-11-15 05:16:06 +00:00
|
|
|
pub struct TreeEntriesNonRecursiveIterator<'a> {
|
2023-11-25 08:46:17 +00:00
|
|
|
iter: std::collections::btree_map::Iter<'a, RepoPathComponentBuf, TreeValue>,
|
2021-09-12 06:52:38 +00:00
|
|
|
}
|
|
|
|
|
2021-11-15 05:16:06 +00:00
|
|
|
impl<'a> Iterator for TreeEntriesNonRecursiveIterator<'a> {
|
2021-09-12 06:52:38 +00:00
|
|
|
type Item = TreeEntry<'a>;
|
|
|
|
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
|
|
self.iter
|
|
|
|
.next()
|
|
|
|
.map(|(name, value)| TreeEntry { name, value })
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-12 19:19:03 +00:00
|
|
|
content_hash! {
|
2023-01-20 19:40:16 +00:00
|
|
|
#[derive(Default, PartialEq, Eq, Debug, Clone)]
|
2022-11-12 19:19:03 +00:00
|
|
|
pub struct Tree {
|
2023-11-25 08:46:17 +00:00
|
|
|
entries: BTreeMap<RepoPathComponentBuf, TreeValue>,
|
2022-11-12 19:19:03 +00:00
|
|
|
}
|
2021-09-12 06:52:38 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
impl Tree {
|
|
|
|
pub fn is_empty(&self) -> bool {
|
|
|
|
self.entries.is_empty()
|
|
|
|
}
|
|
|
|
|
2023-07-06 07:05:27 +00:00
|
|
|
pub fn names(&self) -> impl Iterator<Item = &RepoPathComponent> {
|
2023-11-25 09:22:09 +00:00
|
|
|
self.entries.keys().map(|name| name.as_ref())
|
2023-07-06 07:05:27 +00:00
|
|
|
}
|
|
|
|
|
2021-11-15 05:16:06 +00:00
|
|
|
pub fn entries(&self) -> TreeEntriesNonRecursiveIterator {
|
|
|
|
TreeEntriesNonRecursiveIterator {
|
2021-09-12 06:52:38 +00:00
|
|
|
iter: self.entries.iter(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-11-25 08:46:17 +00:00
|
|
|
pub fn set(&mut self, name: RepoPathComponentBuf, value: TreeValue) {
|
2021-09-12 06:52:38 +00:00
|
|
|
self.entries.insert(name, value);
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn remove(&mut self, name: &RepoPathComponent) {
|
|
|
|
self.entries.remove(name);
|
|
|
|
}
|
|
|
|
|
2023-06-28 10:09:54 +00:00
|
|
|
pub fn set_or_remove(&mut self, name: &RepoPathComponent, value: Option<TreeValue>) {
|
|
|
|
match value {
|
|
|
|
None => {
|
|
|
|
self.entries.remove(name);
|
|
|
|
}
|
|
|
|
Some(value) => {
|
2023-11-25 09:22:09 +00:00
|
|
|
self.entries.insert(name.to_owned(), value);
|
2023-06-28 10:09:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-09-12 06:52:38 +00:00
|
|
|
pub fn entry(&self, name: &RepoPathComponent) -> Option<TreeEntry> {
|
|
|
|
self.entries
|
|
|
|
.get_key_value(name)
|
|
|
|
.map(|(name, value)| TreeEntry { name, value })
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn value(&self, name: &RepoPathComponent) -> Option<&TreeValue> {
|
|
|
|
self.entries.get(name)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-18 11:18:57 +00:00
|
|
|
/// Calculates common prefix length of two bytes. The length to be returned is
|
|
|
|
/// a number of hexadecimal digits.
|
|
|
|
pub fn common_hex_len(bytes_a: &[u8], bytes_b: &[u8]) -> usize {
|
|
|
|
iter_half_bytes(bytes_a)
|
|
|
|
.zip(iter_half_bytes(bytes_b))
|
|
|
|
.take_while(|(a, b)| a == b)
|
|
|
|
.count()
|
|
|
|
}
|
|
|
|
|
|
|
|
fn iter_half_bytes(bytes: &[u8]) -> impl ExactSizeIterator<Item = u8> + '_ {
|
|
|
|
(0..bytes.len() * 2).map(|i| {
|
|
|
|
let v = bytes[i / 2];
|
|
|
|
if i & 1 == 0 {
|
|
|
|
v >> 4
|
|
|
|
} else {
|
|
|
|
v & 0xf
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2023-02-06 18:15:01 +00:00
|
|
|
pub fn make_root_commit(root_change_id: ChangeId, empty_tree_id: TreeId) -> Commit {
|
2022-09-19 00:33:39 +00:00
|
|
|
let timestamp = Timestamp {
|
|
|
|
timestamp: MillisSinceEpoch(0),
|
|
|
|
tz_offset: 0,
|
|
|
|
};
|
|
|
|
let signature = Signature {
|
|
|
|
name: String::new(),
|
|
|
|
email: String::new(),
|
|
|
|
timestamp,
|
|
|
|
};
|
|
|
|
Commit {
|
|
|
|
parents: vec![],
|
|
|
|
predecessors: vec![],
|
2023-08-24 23:59:07 +00:00
|
|
|
root_tree: MergedTreeId::Legacy(empty_tree_id),
|
2023-02-06 18:15:01 +00:00
|
|
|
change_id: root_change_id,
|
2022-09-19 00:33:39 +00:00
|
|
|
description: String::new(),
|
|
|
|
author: signature.clone(),
|
|
|
|
committer: signature,
|
2023-11-09 01:10:39 +00:00
|
|
|
secure_sig: None,
|
2022-09-19 00:33:39 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
backend: make read functions async
The commit backend at Google is cloud-based (and so are the other
backends); it reads and writes commits from/to a server, which stores
them in a database. That makes latency much higher than for disk-based
backends. To reduce the latency, we have a local daemon process that
caches and prefetches objects. There are still many cases where
latency is high, such as when diffing two uncached commits. We can
improve that by changing some of our (jj's) algorithms to read many
objects concurrently from the backend. In the case of tree-diffing, we
can fetch one level (depth) of the tree at a time. There are several
ways of doing that:
* Make the backend methods `async`
* Use many threads for reading from the backend
* Add backend methods for batch reading
I don't think we typically need CPU parallelism, so it's wasteful to
have hundreds of threads running in order to fetch hundreds of objects
in parallel (especially when using a synchronous backend like the Git
backend). Batching would work well for the tree-diffing case, but it's
not as composable as `async`. For example, if we wanted to fetch some
commits at the same time as we were doing a diff, it's hard to see how
to do that with batching. Using async seems like our best bet.
I didn't make the backend interface's write functions async because
writes are already async with the daemon we have at Google. That
daemon will hash the object and immediately return, and then send the
object to the server in the background. I think any cloud-based
solution will need a similar daemon process. However, we may need to
reconsider this if/when jj gets used on a server with a custom backend
that writes directly to a database (i.e. no async daemon in between).
I've tried to measure the performance impact. That's the largest
difference I've been able to measure was on `jj diff
--ignore-working-copy -s --from v5.0 --to v6.0` in the Linux repo,
which increases from 749 ms to 773 ms (3.3%). In most cases I've
tested, there's no measurable difference. I've tried diffing from the
root commit, as well as `jj --ignore-working-copy log --no-graph -r
'::v3.0 & author(torvalds)' -T 'commit_id ++ "\n"'` (to test a
commit-heavy load).
2023-09-06 19:59:17 +00:00
|
|
|
#[async_trait]
|
2021-09-12 06:52:38 +00:00
|
|
|
pub trait Backend: Send + Sync + Debug {
|
2023-05-12 13:05:32 +00:00
|
|
|
fn as_any(&self) -> &dyn Any;
|
|
|
|
|
2022-09-23 04:01:38 +00:00
|
|
|
/// A unique name that identifies this backend. Written to
|
|
|
|
/// `.jj/repo/store/backend` when the repo is created.
|
|
|
|
fn name(&self) -> &str;
|
|
|
|
|
2023-02-06 18:05:09 +00:00
|
|
|
/// The length of commit IDs in bytes.
|
|
|
|
fn commit_id_length(&self) -> usize;
|
2021-09-12 06:52:38 +00:00
|
|
|
|
2023-02-06 18:15:01 +00:00
|
|
|
/// The length of change IDs in bytes.
|
|
|
|
fn change_id_length(&self) -> usize;
|
|
|
|
|
2023-09-19 10:55:51 +00:00
|
|
|
fn root_commit_id(&self) -> &CommitId;
|
|
|
|
|
|
|
|
fn root_change_id(&self) -> &ChangeId;
|
|
|
|
|
|
|
|
fn empty_tree_id(&self) -> &TreeId;
|
|
|
|
|
2023-10-19 18:27:55 +00:00
|
|
|
/// An estimate of how many concurrent requests this backend handles well. A
|
|
|
|
/// local backend like the Git backend (at until it supports partial clones)
|
|
|
|
/// may want to set this to 1. A cloud-backed backend may want to set it to
|
|
|
|
/// 100 or so.
|
|
|
|
///
|
|
|
|
/// It is not guaranteed that at most this number of concurrent requests are
|
|
|
|
/// sent.
|
|
|
|
fn concurrency(&self) -> usize;
|
|
|
|
|
backend: make read functions async
The commit backend at Google is cloud-based (and so are the other
backends); it reads and writes commits from/to a server, which stores
them in a database. That makes latency much higher than for disk-based
backends. To reduce the latency, we have a local daemon process that
caches and prefetches objects. There are still many cases where
latency is high, such as when diffing two uncached commits. We can
improve that by changing some of our (jj's) algorithms to read many
objects concurrently from the backend. In the case of tree-diffing, we
can fetch one level (depth) of the tree at a time. There are several
ways of doing that:
* Make the backend methods `async`
* Use many threads for reading from the backend
* Add backend methods for batch reading
I don't think we typically need CPU parallelism, so it's wasteful to
have hundreds of threads running in order to fetch hundreds of objects
in parallel (especially when using a synchronous backend like the Git
backend). Batching would work well for the tree-diffing case, but it's
not as composable as `async`. For example, if we wanted to fetch some
commits at the same time as we were doing a diff, it's hard to see how
to do that with batching. Using async seems like our best bet.
I didn't make the backend interface's write functions async because
writes are already async with the daemon we have at Google. That
daemon will hash the object and immediately return, and then send the
object to the server in the background. I think any cloud-based
solution will need a similar daemon process. However, we may need to
reconsider this if/when jj gets used on a server with a custom backend
that writes directly to a database (i.e. no async daemon in between).
I've tried to measure the performance impact. That's the largest
difference I've been able to measure was on `jj diff
--ignore-working-copy -s --from v5.0 --to v6.0` in the Linux repo,
which increases from 749 ms to 773 ms (3.3%). In most cases I've
tested, there's no measurable difference. I've tried diffing from the
root commit, as well as `jj --ignore-working-copy log --no-graph -r
'::v3.0 & author(torvalds)' -T 'commit_id ++ "\n"'` (to test a
commit-heavy load).
2023-09-06 19:59:17 +00:00
|
|
|
async fn read_file(&self, path: &RepoPath, id: &FileId) -> BackendResult<Box<dyn Read>>;
|
2021-09-12 06:52:38 +00:00
|
|
|
|
|
|
|
fn write_file(&self, path: &RepoPath, contents: &mut dyn Read) -> BackendResult<FileId>;
|
|
|
|
|
backend: make read functions async
The commit backend at Google is cloud-based (and so are the other
backends); it reads and writes commits from/to a server, which stores
them in a database. That makes latency much higher than for disk-based
backends. To reduce the latency, we have a local daemon process that
caches and prefetches objects. There are still many cases where
latency is high, such as when diffing two uncached commits. We can
improve that by changing some of our (jj's) algorithms to read many
objects concurrently from the backend. In the case of tree-diffing, we
can fetch one level (depth) of the tree at a time. There are several
ways of doing that:
* Make the backend methods `async`
* Use many threads for reading from the backend
* Add backend methods for batch reading
I don't think we typically need CPU parallelism, so it's wasteful to
have hundreds of threads running in order to fetch hundreds of objects
in parallel (especially when using a synchronous backend like the Git
backend). Batching would work well for the tree-diffing case, but it's
not as composable as `async`. For example, if we wanted to fetch some
commits at the same time as we were doing a diff, it's hard to see how
to do that with batching. Using async seems like our best bet.
I didn't make the backend interface's write functions async because
writes are already async with the daemon we have at Google. That
daemon will hash the object and immediately return, and then send the
object to the server in the background. I think any cloud-based
solution will need a similar daemon process. However, we may need to
reconsider this if/when jj gets used on a server with a custom backend
that writes directly to a database (i.e. no async daemon in between).
I've tried to measure the performance impact. That's the largest
difference I've been able to measure was on `jj diff
--ignore-working-copy -s --from v5.0 --to v6.0` in the Linux repo,
which increases from 749 ms to 773 ms (3.3%). In most cases I've
tested, there's no measurable difference. I've tried diffing from the
root commit, as well as `jj --ignore-working-copy log --no-graph -r
'::v3.0 & author(torvalds)' -T 'commit_id ++ "\n"'` (to test a
commit-heavy load).
2023-09-06 19:59:17 +00:00
|
|
|
async fn read_symlink(&self, path: &RepoPath, id: &SymlinkId) -> BackendResult<String>;
|
2021-09-12 06:52:38 +00:00
|
|
|
|
|
|
|
fn write_symlink(&self, path: &RepoPath, target: &str) -> BackendResult<SymlinkId>;
|
|
|
|
|
backend: make read functions async
The commit backend at Google is cloud-based (and so are the other
backends); it reads and writes commits from/to a server, which stores
them in a database. That makes latency much higher than for disk-based
backends. To reduce the latency, we have a local daemon process that
caches and prefetches objects. There are still many cases where
latency is high, such as when diffing two uncached commits. We can
improve that by changing some of our (jj's) algorithms to read many
objects concurrently from the backend. In the case of tree-diffing, we
can fetch one level (depth) of the tree at a time. There are several
ways of doing that:
* Make the backend methods `async`
* Use many threads for reading from the backend
* Add backend methods for batch reading
I don't think we typically need CPU parallelism, so it's wasteful to
have hundreds of threads running in order to fetch hundreds of objects
in parallel (especially when using a synchronous backend like the Git
backend). Batching would work well for the tree-diffing case, but it's
not as composable as `async`. For example, if we wanted to fetch some
commits at the same time as we were doing a diff, it's hard to see how
to do that with batching. Using async seems like our best bet.
I didn't make the backend interface's write functions async because
writes are already async with the daemon we have at Google. That
daemon will hash the object and immediately return, and then send the
object to the server in the background. I think any cloud-based
solution will need a similar daemon process. However, we may need to
reconsider this if/when jj gets used on a server with a custom backend
that writes directly to a database (i.e. no async daemon in between).
I've tried to measure the performance impact. That's the largest
difference I've been able to measure was on `jj diff
--ignore-working-copy -s --from v5.0 --to v6.0` in the Linux repo,
which increases from 749 ms to 773 ms (3.3%). In most cases I've
tested, there's no measurable difference. I've tried diffing from the
root commit, as well as `jj --ignore-working-copy log --no-graph -r
'::v3.0 & author(torvalds)' -T 'commit_id ++ "\n"'` (to test a
commit-heavy load).
2023-09-06 19:59:17 +00:00
|
|
|
async fn read_tree(&self, path: &RepoPath, id: &TreeId) -> BackendResult<Tree>;
|
2021-09-12 06:52:38 +00:00
|
|
|
|
|
|
|
fn write_tree(&self, path: &RepoPath, contents: &Tree) -> BackendResult<TreeId>;
|
|
|
|
|
2023-10-27 05:54:09 +00:00
|
|
|
// Not async because it would force `MergedTree::value()` to be async. We don't
|
|
|
|
// need this to be async anyway because it's only used by legacy repos.
|
|
|
|
fn read_conflict(&self, path: &RepoPath, id: &ConflictId) -> BackendResult<Conflict>;
|
2021-09-12 06:52:38 +00:00
|
|
|
|
2022-03-31 16:21:50 +00:00
|
|
|
fn write_conflict(&self, path: &RepoPath, contents: &Conflict) -> BackendResult<ConflictId>;
|
2022-04-28 20:31:28 +00:00
|
|
|
|
backend: make read functions async
The commit backend at Google is cloud-based (and so are the other
backends); it reads and writes commits from/to a server, which stores
them in a database. That makes latency much higher than for disk-based
backends. To reduce the latency, we have a local daemon process that
caches and prefetches objects. There are still many cases where
latency is high, such as when diffing two uncached commits. We can
improve that by changing some of our (jj's) algorithms to read many
objects concurrently from the backend. In the case of tree-diffing, we
can fetch one level (depth) of the tree at a time. There are several
ways of doing that:
* Make the backend methods `async`
* Use many threads for reading from the backend
* Add backend methods for batch reading
I don't think we typically need CPU parallelism, so it's wasteful to
have hundreds of threads running in order to fetch hundreds of objects
in parallel (especially when using a synchronous backend like the Git
backend). Batching would work well for the tree-diffing case, but it's
not as composable as `async`. For example, if we wanted to fetch some
commits at the same time as we were doing a diff, it's hard to see how
to do that with batching. Using async seems like our best bet.
I didn't make the backend interface's write functions async because
writes are already async with the daemon we have at Google. That
daemon will hash the object and immediately return, and then send the
object to the server in the background. I think any cloud-based
solution will need a similar daemon process. However, we may need to
reconsider this if/when jj gets used on a server with a custom backend
that writes directly to a database (i.e. no async daemon in between).
I've tried to measure the performance impact. That's the largest
difference I've been able to measure was on `jj diff
--ignore-working-copy -s --from v5.0 --to v6.0` in the Linux repo,
which increases from 749 ms to 773 ms (3.3%). In most cases I've
tested, there's no measurable difference. I've tried diffing from the
root commit, as well as `jj --ignore-working-copy log --no-graph -r
'::v3.0 & author(torvalds)' -T 'commit_id ++ "\n"'` (to test a
commit-heavy load).
2023-09-06 19:59:17 +00:00
|
|
|
async fn read_commit(&self, id: &CommitId) -> BackendResult<Commit>;
|
2022-04-28 20:31:28 +00:00
|
|
|
|
2023-05-11 22:40:24 +00:00
|
|
|
/// Writes a commit and returns its ID and the commit itself. The commit
|
|
|
|
/// should contain the data that was actually written, which may differ
|
|
|
|
/// from the data passed in. For example, the backend may change the
|
|
|
|
/// committer name to an authenticated user's name, or the backend's
|
|
|
|
/// timestamps may have less precision than the millisecond precision in
|
|
|
|
/// `Commit`.
|
2023-11-12 01:40:23 +00:00
|
|
|
///
|
|
|
|
/// The `sign_with` parameter could contain a function to cryptographically
|
|
|
|
/// sign some binary representation of the commit.
|
|
|
|
/// If the backend supports it, it could call it and store the result in
|
|
|
|
/// an implementation specific fashion, and both `read_commit` and the
|
|
|
|
/// return of `write_commit` should read it back as the `secure_sig`
|
|
|
|
/// field.
|
|
|
|
fn write_commit(
|
|
|
|
&self,
|
|
|
|
contents: Commit,
|
2023-11-28 06:34:02 +00:00
|
|
|
sign_with: Option<&mut SigningFn>,
|
2023-11-12 01:40:23 +00:00
|
|
|
) -> BackendResult<(CommitId, Commit)>;
|
2023-12-01 22:00:22 +00:00
|
|
|
|
|
|
|
/// Perform garbage collection.
|
|
|
|
// TODO: pass in the set of commits to keep here
|
|
|
|
fn gc(&self) -> Result<(), Box<dyn std::error::Error + Send + Sync>>;
|
2021-09-12 06:52:38 +00:00
|
|
|
}
|