local_backend: use ContentHash rather than hashing protos

Insulates identifiers from the unstable serialized form.
This commit is contained in:
Benjamin Saunders 2022-11-12 11:19:03 -08:00
parent 2447dfeed8
commit c3bfe72754
4 changed files with 115 additions and 59 deletions

View file

@ -20,6 +20,7 @@ use std::vec::Vec;
use thiserror::Error;
use crate::content_hash::ContentHash;
use crate::repo_path::{RepoPath, RepoPathComponent};
content_hash! {
@ -59,8 +60,10 @@ impl CommitId {
}
}
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)]
pub struct ChangeId(Vec<u8>);
content_hash! {
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)]
pub struct ChangeId(Vec<u8>);
}
impl Debug for ChangeId {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
@ -94,8 +97,10 @@ impl ChangeId {
}
}
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)]
pub struct TreeId(Vec<u8>);
content_hash! {
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)]
pub struct TreeId(Vec<u8>);
}
impl Debug for TreeId {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
@ -129,8 +134,10 @@ impl TreeId {
}
}
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)]
pub struct FileId(Vec<u8>);
content_hash! {
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)]
pub struct FileId(Vec<u8>);
}
impl Debug for FileId {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
@ -160,8 +167,10 @@ impl FileId {
}
}
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)]
pub struct SymlinkId(Vec<u8>);
content_hash! {
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)]
pub struct SymlinkId(Vec<u8>);
}
impl Debug for SymlinkId {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
@ -191,8 +200,10 @@ impl SymlinkId {
}
}
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)]
pub struct ConflictId(Vec<u8>);
content_hash! {
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)]
pub struct ConflictId(Vec<u8>);
}
impl Debug for ConflictId {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
@ -256,15 +267,18 @@ impl Timestamp {
}
}
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct Signature {
content_hash! {
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct Signature {
pub name: String,
pub email: String,
pub timestamp: Timestamp,
}
}
#[derive(Debug, Clone)]
pub struct Commit {
content_hash! {
#[derive(Debug, Clone)]
pub struct Commit {
pub parents: Vec<CommitId>,
pub predecessors: Vec<CommitId>,
pub root_tree: TreeId,
@ -272,23 +286,28 @@ pub struct Commit {
pub description: String,
pub author: Signature,
pub committer: Signature,
}
}
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct ConflictPart {
content_hash! {
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct ConflictPart {
// TODO: Store e.g. CommitId here too? Labels (theirs/ours/base)? Would those still be
// useful e.g. after rebasing this conflict?
pub value: TreeValue,
}
}
#[derive(Default, Debug, PartialEq, Eq, Clone)]
pub struct Conflict {
content_hash! {
#[derive(Default, Debug, PartialEq, Eq, Clone)]
pub struct Conflict {
// A conflict is represented by a list of positive and negative states that need to be applied.
// In a simple 3-way merge of B and C with merge base A, the conflict will be { add: [B, C],
// remove: [A] }. Also note that a conflict of the form { add: [A], remove: [] } is the
// same as non-conflict A.
pub removes: Vec<ConflictPart>,
pub adds: Vec<ConflictPart>,
}
}
#[derive(Debug, Error, PartialEq, Eq)]
@ -310,6 +329,35 @@ pub enum TreeValue {
Conflict(ConflictId),
}
impl ContentHash for TreeValue {
fn hash(&self, state: &mut impl digest::Update) {
use TreeValue::*;
match *self {
Normal { ref id, executable } => {
state.update(&0u32.to_le_bytes());
id.hash(state);
executable.hash(state);
}
Symlink(ref id) => {
state.update(&1u32.to_le_bytes());
id.hash(state);
}
Tree(ref id) => {
state.update(&2u32.to_le_bytes());
id.hash(state);
}
GitSubmodule(ref id) => {
state.update(&3u32.to_le_bytes());
id.hash(state);
}
Conflict(ref id) => {
state.update(&4u32.to_le_bytes());
id.hash(state);
}
}
}
}
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct TreeEntry<'a> {
name: &'a RepoPathComponent,
@ -344,9 +392,11 @@ impl<'a> Iterator for TreeEntriesNonRecursiveIterator<'a> {
}
}
#[derive(Default, Debug, Clone)]
pub struct Tree {
content_hash! {
#[derive(Default, Debug, Clone)]
pub struct Tree {
entries: BTreeMap<RepoPathComponent, TreeValue>,
}
}
impl Tree {

View file

@ -15,6 +15,12 @@ impl ContentHash for () {
fn hash(&self, _: &mut impl digest::Update) {}
}
impl ContentHash for bool {
fn hash(&self, state: &mut impl digest::Update) {
u8::from(*self).hash(state);
}
}
impl ContentHash for u8 {
fn hash(&self, state: &mut impl digest::Update) {
state.update(&[*self]);

View file

@ -27,6 +27,7 @@ use crate::backend::{
ConflictId, ConflictPart, FileId, MillisSinceEpoch, Signature, SymlinkId, Timestamp, Tree,
TreeId, TreeValue,
};
use crate::content_hash::ContentHash;
use crate::file_util::persist_content_addressed_temp_file;
use crate::repo_path::{RepoPath, RepoPathComponent};
@ -72,7 +73,7 @@ impl LocalBackend {
pub fn load(store_path: &Path) -> Self {
let root_commit_id = CommitId::from_bytes(&[0; 64]);
let empty_tree_id = TreeId::from_hex("786a02f742015903c6c6fd852552d272912f4740e15847618a86e217f71f5419d25e1031afee585313896444934eb04b903a685b1448b755d56f701afe9be2ce");
let empty_tree_id = TreeId::from_hex("482ae5a29fbe856c7272f2071b8b0f0359ee2d89ff392b8a900643fbd0836eccd067b8bf41909e206c90d45d6e7d8b6686b93ecaee5fe1a9060d87b672101310");
LocalBackend {
path: store_path.to_path_buf(),
root_commit_id,
@ -192,12 +193,9 @@ impl Backend for LocalBackend {
let temp_file = NamedTempFile::new_in(&self.path)?;
let proto = tree_to_proto(tree);
let mut proto_bytes: Vec<u8> = Vec::new();
proto.write_to_writer(&mut proto_bytes)?;
proto.write_to_writer(&mut temp_file.as_file())?;
temp_file.as_file().write_all(&proto_bytes)?;
let id = TreeId::new(Blake2b512::digest(&proto_bytes).to_vec());
let id = TreeId::new(hash(tree).to_vec());
persist_content_addressed_temp_file(temp_file, self.tree_path(&id))?;
Ok(id)
@ -215,12 +213,9 @@ impl Backend for LocalBackend {
let temp_file = NamedTempFile::new_in(&self.path)?;
let proto = conflict_to_proto(conflict);
let mut proto_bytes: Vec<u8> = Vec::new();
proto.write_to_writer(&mut proto_bytes)?;
proto.write_to_writer(&mut temp_file.as_file())?;
temp_file.as_file().write_all(&proto_bytes)?;
let id = ConflictId::new(Blake2b512::digest(&proto_bytes).to_vec());
let id = ConflictId::new(hash(conflict).to_vec());
persist_content_addressed_temp_file(temp_file, self.conflict_path(&id))?;
Ok(id)
@ -242,12 +237,9 @@ impl Backend for LocalBackend {
let temp_file = NamedTempFile::new_in(&self.path)?;
let proto = commit_to_proto(commit);
let mut proto_bytes: Vec<u8> = Vec::new();
proto.write_to_writer(&mut proto_bytes)?;
proto.write_to_writer(&mut temp_file.as_file())?;
temp_file.as_file().write_all(&proto_bytes)?;
let id = CommitId::new(Blake2b512::digest(&proto_bytes).to_vec());
let id = CommitId::new(hash(commit).to_vec());
persist_content_addressed_temp_file(temp_file, self.commit_path(&id))?;
Ok(id)
@ -412,3 +404,9 @@ fn conflict_part_to_proto(part: &ConflictPart) -> crate::protos::store::conflict
proto.content = MessageField::some(tree_value_to_proto(&part.value));
proto
}
fn hash(x: &impl ContentHash) -> digest::Output<Blake2b512> {
let mut hasher = Blake2b512::default();
x.hash(&mut hasher);
hasher.finalize()
}

View file

@ -20,9 +20,11 @@ use thiserror::Error;
use crate::file_util;
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)]
pub struct RepoPathComponent {
content_hash! {
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)]
pub struct RepoPathComponent {
value: String,
}
}
impl RepoPathComponent {