sign: Implement storage for digital commit signatures

Recognize signature metadata from git commit objects, implement
a basic version of that for the native backend.
Extract the signed data (a commit binary repr without the signature) to
be verified later.
This commit is contained in:
Anton Bulakh 2023-11-09 03:10:39 +02:00 committed by Anton Bulakh
parent ffd688472e
commit e3a1e5b80e
8 changed files with 116 additions and 18 deletions

View file

@ -34,7 +34,7 @@
in
pkgs.lib.all (re: builtins.match re relPath == null) regexes;
};
rust-version = pkgs.rust-bin.stable."1.71.0".default;
ourRustPlatform = pkgs.makeRustPlatform {
@ -130,6 +130,9 @@
cargo-nextest
cargo-watch
# In case you need to run `cargo run --bin gen-protos`
protobuf
# For building the documentation website
poetry
] ++ darwinDeps;

View file

@ -139,6 +139,14 @@ content_hash! {
}
}
content_hash! {
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct SecureSig {
pub data: Vec<u8>,
pub sig: Vec<u8>,
}
}
/// Identifies a single legacy tree, which may have path-level conflicts, or a
/// merge of multiple trees, where the individual trees do not have conflicts.
// TODO(#1624): Delete this type at some point in the future, when we decide to drop
@ -178,7 +186,7 @@ impl ContentHash for MergedTreeId {
}
impl MergedTreeId {
/// Create a resolved `MergedTreeId` from a single regular tree.
/// Create a resolved `MergedTreeId` from a single regular tree.
pub fn resolved(tree_id: TreeId) -> Self {
MergedTreeId::Merge(Merge::resolved(tree_id))
}
@ -202,6 +210,7 @@ content_hash! {
pub description: String,
pub author: Signature,
pub committer: Signature,
pub secure_sig: Option<SecureSig>,
}
}
@ -450,6 +459,7 @@ pub fn make_root_commit(root_change_id: ChangeId, empty_tree_id: TreeId) -> Comm
description: String::new(),
author: signature.clone(),
committer: signature,
secure_sig: None,
}
}

View file

@ -48,6 +48,7 @@ impl CommitBuilder<'_> {
description: String::new(),
author: signature.clone(),
committer: signature,
secure_sig: None,
};
CommitBuilder {
mut_repo,

View file

@ -22,6 +22,7 @@ use std::sync::{Arc, Mutex, MutexGuard};
use std::{fs, str};
use async_trait::async_trait;
use gix::objs::CommitRefIter;
use itertools::Itertools;
use prost::Message;
use thiserror::Error;
@ -29,7 +30,8 @@ use thiserror::Error;
use crate::backend::{
make_root_commit, Backend, BackendError, BackendInitError, BackendLoadError, BackendResult,
ChangeId, Commit, CommitId, Conflict, ConflictId, ConflictTerm, FileId, MergedTreeId,
MillisSinceEpoch, ObjectId, Signature, SymlinkId, Timestamp, Tree, TreeId, TreeValue,
MillisSinceEpoch, ObjectId, SecureSig, Signature, SymlinkId, Timestamp, Tree, TreeId,
TreeValue,
};
use crate::file_util::{IoResultExt as _, PathError};
use crate::lock::FileLock;
@ -371,9 +373,13 @@ fn gix_open_opts_from_settings(settings: &UserSettings) -> gix::open::Options {
fn commit_from_git_without_root_parent(
id: &CommitId,
commit: &gix::objs::CommitRef,
git_object: &gix::Object,
uses_tree_conflict_format: bool,
) -> Commit {
) -> Result<Commit, BackendError> {
let commit = git_object
.try_to_commit_ref()
.map_err(|err| to_read_object_err(err, id))?;
// We reverse the bits of the commit id to create the change id. We don't want
// to use the first bytes unmodified because then it would be ambiguous
// if a given hash prefix refers to the commit id or the change id. It
@ -406,7 +412,27 @@ fn commit_from_git_without_root_parent(
let author = signature_from_git(commit.author());
let committer = signature_from_git(commit.committer());
Commit {
// If the commit is signed, extract both the signature and the signed data
// (which is the commit buffer with the gpgsig header omitted).
// We have to re-parse the raw commit data because gix CommitRef does not give
// us the sogned data, only the signature.
// Ideally, we could use try_to_commit_ref_iter at the beginning of this
// function and extract everything from that. For now, this works
let secure_sig = commit
.extra_headers
.iter()
// gix does not recognize gpgsig-sha256, but prevent future footguns by checking for it too
.any(|(k, _)| *k == "gpgsig" || *k == "gpgsig-sha256")
.then(|| CommitRefIter::signature(&git_object.data))
.transpose()
.map_err(|err| to_read_object_err(err, id))?
.flatten()
.map(|(sig, data)| SecureSig {
data: data.to_bstring().into(),
sig: sig.into_owned().into(),
});
Ok(Commit {
parents,
predecessors: vec![],
// If this commit has associated extra metadata, we may reset this later.
@ -415,7 +441,8 @@ fn commit_from_git_without_root_parent(
description,
author,
committer,
}
secure_sig,
})
}
const EMPTY_STRING_PLACEHOLDER: &str = "JJ_EMPTY_STRING";
@ -585,14 +612,11 @@ fn import_extra_metadata_entries_from_heads(
let git_object = git_repo
.find_object(validate_git_object_id(&id)?)
.map_err(|err| map_not_found_err(err, &id))?;
let git_commit = git_object
.try_to_commit_ref()
.map_err(|err| to_read_object_err(err, &id))?;
// TODO(#1624): Should we read the root tree here and check if it has a
// `.jjconflict-...` entries? That could happen if the user used `git` to e.g.
// change the description of a commit with tree-level conflicts.
let commit =
commit_from_git_without_root_parent(&id, &git_commit, uses_tree_conflict_format);
commit_from_git_without_root_parent(&id, &git_object, uses_tree_conflict_format)?;
mut_table.add_entry(id.to_bytes(), serialize_extras(&commit));
work_ids.extend(
commit
@ -858,10 +882,7 @@ impl Backend for GitBackend {
let git_object = locked_repo
.find_object(git_commit_id)
.map_err(|err| map_not_found_err(err, id))?;
let git_commit = git_object
.try_to_commit_ref()
.map_err(|err| to_read_object_err(err, id))?;
commit_from_git_without_root_parent(id, &git_commit, false)
commit_from_git_without_root_parent(id, &git_object, false)?
};
if commit.parents.is_empty() {
commit.parents.push(self.root_commit_id.clone());
@ -1281,6 +1302,51 @@ mod tests {
);
}
#[test]
fn read_signed_git_commit() {
let settings = user_settings();
let temp_dir = testutils::new_temp_dir();
let store_path = temp_dir.path();
let git_repo_path = temp_dir.path().join("git");
let git_repo = git2::Repository::init(&git_repo_path).unwrap();
let signature = git2::Signature::now("Someone", "someone@example.com").unwrap();
let empty_tree_id = Oid::from_str("4b825dc642cb6eb9a060e54bf8d69288fbee4904").unwrap();
let empty_tree = git_repo.find_tree(empty_tree_id).unwrap();
let commit_buf = git_repo
.commit_create_buffer(
&signature,
&signature,
"git commit message",
&empty_tree,
&[],
)
.unwrap();
// libgit2-rs works with &strs here for some reason
let commit_buf = std::str::from_utf8(&commit_buf).unwrap();
let secure_sig =
"here are some ASCII bytes to be used as a test signature\n\ndefinitely not PGP";
let git_commit_id = git_repo
.commit_signed(commit_buf, secure_sig, None)
.unwrap();
let backend = GitBackend::init_external(&settings, store_path, &git_repo_path).unwrap();
let commit = backend
.read_commit(&CommitId::from_bytes(git_commit_id.as_bytes()))
.block_on()
.unwrap();
let sig = commit.secure_sig.expect("failed to read the signature");
// converting to string for nicer assert diff
assert_eq!(std::str::from_utf8(&sig.sig).unwrap(), secure_sig);
assert_eq!(std::str::from_utf8(&sig.data).unwrap(), commit_buf);
}
#[test]
fn read_empty_string_placeholder() {
let git_signature1 = gix::actor::SignatureRef {
@ -1345,6 +1411,7 @@ mod tests {
description: "".to_string(),
author: create_signature(),
committer: create_signature(),
secure_sig: None,
};
// No parents
@ -1422,6 +1489,7 @@ mod tests {
description: "".to_string(),
author: create_signature(),
committer: create_signature(),
secure_sig: None,
};
// When writing a tree-level conflict, the root tree on the git side has the
@ -1503,6 +1571,7 @@ mod tests {
description: "initial".to_string(),
author: signature.clone(),
committer: signature,
secure_sig: None,
};
let commit_id = backend.write_commit(commit).unwrap().0;
let git_refs = backend
@ -1528,6 +1597,7 @@ mod tests {
description: "initial".to_string(),
author: create_signature(),
committer: create_signature(),
secure_sig: None,
};
// libgit2 doesn't seem to preserve negative timestamps, so set it to at least 1
// second after the epoch, so the timestamp adjustment can remove 1

View file

@ -28,8 +28,8 @@ use tempfile::NamedTempFile;
use crate::backend::{
make_root_commit, Backend, BackendError, BackendResult, ChangeId, Commit, CommitId, Conflict,
ConflictId, ConflictTerm, FileId, MergedTreeId, MillisSinceEpoch, ObjectId, Signature,
SymlinkId, Timestamp, Tree, TreeId, TreeValue,
ConflictId, ConflictTerm, FileId, MergedTreeId, MillisSinceEpoch, ObjectId, SecureSig,
Signature, SymlinkId, Timestamp, Tree, TreeId, TreeValue,
};
use crate::content_hash::blake2b_hash;
use crate::file_util::persist_content_addressed_temp_file;
@ -307,10 +307,18 @@ pub fn commit_to_proto(commit: &Commit) -> crate::protos::local_store::Commit {
proto.description = commit.description.clone();
proto.author = Some(signature_to_proto(&commit.author));
proto.committer = Some(signature_to_proto(&commit.committer));
proto.secure_sig = commit.secure_sig.as_ref().map(|s| s.sig.clone());
proto
}
fn commit_from_proto(proto: crate::protos::local_store::Commit) -> Commit {
fn commit_from_proto(mut proto: crate::protos::local_store::Commit) -> Commit {
// Note how .take() sets the secure_sig field to None before we encode the data.
// Needs to be done first since proto is partially moved a bunch below
let secure_sig = proto.secure_sig.take().map(|sig| SecureSig {
data: proto.encode_to_vec(),
sig,
});
let parents = proto.parents.into_iter().map(CommitId::new).collect();
let predecessors = proto.predecessors.into_iter().map(CommitId::new).collect();
let root_tree = if proto.uses_tree_conflict_format {
@ -329,6 +337,7 @@ fn commit_from_proto(proto: crate::protos::local_store::Commit) -> Commit {
description: proto.description,
author: signature_from_proto(proto.author.unwrap_or_default()),
committer: signature_from_proto(proto.committer.unwrap_or_default()),
secure_sig,
}
}
@ -485,6 +494,7 @@ mod tests {
description: "".to_string(),
author: create_signature(),
committer: create_signature(),
secure_sig: None,
};
// No parents

View file

@ -60,6 +60,7 @@ message Commit {
}
Signature author = 6;
Signature committer = 7;
optional bytes secure_sig = 9;
}
message Conflict {

View file

@ -65,6 +65,8 @@ pub struct Commit {
pub author: ::core::option::Option<commit::Signature>,
#[prost(message, optional, tag = "7")]
pub committer: ::core::option::Option<commit::Signature>,
#[prost(bytes = "vec", optional, tag = "9")]
pub secure_sig: ::core::option::Option<::prost::alloc::vec::Vec<u8>>,
}
/// Nested message and enum types in `Commit`.
pub mod commit {

View file

@ -334,6 +334,7 @@ pub fn commit_with_tree(store: &Arc<Store>, tree_id: MergedTreeId) -> Commit {
description: "description".to_string(),
author: signature.clone(),
committer: signature,
secure_sig: None,
};
store.write_commit(commit).unwrap()
}