ok/jj
1
0
Fork 0
forked from mirrors/jj

Implement a procedural macro to derive the ContentHash trait for structs

This is a no-op in terms of function, but provides a nicer way to derive the
ContentHash trait for structs using the `#[derive(ContentHash)]` syntax used
for other traits such as `Debug`.

This commit only adds the macro. A subsequent commit will replace uses of
`content_hash!{}` with `#[derive(ContentHash)]`.

The new macro generates nice error messages, just like the old macro:

```
error[E0277]: the trait bound `NotImplemented: content_hash::ContentHash` is not satisfied
   --> lib/src/content_hash.rs:265:16
    |
265 |             z: NotImplemented,
    |                ^^^^^^^^^^^^^^ the trait `content_hash::ContentHash` is not implemented for `NotImplemented`
    |
    = help: the following other types implement trait `content_hash::ContentHash`:
              bool
              i32
              i64
              u8
              u32
              u64
              std::collections::HashMap<K, V>
              BTreeMap<K, V>
            and 38 others
```

This commit does two things to make proc macros re-exported by jj_lib useable
by deps:

1. jj_lib needs to be able refer to itself as `jj_lib` which it does
   by adding an `extern crate self as jj_lib` declaration.

2. jj_lib::content_hash needs to re-export the `digest::Update` type so that
   users of jj_lib can use the `#[derive(ContentHash)]` proc macro without
   directly depending on the digest crate. This is done by re-exporting it
   as `DigestUpdate`.


#3054
This commit is contained in:
Evan Mesterhazy 2024-02-13 18:10:30 -05:00 committed by Evan Mesterhazy
parent 106483ad6a
commit 965d6ce4e4
11 changed files with 163 additions and 25 deletions

14
Cargo.lock generated
View file

@ -1676,6 +1676,7 @@ dependencies = [
"hex",
"insta",
"itertools 0.12.1",
"jj-lib-proc-macros",
"maplit",
"num_cpus",
"once_cell",
@ -1706,6 +1707,15 @@ dependencies = [
"zstd",
]
[[package]]
name = "jj-lib-proc-macros"
version = "0.14.0"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "jobserver"
version = "0.1.27"
@ -2224,9 +2234,9 @@ dependencies = [
[[package]]
name = "proc-macro2"
version = "1.0.76"
version = "1.0.78"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95fc56cda0b5c3325f5fbbd7ff9fda9e02bb00bb3dac51252d2f1bfa1cb8cc8c"
checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae"
dependencies = [
"unicode-ident",
]

View file

@ -2,7 +2,7 @@ cargo-features = []
[workspace]
resolver = "2"
members = ["cli", "lib", "lib/testutils", "lib/gen-protos"]
members = ["cli", "lib", "lib/gen-protos", "lib/proc-macros", "lib/testutils"]
[workspace.package]
version = "0.14.0"
@ -67,8 +67,10 @@ pest = "2.7.7"
pest_derive = "2.7.7"
pollster = "0.3.0"
pretty_assertions = "1.4.0"
proc-macro2 = "1.0.78"
prost = "0.12.3"
prost-build = "0.12.3"
quote = "1.0.35"
rand = "0.8.5"
rand_chacha = "0.3.1"
rayon = "1.8.1"
@ -86,6 +88,7 @@ smallvec = { version = "1.13.0", features = [
"union",
] }
strsim = "0.11.0"
syn = "2.0.48"
tempfile = "3.10.0"
test-case = "3.3.1"
textwrap = "0.16.1"
@ -111,6 +114,7 @@ zstd = "0.12.4"
# their own (alphabetically sorted) block
jj-lib = { path = "lib", version = "0.14.0" }
jj-lib-proc-macros = { path = "lib/proc-macros", version = "0.14.0" }
testutils = { path = "lib/testutils" }
# Insta suggests compiling these packages in opt mode for faster testing.

View file

@ -37,6 +37,7 @@ gix = { workspace = true }
glob = { workspace = true }
hex = { workspace = true }
itertools = { workspace = true }
jj-lib-proc-macros = { workspace = true }
maplit = { workspace = true }
once_cell = { workspace = true }
pest = { workspace = true }

View file

@ -0,0 +1,15 @@
[package]
name = "jj-lib-proc-macros"
publish = false
version = { workspace = true }
edition = { workspace = true }
license = { workspace = true }
[lib]
proc-macro = true
[dependencies]
proc-macro2 = { workspace = true }
quote = { workspace = true }
syn = { workspace = true }

View file

@ -0,0 +1,40 @@
use proc_macro2::TokenStream;
use quote::{quote, quote_spanned};
use syn::spanned::Spanned;
use syn::{Data, Fields, Index};
pub fn generate_hash_impl(data: &Data) -> TokenStream {
match *data {
Data::Struct(ref data) => match data.fields {
Fields::Named(ref fields) => {
let hash_statements = fields.named.iter().map(|f| {
let field_name = &f.ident;
let ty = &f.ty;
quote_spanned! {ty.span()=>
<#ty as ::jj_lib::content_hash::ContentHash>::hash(
&self.#field_name, state);
}
});
quote! {
#(#hash_statements)*
}
}
Fields::Unnamed(ref fields) => {
let hash_statements = fields.unnamed.iter().enumerate().map(|(i, f)| {
let index = Index::from(i);
let ty = &f.ty;
quote_spanned! {ty.span() =>
<#ty as ::jj_lib::content_hash::ContentHash>::hash(&self.#index, state);
}
});
quote! {
#(#hash_statements)*
}
}
Fields::Unit => {
quote! {}
}
},
_ => unimplemented!("ContentHash can only be derived for structs."),
}
}

View file

@ -0,0 +1,30 @@
mod content_hash;
extern crate proc_macro;
use quote::quote;
use syn::{parse_macro_input, DeriveInput};
/// Derives the `ContentHash` trait for a struct by calling `ContentHash::hash`
/// on each of the struct members in the order that they're declared. All
/// members of the struct must implement the `ContentHash` trait.
#[proc_macro_derive(ContentHash)]
pub fn derive_content_hash(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
let input = parse_macro_input!(input as DeriveInput);
// The name of the struct.
let name = &input.ident;
// Generate an expression to hash each of the fields in the struct.
let hash_impl = content_hash::generate_hash_impl(&input.data);
let expanded = quote! {
#[automatically_derived]
impl ::jj_lib::content_hash::ContentHash for #name {
fn hash(&self, state: &mut impl ::jj_lib::content_hash::DigestUpdate) {
#hash_impl
}
}
};
expanded.into()
}

View file

@ -25,7 +25,7 @@ use std::vec::Vec;
use async_trait::async_trait;
use thiserror::Error;
use crate::content_hash::ContentHash;
use crate::content_hash::{ContentHash, DigestUpdate};
use crate::index::Index;
use crate::merge::Merge;
use crate::object_id::{id_type, ObjectId};
@ -111,7 +111,7 @@ impl PartialEq for MergedTreeId {
impl Eq for MergedTreeId {}
impl ContentHash for MergedTreeId {
fn hash(&self, state: &mut impl digest::Update) {
fn hash(&self, state: &mut impl DigestUpdate) {
match self {
MergedTreeId::Legacy(tree_id) => {
state.update(&0u32.to_le_bytes());
@ -247,7 +247,7 @@ impl TreeValue {
}
impl ContentHash for TreeValue {
fn hash(&self, state: &mut impl digest::Update) {
fn hash(&self, state: &mut impl DigestUpdate) {
use TreeValue::*;
match self {
File { id, executable } => {

View file

@ -1,7 +1,11 @@
//! Portable, stable hashing suitable for identifying values
use blake2::Blake2b512;
// Re-export DigestUpdate so that the ContentHash proc macro can be used in
// external crates without directly depending on the digest crate.
pub use digest::Update as DigestUpdate;
use itertools::Itertools as _;
pub use jj_lib_proc_macros::ContentHash;
/// Portable, stable hashing suitable for identifying values
///
@ -10,9 +14,11 @@ use itertools::Itertools as _;
/// order their elements according to their `Ord` implementation. Enums should
/// hash a 32-bit little-endian encoding of the ordinal number of the enum
/// variant, then the variant's fields in lexical order.
///
/// Structs can implement `ContentHash` by using `#[derive(ContentHash)]`.
pub trait ContentHash {
/// Update the hasher state with this object's content
fn hash(&self, state: &mut impl digest::Update);
fn hash(&self, state: &mut impl DigestUpdate);
}
/// The 512-bit BLAKE2b content hash
@ -24,48 +30,48 @@ pub fn blake2b_hash(x: &(impl ContentHash + ?Sized)) -> digest::Output<Blake2b51
}
impl ContentHash for () {
fn hash(&self, _: &mut impl digest::Update) {}
fn hash(&self, _: &mut impl DigestUpdate) {}
}
impl ContentHash for bool {
fn hash(&self, state: &mut impl digest::Update) {
fn hash(&self, state: &mut impl DigestUpdate) {
u8::from(*self).hash(state);
}
}
impl ContentHash for u8 {
fn hash(&self, state: &mut impl digest::Update) {
fn hash(&self, state: &mut impl DigestUpdate) {
state.update(&[*self]);
}
}
impl ContentHash for u32 {
fn hash(&self, state: &mut impl digest::Update) {
fn hash(&self, state: &mut impl DigestUpdate) {
state.update(&self.to_le_bytes());
}
}
impl ContentHash for i32 {
fn hash(&self, state: &mut impl digest::Update) {
fn hash(&self, state: &mut impl DigestUpdate) {
state.update(&self.to_le_bytes());
}
}
impl ContentHash for u64 {
fn hash(&self, state: &mut impl digest::Update) {
fn hash(&self, state: &mut impl DigestUpdate) {
state.update(&self.to_le_bytes());
}
}
impl ContentHash for i64 {
fn hash(&self, state: &mut impl digest::Update) {
fn hash(&self, state: &mut impl DigestUpdate) {
state.update(&self.to_le_bytes());
}
}
// TODO: Specialize for [u8] once specialization exists
impl<T: ContentHash> ContentHash for [T] {
fn hash(&self, state: &mut impl digest::Update) {
fn hash(&self, state: &mut impl DigestUpdate) {
state.update(&(self.len() as u64).to_le_bytes());
for x in self {
x.hash(state);
@ -74,19 +80,19 @@ impl<T: ContentHash> ContentHash for [T] {
}
impl<T: ContentHash> ContentHash for Vec<T> {
fn hash(&self, state: &mut impl digest::Update) {
fn hash(&self, state: &mut impl DigestUpdate) {
self.as_slice().hash(state)
}
}
impl ContentHash for String {
fn hash(&self, state: &mut impl digest::Update) {
fn hash(&self, state: &mut impl DigestUpdate) {
self.as_bytes().hash(state);
}
}
impl<T: ContentHash> ContentHash for Option<T> {
fn hash(&self, state: &mut impl digest::Update) {
fn hash(&self, state: &mut impl DigestUpdate) {
match self {
None => state.update(&0u32.to_le_bytes()),
Some(x) => {
@ -102,7 +108,7 @@ where
K: ContentHash + Ord,
V: ContentHash,
{
fn hash(&self, state: &mut impl digest::Update) {
fn hash(&self, state: &mut impl DigestUpdate) {
state.update(&(self.len() as u64).to_le_bytes());
let mut kv = self.iter().collect_vec();
kv.sort_unstable_by_key(|&(k, _)| k);
@ -117,7 +123,7 @@ impl<K> ContentHash for std::collections::HashSet<K>
where
K: ContentHash + Ord,
{
fn hash(&self, state: &mut impl digest::Update) {
fn hash(&self, state: &mut impl DigestUpdate) {
state.update(&(self.len() as u64).to_le_bytes());
for k in self.iter().sorted() {
k.hash(state);
@ -130,7 +136,7 @@ where
K: ContentHash,
V: ContentHash,
{
fn hash(&self, state: &mut impl digest::Update) {
fn hash(&self, state: &mut impl DigestUpdate) {
state.update(&(self.len() as u64).to_le_bytes());
for (k, v) in self.iter() {
k.hash(state);
@ -243,6 +249,31 @@ mod tests {
);
}
// This will be removed once all uses of content_hash! are replaced by the
// derive version.
#[test]
fn derive_is_equivalent_to_macro() {
content_hash! {
struct FooMacro { x: Vec<Option<i32>>, y: i64}
}
#[derive(ContentHash)]
struct FooDerive {
x: Vec<Option<i32>>,
y: i64,
}
let foo_macro = FooMacro {
x: vec![None, Some(42)],
y: 17,
};
let foo_derive = FooDerive {
x: vec![None, Some(42)],
y: 17,
};
assert_eq!(hash(&foo_macro), hash(&foo_derive));
}
fn hash(x: &(impl ContentHash + ?Sized)) -> digest::Output<Blake2b512> {
blake2b_hash(x)
}

View file

@ -18,6 +18,13 @@
#![deny(unused_must_use)]
#![forbid(unsafe_code)]
// Needed so that proc macros can be used inside jj_lib and by external crates
// that depend on it.
// See:
// - https://github.com/rust-lang/rust/issues/54647#issuecomment-432015102
// - https://github.com/rust-lang/rust/issues/54363
extern crate self as jj_lib;
#[macro_use]
pub mod content_hash;

View file

@ -29,7 +29,7 @@ use smallvec::{smallvec_inline, SmallVec};
use crate::backend;
use crate::backend::{BackendError, FileId, TreeId, TreeValue};
use crate::content_hash::ContentHash;
use crate::content_hash::{ContentHash, DigestUpdate};
use crate::object_id::ObjectId;
use crate::repo_path::RepoPath;
use crate::store::Store;
@ -457,7 +457,7 @@ impl<T> Merge<Merge<T>> {
}
impl<T: ContentHash> ContentHash for Merge<T> {
fn hash(&self, state: &mut impl digest::Update) {
fn hash(&self, state: &mut impl DigestUpdate) {
self.values.hash(state)
}
}

View file

@ -25,7 +25,7 @@ use once_cell::sync::Lazy;
use thiserror::Error;
use crate::backend::{CommitId, MillisSinceEpoch, Timestamp};
use crate::content_hash::ContentHash;
use crate::content_hash::{ContentHash, DigestUpdate};
use crate::merge::Merge;
use crate::object_id::{id_type, HexPrefix, ObjectId, PrefixResolution};
@ -212,7 +212,7 @@ pub enum RemoteRefState {
}
impl ContentHash for RemoteRefState {
fn hash(&self, state: &mut impl digest::Update) {
fn hash(&self, state: &mut impl DigestUpdate) {
match self {
RemoteRefState::New => state.update(&0u32.to_le_bytes()),
RemoteRefState::Tracking => state.update(&1u32.to_le_bytes()),