2023-09-18 05:22:52 +00:00
|
|
|
//! Portable, stable hashing suitable for identifying values
|
|
|
|
|
2022-12-02 18:03:00 +00:00
|
|
|
use blake2::Blake2b512;
|
2024-02-13 23:10:30 +00:00
|
|
|
// Re-export DigestUpdate so that the ContentHash proc macro can be used in
|
|
|
|
// external crates without directly depending on the digest crate.
|
|
|
|
pub use digest::Update as DigestUpdate;
|
2022-11-11 17:33:22 +00:00
|
|
|
use itertools::Itertools as _;
|
2024-02-13 23:10:30 +00:00
|
|
|
pub use jj_lib_proc_macros::ContentHash;
|
2022-11-11 17:33:22 +00:00
|
|
|
|
|
|
|
/// Portable, stable hashing suitable for identifying values
|
|
|
|
///
|
|
|
|
/// Variable-length sequences should hash a 64-bit little-endian representation
|
|
|
|
/// of their length, then their elements in order. Unordered containers should
|
|
|
|
/// order their elements according to their `Ord` implementation. Enums should
|
|
|
|
/// hash a 32-bit little-endian encoding of the ordinal number of the enum
|
|
|
|
/// variant, then the variant's fields in lexical order.
|
2024-02-13 23:10:30 +00:00
|
|
|
///
|
|
|
|
/// Structs can implement `ContentHash` by using `#[derive(ContentHash)]`.
|
2022-11-11 17:33:22 +00:00
|
|
|
pub trait ContentHash {
|
2023-09-18 05:22:52 +00:00
|
|
|
/// Update the hasher state with this object's content
|
2024-02-13 23:10:30 +00:00
|
|
|
fn hash(&self, state: &mut impl DigestUpdate);
|
2022-11-11 17:33:22 +00:00
|
|
|
}
|
|
|
|
|
2023-09-18 05:22:52 +00:00
|
|
|
/// The 512-bit BLAKE2b content hash
|
2022-12-02 18:03:00 +00:00
|
|
|
pub fn blake2b_hash(x: &(impl ContentHash + ?Sized)) -> digest::Output<Blake2b512> {
|
|
|
|
use digest::Digest;
|
|
|
|
let mut hasher = Blake2b512::default();
|
|
|
|
x.hash(&mut hasher);
|
|
|
|
hasher.finalize()
|
|
|
|
}
|
|
|
|
|
2022-11-11 17:33:22 +00:00
|
|
|
impl ContentHash for () {
|
2024-02-13 23:10:30 +00:00
|
|
|
fn hash(&self, _: &mut impl DigestUpdate) {}
|
2022-11-11 17:33:22 +00:00
|
|
|
}
|
|
|
|
|
2022-11-12 19:19:03 +00:00
|
|
|
impl ContentHash for bool {
|
2024-02-13 23:10:30 +00:00
|
|
|
fn hash(&self, state: &mut impl DigestUpdate) {
|
2022-11-12 19:19:03 +00:00
|
|
|
u8::from(*self).hash(state);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-11 17:33:22 +00:00
|
|
|
impl ContentHash for u8 {
|
2024-02-13 23:10:30 +00:00
|
|
|
fn hash(&self, state: &mut impl DigestUpdate) {
|
2022-11-11 17:33:22 +00:00
|
|
|
state.update(&[*self]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-02-16 14:28:30 +00:00
|
|
|
impl ContentHash for u32 {
|
2024-02-13 23:10:30 +00:00
|
|
|
fn hash(&self, state: &mut impl DigestUpdate) {
|
2024-02-16 14:28:30 +00:00
|
|
|
state.update(&self.to_le_bytes());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-11 17:33:22 +00:00
|
|
|
impl ContentHash for i32 {
|
2024-02-13 23:10:30 +00:00
|
|
|
fn hash(&self, state: &mut impl DigestUpdate) {
|
2022-11-11 17:33:22 +00:00
|
|
|
state.update(&self.to_le_bytes());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-02-16 14:28:30 +00:00
|
|
|
impl ContentHash for u64 {
|
2024-02-13 23:10:30 +00:00
|
|
|
fn hash(&self, state: &mut impl DigestUpdate) {
|
2024-02-16 14:28:30 +00:00
|
|
|
state.update(&self.to_le_bytes());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-11 17:33:22 +00:00
|
|
|
impl ContentHash for i64 {
|
2024-02-13 23:10:30 +00:00
|
|
|
fn hash(&self, state: &mut impl DigestUpdate) {
|
2022-11-11 17:33:22 +00:00
|
|
|
state.update(&self.to_le_bytes());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO: Specialize for [u8] once specialization exists
|
|
|
|
impl<T: ContentHash> ContentHash for [T] {
|
2024-02-13 23:10:30 +00:00
|
|
|
fn hash(&self, state: &mut impl DigestUpdate) {
|
2022-11-11 17:33:22 +00:00
|
|
|
state.update(&(self.len() as u64).to_le_bytes());
|
|
|
|
for x in self {
|
|
|
|
x.hash(state);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<T: ContentHash> ContentHash for Vec<T> {
|
2024-02-13 23:10:30 +00:00
|
|
|
fn hash(&self, state: &mut impl DigestUpdate) {
|
2022-11-11 17:33:22 +00:00
|
|
|
self.as_slice().hash(state)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl ContentHash for String {
|
2024-02-13 23:10:30 +00:00
|
|
|
fn hash(&self, state: &mut impl DigestUpdate) {
|
2022-11-11 17:33:22 +00:00
|
|
|
self.as_bytes().hash(state);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<T: ContentHash> ContentHash for Option<T> {
|
2024-02-13 23:10:30 +00:00
|
|
|
fn hash(&self, state: &mut impl DigestUpdate) {
|
2023-01-14 17:51:13 +00:00
|
|
|
match self {
|
2024-02-16 03:33:36 +00:00
|
|
|
None => state.update(&0u32.to_le_bytes()),
|
2023-01-14 17:51:13 +00:00
|
|
|
Some(x) => {
|
2024-02-16 03:33:36 +00:00
|
|
|
state.update(&1u32.to_le_bytes());
|
2022-11-11 17:33:22 +00:00
|
|
|
x.hash(state)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<K, V> ContentHash for std::collections::HashMap<K, V>
|
|
|
|
where
|
|
|
|
K: ContentHash + Ord,
|
|
|
|
V: ContentHash,
|
|
|
|
{
|
2024-02-13 23:10:30 +00:00
|
|
|
fn hash(&self, state: &mut impl DigestUpdate) {
|
2022-11-11 17:33:22 +00:00
|
|
|
state.update(&(self.len() as u64).to_le_bytes());
|
|
|
|
let mut kv = self.iter().collect_vec();
|
|
|
|
kv.sort_unstable_by_key(|&(k, _)| k);
|
|
|
|
for (k, v) in kv {
|
|
|
|
k.hash(state);
|
|
|
|
v.hash(state);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<K> ContentHash for std::collections::HashSet<K>
|
|
|
|
where
|
|
|
|
K: ContentHash + Ord,
|
|
|
|
{
|
2024-02-13 23:10:30 +00:00
|
|
|
fn hash(&self, state: &mut impl DigestUpdate) {
|
2022-11-11 17:33:22 +00:00
|
|
|
state.update(&(self.len() as u64).to_le_bytes());
|
|
|
|
for k in self.iter().sorted() {
|
|
|
|
k.hash(state);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<K, V> ContentHash for std::collections::BTreeMap<K, V>
|
|
|
|
where
|
|
|
|
K: ContentHash,
|
|
|
|
V: ContentHash,
|
|
|
|
{
|
2024-02-13 23:10:30 +00:00
|
|
|
fn hash(&self, state: &mut impl DigestUpdate) {
|
2022-11-11 17:33:22 +00:00
|
|
|
state.update(&(self.len() as u64).to_le_bytes());
|
|
|
|
for (k, v) in self.iter() {
|
|
|
|
k.hash(state);
|
|
|
|
v.hash(state);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
macro_rules! content_hash {
|
|
|
|
($(#[$meta:meta])* $vis:vis struct $name:ident {
|
|
|
|
$($(#[$field_meta:meta])* $field_vis:vis $field:ident : $ty:ty),* $(,)?
|
|
|
|
}) => {
|
|
|
|
$(#[$meta])*
|
|
|
|
$vis struct $name {
|
|
|
|
$($(#[$field_meta])* $field_vis $field : $ty),*
|
|
|
|
}
|
|
|
|
|
|
|
|
impl crate::content_hash::ContentHash for $name {
|
|
|
|
fn hash(&self, state: &mut impl digest::Update) {
|
|
|
|
$(<$ty as crate::content_hash::ContentHash>::hash(&self.$field, state);)*
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
($(#[$meta:meta])* $vis:vis struct $name:ident($field_vis:vis $ty:ty);) => {
|
|
|
|
$(#[$meta])*
|
|
|
|
$vis struct $name($field_vis $ty);
|
|
|
|
|
|
|
|
impl crate::content_hash::ContentHash for $name {
|
|
|
|
fn hash(&self, state: &mut impl digest::Update) {
|
|
|
|
<$ty as crate::content_hash::ContentHash>::hash(&self.0, state);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use std::collections::{BTreeMap, HashMap};
|
|
|
|
|
2022-12-02 18:03:00 +00:00
|
|
|
use blake2::Blake2b512;
|
2022-11-11 17:33:22 +00:00
|
|
|
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_string_sanity() {
|
|
|
|
let a = "a".to_string();
|
|
|
|
let b = "b".to_string();
|
|
|
|
assert_eq!(hash(&a), hash(&a.clone()));
|
|
|
|
assert_ne!(hash(&a), hash(&b));
|
|
|
|
assert_ne!(hash(&"a".to_string()), hash(&"a\0".to_string()));
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_hash_map_key_value_distinction() {
|
|
|
|
let a = [("ab".to_string(), "cd".to_string())]
|
|
|
|
.into_iter()
|
|
|
|
.collect::<HashMap<_, _>>();
|
|
|
|
let b = [("a".to_string(), "bcd".to_string())]
|
|
|
|
.into_iter()
|
|
|
|
.collect::<HashMap<_, _>>();
|
|
|
|
|
|
|
|
assert_ne!(hash(&a), hash(&b));
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_btree_map_key_value_distinction() {
|
|
|
|
let a = [("ab".to_string(), "cd".to_string())]
|
|
|
|
.into_iter()
|
|
|
|
.collect::<BTreeMap<_, _>>();
|
|
|
|
let b = [("a".to_string(), "bcd".to_string())]
|
|
|
|
.into_iter()
|
|
|
|
.collect::<BTreeMap<_, _>>();
|
|
|
|
|
|
|
|
assert_ne!(hash(&a), hash(&b));
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_struct_sanity() {
|
|
|
|
content_hash! {
|
|
|
|
struct Foo { x: i32 }
|
|
|
|
}
|
|
|
|
assert_ne!(hash(&Foo { x: 42 }), hash(&Foo { x: 12 }));
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_option_sanity() {
|
|
|
|
assert_ne!(hash(&Some(42)), hash(&42));
|
|
|
|
assert_ne!(hash(&None::<i32>), hash(&42i32));
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_slice_sanity() {
|
|
|
|
assert_ne!(hash(&[42i32][..]), hash(&[12i32][..]));
|
|
|
|
assert_ne!(hash(&([] as [i32; 0])[..]), hash(&[42i32][..]));
|
|
|
|
assert_ne!(hash(&([] as [i32; 0])[..]), hash(&()));
|
|
|
|
assert_ne!(hash(&42i32), hash(&[42i32][..]));
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_consistent_hashing() {
|
|
|
|
content_hash! {
|
|
|
|
struct Foo { x: Vec<Option<i32>>, y: i64 }
|
|
|
|
}
|
2024-02-14 18:28:11 +00:00
|
|
|
let foo_hash = hex::encode(hash(&Foo {
|
|
|
|
x: vec![None, Some(42)],
|
|
|
|
y: 17,
|
|
|
|
}));
|
2022-11-11 17:33:22 +00:00
|
|
|
insta::assert_snapshot!(
|
2024-02-14 18:28:11 +00:00
|
|
|
foo_hash,
|
|
|
|
@"e33c423b4b774b1353c414e0f9ef108822fde2fd5113fcd53bf7bd9e74e3206690b96af96373f268ed95dd020c7cbe171c7b7a6947fcaf5703ff6c8e208cefd4"
|
|
|
|
);
|
|
|
|
|
|
|
|
// Try again with an equivalent generic struct deriving ContentHash.
|
|
|
|
#[derive(ContentHash)]
|
|
|
|
struct GenericFoo<X, Y> {
|
|
|
|
x: X,
|
|
|
|
y: Y,
|
|
|
|
}
|
|
|
|
assert_eq!(
|
|
|
|
hex::encode(hash(&GenericFoo {
|
2022-11-11 17:33:22 +00:00
|
|
|
x: vec![None, Some(42)],
|
2024-02-14 18:28:11 +00:00
|
|
|
y: 17i64
|
2022-11-11 17:33:22 +00:00
|
|
|
})),
|
2024-02-14 18:28:11 +00:00
|
|
|
foo_hash
|
2022-11-11 17:33:22 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2024-02-13 23:10:30 +00:00
|
|
|
// This will be removed once all uses of content_hash! are replaced by the
|
|
|
|
// derive version.
|
|
|
|
#[test]
|
|
|
|
fn derive_is_equivalent_to_macro() {
|
|
|
|
content_hash! {
|
|
|
|
struct FooMacro { x: Vec<Option<i32>>, y: i64}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(ContentHash)]
|
|
|
|
struct FooDerive {
|
|
|
|
x: Vec<Option<i32>>,
|
|
|
|
y: i64,
|
|
|
|
}
|
|
|
|
|
|
|
|
let foo_macro = FooMacro {
|
|
|
|
x: vec![None, Some(42)],
|
|
|
|
y: 17,
|
|
|
|
};
|
|
|
|
let foo_derive = FooDerive {
|
|
|
|
x: vec![None, Some(42)],
|
|
|
|
y: 17,
|
|
|
|
};
|
|
|
|
assert_eq!(hash(&foo_macro), hash(&foo_derive));
|
|
|
|
}
|
|
|
|
|
2022-11-11 17:33:22 +00:00
|
|
|
fn hash(x: &(impl ContentHash + ?Sized)) -> digest::Output<Blake2b512> {
|
2022-12-02 18:03:00 +00:00
|
|
|
blake2b_hash(x)
|
2022-11-11 17:33:22 +00:00
|
|
|
}
|
|
|
|
}
|