Skip to main content

blake3/
hazmat.rs

1//! Low-level tree manipulations and other sharp tools
2//!
3//! The target audience for this module is projects like [Bao](https://github.com/oconnor663/bao),
4//! which work directly with the interior hashes ("chaining values") of BLAKE3 chunks and subtrees.
5//! For example, you could use these functions to implement a BitTorrent-like protocol using the
6//! BLAKE3 tree structure, or to hash an input that's distributed across different machines. These
7//! use cases are advanced, and most applications don't need this module. Also:
8//!
9//! <div class="warning">
10//!
11//! **Warning:** This module is *hazardous material*. If you've heard folks say *don't roll your
12//! own crypto,* this is the sort of thing they're talking about. These functions have complicated
13//! requirements, and any mistakes will give you garbage output and/or break the security
14//! properties that BLAKE3 is supposed to have. Read section 2.1 of [the BLAKE3
15//! paper](https://github.com/BLAKE3-team/BLAKE3-specs/blob/master/blake3.pdf) to understand the
16//! tree structure you need to maintain. Test your code against [`blake3::hash`](../fn.hash.html)
17//! and make sure you can get the same outputs for [lots of different
18//! inputs](https://github.com/BLAKE3-team/BLAKE3/blob/master/test_vectors/test_vectors.json).
19//!
20//! </div>
21//!
22//! On the other hand:
23//!
24//! <div class="warning">
25//!
26//! **Encouragement:** Playing with these functions is a great way to learn how BLAKE3 works on the
27//! inside. Have fun!
28//!
29//! </div>
30//!
31//! The main entrypoint for this module is the [`HasherExt`] trait, particularly the
32//! [`set_input_offset`](HasherExt::set_input_offset) and
33//! [`finalize_non_root`](HasherExt::finalize_non_root) methods. These let you compute the chaining
34//! values of individual chunks or subtrees. You then combine these chaining values into larger
35//! subtrees using [`merge_subtrees_non_root`] and finally (once at the very top)
36//! [`merge_subtrees_root`] or [`merge_subtrees_root_xof`].
37//!
38//! # Examples
39//!
40//! Here's an example of computing all the interior hashes in a 3-chunk tree:
41//!
42//! ```text
43//!            root
44//!          /      \
45//!      parent      \
46//!    /       \      \
47//! chunk0  chunk1  chunk2
48//! ```
49//!
50//! ```
51//! # fn main() {
52//! use blake3::{Hasher, CHUNK_LEN};
53//! use blake3::hazmat::{merge_subtrees_non_root, merge_subtrees_root, Mode};
54//! use blake3::hazmat::HasherExt; // an extension trait for Hasher
55//!
56//! let chunk0 = [b'a'; CHUNK_LEN];
57//! let chunk1 = [b'b'; CHUNK_LEN];
58//! let chunk2 = [b'c'; 42]; // The final chunk can be short.
59//!
60//! // Compute the non-root hashes ("chaining values") of all three chunks. Chunks or subtrees
61//! // that don't begin at the start of the input use `set_input_offset` to say where they begin.
62//! let chunk0_cv = Hasher::new()
63//!     // .set_input_offset(0) is the default.
64//!     .update(&chunk0)
65//!     .finalize_non_root();
66//! let chunk1_cv = Hasher::new()
67//!     .set_input_offset(CHUNK_LEN as u64)
68//!     .update(&chunk1)
69//!     .finalize_non_root();
70//! let chunk2_cv = Hasher::new()
71//!     .set_input_offset(2 * CHUNK_LEN as u64)
72//!     .update(&chunk2)
73//!     .finalize_non_root();
74//!
75//! // Join the first two chunks with a non-root parent node and compute its chaining value.
76//! let parent_cv = merge_subtrees_non_root(&chunk0_cv, &chunk1_cv, Mode::Hash);
77//!
78//! // Join that parent node and the third chunk with a root parent node and compute the hash.
79//! let root_hash = merge_subtrees_root(&parent_cv, &chunk2_cv, Mode::Hash);
80//!
81//! // Double check that we got the right answer.
82//! let mut combined_input = Vec::new();
83//! combined_input.extend_from_slice(&chunk0);
84//! combined_input.extend_from_slice(&chunk1);
85//! combined_input.extend_from_slice(&chunk2);
86//! assert_eq!(root_hash, blake3::hash(&combined_input));
87//! # }
88//! ```
89//!
90//! Hashing many chunks together is important for performance, because it allows the implementation
91//! to use SIMD parallelism internally. ([AVX-512](https://en.wikipedia.org/wiki/AVX-512) for
92//! example needs 16 chunks to really get going.) We can reproduce `parent_cv` by hashing `chunk0`
93//! and `chunk1` at the same time:
94//!
95//! ```
96//! # fn main() {
97//! # use blake3::{Hasher, CHUNK_LEN};
98//! # use blake3::hazmat::{Mode, HasherExt, merge_subtrees_non_root, merge_subtrees_root};
99//! # let chunk0 = [b'a'; CHUNK_LEN];
100//! # let chunk1 = [b'b'; CHUNK_LEN];
101//! # let chunk0_cv = Hasher::new().update(&chunk0).finalize_non_root();
102//! # let chunk1_cv = Hasher::new().set_input_offset(CHUNK_LEN as u64).update(&chunk1).finalize_non_root();
103//! # let parent_cv = merge_subtrees_non_root(&chunk0_cv, &chunk1_cv, Mode::Hash);
104//! # let mut combined_input = Vec::new();
105//! # combined_input.extend_from_slice(&chunk0);
106//! # combined_input.extend_from_slice(&chunk1);
107//! let left_subtree_cv = Hasher::new()
108//!     // .set_input_offset(0) is the default.
109//!     .update(&combined_input[..2 * CHUNK_LEN])
110//!     .finalize_non_root();
111//! assert_eq!(left_subtree_cv, parent_cv);
112//!
113//! // Using multiple updates gives the same answer, though it's not as efficient.
114//! let mut subtree_hasher = Hasher::new();
115//! // Again, .set_input_offset(0) is the default.
116//! subtree_hasher.update(&chunk0);
117//! subtree_hasher.update(&chunk1);
118//! assert_eq!(left_subtree_cv, subtree_hasher.finalize_non_root());
119//! # }
120//! ```
121//!
122//! However, hashing multiple chunks together **must** respect the overall tree structure. Hashing
123//! `chunk0` and `chunk1` together is valid, but hashing `chunk1` and `chunk2` together is
124//! incorrect and gives a garbage result that will never match a standard BLAKE3 hash. The
125//! implementation includes a few best-effort asserts to catch some of these mistakes, but these
126//! checks aren't guaranteed. For example, this second call to `update` currently panics:
127//!
128//! ```should_panic
129//! # fn main() {
130//! # use blake3::{Hasher, CHUNK_LEN};
131//! # use blake3::hazmat::HasherExt;
132//! # let chunk0 = [b'a'; CHUNK_LEN];
133//! # let chunk1 = [b'b'; CHUNK_LEN];
134//! # let chunk2 = [b'c'; 42];
135//! let oops = Hasher::new()
136//!     .set_input_offset(CHUNK_LEN as u64)
137//!     .update(&chunk1)
138//!     // PANIC: "the subtree starting at 1024 contains at most 1024 bytes"
139//!     .update(&chunk2)
140//!     .finalize_non_root();
141//! # }
142//! ```
143//!
144//! For more on valid tree structures, see the docs for and [`left_subtree_len`] and
145//! [`max_subtree_len`], and see section 2.1 of [the BLAKE3
146//! paper](https://github.com/BLAKE3-team/BLAKE3-specs/blob/master/blake3.pdf). Note that the
147//! merging functions ([`merge_subtrees_root`] and friends) don't know the shape of the left and
148//! right subtrees you're giving them, and they can't help you catch mistakes. The best way to
149//! catch mistakes with these is to compare your root output to the [`blake3::hash`](crate::hash)
150//! of the same input.
151
152use crate::platform::Platform;
153use crate::{CVWords, Hasher, CHUNK_LEN, IV, KEY_LEN, OUT_LEN};
154
155/// Extension methods for [`Hasher`]. This is the main entrypoint to the `hazmat` module.
156pub trait HasherExt {
157    /// Similar to [`Hasher::new_derive_key`] but using a pre-hashed [`ContextKey`] from
158    /// [`hash_derive_key_context`].
159    ///
160    /// The [`hash_derive_key_context`] function is the _only_ valid source of the [`ContextKey`].
161    /// Any other source ([`hash`](crate::hash), [`keyed_hash`](crate::keyed_hash), arbitrary bytes
162    /// from the caller) violates the security requirements.
163    ///
164    /// Calling [`derive_key`](crate::derive_key) or [`Hasher::new_derive_key`] in a loop will
165    /// re-hash the context string every time. This constructor function is a performance
166    /// optimization to avoid that repeated work. If you hardcode the [`ContextKey`], the
167    /// derive-key mode becomes zero-overhead, like the keyed mode.
168    ///
169    /// # Example
170    ///
171    /// ```
172    /// use blake3::Hasher;
173    /// use blake3::hazmat::HasherExt;
174    ///
175    /// let context_key = blake3::hazmat::hash_derive_key_context("foo");
176    /// let mut hasher = Hasher::new_from_context_key(&context_key);
177    /// hasher.update(b"bar");
178    /// let derived_key = *hasher.finalize().as_bytes();
179    ///
180    /// assert_eq!(derived_key, blake3::derive_key("foo", b"bar"));
181    /// ```
182    fn new_from_context_key(context_key: &ContextKey) -> Self;
183
184    /// Configure the `Hasher` to process a chunk or subtree starting at `offset` bytes into the
185    /// whole input.
186    ///
187    /// You must call this function before processing any input with [`update`](Hasher::update) or
188    /// similar. This step isn't required for the first chunk, or for a subtree that includes the
189    /// first chunk (i.e. when the `offset` is zero), but it's required for all other chunks and
190    /// subtrees.
191    ///
192    /// The starting input offset of a subtree implies a maximum possible length for that subtree.
193    /// See [`max_subtree_len`] and section 2.1 of [the BLAKE3
194    /// paper](https://github.com/BLAKE3-team/BLAKE3-specs/blob/master/blake3.pdf). Note that only
195    /// subtrees along the right edge of the whole tree can have a length less than their maximum
196    /// possible length.
197    ///
198    /// See the [module level examples](index.html#examples).
199    ///
200    /// # Panics
201    ///
202    /// This function panics if the `Hasher` has already accepted any input with
203    /// [`update`](Hasher::update) or similar.
204    ///
205    /// This should always be paired with [`finalize_non_root`](HasherExt::finalize_non_root). It's
206    /// never correct to use a non-zero input offset with [`finalize`](Hasher::finalize) or
207    /// [`finalize_xof`](Hasher::finalize_xof). The `offset` must also be a multiple of
208    /// `CHUNK_LEN`. Violating either of these rules will currently fail an assertion and panic,
209    /// but this is not guaranteed.
210    fn set_input_offset(&mut self, offset: u64) -> &mut Self;
211
212    /// Finalize the non-root hash ("chaining value") of the current chunk or subtree.
213    ///
214    /// Afterwards you can merge subtree chaining values into parent nodes using
215    /// [`merge_subtrees_non_root`] and ultimately into the root node with either
216    /// [`merge_subtrees_root`] (similar to [`Hasher::finalize`]) or [`merge_subtrees_root_xof`]
217    /// (similar to [`Hasher::finalize_xof`]).
218    ///
219    /// See the [module level examples](index.html#examples), particularly the discussion of valid
220    /// tree structures.
221    fn finalize_non_root(&self) -> ChainingValue;
222}
223
224impl HasherExt for Hasher {
225    fn new_from_context_key(context_key: &[u8; KEY_LEN]) -> Hasher {
226        let context_key_words = crate::platform::words_from_le_bytes_32(context_key);
227        Hasher::new_internal(&context_key_words, crate::DERIVE_KEY_MATERIAL)
228    }
229
230    fn set_input_offset(&mut self, offset: u64) -> &mut Hasher {
231        assert_eq!(self.count(), 0, "hasher has already accepted input");
232        assert_eq!(
233            offset % CHUNK_LEN as u64,
234            0,
235            "offset ({offset}) must be a chunk boundary (divisible by {CHUNK_LEN})",
236        );
237        let counter = offset / CHUNK_LEN as u64;
238        self.chunk_state.chunk_counter = counter;
239        self.initial_chunk_counter = counter;
240        self
241    }
242
243    fn finalize_non_root(&self) -> ChainingValue {
244        assert_ne!(self.count(), 0, "empty subtrees are never valid");
245        self.final_output().chaining_value()
246    }
247}
248
249/// The maximum length of a subtree in bytes, given its starting offset in bytes
250///
251/// If you try to hash more than this many bytes as one subtree, you'll end up merging parent nodes
252/// that shouldn't be merged, and your output will be garbage. [`Hasher::update`] will currently
253/// panic in this case, but this is not guaranteed.
254///
255/// For input offset zero (the default), there is no maximum length, and this function returns
256/// `None`. For all other offsets it returns `Some`. Note that valid offsets must be a multiple of
257/// [`CHUNK_LEN`] (1024); it's not possible to start hashing a chunk in the middle.
258///
259/// In the example tree below, chunks are numbered by their _0-based index_. The subtree that
260/// _starts_ with chunk 3, i.e. `input_offset = 3 * CHUNK_LEN`, includes only that one chunk, so
261/// its max length is `Some(CHUNK_LEN)`. The subtree that starts with chunk 6 includes chunk 7 but
262/// not chunk 8, so its max length is `Some(2 * CHUNK_LEN)`. The subtree that starts with chunk 12
263/// includes chunks 13, 14, and 15, but if the tree were bigger it would not include chunk 16, so
264/// its max length is `Some(4 * CHUNK_LEN)`. One way to think about the rule here is that, if you
265/// go beyond the max subtree length from a given starting offset, you start dealing with subtrees
266/// that include chunks _to the left_ of where you started.
267///
268/// ```text
269///                           root
270///                 /                       \
271///              .                             .
272///        /           \                 /           \
273///       .             .               .             .
274///    /    \         /    \         /    \         /    \
275///   .      .       .      .       .      .       .      .
276///  / \    / \     / \    / \     / \    / \     / \    / \
277/// 0  1   2  3    4  5   6  7    8  9   10 11   12 13  14 15
278/// ```
279///
280/// The general rule turns out to be that for a subtree starting at a 0-based chunk index N greater
281/// than zero, the maximum number of chunks in that subtree is the largest power-of-two that
282/// divides N, which is given by `1 << N.trailing_zeros()`.
283///
284/// This function can be useful for writing tests or debug assertions, but it's actually rare to
285/// use this for real control flow. Callers who split their input recursively using
286/// [`left_subtree_len`] will automatically satisfy the `max_subtree_len` bound and don't
287/// necessarily need to check. It's also common to choose some fixed power-of-two subtree size, say
288/// 64 chunks, and divide your input up into slices of that fixed length (with the final slice
289/// possibly short). This approach also automatically satisfies the `max_subtree_len` bound and
290/// doesn't need to check. Proving that this is true can be an interesting exercise. Note that
291/// chunks 0, 4, 8, and 12 all begin subtrees of at least 4 chunks in the example tree above.
292///
293/// # Panics
294///
295/// This function currently panics if `input_offset` is not a multiple of `CHUNK_LEN`. This is not
296/// guaranteed.
297#[inline(always)]
298pub fn max_subtree_len(input_offset: u64) -> Option<u64> {
299    if input_offset == 0 {
300        return None;
301    }
302    assert_eq!(input_offset % CHUNK_LEN as u64, 0);
303    let counter = input_offset / CHUNK_LEN as u64;
304    let max_chunks = 1 << counter.trailing_zeros();
305    Some(max_chunks * CHUNK_LEN as u64)
306}
307
308#[test]
309fn test_max_subtree_len() {
310    assert_eq!(max_subtree_len(0), None);
311    // (chunk index, max chunks)
312    let cases = [
313        (1, 1),
314        (2, 2),
315        (3, 1),
316        (4, 4),
317        (5, 1),
318        (6, 2),
319        (7, 1),
320        (8, 8),
321    ];
322    for (chunk_index, max_chunks) in cases {
323        let input_offset = chunk_index * CHUNK_LEN as u64;
324        assert_eq!(
325            max_subtree_len(input_offset),
326            Some(max_chunks * CHUNK_LEN as u64),
327        );
328    }
329}
330
331/// Given the length in bytes of either a complete input or a subtree input, return the number of
332/// bytes that belong to its left child subtree. The rest belong to its right child subtree.
333///
334/// Concretely, this function returns the largest power-of-two number of bytes that's strictly less
335/// than `input_len`. This leads to a tree where all left subtrees are "complete" and at least as
336/// large as their sibling right subtrees, as specified in section 2.1 of [the BLAKE3
337/// paper](https://github.com/BLAKE3-team/BLAKE3-specs/blob/master/blake3.pdf). For example, if an
338/// input is exactly two chunks, the left subtree gets the first chunk and the right subtree gets
339/// the second chunk. But if an input is two chunks plus one more byte, then its left subtree gets
340/// two chunks, and its right subtree only gets one byte.
341///
342/// This function isn't meaningful for one chunk of input, because chunks don't have children. It
343/// currently panics in debug mode if `input_len <= CHUNK_LEN`.
344///
345/// # Example
346///
347/// Hash a input of random length as two subtrees:
348///
349/// ```
350/// # #[cfg(feature = "std")] {
351/// use blake3::hazmat::{left_subtree_len, merge_subtrees_root, HasherExt, Mode};
352/// use blake3::{Hasher, CHUNK_LEN};
353///
354/// // Generate a random-length input. Note that to be split into two subtrees, the input length
355/// // must be greater than CHUNK_LEN.
356/// let input_len = rand::random_range(CHUNK_LEN + 1..1_000_000);
357/// let mut input = vec![0; input_len];
358/// rand::fill(&mut input[..]);
359///
360/// // Compute the left and right subtree hashes and then the root hash. left_subtree_len() tells
361/// // us exactly where to split the input. Any other split would either panic (if we're lucky) or
362/// // lead to an incorrect root hash.
363/// let left_len = left_subtree_len(input_len as u64) as usize;
364/// let left_subtree_cv = Hasher::new()
365///     .update(&input[..left_len])
366///     .finalize_non_root();
367/// let right_subtree_cv = Hasher::new()
368///     .set_input_offset(left_len as u64)
369///     .update(&input[left_len..])
370///     .finalize_non_root();
371/// let root_hash = merge_subtrees_root(&left_subtree_cv, &right_subtree_cv, Mode::Hash);
372///
373/// // Double check the answer.
374/// assert_eq!(root_hash, blake3::hash(&input));
375/// # }
376/// ```
377#[inline(always)]
378pub fn left_subtree_len(input_len: u64) -> u64 {
379    debug_assert!(input_len > CHUNK_LEN as u64);
380    // Note that .next_power_of_two() is greater than *or equal*.
381    ((input_len + 1) / 2).next_power_of_two()
382}
383
384#[test]
385fn test_left_subtree_len() {
386    assert_eq!(left_subtree_len(1025), 1024);
387    for boundary_case in [2, 4, 8, 16, 32, 64] {
388        let input_len = boundary_case * CHUNK_LEN as u64;
389        assert_eq!(left_subtree_len(input_len - 1), input_len / 2);
390        assert_eq!(left_subtree_len(input_len), input_len / 2);
391        assert_eq!(left_subtree_len(input_len + 1), input_len);
392    }
393}
394
395/// The `mode` argument to [`merge_subtrees_root`] and friends
396///
397/// See the [module level examples](index.html#examples).
398#[derive(Copy, Clone, Debug)]
399pub enum Mode<'a> {
400    /// Corresponding to [`hash`](crate::hash)
401    Hash,
402
403    /// Corresponding to [`keyed_hash`](crate::hash)
404    KeyedHash(&'a [u8; KEY_LEN]),
405
406    /// Corresponding to [`derive_key`](crate::hash)
407    ///
408    /// The [`ContextKey`] comes from [`hash_derive_key_context`].
409    DeriveKeyMaterial(&'a ContextKey),
410}
411
412impl<'a> Mode<'a> {
413    fn key_words(&self) -> CVWords {
414        match self {
415            Mode::Hash => *IV,
416            Mode::KeyedHash(key) => crate::platform::words_from_le_bytes_32(key),
417            Mode::DeriveKeyMaterial(cx_key) => crate::platform::words_from_le_bytes_32(cx_key),
418        }
419    }
420
421    fn flags_byte(&self) -> u8 {
422        match self {
423            Mode::Hash => 0,
424            Mode::KeyedHash(_) => crate::KEYED_HASH,
425            Mode::DeriveKeyMaterial(_) => crate::DERIVE_KEY_MATERIAL,
426        }
427    }
428}
429
430/// "Chaining value" is the academic term for a non-root or non-final hash.
431///
432/// Besides just sounding fancy, it turns out there are [security
433/// reasons](https://jacko.io/tree_hashing.html) to be careful about the difference between
434/// (root/final) hashes and (non-root/non-final) chaining values.
435pub type ChainingValue = [u8; OUT_LEN];
436
437fn merge_subtrees_inner(
438    left_child: &ChainingValue,
439    right_child: &ChainingValue,
440    mode: Mode,
441) -> crate::Output {
442    crate::parent_node_output(
443        &left_child,
444        &right_child,
445        &mode.key_words(),
446        mode.flags_byte(),
447        Platform::detect(),
448    )
449}
450
451/// Compute a non-root parent node chaining value from two child chaining values.
452///
453/// See the [module level examples](index.html#examples), particularly the discussion of valid tree
454/// structures. The left and right child chaining values can come from either
455/// [`Hasher::finalize_non_root`](HasherExt::finalize_non_root) or other calls to
456/// `merge_subtrees_non_root`. "Chaining value" is the academic term for a non-root or non-final
457/// hash.
458pub fn merge_subtrees_non_root(
459    left_child: &ChainingValue,
460    right_child: &ChainingValue,
461    mode: Mode,
462) -> ChainingValue {
463    merge_subtrees_inner(left_child, right_child, mode).chaining_value()
464}
465
466/// Compute a root hash from two child chaining values.
467///
468/// See the [module level examples](index.html#examples), particularly the discussion of valid tree
469/// structures. The left and right child chaining values can come from either
470/// [`Hasher::finalize_non_root`](HasherExt::finalize_non_root) or [`merge_subtrees_non_root`].
471/// "Chaining value" is the academic term for a non-root or non-final hash.
472///
473/// Note that inputs of [`CHUNK_LEN`] or less don't produce any parent nodes and can't be hashed
474/// using this function. In that case you must get the root hash from [`Hasher::finalize`] (or just
475/// [`blake3::hash`](crate::hash)).
476pub fn merge_subtrees_root(
477    left_child: &ChainingValue,
478    right_child: &ChainingValue,
479    mode: Mode,
480) -> crate::Hash {
481    merge_subtrees_inner(left_child, right_child, mode).root_hash()
482}
483
484/// Build a root [`OutputReader`](crate::OutputReader) from two child chaining values.
485///
486/// See also the [module level examples](index.html#examples), particularly the discussion of valid
487/// tree structures. The left and right child chaining values can come from either
488/// [`Hasher::finalize_non_root`](HasherExt::finalize_non_root) or [`merge_subtrees_non_root`].
489/// "Chaining value" is the academic term for a non-root or non-final hash.
490///
491/// Note that inputs of [`CHUNK_LEN`] or less don't produce any parent nodes and can't be hashed
492/// using this function. In that case you must get the `OutputReader` from
493/// [`Hasher::finalize_xof`].
494///
495/// # Example
496///
497/// ```
498/// use blake3::hazmat::{merge_subtrees_root_xof, HasherExt, Mode};
499/// use blake3::{Hasher, CHUNK_LEN};
500///
501/// // Hash a 2-chunk subtree in steps. Note that only
502/// // the final chunk can be shorter than CHUNK_LEN.
503/// let chunk0 = &[42; CHUNK_LEN];
504/// let chunk1 = b"hello world";
505/// let chunk0_cv = Hasher::new()
506///     .update(chunk0)
507///     .finalize_non_root();
508/// let chunk1_cv = Hasher::new()
509///     .set_input_offset(CHUNK_LEN as u64)
510///     .update(chunk1)
511///     .finalize_non_root();
512///
513/// // Obtain a blake3::OutputReader at the root and extract 1000 bytes.
514/// let mut output_reader = merge_subtrees_root_xof(&chunk0_cv, &chunk1_cv, Mode::Hash);
515/// let mut output_bytes = [0; 1_000];
516/// output_reader.fill(&mut output_bytes);
517///
518/// // Double check the answer.
519/// let mut hasher = Hasher::new();
520/// hasher.update(chunk0);
521/// hasher.update(chunk1);
522/// let mut expected = [0; 1_000];
523/// hasher.finalize_xof().fill(&mut expected);
524/// assert_eq!(output_bytes, expected);
525/// ```
526pub fn merge_subtrees_root_xof(
527    left_child: &ChainingValue,
528    right_child: &ChainingValue,
529    mode: Mode,
530) -> crate::OutputReader {
531    crate::OutputReader::new(merge_subtrees_inner(left_child, right_child, mode))
532}
533
534/// An alias to distinguish [`hash_derive_key_context`] outputs from other keys.
535pub type ContextKey = [u8; KEY_LEN];
536
537/// Hash a [`derive_key`](crate::derive_key) context string and return a [`ContextKey`].
538///
539/// This has the same security requirement as [`derive_key`](crate::derive_key). **The context
540/// string should be hardcoded, globally unique, and application-specific.**
541///
542/// The _only_ valid uses for the returned [`ContextKey`] are
543/// [`new_from_context_key`](HasherExt::new_from_context_key) and [`Mode::DeriveKeyMaterial`]
544/// (together with the merge subtree functions).
545///
546/// # Example
547///
548/// ```
549/// use blake3::Hasher;
550/// use blake3::hazmat::HasherExt;
551///
552/// let context_key = blake3::hazmat::hash_derive_key_context("foo");
553/// let mut hasher = Hasher::new_from_context_key(&context_key);
554/// hasher.update(b"bar");
555/// let derived_key = *hasher.finalize().as_bytes();
556///
557/// assert_eq!(derived_key, blake3::derive_key("foo", b"bar"));
558/// ```
559pub fn hash_derive_key_context(context: &str) -> ContextKey {
560    crate::hash_all_at_once::<crate::join::SerialJoin>(
561        context.as_bytes(),
562        IV,
563        crate::DERIVE_KEY_CONTEXT,
564    )
565    .root_hash()
566    .0
567}
568
569#[cfg(test)]
570mod test {
571    use super::*;
572
573    #[test]
574    #[should_panic]
575    fn test_empty_subtree_should_panic() {
576        Hasher::new().finalize_non_root();
577    }
578
579    #[test]
580    #[should_panic]
581    fn test_unaligned_offset_should_panic() {
582        Hasher::new().set_input_offset(1);
583    }
584
585    #[test]
586    #[should_panic]
587    fn test_hasher_already_accepted_input_should_panic() {
588        Hasher::new().update(b"x").set_input_offset(0);
589    }
590
591    #[test]
592    #[should_panic]
593    fn test_too_much_input_should_panic() {
594        Hasher::new()
595            .set_input_offset(CHUNK_LEN as u64)
596            .update(&[0; CHUNK_LEN + 1]);
597    }
598
599    #[test]
600    #[should_panic]
601    fn test_set_input_offset_cant_finalize() {
602        Hasher::new().set_input_offset(CHUNK_LEN as u64).finalize();
603    }
604
605    #[test]
606    #[should_panic]
607    fn test_set_input_offset_cant_finalize_xof() {
608        Hasher::new()
609            .set_input_offset(CHUNK_LEN as u64)
610            .finalize_xof();
611    }
612
613    #[test]
614    fn test_grouped_hash() {
615        const MAX_CHUNKS: usize = (crate::test::TEST_CASES_MAX + 1) / CHUNK_LEN;
616        let mut input_buf = [0; crate::test::TEST_CASES_MAX];
617        crate::test::paint_test_input(&mut input_buf);
618        for subtree_chunks in [1, 2, 4, 8, 16, 32] {
619            #[cfg(feature = "std")]
620            dbg!(subtree_chunks);
621            let subtree_len = subtree_chunks * CHUNK_LEN;
622            for &case in crate::test::TEST_CASES {
623                if case <= subtree_len {
624                    continue;
625                }
626                #[cfg(feature = "std")]
627                dbg!(case);
628                let input = &input_buf[..case];
629                let expected_hash = crate::hash(input);
630
631                // Collect all the group chaining values.
632                let mut chaining_values = arrayvec::ArrayVec::<ChainingValue, MAX_CHUNKS>::new();
633                let mut subtree_offset = 0;
634                while subtree_offset < input.len() {
635                    let take = core::cmp::min(subtree_len, input.len() - subtree_offset);
636                    let subtree_input = &input[subtree_offset..][..take];
637                    let subtree_cv = Hasher::new()
638                        .set_input_offset(subtree_offset as u64)
639                        .update(subtree_input)
640                        .finalize_non_root();
641                    chaining_values.push(subtree_cv);
642                    subtree_offset += take;
643                }
644
645                // Compress all the chaining_values together, layer by layer.
646                assert!(chaining_values.len() >= 2);
647                while chaining_values.len() > 2 {
648                    let n = chaining_values.len();
649                    // Merge each side-by-side pair in place, overwriting the front half of the
650                    // array with the merged results. This moves us "up one level" in the tree.
651                    for i in 0..(n / 2) {
652                        chaining_values[i] = merge_subtrees_non_root(
653                            &chaining_values[2 * i],
654                            &chaining_values[2 * i + 1],
655                            Mode::Hash,
656                        );
657                    }
658                    // If there's an odd CV out, it moves up.
659                    if n % 2 == 1 {
660                        chaining_values[n / 2] = chaining_values[n - 1];
661                    }
662                    chaining_values.truncate(n / 2 + n % 2);
663                }
664                assert_eq!(chaining_values.len(), 2);
665                let root_hash =
666                    merge_subtrees_root(&chaining_values[0], &chaining_values[1], Mode::Hash);
667                assert_eq!(expected_hash, root_hash);
668            }
669        }
670    }
671
672    #[test]
673    fn test_keyed_hash_xof() {
674        let group0 = &[42; 4096];
675        let group1 = &[43; 4095];
676        let mut input = [0; 8191];
677        input[..4096].copy_from_slice(group0);
678        input[4096..].copy_from_slice(group1);
679        let key = &[44; 32];
680
681        let mut expected_output = [0; 100];
682        Hasher::new_keyed(&key)
683            .update(&input)
684            .finalize_xof()
685            .fill(&mut expected_output);
686
687        let mut hazmat_output = [0; 100];
688        let left = Hasher::new_keyed(key).update(group0).finalize_non_root();
689        let right = Hasher::new_keyed(key)
690            .set_input_offset(group0.len() as u64)
691            .update(group1)
692            .finalize_non_root();
693        merge_subtrees_root_xof(&left, &right, Mode::KeyedHash(&key)).fill(&mut hazmat_output);
694        assert_eq!(expected_output, hazmat_output);
695    }
696
697    #[test]
698    fn test_derive_key() {
699        let context = "foo";
700        let mut input = [0; 1025];
701        crate::test::paint_test_input(&mut input);
702        let expected = crate::derive_key(context, &input);
703
704        let cx_key = hash_derive_key_context(context);
705        let left = Hasher::new_from_context_key(&cx_key)
706            .update(&input[..1024])
707            .finalize_non_root();
708        let right = Hasher::new_from_context_key(&cx_key)
709            .set_input_offset(1024)
710            .update(&input[1024..])
711            .finalize_non_root();
712        let derived_key = merge_subtrees_root(&left, &right, Mode::DeriveKeyMaterial(&cx_key)).0;
713        assert_eq!(expected, derived_key);
714    }
715}