p3_poseidon1/
internal.rs

1//! Partial (internal) round layers for the Poseidon1 permutation.
2//!
3//! # Overview
4//!
5//! Partial rounds apply the S-box only to `state[0]`, leaving the other t-1 elements
6//! unchanged through the nonlinear layer. This is the key efficiency insight of the Poseidon1
7//! design: partial rounds are much cheaper than full rounds, yet they efficiently
8//! increase the algebraic degree to resist interpolation and Gröbner basis attacks.
9//!
10//! # Sparse Matrix Optimization
11//!
12//! In the textbook formulation, each partial round still multiplies by the full dense
13//! MDS matrix M (cost O(t^2)). The Poseidon1 paper (Appendix B) factors M into a product
14//! of sparse matrices, one per round. Each sparse matrix S_r has the structure:
15//!
16//! ```text
17//!   S_r = ┌─────────┬───────────┐
18//!         │ mds_0_0 │  ŵ_r      │    <- first row
19//!         ├─────────┼───────────┤
20//!         │  v_r    │    I      │    <- identity block
21//!         └─────────┴───────────┘
22//! ```
23//!
24//! where the top-left entry is a scalar, the first row holds a (t-1)-vector, the first
25//! column holds a (t-1)-vector, and the bottom-right block is the (t-1)x(t-1) identity.
26//!
27//! Multiplying by S_r costs only O(t) operations instead of O(t^2). Over RP partial
28//! rounds, this saves O(RP * t^2) -> O(RP * t).
29//!
30//! # Optimized Partial Round Structure
31//!
32//! After this transformation, the RP partial rounds become:
33//!
34//! ```text
35//!   1. Add first_round_constants (full WIDTH-vector)
36//!   2. Multiply by m_i (dense transition matrix, applied once)
37//!   3. For each of RP rounds:
38//!      a. S-box on state[0]:  state[0] = state[0]^D
39//!      b. Add scalar constant to state[0] (all rounds except the last)
40//!      c. Sparse matrix multiply via cheap_matmul
41//! ```
42//!
43//! # References
44//!
45//! - Grassi et al., "Poseidon1: A New Hash Function for Zero-Knowledge Proof Systems",
46//!   USENIX Security 2021. <https://eprint.iacr.org/2019/458>
47//! - HorizenLabs reference implementation: <https://github.com/HorizenLabs/poseidon2>
48
49use alloc::vec::Vec;
50
51use p3_field::{Algebra, Field, InjectiveMonomial, PrimeCharacteristicRing};
52use p3_symmetric::Permutation;
53
54use crate::external::mds_multiply;
55
56/// Pre-computed constants for the RP partial (internal) rounds.
57///
58/// These are produced by the sparse matrix decomposition in [`crate::utils`].
59#[derive(Debug, Clone)]
60pub struct PartialRoundConstants<F, const WIDTH: usize> {
61    /// Full WIDTH-vector of optimized round constants, added once before the
62    /// transition matrix m_i.
63    ///
64    /// This vector absorbs the original round constants from all RP partial rounds
65    /// via backward substitution through M^{-1}.
66    pub first_round_constants: [F; WIDTH],
67
68    /// Dense transition matrix m_i, applied once before the partial round loop.
69    ///
70    /// This is the accumulated product of sparse matrix factors from the
71    /// sparse matrix decomposition, transposed to match the HorizenLabs convention.
72    pub m_i: [[F; WIDTH]; WIDTH],
73
74    /// Per-round full first row of the sparse matrix, pre-assembled for
75    /// branch-free dot product computation.
76    ///
77    /// `sparse_first_row[r] = [mds_0_0, ŵ_r[0], ŵ_r[1], ..., ŵ_r[WIDTH-2]]`
78    ///
79    /// where `mds_0_0` is the top-left entry of the original MDS matrix (same for
80    /// all rounds) and `ŵ_r` is the per-round first-row vector from the sparse
81    /// factorization.
82    ///
83    /// Length = RP. Stored in forward application order.
84    pub sparse_first_row: Vec<[F; WIDTH]>,
85
86    /// Per-round first-column vectors for the sparse matrix multiply
87    /// (excluding the `[0,0]` entry).
88    ///
89    /// `v[r]` has WIDTH elements: `[v_r[0], v_r[1], ..., v_r[WIDTH-2], 0]`.
90    /// Only the first WIDTH-1 entries are meaningful; the last is padding.
91    ///
92    /// Length = RP. Stored in forward application order.
93    pub v: Vec<[F; WIDTH]>,
94
95    /// Optimized scalar round constants for partial rounds 0 through RP-2.
96    ///
97    /// The last partial round has no additive constant (it was absorbed by the
98    /// backward substitution). Length = RP - 1.
99    pub round_constants: Vec<F>,
100
101    /// Scalar constants for the textbook partial round path.
102    ///
103    /// Length = RP. Each entry is the optimized scalar to add to `state[0]` before
104    /// the S-box, computed via forward constant substitution.
105    pub textbook_scalar_constants: Vec<F>,
106
107    /// Residual accumulator for the textbook path.
108    ///
109    /// Added to the state after all partial rounds complete.
110    /// Accounts for the folded-forward `state[1..WIDTH]` constants.
111    pub textbook_residual: [F; WIDTH],
112}
113
114/// Construct a partial round layer from pre-computed constants.
115pub trait PartialRoundLayerConstructor<F: Field, const WIDTH: usize> {
116    /// Build the layer from the sparse-form optimized constants.
117    fn new_from_constants(constants: PartialRoundConstants<F, WIDTH>) -> Self;
118}
119
120/// The partial (internal) round layer of the Poseidon1 permutation.
121///
122/// Implementors apply all RP partial rounds to the state.
123///
124/// Field-specific implementations (e.g., NEON, AVX2) can override the generic behavior.
125pub trait PartialRoundLayer<R, const WIDTH: usize, const D: u64>: Sync + Clone
126where
127    R: PrimeCharacteristicRing,
128{
129    /// Apply all RP partial rounds to the state.
130    fn permute_state(&self, state: &mut [R; WIDTH]);
131}
132
133/// Sparse matrix-vector multiplication in O(WIDTH) operations.
134///
135/// Replaces the full O(t^2) MDS multiply in each partial round. Computes
136/// `state <- S * state` where S has the structure:
137///
138/// ```text
139///   S = ┌─────────┬──────┐
140///       │ mds_0_0 │  ŵ   │    state'[0] = mds_0_0 * s[0] + Σ ŵ[j] * s[j+1]
141///       ├─────────┼──────┤
142///       │    v    │   I  │    state'[i] = s[i] + v[i-1] * s[0],  for i ≥ 1
143///       └─────────┴──────┘
144/// ```
145///
146/// `first_row` contains the pre-assembled full first row `[mds_0_0, ŵ[0], ..., ŵ[WIDTH-2]]`,
147/// enabling a branch-free dot product for the new `state[0]`.
148///
149/// `v` contains the first-column vector (excluding `[0,0]`): `[v[0], ..., v[WIDTH-2], 0]`.
150///
151/// The identity block means `state[1..]` is updated by a simple rank-1 correction
152/// (add a multiple of the old `state[0]`), and `state[0]` is a dot product.
153#[inline(always)]
154pub fn cheap_matmul<F: Field, A: Algebra<F>, const WIDTH: usize>(
155    state: &mut [A; WIDTH],
156    first_row: &[F; WIDTH],
157    v: &[F; WIDTH],
158) {
159    // Save state[0] before it is overwritten.
160    let old_s0 = state[0].clone();
161
162    // Compute new state[0] = dot(first_row, state).
163    state[0] = A::mixed_dot_product(state, first_row);
164
165    // Rank-1 update: state[i] += v[i-1] * old_s0, for i = 1..WIDTH.
166    for i in 1..WIDTH {
167        state[i] += old_s0.clone() * v[i - 1];
168    }
169}
170
171/// Generic implementation of the partial round permutation.
172///
173/// See the module-level documentation for the algorithm structure.
174#[inline]
175pub fn partial_permute_state<
176    F: Field,
177    A: Algebra<F> + InjectiveMonomial<D>,
178    const WIDTH: usize,
179    const D: u64,
180>(
181    state: &mut [A; WIDTH],
182    constants: &PartialRoundConstants<F, WIDTH>,
183) {
184    // Add the full first-round constant vector.
185    for (s, &rc) in state.iter_mut().zip(constants.first_round_constants.iter()) {
186        *s += rc;
187    }
188
189    // Apply the dense transition matrix m_i (once).
190    mds_multiply(state, &constants.m_i);
191
192    let rounds_p = constants.sparse_first_row.len();
193
194    // Partial rounds 0..RP-2: S-box + scalar RC + sparse matmul.
195    // The last round is handled separately to avoid a branch per iteration.
196    for r in 0..rounds_p - 1 {
197        // S-box on state[0] only.
198        state[0] = state[0].injective_exp_n();
199
200        // Add scalar round constant.
201        state[0] += constants.round_constants[r];
202
203        // Sparse matrix multiply.
204        cheap_matmul(state, &constants.sparse_first_row[r], &constants.v[r]);
205    }
206
207    // Last partial round: S-box + sparse matmul (no round constant).
208    state[0] = state[0].injective_exp_n();
209    cheap_matmul(
210        state,
211        &constants.sparse_first_row[rounds_p - 1],
212        &constants.v[rounds_p - 1],
213    );
214}
215
216/// Textbook partial round permutation with forward-substituted scalar constants.
217///
218/// Instead of the sparse matrix decomposition, this applies the full MDS permutation
219/// per round but with only a scalar constant addition to `state[0]`. After all rounds,
220/// a residual vector is added to the state.
221///
222/// This is beneficial when the MDS permutation is very fast (e.g., Karatsuba
223/// convolution for power-of-2 circulant matrices), making the per-round MDS cost
224/// competitive with the sparse approach.
225#[inline]
226pub fn textbook_partial_permute_state<
227    F: Field,
228    A: Algebra<F> + InjectiveMonomial<D>,
229    Mds: Permutation<[A; WIDTH]>,
230    const WIDTH: usize,
231    const D: u64,
232>(
233    state: &mut [A; WIDTH],
234    constants: &PartialRoundConstants<F, WIDTH>,
235    mds: &Mds,
236) {
237    for &c in &constants.textbook_scalar_constants {
238        state[0] += c;
239        state[0] = state[0].injective_exp_n();
240        mds.permute_mut(state);
241    }
242    for (s, &r) in state.iter_mut().zip(constants.textbook_residual.iter()) {
243        *s += r;
244    }
245}
p3_poseidon1/internal.rs

p3_poseidon1/
internal.rs