p3_poseidon1/internal.rs
1//! Partial (internal) round layers for the Poseidon1 permutation.
2//!
3//! # Overview
4//!
5//! Partial rounds apply the S-box only to `state[0]`, leaving the other t-1 elements
6//! unchanged through the nonlinear layer. This is the key efficiency insight of the Poseidon1
7//! design: partial rounds are much cheaper than full rounds, yet they efficiently
8//! increase the algebraic degree to resist interpolation and Gröbner basis attacks.
9//!
10//! # Sparse Matrix Optimization
11//!
12//! In the textbook formulation, each partial round still multiplies by the full dense
13//! MDS matrix M (cost O(t^2)). The Poseidon1 paper (Appendix B) factors M into a product
14//! of sparse matrices, one per round. Each sparse matrix S_r has the structure:
15//!
16//! ```text
17//! S_r = ┌─────────┬───────────┐
18//! │ mds_0_0 │ ŵ_r │ <- first row
19//! ├─────────┼───────────┤
20//! │ v_r │ I │ <- identity block
21//! └─────────┴───────────┘
22//! ```
23//!
24//! where the top-left entry is a scalar, the first row holds a (t-1)-vector, the first
25//! column holds a (t-1)-vector, and the bottom-right block is the (t-1)x(t-1) identity.
26//!
27//! Multiplying by S_r costs only O(t) operations instead of O(t^2). Over RP partial
28//! rounds, this saves O(RP * t^2) -> O(RP * t).
29//!
30//! # Optimized Partial Round Structure
31//!
32//! After this transformation, the RP partial rounds become:
33//!
34//! ```text
35//! 1. Add first_round_constants (full WIDTH-vector)
36//! 2. Multiply by m_i (dense transition matrix, applied once)
37//! 3. For each of RP rounds:
38//! a. S-box on state[0]: state[0] = state[0]^D
39//! b. Add scalar constant to state[0] (all rounds except the last)
40//! c. Sparse matrix multiply via cheap_matmul
41//! ```
42//!
43//! # References
44//!
45//! - Grassi et al., "Poseidon1: A New Hash Function for Zero-Knowledge Proof Systems",
46//! USENIX Security 2021. <https://eprint.iacr.org/2019/458>
47//! - HorizenLabs reference implementation: <https://github.com/HorizenLabs/poseidon2>
48
49use alloc::vec::Vec;
50
51use p3_field::{Algebra, Field, InjectiveMonomial, PrimeCharacteristicRing};
52use p3_symmetric::Permutation;
53
54use crate::external::mds_multiply;
55
56/// Pre-computed constants for the RP partial (internal) rounds.
57///
58/// These are produced by the sparse matrix decomposition in [`crate::utils`].
59#[derive(Debug, Clone)]
60pub struct PartialRoundConstants<F, const WIDTH: usize> {
61 /// Full WIDTH-vector of optimized round constants, added once before the
62 /// transition matrix m_i.
63 ///
64 /// This vector absorbs the original round constants from all RP partial rounds
65 /// via backward substitution through M^{-1}.
66 pub first_round_constants: [F; WIDTH],
67
68 /// Dense transition matrix m_i, applied once before the partial round loop.
69 ///
70 /// This is the accumulated product of sparse matrix factors from the
71 /// sparse matrix decomposition, transposed to match the HorizenLabs convention.
72 pub m_i: [[F; WIDTH]; WIDTH],
73
74 /// Per-round full first row of the sparse matrix, pre-assembled for
75 /// branch-free dot product computation.
76 ///
77 /// `sparse_first_row[r] = [mds_0_0, ŵ_r[0], ŵ_r[1], ..., ŵ_r[WIDTH-2]]`
78 ///
79 /// where `mds_0_0` is the top-left entry of the original MDS matrix (same for
80 /// all rounds) and `ŵ_r` is the per-round first-row vector from the sparse
81 /// factorization.
82 ///
83 /// Length = RP. Stored in forward application order.
84 pub sparse_first_row: Vec<[F; WIDTH]>,
85
86 /// Per-round first-column vectors for the sparse matrix multiply
87 /// (excluding the `[0,0]` entry).
88 ///
89 /// `v[r]` has WIDTH elements: `[v_r[0], v_r[1], ..., v_r[WIDTH-2], 0]`.
90 /// Only the first WIDTH-1 entries are meaningful; the last is padding.
91 ///
92 /// Length = RP. Stored in forward application order.
93 pub v: Vec<[F; WIDTH]>,
94
95 /// Optimized scalar round constants for partial rounds 0 through RP-2.
96 ///
97 /// The last partial round has no additive constant (it was absorbed by the
98 /// backward substitution). Length = RP - 1.
99 pub round_constants: Vec<F>,
100
101 /// Scalar constants for the textbook partial round path.
102 ///
103 /// Length = RP. Each entry is the optimized scalar to add to `state[0]` before
104 /// the S-box, computed via forward constant substitution.
105 pub textbook_scalar_constants: Vec<F>,
106
107 /// Residual accumulator for the textbook path.
108 ///
109 /// Added to the state after all partial rounds complete.
110 /// Accounts for the folded-forward `state[1..WIDTH]` constants.
111 pub textbook_residual: [F; WIDTH],
112}
113
114/// Construct a partial round layer from pre-computed constants.
115pub trait PartialRoundLayerConstructor<F: Field, const WIDTH: usize> {
116 /// Build the layer from the sparse-form optimized constants.
117 fn new_from_constants(constants: PartialRoundConstants<F, WIDTH>) -> Self;
118}
119
120/// The partial (internal) round layer of the Poseidon1 permutation.
121///
122/// Implementors apply all RP partial rounds to the state.
123///
124/// Field-specific implementations (e.g., NEON, AVX2) can override the generic behavior.
125pub trait PartialRoundLayer<R, const WIDTH: usize, const D: u64>: Sync + Clone
126where
127 R: PrimeCharacteristicRing,
128{
129 /// Apply all RP partial rounds to the state.
130 fn permute_state(&self, state: &mut [R; WIDTH]);
131}
132
133/// Sparse matrix-vector multiplication in O(WIDTH) operations.
134///
135/// Replaces the full O(t^2) MDS multiply in each partial round. Computes
136/// `state <- S * state` where S has the structure:
137///
138/// ```text
139/// S = ┌─────────┬──────┐
140/// │ mds_0_0 │ ŵ │ state'[0] = mds_0_0 * s[0] + Σ ŵ[j] * s[j+1]
141/// ├─────────┼──────┤
142/// │ v │ I │ state'[i] = s[i] + v[i-1] * s[0], for i ≥ 1
143/// └─────────┴──────┘
144/// ```
145///
146/// `first_row` contains the pre-assembled full first row `[mds_0_0, ŵ[0], ..., ŵ[WIDTH-2]]`,
147/// enabling a branch-free dot product for the new `state[0]`.
148///
149/// `v` contains the first-column vector (excluding `[0,0]`): `[v[0], ..., v[WIDTH-2], 0]`.
150///
151/// The identity block means `state[1..]` is updated by a simple rank-1 correction
152/// (add a multiple of the old `state[0]`), and `state[0]` is a dot product.
153#[inline(always)]
154pub fn cheap_matmul<F: Field, A: Algebra<F>, const WIDTH: usize>(
155 state: &mut [A; WIDTH],
156 first_row: &[F; WIDTH],
157 v: &[F; WIDTH],
158) {
159 // Save state[0] before it is overwritten.
160 let old_s0 = state[0].clone();
161
162 // Compute new state[0] = dot(first_row, state).
163 state[0] = A::mixed_dot_product(state, first_row);
164
165 // Rank-1 update: state[i] += v[i-1] * old_s0, for i = 1..WIDTH.
166 for i in 1..WIDTH {
167 state[i] += old_s0.clone() * v[i - 1];
168 }
169}
170
171/// Generic implementation of the partial round permutation.
172///
173/// See the module-level documentation for the algorithm structure.
174#[inline]
175pub fn partial_permute_state<
176 F: Field,
177 A: Algebra<F> + InjectiveMonomial<D>,
178 const WIDTH: usize,
179 const D: u64,
180>(
181 state: &mut [A; WIDTH],
182 constants: &PartialRoundConstants<F, WIDTH>,
183) {
184 // Add the full first-round constant vector.
185 for (s, &rc) in state.iter_mut().zip(constants.first_round_constants.iter()) {
186 *s += rc;
187 }
188
189 // Apply the dense transition matrix m_i (once).
190 mds_multiply(state, &constants.m_i);
191
192 let rounds_p = constants.sparse_first_row.len();
193
194 // Partial rounds 0..RP-2: S-box + scalar RC + sparse matmul.
195 // The last round is handled separately to avoid a branch per iteration.
196 for r in 0..rounds_p - 1 {
197 // S-box on state[0] only.
198 state[0] = state[0].injective_exp_n();
199
200 // Add scalar round constant.
201 state[0] += constants.round_constants[r];
202
203 // Sparse matrix multiply.
204 cheap_matmul(state, &constants.sparse_first_row[r], &constants.v[r]);
205 }
206
207 // Last partial round: S-box + sparse matmul (no round constant).
208 state[0] = state[0].injective_exp_n();
209 cheap_matmul(
210 state,
211 &constants.sparse_first_row[rounds_p - 1],
212 &constants.v[rounds_p - 1],
213 );
214}
215
216/// Textbook partial round permutation with forward-substituted scalar constants.
217///
218/// Instead of the sparse matrix decomposition, this applies the full MDS permutation
219/// per round but with only a scalar constant addition to `state[0]`. After all rounds,
220/// a residual vector is added to the state.
221///
222/// This is beneficial when the MDS permutation is very fast (e.g., Karatsuba
223/// convolution for power-of-2 circulant matrices), making the per-round MDS cost
224/// competitive with the sparse approach.
225#[inline]
226pub fn textbook_partial_permute_state<
227 F: Field,
228 A: Algebra<F> + InjectiveMonomial<D>,
229 Mds: Permutation<[A; WIDTH]>,
230 const WIDTH: usize,
231 const D: u64,
232>(
233 state: &mut [A; WIDTH],
234 constants: &PartialRoundConstants<F, WIDTH>,
235 mds: &Mds,
236) {
237 for &c in &constants.textbook_scalar_constants {
238 state[0] += c;
239 state[0] = state[0].injective_exp_n();
240 mds.permute_mut(state);
241 }
242 for (s, &r) in state.iter_mut().zip(constants.textbook_residual.iter()) {
243 *s += r;
244 }
245}