p3_field/
helpers.rs

1use alloc::vec;
2use alloc::vec::Vec;
3use core::iter::Sum;
4use core::mem::MaybeUninit;
5use core::ops::{Add, Mul};
6
7use num_bigint::BigUint;
8use p3_maybe_rayon::prelude::*;
9
10use crate::field::Field;
11use crate::{PackedValue, PrimeCharacteristicRing, PrimeField, PrimeField32};
12
13/// Computes a multiplicative subgroup whose order is known in advance.
14pub fn cyclic_subgroup_known_order<F: Field>(
15    generator: F,
16    order: usize,
17) -> impl Iterator<Item = F> + Clone {
18    generator.powers().take(order)
19}
20
21/// Computes a coset of a multiplicative subgroup whose order is known in advance.
22pub fn cyclic_subgroup_coset_known_order<F: Field>(
23    generator: F,
24    shift: F,
25    order: usize,
26) -> impl Iterator<Item = F> + Clone {
27    generator.shifted_powers(shift).take(order)
28}
29
30/// Scales each element of the slice by `s` using packing.
31///
32/// # Performance
33/// For large slices, use [`par_scale_slice_in_place`].
34pub fn scale_slice_in_place_single_core<F: Field>(slice: &mut [F], s: F) {
35    let (packed, sfx) = F::Packing::pack_slice_with_suffix_mut(slice);
36    let packed_s: F::Packing = s.into();
37    packed.iter_mut().for_each(|x| *x *= packed_s);
38    sfx.iter_mut().for_each(|x| *x *= s);
39}
40
41/// Scales each element of the slice by `s` using packing and parallelization.
42///
43/// # Performance
44/// For small slices, use [`scale_slice_in_place_single_core`].
45/// Requires the `parallel` feature.
46#[inline]
47pub fn par_scale_slice_in_place<F: Field>(slice: &mut [F], s: F) {
48    let (packed, sfx) = F::Packing::pack_slice_with_suffix_mut(slice);
49    let packed_s: F::Packing = s.into();
50    packed.par_iter_mut().for_each(|x| *x *= packed_s);
51    sfx.iter_mut().for_each(|x| *x *= s);
52}
53
54/// Adds `other`, scaled by `s`, to the mutable `slice` using packing, or `slice += other * s`.
55///
56/// # Performance
57/// For large slices, use [`par_add_scaled_slice_in_place`].
58pub fn add_scaled_slice_in_place<F: Field>(slice: &mut [F], other: &[F], s: F) {
59    debug_assert_eq!(slice.len(), other.len(), "slices must have equal length");
60    let (slice_packed, slice_sfx) = F::Packing::pack_slice_with_suffix_mut(slice);
61    let (other_packed, other_sfx) = F::Packing::pack_slice_with_suffix(other);
62    let packed_s: F::Packing = s.into();
63    slice_packed
64        .iter_mut()
65        .zip(other_packed)
66        .for_each(|(x, y)| *x += *y * packed_s);
67    slice_sfx
68        .iter_mut()
69        .zip(other_sfx)
70        .for_each(|(x, y)| *x += *y * s);
71}
72
73/// Adds `other`, scaled by `s`, to the mutable `slice` using packing, or `slice += other * s`.
74///
75/// # Performance
76/// For small slices, use [`add_scaled_slice_in_place`].
77/// Requires the `parallel` feature.
78pub fn par_add_scaled_slice_in_place<F: Field>(slice: &mut [F], other: &[F], s: F) {
79    debug_assert_eq!(slice.len(), other.len(), "slices must have equal length");
80    let (slice_packed, slice_sfx) = F::Packing::pack_slice_with_suffix_mut(slice);
81    let (other_packed, other_sfx) = F::Packing::pack_slice_with_suffix(other);
82    let packed_s: F::Packing = s.into();
83    slice_packed
84        .par_iter_mut()
85        .zip(other_packed.par_iter())
86        .for_each(|(x, y)| *x += *y * packed_s);
87    slice_sfx
88        .iter_mut()
89        .zip(other_sfx)
90        .for_each(|(x, y)| *x += *y * s);
91}
92
93/// Extend a ring `R` element `x` to an array of length `D`
94/// by filling zeros.
95#[inline]
96#[must_use]
97pub const fn field_to_array<R: PrimeCharacteristicRing, const D: usize>(x: R) -> [R; D] {
98    let mut arr = [const { MaybeUninit::uninit() }; D];
99    arr[0] = MaybeUninit::new(x);
100    let mut i = 1;
101    while i < D {
102        arr[i] = MaybeUninit::new(R::ZERO);
103        i += 1;
104    }
105    unsafe { core::mem::transmute_copy::<_, [R; D]>(&arr) }
106}
107
108/// Given an element x from a 32 bit field F_P compute x/2.
109#[inline]
110#[must_use]
111pub const fn halve_u32<const P: u32>(x: u32) -> u32 {
112    let shift = (P + 1) >> 1;
113    let half = x >> 1;
114    if x & 1 == 0 { half } else { half + shift }
115}
116
117/// Given an element x from a 64 bit field F_P compute x/2.
118#[inline]
119#[must_use]
120pub const fn halve_u64<const P: u64>(x: u64) -> u64 {
121    let shift = (P + 1) >> 1;
122    let half = x >> 1;
123    if x & 1 == 0 { half } else { half + shift }
124}
125
126/// Reduce a slice of 32-bit field elements into a single element of a larger field.
127///
128/// Uses base-$2^{32}$ decomposition:
129///
130/// ```math
131/// \begin{equation}
132///     \text{reduce\_32}(vals) = \sum_{i=0}^{n-1} a_i \cdot 2^{32i}
133/// \end{equation}
134/// ```
135///
136/// Equivalent to [`reduce_packed`] with `radix_bits = 32`.
137#[must_use]
138pub fn reduce_32<SF: PrimeField32, TF: PrimeField>(vals: &[SF]) -> TF {
139    reduce_packed(vals, 32)
140}
141
142/// Horner-evaluate `vals` as base-$2^{radix\_bits}$ digits into `TF`, shifting each digit by `+1`.
143///
144/// This reserves zero as an out-of-band "no digit" value, so sequences of different lengths remain
145/// distinct when packed into a fixed-width slot.
146#[must_use]
147pub fn reduce_packed_shifted<SF: PrimeField32, TF: PrimeField>(vals: &[SF], radix_bits: u32) -> TF {
148    debug_assert!((radix_bits < 64) && ((SF::ORDER_U32 as u64) < (1u64 << radix_bits)));
149    let base = TF::from_int(1u64 << radix_bits);
150    vals.iter()
151        .map(|val| TF::from_int(val.as_canonical_u32() as u64 + 1))
152        .horner(base)
153}
154
155/// Bit length of `F::ORDER_U32 - 1`, i.e. the smallest `b` with `F::ORDER_U32 - 1 < 2^b`.
156///
157/// Used for tight base-$2^b$ absorb packing so canonical [`PrimeField32`] digits are always
158/// valid base-$2^b$ digits (more limbs per [`PrimeField`] slot than radix $2^{32}$ when
159/// `ORDER_U32 < 2^{32}`).
160#[inline]
161#[must_use]
162pub const fn absorb_radix_bits<F: PrimeField32>() -> u32 {
163    u32::BITS - (F::ORDER_U32 - 1).leading_zeros()
164}
165
166/// Horner-evaluate `vals` as base-$2^{radix\_bits}$ digits into `TF`.
167///
168/// Requires every canonical `SF` digit to be strictly less than `2^{radix\_bits}` (true when
169/// `radix_bits ≥ absorb_radix_bits::<SF>()`).
170#[must_use]
171pub fn reduce_packed<SF: PrimeField32, TF: PrimeField>(vals: &[SF], radix_bits: u32) -> TF {
172    debug_assert!((absorb_radix_bits::<SF>() <= radix_bits) && (radix_bits < 64));
173    let base = TF::from_int(1u64 << radix_bits);
174    vals.iter()
175        .map(|val| TF::from_int(val.as_canonical_u32()))
176        .horner(base)
177}
178
179/// Largest `b` such that every integer in `[0, 2^b)` maps injectively into `F` via `PrimeField32::from_int`.
180///
181/// Equivalently `b = floor(log2(p-1))` for prime `p = F::ORDER_U32`.
182#[inline]
183#[must_use]
184pub const fn injective_pack_bits<F: PrimeField32>() -> u32 {
185    (F::ORDER_U32 - 1).ilog2()
186}
187
188/// Maximum number of [`PrimeField32`] elements packable into [`PrimeField`] injectively via
189/// [`reduce_packed`] with the given `radix_bits` (base-$2^{radix\_bits}$ digits bounded by
190/// `F::ORDER_U32 - 1`).
191///
192/// Returns the largest `k` such that
193/// `(F::ORDER_U32 - 1) · ∑_{i=0}^{k-1} (2^{radix\_bits})^i < PF::order()`.
194#[must_use]
195pub fn max_packed_injective_limbs<F: PrimeField32, PF: PrimeField>(radix_bits: u32) -> usize {
196    max_packed_injective_limbs_with_max_digit::<PF>(radix_bits, F::ORDER_U32 - 1)
197}
198
199fn max_packed_injective_limbs_with_max_digit<PF: PrimeField>(
200    radix_bits: u32,
201    max_digit: u32,
202) -> usize {
203    debug_assert!((0 < radix_bits) && (radix_bits < 64));
204    let max_digit = BigUint::from(max_digit);
205    let base = BigUint::from(1u32) << (radix_bits as usize);
206    let pf_order = PF::order();
207    let mut k = 0usize;
208    let mut max_val = BigUint::ZERO;
209    let mut power = BigUint::from(1u32);
210    loop {
211        let new_max = &max_val + &max_digit * &power;
212        if new_max >= pf_order {
213            break k;
214        }
215        max_val = new_max;
216        power *= &base;
217        k += 1;
218    }
219}
220
221/// Maximum number of shifted [`PrimeField32`] elements packable into [`PrimeField`] injectively
222/// via [`reduce_packed_shifted`] with the given `radix_bits`.
223///
224/// Returns the largest `k` such that
225/// `F::ORDER_U32 · ∑_{i=0}^{k-1} (2^{radix\_bits})^i < PF::order()`.
226#[must_use]
227pub fn max_shifted_packed_injective_limbs<F: PrimeField32, PF: PrimeField>(
228    radix_bits: u32,
229) -> usize {
230    max_packed_injective_limbs_with_max_digit::<PF>(radix_bits, F::ORDER_U32)
231}
232
233/// Maximum limbs per [`PrimeField`] rate slot when absorbing with radix
234/// $2^{\texttt{absorb\\_radix\\_bits::\<F\>()}}$ (see [`reduce_packed`]).
235#[must_use]
236pub fn max_absorb_injective_limbs<F: PrimeField32, PF: PrimeField>() -> usize {
237    max_packed_injective_limbs::<F, PF>(absorb_radix_bits::<F>())
238}
239
240/// Maximum shifted limbs per [`PrimeField`] rate slot when absorbing with radix
241/// $2^{\texttt{absorb\\_radix\\_bits::\<F\>()}}$ (see [`reduce_packed_shifted`]).
242#[must_use]
243pub fn max_shifted_absorb_injective_limbs<F: PrimeField32, PF: PrimeField>() -> usize {
244    max_shifted_packed_injective_limbs::<F, PF>(absorb_radix_bits::<F>())
245}
246
247/// Returns true iff every integer in `[0, SF::order())` fits in `num_limbs` little-endian
248/// base-`2^radix_bits` digits without truncation, i.e. `2^{num_limbs · radix_bits} ≥ SF::order()`.
249#[must_use]
250pub fn pf_packed_limbs_cover_order<SF: PrimeField>(num_limbs: usize, radix_bits: u32) -> bool {
251    let Some(total_bits) = num_limbs.checked_mul(radix_bits as usize) else {
252        return false;
253    };
254    (BigUint::from(1u32) << total_bits) >= SF::order()
255}
256
257/// Split `val` into `num_limbs` little-endian base-`2^radix_bits` limbs, each mapped into `TF`.
258///
259/// Each output limb is in `[0, 2^radix_bits)`. Pads with zero limbs if the value has fewer
260/// non-zero digits than `num_limbs`.
261///
262/// **Parameter requirements**
263///
264/// - `radix_bits ≤ injective_pack_bits::<TF>()` so each limb maps injectively into `TF` via
265///   `PrimeField32::from_int`. If `radix_bits` is too large, distinct limbs can collide after
266///   reduction modulo `TF::ORDER`.
267/// - For a **lossless** transcript binding of arbitrary `SF` values, also require
268///   `pf_packed_limbs_cover_order::<SF>(num_limbs, radix_bits)`. Deliberately truncated
269///   splits (e.g. challengers that use `floor` limb counts for squeeze) omit high bits by design
270///   and do not satisfy that coverage check.
271#[must_use]
272pub fn split_pf_to_packed_limbs<SF: PrimeField, TF: PrimeField32>(
273    val: SF,
274    num_limbs: usize,
275    radix_bits: u32,
276) -> Vec<TF> {
277    debug_assert!((0 < radix_bits) && (radix_bits < 64));
278    debug_assert!(
279        radix_bits <= injective_pack_bits::<TF>(),
280        "radix_bits must be ≤ injective_pack_bits::<TF>() for injective limb embedding"
281    );
282
283    // Use a primitive u32 mask!
284    let mask_u32: u32 = (1u32 << radix_bits) - 1;
285    let mut rem = val.as_canonical_biguint();
286    let mut out = vec![TF::ZERO; num_limbs];
287
288    for item in out.iter_mut() {
289        // Look at the lowest limb directly, no allocations
290        let limb = rem.iter_u32_digits().next().unwrap_or(0) & mask_u32;
291        *item = TF::from_int(limb);
292
293        // In-place bitshift modifies the BigUint without allocating
294        rem >>= radix_bits;
295    }
296
297    out
298}
299
300/// Number of `TF` limbs with statistical bias `< 1/|TF|` when decomposing a uniformly random
301/// `PF` element in base `|TF|` (see [`split_pf_to_field_order_limbs`]).
302///
303/// Returns the largest `k` such that `TF::ORDER^{k+1} < PF::ORDER`. Each retained limb `c_i`
304/// (`i < k`) has bias `≈ 1/⌊PF::ORDER / TF::ORDER^{i+2}⌋ < 1/TF::ORDER`.
305///
306/// Unlike the power-of-two radix variant ([`split_pf_to_packed_limbs`] with
307/// `radix_bits = injective_pack_bits::<TF>()`), which confines each challenge to
308/// `[0, 2^{radix_bits})` (≈ 50% of `TF`'s domain for BabyBear), this gives limbs that are
309/// near-uniform over the **entire** `TF` domain.
310///
311/// # BabyBear concrete values
312/// | PF | Good limbs |
313/// |---|---|
314/// | Goldilocks (64-bit) | 1 |
315/// | BN254 (254-bit) | 7 |
316#[must_use]
317pub fn squeeze_field_order_num_limbs<PF: PrimeField, TF: PrimeField32>() -> usize {
318    let p = BigUint::from(TF::ORDER_U32);
319    let n = PF::order();
320    let mut count = 0usize;
321    let mut power = BigUint::from(1u32);
322    while &power * &p < n {
323        power *= &p;
324        count += 1;
325    }
326    count.saturating_sub(1)
327}
328
329/// Split `val` into `num_limbs` little-endian base-`|TF|` limbs, each mapped into `TF`.
330///
331/// Decomposes `val` as `c0 + c1·p + c2·p² + …` (p = `TF::ORDER_U32`), returning
332/// `[c0, c1, …, c_{num_limbs-1}]` with `0 ≤ ci < p`. Pads with `TF::ZERO` if `val` has
333/// fewer significant digits than `num_limbs`.
334///
335/// Use [`squeeze_field_order_num_limbs`] to choose `num_limbs` such that each retained limb
336/// is near-uniform over all of `TF` when `val` is uniformly random.
337#[must_use]
338pub fn split_pf_to_field_order_limbs<SF: PrimeField, TF: PrimeField32>(
339    val: SF,
340    num_limbs: usize,
341) -> Vec<TF> {
342    let p_u32 = TF::ORDER_U32;
343    let mut rem = val.as_canonical_biguint();
344    let mut out = Vec::with_capacity(num_limbs);
345
346    for _ in 0..num_limbs {
347        // Fast, primitive 32-bit modulo (no heap allocation!)
348        let limb = (&rem % p_u32).to_u32_digits().first().copied().unwrap_or(0);
349        out.push(TF::from_int(limb));
350
351        // Fast, primitive in-place 32-bit division
352        rem /= p_u32;
353    }
354    out
355}
356
357/// Split a large field element into `n` base-$2^{64}$ chunks and map each into a 32-bit field.
358///
359/// Converts:
360/// ```math
361/// \begin{equation}
362///     x = \sum_{i=0}^{n-1} d_i \cdot 2^{64i}
363/// \end{equation}
364/// ```
365///
366/// Pads with zeros if needed.
367#[must_use]
368pub fn split_32<SF: PrimeField, TF: PrimeField32>(val: SF, n: usize) -> Vec<TF> {
369    let mut result: Vec<TF> = val
370        .as_canonical_biguint()
371        .to_u64_digits()
372        .iter()
373        .take(n)
374        .map(|d| TF::from_u64(*d))
375        .collect();
376
377    // Pad with zeros if needed
378    result.resize_with(n, || TF::ZERO);
379    result
380}
381
382/// Maximally generic dot product.
383#[must_use]
384pub fn dot_product<S, LI, RI>(li: LI, ri: RI) -> S
385where
386    LI: Iterator,
387    RI: Iterator,
388    LI::Item: Mul<RI::Item>,
389    S: Sum<<LI::Item as Mul<RI::Item>>::Output>,
390{
391    li.zip(ri).map(|(l, r)| l * r).sum()
392}
393
394/// Horner-style polynomial evaluation over a [`DoubleEndedIterator`].
395///
396/// The iterator yields coefficients in **ascending degree order**
397/// `[c_0, c_1, …, c_{n-1}]`. Both methods walk the iterator back-to-front
398/// via [`DoubleEndedIterator::rfold`], avoiding any allocation.
399///
400/// # Convention
401///
402/// Given an evaluation point `x` and accumulator `acc`,
403/// [`HornerIter::horner_acc`] computes
404///
405/// ```text
406/// acc · xⁿ + Σ_{i=0..n} c_i · xⁱ
407///   = c_0 + x · (c_1 + x · (… + x · (c_{n-1} + x · acc)))
408/// ```
409///
410/// [`HornerIter::horner`] is the same with `acc = Acc::default()`, i.e. the
411/// polynomial evaluation `Σ_i c_i · xⁱ`.
412///
413/// For inputs in *descending* degree order, call `.rev()` on the iterator
414/// first.
415pub trait HornerIter: DoubleEndedIterator + Sized {
416    /// Horner fold with an explicit accumulator. See the trait docs for the
417    /// evaluation convention.
418    #[inline]
419    fn horner_acc<Acc, X>(self, acc: Acc, x: X) -> Acc
420    where
421        Acc: Mul<X, Output = Acc> + Add<Self::Item, Output = Acc>,
422        X: Clone,
423    {
424        self.rfold(acc, |a, v| a * x.clone() + v)
425    }
426
427    /// Horner fold starting from `Acc::default()`. See the trait docs for the
428    /// evaluation convention.
429    #[inline]
430    fn horner<Acc, X>(self, x: X) -> Acc
431    where
432        Acc: Default + Mul<X, Output = Acc> + Add<Self::Item, Output = Acc>,
433        X: Clone,
434    {
435        self.horner_acc(Acc::default(), x)
436    }
437}
438
439impl<I: DoubleEndedIterator> HornerIter for I {}
p3_field/helpers.rs

p3_field/
helpers.rs