1use crate::domain::DomainCoeff;
2use ark_ff::{FftField, Field};
3use ark_std::vec::*;
4#[cfg(feature = "parallel")]
5use rayon::prelude::*;
6
7#[allow(unused)]
9#[cfg(feature = "parallel")]
10const MIN_PARALLEL_CHUNK_SIZE: usize = 1 << 7;
11
12#[inline]
13pub(crate) fn bitreverse(mut n: u32, l: u32) -> u32 {
14 let mut r = 0;
15 for _ in 0..l {
16 r = (r << 1) | (n & 1);
17 n >>= 1;
18 }
19 r
20}
21
22pub(crate) fn compute_powers_serial<F: Field>(size: usize, root: F) -> Vec<F> {
23 compute_powers_and_mul_by_const_serial(size, root, F::one())
24}
25
26pub(crate) fn compute_powers_and_mul_by_const_serial<F: Field>(
27 size: usize,
28 root: F,
29 c: F,
30) -> Vec<F> {
31 let mut value = c;
32 (0..size)
33 .map(|_| {
34 let old_value = value;
35 value *= root;
36 old_value
37 })
38 .collect()
39}
40
41#[allow(unused)]
42#[cfg(feature = "parallel")]
43pub(crate) fn compute_powers<F: Field>(size: usize, g: F) -> Vec<F> {
44 if size < MIN_PARALLEL_CHUNK_SIZE {
45 return compute_powers_serial(size, g);
46 }
47 use ark_std::cmp::{max, min};
49 let num_cpus_available = rayon::current_num_threads();
50 let num_elem_per_thread = max(size / num_cpus_available, MIN_PARALLEL_CHUNK_SIZE);
51 let num_cpus_used = size / num_elem_per_thread;
52
53 let res: Vec<F> = (0..num_cpus_used)
55 .into_par_iter()
56 .flat_map(|i| {
57 let offset = g.pow(&[(i * num_elem_per_thread) as u64]);
58 let num_elements_to_compute = min(size - i * num_elem_per_thread, num_elem_per_thread);
61 let res = compute_powers_and_mul_by_const_serial(num_elements_to_compute, g, offset);
62 res
63 })
64 .collect();
65 res
66}
67
68#[cfg(feature = "parallel")]
69fn log2_floor(num: usize) -> u32 {
70 if num == 0 {
71 0
72 } else {
73 1usize.leading_zeros() - num.leading_zeros()
74 }
75}
76
77#[cfg(feature = "parallel")]
78pub(crate) fn best_fft<T: DomainCoeff<F>, F: FftField>(
79 a: &mut [T],
80 omega: F,
81 log_n: u32,
82 serial_fft: fn(&mut [T], F, u32),
83) {
84 let num_cpus = rayon::current_num_threads();
85 let log_cpus = log2_floor(num_cpus);
86 if log_n <= log_cpus {
87 serial_fft(a, omega, log_n);
88 } else {
89 parallel_fft(a, omega, log_n, log_cpus, serial_fft);
90 }
91}
92
93#[cfg(not(feature = "parallel"))]
94#[inline]
95pub(crate) fn best_fft<T: DomainCoeff<F>, F: FftField>(
96 a: &mut [T],
97 omega: F,
98 log_n: u32,
99 serial_fft: fn(&mut [T], F, u32),
100) {
101 serial_fft(a, omega, log_n)
102}
103
104#[cfg(feature = "parallel")]
105pub(crate) fn parallel_fft<T: DomainCoeff<F>, F: FftField>(
106 a: &mut [T],
107 omega: F,
108 log_n: u32,
109 log_cpus: u32,
110 serial_fft: fn(&mut [T], F, u32),
111) {
112 assert!(log_n >= log_cpus);
113 let m = a.len();
119 let num_threads = 1 << (log_cpus as usize);
120 let num_cosets = num_threads;
121 assert_eq!(m % num_threads, 0);
122 let coset_size = m / num_threads;
123
124 let mut tmp = vec![vec![T::zero(); coset_size]; num_cosets];
131 let new_omega = omega.pow(&[num_cosets as u64]);
132 let new_two_adicity = ark_ff::utils::k_adicity(2, coset_size as u64);
133
134 tmp.par_iter_mut()
138 .enumerate()
139 .for_each(|(k, kth_poly_coeffs)| {
140 let omega_k = omega.pow(&[k as u64]);
142 let omega_step = omega.pow(&[(k * coset_size) as u64]);
143
144 let mut elt = F::one();
145 for i in 0..coset_size {
163 for c in 0..num_threads {
164 let idx = i + (c * coset_size);
165 let mut t = a[idx];
167 t *= elt;
169 kth_poly_coeffs[i] += t;
170 elt *= &omega_step;
171 }
172 elt *= &omega_k;
173 }
174
175 serial_fft(kth_poly_coeffs, new_omega, new_two_adicity);
179 });
180
181 a.iter_mut()
184 .enumerate()
185 .for_each(|(i, a)| *a = tmp[i % num_cosets][i / num_cosets]);
186}
187
188pub struct Elements<F: FftField> {
190 pub(crate) cur_elem: F,
191 pub(crate) cur_pow: u64,
192 pub(crate) size: u64,
193 pub(crate) group_gen: F,
194}
195
196impl<F: FftField> Iterator for Elements<F> {
197 type Item = F;
198 fn next(&mut self) -> Option<F> {
199 if self.cur_pow == self.size {
200 None
201 } else {
202 let cur_elem = self.cur_elem;
203 self.cur_elem *= &self.group_gen;
204 self.cur_pow += 1;
205 Some(cur_elem)
206 }
207 }
208}