ark_ff_macros/montgomery/
sum_of_products.rs

1use quote::quote;
2
3pub(super) fn sum_of_products_impl(num_limbs: usize, modulus: &[u64]) -> proc_macro2::TokenStream {
4    let modulus_size =
5        (((num_limbs - 1) * 64) as u32 + (64 - modulus[num_limbs - 1].leading_zeros())) as usize;
6    let mut body = proc_macro2::TokenStream::new();
7    // Adapted from https://github.com/zkcrypto/bls12_381/pull/84 by @str4d.
8
9    // For a single `a x b` multiplication, operand scanning (schoolbook) takes each
10    // limb of `a` in turn, and multiplies it by all of the limbs of `b` to compute
11    // the result as a double-width intermediate representation, which is then fully
12    // reduced at the carry. Here however we have pairs of multiplications (a_i, b_i),
13    // the results of which are summed.
14    //
15    // The intuition for this algorithm is two-fold:
16    // - We can interleave the operand scanning for each pair, by processing the jth
17    //   limb of each `a_i` together. As these have the same offset within the overall
18    //   operand scanning flow, their results can be summed directly.
19    // - We can interleave the multiplication and reduction steps, resulting in a
20    //   single bitshift by the limb size after each iteration. This means we only
21    //   need to store a single extra limb overall, instead of keeping around all the
22    //   intermediate results and eventually having twice as many limbs.
23
24    if modulus_size >= 64 * num_limbs - 1 {
25        quote! {
26            a.iter().zip(b).map(|(a, b)| *a * b).sum()
27        }
28    } else {
29        let mut inner_loop_body = proc_macro2::TokenStream::new();
30        for k in 1..num_limbs {
31            inner_loop_body.extend(quote! {
32                result.0[#k] = fa::mac_with_carry(result.0[#k], a.0[j], b.0[#k], &mut carry2);
33            });
34        }
35        let mut mont_red_body = proc_macro2::TokenStream::new();
36        for (i, modulus_i) in modulus.iter().enumerate().take(num_limbs).skip(1) {
37            mont_red_body.extend(quote! {
38                result.0[#i - 1] = fa::mac_with_carry(result.0[#i], k, #modulus_i, &mut carry2);
39            });
40        }
41        let modulus_0 = modulus[0];
42        let chunk_size = 2 * (num_limbs * 64 - modulus_size) - 1;
43        body.extend(quote! {
44            if M <= #chunk_size {
45                // Algorithm 2, line 2
46                let result = (0..#num_limbs).fold(BigInt::zero(), |mut result, j| {
47                    // Algorithm 2, line 3
48                    let mut carry_a = 0;
49                    let mut carry_b = 0;
50                    for (a, b) in a.iter().zip(b) {
51                        let a = &a.0;
52                        let b = &b.0;
53                        let mut carry2 = 0;
54                        result.0[0] = fa::mac(result.0[0], a.0[j], b.0[0], &mut carry2);
55                        #inner_loop_body
56                        carry_b = fa::adc(&mut carry_a, carry_b, carry2);
57                    }
58
59                    let k = result.0[0].wrapping_mul(Self::INV);
60                    let mut carry2 = 0;
61                    fa::mac_discard(result.0[0], k, #modulus_0, &mut carry2);
62                    #mont_red_body
63                    result.0[#num_limbs - 1] = fa::adc_no_carry(carry_a, carry_b, &mut carry2);
64                    result
65                });
66                let mut result = F::new_unchecked(result);
67                __subtract_modulus(&mut result);
68                debug_assert_eq!(
69                    a.iter().zip(b).map(|(a, b)| *a * b).sum::<F>(),
70                    result
71                );
72                result
73            } else {
74                a.chunks(#chunk_size).zip(b.chunks(#chunk_size)).map(|(a, b)| {
75                    if a.len() == #chunk_size {
76                        Self::sum_of_products::<#chunk_size>(a.try_into().unwrap(), b.try_into().unwrap())
77                    } else {
78                        a.iter().zip(b).map(|(a, b)| *a * b).sum()
79                    }
80                }).sum()
81            }
82
83
84        });
85        body
86    }
87}