1 // Copyright 2015-2023 Brian Smith.
2 //
3 // Permission to use, copy, modify, and/or distribute this software for any
4 // purpose with or without fee is hereby granted, provided that the above
5 // copyright notice and this permission notice appear in all copies.
6 //
7 // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
8 // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9 // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
10 // SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11 // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12 // OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13 // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14 
15 use super::{
16     super::{
17         montgomery::{Unencoded, R, RR},
18         n0::N0,
19     },
20     BoxedLimbs, Elem, Nonnegative, One, PublicModulus, SlightlySmallerModulus, SmallerModulus,
21     Width,
22 };
23 use crate::{
24     bits, cpu, error,
25     limb::{self, Limb, LimbMask, LIMB_BITS},
26     polyfill::LeadingZerosStripped,
27 };
28 use core::marker::PhantomData;
29 
30 /// The x86 implementation of `bn_mul_mont`, at least, requires at least 4
31 /// limbs. For a long time we have required 4 limbs for all targets, though
32 /// this may be unnecessary. TODO: Replace this with
33 /// `n.len() < 256 / LIMB_BITS` so that 32-bit and 64-bit platforms behave the
34 /// same.
35 pub const MODULUS_MIN_LIMBS: usize = 4;
36 
37 pub const MODULUS_MAX_LIMBS: usize = super::super::BIGINT_MODULUS_MAX_LIMBS;
38 
39 /// The modulus *m* for a ring ℤ/mℤ, along with the precomputed values needed
40 /// for efficient Montgomery multiplication modulo *m*. The value must be odd
41 /// and larger than 2. The larger-than-1 requirement is imposed, at least, by
42 /// the modular inversion code.
43 pub struct Modulus<M> {
44     limbs: BoxedLimbs<M>, // Also `value >= 3`.
45 
46     // n0 * N == -1 (mod r).
47     //
48     // r == 2**(N0::LIMBS_USED * LIMB_BITS) and LG_LITTLE_R == lg(r). This
49     // ensures that we can do integer division by |r| by simply ignoring
50     // `N0::LIMBS_USED` limbs. Similarly, we can calculate values modulo `r` by
51     // just looking at the lowest `N0::LIMBS_USED` limbs. This is what makes
52     // Montgomery multiplication efficient.
53     //
54     // As shown in Algorithm 1 of "Fast Prime Field Elliptic Curve Cryptography
55     // with 256 Bit Primes" by Shay Gueron and Vlad Krasnov, in the loop of a
56     // multi-limb Montgomery multiplication of a * b (mod n), given the
57     // unreduced product t == a * b, we repeatedly calculate:
58     //
59     //    t1 := t % r         |t1| is |t|'s lowest limb (see previous paragraph).
60     //    t2 := t1*n0*n
61     //    t3 := t + t2
62     //    t := t3 / r         copy all limbs of |t3| except the lowest to |t|.
63     //
64     // In the last step, it would only make sense to ignore the lowest limb of
65     // |t3| if it were zero. The middle steps ensure that this is the case:
66     //
67     //                            t3 ==  0 (mod r)
68     //                        t + t2 ==  0 (mod r)
69     //                   t + t1*n0*n ==  0 (mod r)
70     //                       t1*n0*n == -t (mod r)
71     //                        t*n0*n == -t (mod r)
72     //                          n0*n == -1 (mod r)
73     //                            n0 == -1/n (mod r)
74     //
75     // Thus, in each iteration of the loop, we multiply by the constant factor
76     // n0, the negative inverse of n (mod r).
77     //
78     // TODO(perf): Not all 32-bit platforms actually make use of n0[1]. For the
79     // ones that don't, we could use a shorter `R` value and use faster `Limb`
80     // calculations instead of double-precision `u64` calculations.
81     n0: N0,
82 
83     oneRR: One<M, RR>,
84 
85     cpu_features: cpu::Features,
86 }
87 
88 impl<M: PublicModulus> Clone for Modulus<M> {
clone(&self) -> Self89     fn clone(&self) -> Self {
90         Self {
91             limbs: self.limbs.clone(),
92             n0: self.n0.clone(),
93             oneRR: self.oneRR.clone(),
94             cpu_features: self.cpu_features,
95         }
96     }
97 }
98 
99 impl<M: PublicModulus> core::fmt::Debug for Modulus<M> {
fmt(&self, fmt: &mut ::core::fmt::Formatter) -> Result<(), ::core::fmt::Error>100     fn fmt(&self, fmt: &mut ::core::fmt::Formatter) -> Result<(), ::core::fmt::Error> {
101         fmt.debug_struct("Modulus")
102             // TODO: Print modulus value.
103             .finish()
104     }
105 }
106 
107 impl<M> Modulus<M> {
from_be_bytes_with_bit_length( input: untrusted::Input, cpu_features: cpu::Features, ) -> Result<(Self, bits::BitLength), error::KeyRejected>108     pub(crate) fn from_be_bytes_with_bit_length(
109         input: untrusted::Input,
110         cpu_features: cpu::Features,
111     ) -> Result<(Self, bits::BitLength), error::KeyRejected> {
112         let limbs = BoxedLimbs::positive_minimal_width_from_be_bytes(input)?;
113         Self::from_boxed_limbs(limbs, cpu_features)
114     }
115 
from_nonnegative_with_bit_length( n: Nonnegative, cpu_features: cpu::Features, ) -> Result<(Self, bits::BitLength), error::KeyRejected>116     pub(crate) fn from_nonnegative_with_bit_length(
117         n: Nonnegative,
118         cpu_features: cpu::Features,
119     ) -> Result<(Self, bits::BitLength), error::KeyRejected> {
120         let limbs = BoxedLimbs::new_unchecked(n.into_limbs());
121         Self::from_boxed_limbs(limbs, cpu_features)
122     }
123 
from_elem<L>( elem: Elem<L, Unencoded>, cpu_features: cpu::Features, ) -> Result<Self, error::KeyRejected> where M: SlightlySmallerModulus<L>,124     pub(crate) fn from_elem<L>(
125         elem: Elem<L, Unencoded>,
126         cpu_features: cpu::Features,
127     ) -> Result<Self, error::KeyRejected>
128     where
129         M: SlightlySmallerModulus<L>,
130     {
131         let (m, _bits) = Self::from_boxed_limbs(
132             BoxedLimbs::minimal_width_from_unpadded(&elem.limbs),
133             cpu_features,
134         )?;
135         Ok(m)
136     }
137 
from_boxed_limbs( n: BoxedLimbs<M>, cpu_features: cpu::Features, ) -> Result<(Self, bits::BitLength), error::KeyRejected>138     fn from_boxed_limbs(
139         n: BoxedLimbs<M>,
140         cpu_features: cpu::Features,
141     ) -> Result<(Self, bits::BitLength), error::KeyRejected> {
142         if n.len() > MODULUS_MAX_LIMBS {
143             return Err(error::KeyRejected::too_large());
144         }
145         if n.len() < MODULUS_MIN_LIMBS {
146             return Err(error::KeyRejected::unexpected_error());
147         }
148         if limb::limbs_are_even_constant_time(&n) != LimbMask::False {
149             return Err(error::KeyRejected::invalid_component());
150         }
151         if limb::limbs_less_than_limb_constant_time(&n, 3) != LimbMask::False {
152             return Err(error::KeyRejected::unexpected_error());
153         }
154 
155         // n_mod_r = n % r. As explained in the documentation for `n0`, this is
156         // done by taking the lowest `N0::LIMBS_USED` limbs of `n`.
157         #[allow(clippy::useless_conversion)]
158         let n0 = {
159             prefixed_extern! {
160                 fn bn_neg_inv_mod_r_u64(n: u64) -> u64;
161             }
162 
163             // XXX: u64::from isn't guaranteed to be constant time.
164             let mut n_mod_r: u64 = u64::from(n[0]);
165 
166             if N0::LIMBS_USED == 2 {
167                 // XXX: If we use `<< LIMB_BITS` here then 64-bit builds
168                 // fail to compile because of `deny(exceeding_bitshifts)`.
169                 debug_assert_eq!(LIMB_BITS, 32);
170                 n_mod_r |= u64::from(n[1]) << 32;
171             }
172             N0::from(unsafe { bn_neg_inv_mod_r_u64(n_mod_r) })
173         };
174 
175         let bits = limb::limbs_minimal_bits(&n);
176         let oneRR = {
177             let partial = PartialModulus {
178                 limbs: &n,
179                 n0: n0.clone(),
180                 m: PhantomData,
181                 cpu_features,
182             };
183 
184             One::newRR(&partial, bits)
185         };
186 
187         Ok((
188             Self {
189                 limbs: n,
190                 n0,
191                 oneRR,
192                 cpu_features,
193             },
194             bits,
195         ))
196     }
197 
198     #[inline]
cpu_features(&self) -> cpu::Features199     pub(super) fn cpu_features(&self) -> cpu::Features {
200         self.cpu_features
201     }
202 
203     #[inline]
limbs(&self) -> &[Limb]204     pub(super) fn limbs(&self) -> &[Limb] {
205         &self.limbs
206     }
207 
208     #[inline]
n0(&self) -> &N0209     pub(super) fn n0(&self) -> &N0 {
210         &self.n0
211     }
212 
213     #[inline]
width(&self) -> Width<M>214     pub(super) fn width(&self) -> Width<M> {
215         self.limbs.width()
216     }
217 
zero<E>(&self) -> Elem<M, E>218     pub(super) fn zero<E>(&self) -> Elem<M, E> {
219         Elem {
220             limbs: BoxedLimbs::zero(self.width()),
221             encoding: PhantomData,
222         }
223     }
224 
225     // TODO: Get rid of this
one(&self) -> Elem<M, Unencoded>226     pub(super) fn one(&self) -> Elem<M, Unencoded> {
227         let mut r = self.zero();
228         r.limbs[0] = 1;
229         r
230     }
231 
oneRR(&self) -> &One<M, RR>232     pub fn oneRR(&self) -> &One<M, RR> {
233         &self.oneRR
234     }
235 
to_elem<L>(&self, l: &Modulus<L>) -> Elem<L, Unencoded> where M: SmallerModulus<L>,236     pub fn to_elem<L>(&self, l: &Modulus<L>) -> Elem<L, Unencoded>
237     where
238         M: SmallerModulus<L>,
239     {
240         // TODO: Encode this assertion into the `where` above.
241         assert_eq!(self.width().num_limbs, l.width().num_limbs);
242         Elem {
243             limbs: BoxedLimbs::new_unchecked(self.limbs.clone().into_limbs()),
244             encoding: PhantomData,
245         }
246     }
247 
as_partial(&self) -> PartialModulus<M>248     pub(crate) fn as_partial(&self) -> PartialModulus<M> {
249         PartialModulus {
250             limbs: &self.limbs,
251             n0: self.n0.clone(),
252             m: PhantomData,
253             cpu_features: self.cpu_features,
254         }
255     }
256 }
257 
258 impl<M: PublicModulus> Modulus<M> {
be_bytes(&self) -> LeadingZerosStripped<impl ExactSizeIterator<Item = u8> + Clone + '_>259     pub fn be_bytes(&self) -> LeadingZerosStripped<impl ExactSizeIterator<Item = u8> + Clone + '_> {
260         LeadingZerosStripped::new(limb::unstripped_be_bytes(&self.limbs))
261     }
262 }
263 
264 pub(crate) struct PartialModulus<'a, M> {
265     limbs: &'a [Limb],
266     n0: N0,
267     m: PhantomData<M>,
268     cpu_features: cpu::Features,
269 }
270 
271 impl<M> PartialModulus<'_, M> {
272     // TODO: XXX Avoid duplication with `Modulus`.
zero(&self) -> Elem<M, R>273     pub(super) fn zero(&self) -> Elem<M, R> {
274         let width = Width {
275             num_limbs: self.limbs.len(),
276             m: PhantomData,
277         };
278         Elem {
279             limbs: BoxedLimbs::zero(width),
280             encoding: PhantomData,
281         }
282     }
283 
284     #[inline]
limbs(&self) -> &[Limb]285     pub(super) fn limbs(&self) -> &[Limb] {
286         self.limbs
287     }
288 
289     #[inline]
n0(&self) -> &N0290     pub(super) fn n0(&self) -> &N0 {
291         &self.n0
292     }
293 
294     #[inline]
cpu_features(&self) -> cpu::Features295     pub fn cpu_features(&self) -> cpu::Features {
296         self.cpu_features
297     }
298 }
299