1*8fb009dcSAndroid Build Coastguard Worker /* Copyright (c) 2018, Google Inc.
2*8fb009dcSAndroid Build Coastguard Worker *
3*8fb009dcSAndroid Build Coastguard Worker * Permission to use, copy, modify, and/or distribute this software for any
4*8fb009dcSAndroid Build Coastguard Worker * purpose with or without fee is hereby granted, provided that the above
5*8fb009dcSAndroid Build Coastguard Worker * copyright notice and this permission notice appear in all copies.
6*8fb009dcSAndroid Build Coastguard Worker *
7*8fb009dcSAndroid Build Coastguard Worker * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8*8fb009dcSAndroid Build Coastguard Worker * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9*8fb009dcSAndroid Build Coastguard Worker * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10*8fb009dcSAndroid Build Coastguard Worker * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11*8fb009dcSAndroid Build Coastguard Worker * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12*8fb009dcSAndroid Build Coastguard Worker * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13*8fb009dcSAndroid Build Coastguard Worker * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
14*8fb009dcSAndroid Build Coastguard Worker
15*8fb009dcSAndroid Build Coastguard Worker #include <openssl/hrss.h>
16*8fb009dcSAndroid Build Coastguard Worker
17*8fb009dcSAndroid Build Coastguard Worker #include <assert.h>
18*8fb009dcSAndroid Build Coastguard Worker #include <stdio.h>
19*8fb009dcSAndroid Build Coastguard Worker #include <stdlib.h>
20*8fb009dcSAndroid Build Coastguard Worker
21*8fb009dcSAndroid Build Coastguard Worker #include <openssl/bn.h>
22*8fb009dcSAndroid Build Coastguard Worker #include <openssl/hmac.h>
23*8fb009dcSAndroid Build Coastguard Worker #include <openssl/mem.h>
24*8fb009dcSAndroid Build Coastguard Worker #include <openssl/rand.h>
25*8fb009dcSAndroid Build Coastguard Worker #include <openssl/sha.h>
26*8fb009dcSAndroid Build Coastguard Worker
27*8fb009dcSAndroid Build Coastguard Worker #if defined(_MSC_VER)
28*8fb009dcSAndroid Build Coastguard Worker #define RESTRICT
29*8fb009dcSAndroid Build Coastguard Worker #else
30*8fb009dcSAndroid Build Coastguard Worker #define RESTRICT restrict
31*8fb009dcSAndroid Build Coastguard Worker #endif
32*8fb009dcSAndroid Build Coastguard Worker
33*8fb009dcSAndroid Build Coastguard Worker #include "../internal.h"
34*8fb009dcSAndroid Build Coastguard Worker #include "internal.h"
35*8fb009dcSAndroid Build Coastguard Worker
36*8fb009dcSAndroid Build Coastguard Worker #if defined(OPENSSL_SSE2)
37*8fb009dcSAndroid Build Coastguard Worker #include <emmintrin.h>
38*8fb009dcSAndroid Build Coastguard Worker #endif
39*8fb009dcSAndroid Build Coastguard Worker
40*8fb009dcSAndroid Build Coastguard Worker #if (defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)) && defined(__ARM_NEON)
41*8fb009dcSAndroid Build Coastguard Worker #include <arm_neon.h>
42*8fb009dcSAndroid Build Coastguard Worker #endif
43*8fb009dcSAndroid Build Coastguard Worker
44*8fb009dcSAndroid Build Coastguard Worker // This is an implementation of [HRSS], but with a KEM transformation based on
45*8fb009dcSAndroid Build Coastguard Worker // [SXY]. The primary references are:
46*8fb009dcSAndroid Build Coastguard Worker
47*8fb009dcSAndroid Build Coastguard Worker // HRSS: https://eprint.iacr.org/2017/667.pdf
48*8fb009dcSAndroid Build Coastguard Worker // HRSSNIST:
49*8fb009dcSAndroid Build Coastguard Worker // https://csrc.nist.gov/CSRC/media/Projects/Post-Quantum-Cryptography/documents/round-1/submissions/NTRU_HRSS_KEM.zip
50*8fb009dcSAndroid Build Coastguard Worker // SXY: https://eprint.iacr.org/2017/1005.pdf
51*8fb009dcSAndroid Build Coastguard Worker // NTRUTN14:
52*8fb009dcSAndroid Build Coastguard Worker // https://assets.onboardsecurity.com/static/downloads/NTRU/resources/NTRUTech014.pdf
53*8fb009dcSAndroid Build Coastguard Worker // NTRUCOMP: https://eprint.iacr.org/2018/1174
54*8fb009dcSAndroid Build Coastguard Worker // SAFEGCD: https://gcd.cr.yp.to/papers.html#safegcd
55*8fb009dcSAndroid Build Coastguard Worker
56*8fb009dcSAndroid Build Coastguard Worker
57*8fb009dcSAndroid Build Coastguard Worker // Vector operations.
58*8fb009dcSAndroid Build Coastguard Worker //
59*8fb009dcSAndroid Build Coastguard Worker // A couple of functions in this file can use vector operations to meaningful
60*8fb009dcSAndroid Build Coastguard Worker // effect. If we're building for a target that has a supported vector unit,
61*8fb009dcSAndroid Build Coastguard Worker // |HRSS_HAVE_VECTOR_UNIT| will be defined and |vec_t| will be typedefed to a
62*8fb009dcSAndroid Build Coastguard Worker // 128-bit vector. The following functions abstract over the differences between
63*8fb009dcSAndroid Build Coastguard Worker // NEON and SSE2 for implementing some vector operations.
64*8fb009dcSAndroid Build Coastguard Worker
65*8fb009dcSAndroid Build Coastguard Worker // TODO: MSVC can likely also be made to work with vector operations, but ^ must
66*8fb009dcSAndroid Build Coastguard Worker // be replaced with _mm_xor_si128, etc.
67*8fb009dcSAndroid Build Coastguard Worker #if defined(OPENSSL_SSE2) && (defined(__clang__) || !defined(_MSC_VER))
68*8fb009dcSAndroid Build Coastguard Worker
69*8fb009dcSAndroid Build Coastguard Worker #define HRSS_HAVE_VECTOR_UNIT
70*8fb009dcSAndroid Build Coastguard Worker typedef __m128i vec_t;
71*8fb009dcSAndroid Build Coastguard Worker
72*8fb009dcSAndroid Build Coastguard Worker // vec_capable returns one iff the current platform supports SSE2.
vec_capable(void)73*8fb009dcSAndroid Build Coastguard Worker static int vec_capable(void) { return 1; }
74*8fb009dcSAndroid Build Coastguard Worker
75*8fb009dcSAndroid Build Coastguard Worker // vec_add performs a pair-wise addition of four uint16s from |a| and |b|.
vec_add(vec_t a,vec_t b)76*8fb009dcSAndroid Build Coastguard Worker static inline vec_t vec_add(vec_t a, vec_t b) { return _mm_add_epi16(a, b); }
77*8fb009dcSAndroid Build Coastguard Worker
78*8fb009dcSAndroid Build Coastguard Worker // vec_sub performs a pair-wise subtraction of four uint16s from |a| and |b|.
vec_sub(vec_t a,vec_t b)79*8fb009dcSAndroid Build Coastguard Worker static inline vec_t vec_sub(vec_t a, vec_t b) { return _mm_sub_epi16(a, b); }
80*8fb009dcSAndroid Build Coastguard Worker
81*8fb009dcSAndroid Build Coastguard Worker // vec_mul multiplies each uint16_t in |a| by |b| and returns the resulting
82*8fb009dcSAndroid Build Coastguard Worker // vector.
vec_mul(vec_t a,uint16_t b)83*8fb009dcSAndroid Build Coastguard Worker static inline vec_t vec_mul(vec_t a, uint16_t b) {
84*8fb009dcSAndroid Build Coastguard Worker return _mm_mullo_epi16(a, _mm_set1_epi16(b));
85*8fb009dcSAndroid Build Coastguard Worker }
86*8fb009dcSAndroid Build Coastguard Worker
87*8fb009dcSAndroid Build Coastguard Worker // vec_fma multiplies each uint16_t in |b| by |c|, adds the result to |a|, and
88*8fb009dcSAndroid Build Coastguard Worker // returns the resulting vector.
vec_fma(vec_t a,vec_t b,uint16_t c)89*8fb009dcSAndroid Build Coastguard Worker static inline vec_t vec_fma(vec_t a, vec_t b, uint16_t c) {
90*8fb009dcSAndroid Build Coastguard Worker return _mm_add_epi16(a, _mm_mullo_epi16(b, _mm_set1_epi16(c)));
91*8fb009dcSAndroid Build Coastguard Worker }
92*8fb009dcSAndroid Build Coastguard Worker
93*8fb009dcSAndroid Build Coastguard Worker // vec3_rshift_word right-shifts the 24 uint16_t's in |v| by one uint16.
vec3_rshift_word(vec_t v[3])94*8fb009dcSAndroid Build Coastguard Worker static inline void vec3_rshift_word(vec_t v[3]) {
95*8fb009dcSAndroid Build Coastguard Worker // Intel's left and right shifting is backwards compared to the order in
96*8fb009dcSAndroid Build Coastguard Worker // memory because they're based on little-endian order of words (and not just
97*8fb009dcSAndroid Build Coastguard Worker // bytes). So the shifts in this function will be backwards from what one
98*8fb009dcSAndroid Build Coastguard Worker // might expect.
99*8fb009dcSAndroid Build Coastguard Worker const __m128i carry0 = _mm_srli_si128(v[0], 14);
100*8fb009dcSAndroid Build Coastguard Worker v[0] = _mm_slli_si128(v[0], 2);
101*8fb009dcSAndroid Build Coastguard Worker
102*8fb009dcSAndroid Build Coastguard Worker const __m128i carry1 = _mm_srli_si128(v[1], 14);
103*8fb009dcSAndroid Build Coastguard Worker v[1] = _mm_slli_si128(v[1], 2);
104*8fb009dcSAndroid Build Coastguard Worker v[1] |= carry0;
105*8fb009dcSAndroid Build Coastguard Worker
106*8fb009dcSAndroid Build Coastguard Worker v[2] = _mm_slli_si128(v[2], 2);
107*8fb009dcSAndroid Build Coastguard Worker v[2] |= carry1;
108*8fb009dcSAndroid Build Coastguard Worker }
109*8fb009dcSAndroid Build Coastguard Worker
110*8fb009dcSAndroid Build Coastguard Worker // vec4_rshift_word right-shifts the 32 uint16_t's in |v| by one uint16.
vec4_rshift_word(vec_t v[4])111*8fb009dcSAndroid Build Coastguard Worker static inline void vec4_rshift_word(vec_t v[4]) {
112*8fb009dcSAndroid Build Coastguard Worker // Intel's left and right shifting is backwards compared to the order in
113*8fb009dcSAndroid Build Coastguard Worker // memory because they're based on little-endian order of words (and not just
114*8fb009dcSAndroid Build Coastguard Worker // bytes). So the shifts in this function will be backwards from what one
115*8fb009dcSAndroid Build Coastguard Worker // might expect.
116*8fb009dcSAndroid Build Coastguard Worker const __m128i carry0 = _mm_srli_si128(v[0], 14);
117*8fb009dcSAndroid Build Coastguard Worker v[0] = _mm_slli_si128(v[0], 2);
118*8fb009dcSAndroid Build Coastguard Worker
119*8fb009dcSAndroid Build Coastguard Worker const __m128i carry1 = _mm_srli_si128(v[1], 14);
120*8fb009dcSAndroid Build Coastguard Worker v[1] = _mm_slli_si128(v[1], 2);
121*8fb009dcSAndroid Build Coastguard Worker v[1] |= carry0;
122*8fb009dcSAndroid Build Coastguard Worker
123*8fb009dcSAndroid Build Coastguard Worker const __m128i carry2 = _mm_srli_si128(v[2], 14);
124*8fb009dcSAndroid Build Coastguard Worker v[2] = _mm_slli_si128(v[2], 2);
125*8fb009dcSAndroid Build Coastguard Worker v[2] |= carry1;
126*8fb009dcSAndroid Build Coastguard Worker
127*8fb009dcSAndroid Build Coastguard Worker v[3] = _mm_slli_si128(v[3], 2);
128*8fb009dcSAndroid Build Coastguard Worker v[3] |= carry2;
129*8fb009dcSAndroid Build Coastguard Worker }
130*8fb009dcSAndroid Build Coastguard Worker
131*8fb009dcSAndroid Build Coastguard Worker // vec_merge_3_5 takes the final three uint16_t's from |left|, appends the first
132*8fb009dcSAndroid Build Coastguard Worker // five from |right|, and returns the resulting vector.
vec_merge_3_5(vec_t left,vec_t right)133*8fb009dcSAndroid Build Coastguard Worker static inline vec_t vec_merge_3_5(vec_t left, vec_t right) {
134*8fb009dcSAndroid Build Coastguard Worker return _mm_srli_si128(left, 10) | _mm_slli_si128(right, 6);
135*8fb009dcSAndroid Build Coastguard Worker }
136*8fb009dcSAndroid Build Coastguard Worker
137*8fb009dcSAndroid Build Coastguard Worker // poly3_vec_lshift1 left-shifts the 768 bits in |a_s|, and in |a_a|, by one
138*8fb009dcSAndroid Build Coastguard Worker // bit.
poly3_vec_lshift1(vec_t a_s[6],vec_t a_a[6])139*8fb009dcSAndroid Build Coastguard Worker static inline void poly3_vec_lshift1(vec_t a_s[6], vec_t a_a[6]) {
140*8fb009dcSAndroid Build Coastguard Worker vec_t carry_s = {0};
141*8fb009dcSAndroid Build Coastguard Worker vec_t carry_a = {0};
142*8fb009dcSAndroid Build Coastguard Worker
143*8fb009dcSAndroid Build Coastguard Worker for (int i = 0; i < 6; i++) {
144*8fb009dcSAndroid Build Coastguard Worker vec_t next_carry_s = _mm_srli_epi64(a_s[i], 63);
145*8fb009dcSAndroid Build Coastguard Worker a_s[i] = _mm_slli_epi64(a_s[i], 1);
146*8fb009dcSAndroid Build Coastguard Worker a_s[i] |= _mm_slli_si128(next_carry_s, 8);
147*8fb009dcSAndroid Build Coastguard Worker a_s[i] |= carry_s;
148*8fb009dcSAndroid Build Coastguard Worker carry_s = _mm_srli_si128(next_carry_s, 8);
149*8fb009dcSAndroid Build Coastguard Worker
150*8fb009dcSAndroid Build Coastguard Worker vec_t next_carry_a = _mm_srli_epi64(a_a[i], 63);
151*8fb009dcSAndroid Build Coastguard Worker a_a[i] = _mm_slli_epi64(a_a[i], 1);
152*8fb009dcSAndroid Build Coastguard Worker a_a[i] |= _mm_slli_si128(next_carry_a, 8);
153*8fb009dcSAndroid Build Coastguard Worker a_a[i] |= carry_a;
154*8fb009dcSAndroid Build Coastguard Worker carry_a = _mm_srli_si128(next_carry_a, 8);
155*8fb009dcSAndroid Build Coastguard Worker }
156*8fb009dcSAndroid Build Coastguard Worker }
157*8fb009dcSAndroid Build Coastguard Worker
158*8fb009dcSAndroid Build Coastguard Worker // poly3_vec_rshift1 right-shifts the 768 bits in |a_s|, and in |a_a|, by one
159*8fb009dcSAndroid Build Coastguard Worker // bit.
poly3_vec_rshift1(vec_t a_s[6],vec_t a_a[6])160*8fb009dcSAndroid Build Coastguard Worker static inline void poly3_vec_rshift1(vec_t a_s[6], vec_t a_a[6]) {
161*8fb009dcSAndroid Build Coastguard Worker vec_t carry_s = {0};
162*8fb009dcSAndroid Build Coastguard Worker vec_t carry_a = {0};
163*8fb009dcSAndroid Build Coastguard Worker
164*8fb009dcSAndroid Build Coastguard Worker for (int i = 5; i >= 0; i--) {
165*8fb009dcSAndroid Build Coastguard Worker const vec_t next_carry_s = _mm_slli_epi64(a_s[i], 63);
166*8fb009dcSAndroid Build Coastguard Worker a_s[i] = _mm_srli_epi64(a_s[i], 1);
167*8fb009dcSAndroid Build Coastguard Worker a_s[i] |= _mm_srli_si128(next_carry_s, 8);
168*8fb009dcSAndroid Build Coastguard Worker a_s[i] |= carry_s;
169*8fb009dcSAndroid Build Coastguard Worker carry_s = _mm_slli_si128(next_carry_s, 8);
170*8fb009dcSAndroid Build Coastguard Worker
171*8fb009dcSAndroid Build Coastguard Worker const vec_t next_carry_a = _mm_slli_epi64(a_a[i], 63);
172*8fb009dcSAndroid Build Coastguard Worker a_a[i] = _mm_srli_epi64(a_a[i], 1);
173*8fb009dcSAndroid Build Coastguard Worker a_a[i] |= _mm_srli_si128(next_carry_a, 8);
174*8fb009dcSAndroid Build Coastguard Worker a_a[i] |= carry_a;
175*8fb009dcSAndroid Build Coastguard Worker carry_a = _mm_slli_si128(next_carry_a, 8);
176*8fb009dcSAndroid Build Coastguard Worker }
177*8fb009dcSAndroid Build Coastguard Worker }
178*8fb009dcSAndroid Build Coastguard Worker
179*8fb009dcSAndroid Build Coastguard Worker // vec_broadcast_bit duplicates the least-significant bit in |a| to all bits in
180*8fb009dcSAndroid Build Coastguard Worker // a vector and returns the result.
vec_broadcast_bit(vec_t a)181*8fb009dcSAndroid Build Coastguard Worker static inline vec_t vec_broadcast_bit(vec_t a) {
182*8fb009dcSAndroid Build Coastguard Worker return _mm_shuffle_epi32(_mm_srai_epi32(_mm_slli_epi64(a, 63), 31),
183*8fb009dcSAndroid Build Coastguard Worker 0b01010101);
184*8fb009dcSAndroid Build Coastguard Worker }
185*8fb009dcSAndroid Build Coastguard Worker
186*8fb009dcSAndroid Build Coastguard Worker // vec_get_word returns the |i|th uint16_t in |v|. (This is a macro because the
187*8fb009dcSAndroid Build Coastguard Worker // compiler requires that |i| be a compile-time constant.)
188*8fb009dcSAndroid Build Coastguard Worker #define vec_get_word(v, i) _mm_extract_epi16(v, i)
189*8fb009dcSAndroid Build Coastguard Worker
190*8fb009dcSAndroid Build Coastguard Worker #elif (defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)) && defined(__ARM_NEON)
191*8fb009dcSAndroid Build Coastguard Worker
192*8fb009dcSAndroid Build Coastguard Worker #define HRSS_HAVE_VECTOR_UNIT
193*8fb009dcSAndroid Build Coastguard Worker typedef uint16x8_t vec_t;
194*8fb009dcSAndroid Build Coastguard Worker
195*8fb009dcSAndroid Build Coastguard Worker // These functions perform the same actions as the SSE2 function of the same
196*8fb009dcSAndroid Build Coastguard Worker // name, above.
197*8fb009dcSAndroid Build Coastguard Worker
vec_capable(void)198*8fb009dcSAndroid Build Coastguard Worker static int vec_capable(void) { return CRYPTO_is_NEON_capable(); }
199*8fb009dcSAndroid Build Coastguard Worker
vec_add(vec_t a,vec_t b)200*8fb009dcSAndroid Build Coastguard Worker static inline vec_t vec_add(vec_t a, vec_t b) { return a + b; }
201*8fb009dcSAndroid Build Coastguard Worker
vec_sub(vec_t a,vec_t b)202*8fb009dcSAndroid Build Coastguard Worker static inline vec_t vec_sub(vec_t a, vec_t b) { return a - b; }
203*8fb009dcSAndroid Build Coastguard Worker
vec_mul(vec_t a,uint16_t b)204*8fb009dcSAndroid Build Coastguard Worker static inline vec_t vec_mul(vec_t a, uint16_t b) { return vmulq_n_u16(a, b); }
205*8fb009dcSAndroid Build Coastguard Worker
vec_fma(vec_t a,vec_t b,uint16_t c)206*8fb009dcSAndroid Build Coastguard Worker static inline vec_t vec_fma(vec_t a, vec_t b, uint16_t c) {
207*8fb009dcSAndroid Build Coastguard Worker return vmlaq_n_u16(a, b, c);
208*8fb009dcSAndroid Build Coastguard Worker }
209*8fb009dcSAndroid Build Coastguard Worker
vec3_rshift_word(vec_t v[3])210*8fb009dcSAndroid Build Coastguard Worker static inline void vec3_rshift_word(vec_t v[3]) {
211*8fb009dcSAndroid Build Coastguard Worker const uint16x8_t kZero = {0};
212*8fb009dcSAndroid Build Coastguard Worker v[2] = vextq_u16(v[1], v[2], 7);
213*8fb009dcSAndroid Build Coastguard Worker v[1] = vextq_u16(v[0], v[1], 7);
214*8fb009dcSAndroid Build Coastguard Worker v[0] = vextq_u16(kZero, v[0], 7);
215*8fb009dcSAndroid Build Coastguard Worker }
216*8fb009dcSAndroid Build Coastguard Worker
vec4_rshift_word(vec_t v[4])217*8fb009dcSAndroid Build Coastguard Worker static inline void vec4_rshift_word(vec_t v[4]) {
218*8fb009dcSAndroid Build Coastguard Worker const uint16x8_t kZero = {0};
219*8fb009dcSAndroid Build Coastguard Worker v[3] = vextq_u16(v[2], v[3], 7);
220*8fb009dcSAndroid Build Coastguard Worker v[2] = vextq_u16(v[1], v[2], 7);
221*8fb009dcSAndroid Build Coastguard Worker v[1] = vextq_u16(v[0], v[1], 7);
222*8fb009dcSAndroid Build Coastguard Worker v[0] = vextq_u16(kZero, v[0], 7);
223*8fb009dcSAndroid Build Coastguard Worker }
224*8fb009dcSAndroid Build Coastguard Worker
vec_merge_3_5(vec_t left,vec_t right)225*8fb009dcSAndroid Build Coastguard Worker static inline vec_t vec_merge_3_5(vec_t left, vec_t right) {
226*8fb009dcSAndroid Build Coastguard Worker return vextq_u16(left, right, 5);
227*8fb009dcSAndroid Build Coastguard Worker }
228*8fb009dcSAndroid Build Coastguard Worker
vec_get_word(vec_t v,unsigned i)229*8fb009dcSAndroid Build Coastguard Worker static inline uint16_t vec_get_word(vec_t v, unsigned i) {
230*8fb009dcSAndroid Build Coastguard Worker return v[i];
231*8fb009dcSAndroid Build Coastguard Worker }
232*8fb009dcSAndroid Build Coastguard Worker
233*8fb009dcSAndroid Build Coastguard Worker #if !defined(OPENSSL_AARCH64)
234*8fb009dcSAndroid Build Coastguard Worker
vec_broadcast_bit(vec_t a)235*8fb009dcSAndroid Build Coastguard Worker static inline vec_t vec_broadcast_bit(vec_t a) {
236*8fb009dcSAndroid Build Coastguard Worker a = (vec_t)vshrq_n_s16(((int16x8_t)a) << 15, 15);
237*8fb009dcSAndroid Build Coastguard Worker return vdupq_lane_u16(vget_low_u16(a), 0);
238*8fb009dcSAndroid Build Coastguard Worker }
239*8fb009dcSAndroid Build Coastguard Worker
poly3_vec_lshift1(vec_t a_s[6],vec_t a_a[6])240*8fb009dcSAndroid Build Coastguard Worker static inline void poly3_vec_lshift1(vec_t a_s[6], vec_t a_a[6]) {
241*8fb009dcSAndroid Build Coastguard Worker vec_t carry_s = {0};
242*8fb009dcSAndroid Build Coastguard Worker vec_t carry_a = {0};
243*8fb009dcSAndroid Build Coastguard Worker const vec_t kZero = {0};
244*8fb009dcSAndroid Build Coastguard Worker
245*8fb009dcSAndroid Build Coastguard Worker for (int i = 0; i < 6; i++) {
246*8fb009dcSAndroid Build Coastguard Worker vec_t next_carry_s = a_s[i] >> 15;
247*8fb009dcSAndroid Build Coastguard Worker a_s[i] <<= 1;
248*8fb009dcSAndroid Build Coastguard Worker a_s[i] |= vextq_u16(kZero, next_carry_s, 7);
249*8fb009dcSAndroid Build Coastguard Worker a_s[i] |= carry_s;
250*8fb009dcSAndroid Build Coastguard Worker carry_s = vextq_u16(next_carry_s, kZero, 7);
251*8fb009dcSAndroid Build Coastguard Worker
252*8fb009dcSAndroid Build Coastguard Worker vec_t next_carry_a = a_a[i] >> 15;
253*8fb009dcSAndroid Build Coastguard Worker a_a[i] <<= 1;
254*8fb009dcSAndroid Build Coastguard Worker a_a[i] |= vextq_u16(kZero, next_carry_a, 7);
255*8fb009dcSAndroid Build Coastguard Worker a_a[i] |= carry_a;
256*8fb009dcSAndroid Build Coastguard Worker carry_a = vextq_u16(next_carry_a, kZero, 7);
257*8fb009dcSAndroid Build Coastguard Worker }
258*8fb009dcSAndroid Build Coastguard Worker }
259*8fb009dcSAndroid Build Coastguard Worker
poly3_vec_rshift1(vec_t a_s[6],vec_t a_a[6])260*8fb009dcSAndroid Build Coastguard Worker static inline void poly3_vec_rshift1(vec_t a_s[6], vec_t a_a[6]) {
261*8fb009dcSAndroid Build Coastguard Worker vec_t carry_s = {0};
262*8fb009dcSAndroid Build Coastguard Worker vec_t carry_a = {0};
263*8fb009dcSAndroid Build Coastguard Worker const vec_t kZero = {0};
264*8fb009dcSAndroid Build Coastguard Worker
265*8fb009dcSAndroid Build Coastguard Worker for (int i = 5; i >= 0; i--) {
266*8fb009dcSAndroid Build Coastguard Worker vec_t next_carry_s = a_s[i] << 15;
267*8fb009dcSAndroid Build Coastguard Worker a_s[i] >>= 1;
268*8fb009dcSAndroid Build Coastguard Worker a_s[i] |= vextq_u16(next_carry_s, kZero, 1);
269*8fb009dcSAndroid Build Coastguard Worker a_s[i] |= carry_s;
270*8fb009dcSAndroid Build Coastguard Worker carry_s = vextq_u16(kZero, next_carry_s, 1);
271*8fb009dcSAndroid Build Coastguard Worker
272*8fb009dcSAndroid Build Coastguard Worker vec_t next_carry_a = a_a[i] << 15;
273*8fb009dcSAndroid Build Coastguard Worker a_a[i] >>= 1;
274*8fb009dcSAndroid Build Coastguard Worker a_a[i] |= vextq_u16(next_carry_a, kZero, 1);
275*8fb009dcSAndroid Build Coastguard Worker a_a[i] |= carry_a;
276*8fb009dcSAndroid Build Coastguard Worker carry_a = vextq_u16(kZero, next_carry_a, 1);
277*8fb009dcSAndroid Build Coastguard Worker }
278*8fb009dcSAndroid Build Coastguard Worker }
279*8fb009dcSAndroid Build Coastguard Worker
280*8fb009dcSAndroid Build Coastguard Worker #endif // !OPENSSL_AARCH64
281*8fb009dcSAndroid Build Coastguard Worker
282*8fb009dcSAndroid Build Coastguard Worker #endif // (ARM || AARCH64) && NEON
283*8fb009dcSAndroid Build Coastguard Worker
284*8fb009dcSAndroid Build Coastguard Worker // Polynomials in this scheme have N terms.
285*8fb009dcSAndroid Build Coastguard Worker // #define N 701
286*8fb009dcSAndroid Build Coastguard Worker
287*8fb009dcSAndroid Build Coastguard Worker // Underlying data types and arithmetic operations.
288*8fb009dcSAndroid Build Coastguard Worker // ------------------------------------------------
289*8fb009dcSAndroid Build Coastguard Worker
290*8fb009dcSAndroid Build Coastguard Worker // Binary polynomials.
291*8fb009dcSAndroid Build Coastguard Worker
292*8fb009dcSAndroid Build Coastguard Worker // poly2 represents a degree-N polynomial over GF(2). The words are in little-
293*8fb009dcSAndroid Build Coastguard Worker // endian order, i.e. the coefficient of x^0 is the LSB of the first word. The
294*8fb009dcSAndroid Build Coastguard Worker // final word is only partially used since N is not a multiple of the word size.
295*8fb009dcSAndroid Build Coastguard Worker
296*8fb009dcSAndroid Build Coastguard Worker // Defined in internal.h:
297*8fb009dcSAndroid Build Coastguard Worker // struct poly2 {
298*8fb009dcSAndroid Build Coastguard Worker // crypto_word_t v[WORDS_PER_POLY];
299*8fb009dcSAndroid Build Coastguard Worker // };
300*8fb009dcSAndroid Build Coastguard Worker
hexdump(const void * void_in,size_t len)301*8fb009dcSAndroid Build Coastguard Worker OPENSSL_UNUSED static void hexdump(const void *void_in, size_t len) {
302*8fb009dcSAndroid Build Coastguard Worker const uint8_t *in = (const uint8_t *)void_in;
303*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 0; i < len; i++) {
304*8fb009dcSAndroid Build Coastguard Worker printf("%02x", in[i]);
305*8fb009dcSAndroid Build Coastguard Worker }
306*8fb009dcSAndroid Build Coastguard Worker printf("\n");
307*8fb009dcSAndroid Build Coastguard Worker }
308*8fb009dcSAndroid Build Coastguard Worker
poly2_zero(struct poly2 * p)309*8fb009dcSAndroid Build Coastguard Worker static void poly2_zero(struct poly2 *p) {
310*8fb009dcSAndroid Build Coastguard Worker OPENSSL_memset(&p->v[0], 0, sizeof(crypto_word_t) * WORDS_PER_POLY);
311*8fb009dcSAndroid Build Coastguard Worker }
312*8fb009dcSAndroid Build Coastguard Worker
313*8fb009dcSAndroid Build Coastguard Worker // word_reverse returns |in| with the bits in reverse order.
word_reverse(crypto_word_t in)314*8fb009dcSAndroid Build Coastguard Worker static crypto_word_t word_reverse(crypto_word_t in) {
315*8fb009dcSAndroid Build Coastguard Worker #if defined(OPENSSL_64_BIT)
316*8fb009dcSAndroid Build Coastguard Worker static const crypto_word_t kMasks[6] = {
317*8fb009dcSAndroid Build Coastguard Worker UINT64_C(0x5555555555555555),
318*8fb009dcSAndroid Build Coastguard Worker UINT64_C(0x3333333333333333),
319*8fb009dcSAndroid Build Coastguard Worker UINT64_C(0x0f0f0f0f0f0f0f0f),
320*8fb009dcSAndroid Build Coastguard Worker UINT64_C(0x00ff00ff00ff00ff),
321*8fb009dcSAndroid Build Coastguard Worker UINT64_C(0x0000ffff0000ffff),
322*8fb009dcSAndroid Build Coastguard Worker UINT64_C(0x00000000ffffffff),
323*8fb009dcSAndroid Build Coastguard Worker };
324*8fb009dcSAndroid Build Coastguard Worker #else
325*8fb009dcSAndroid Build Coastguard Worker static const crypto_word_t kMasks[5] = {
326*8fb009dcSAndroid Build Coastguard Worker 0x55555555,
327*8fb009dcSAndroid Build Coastguard Worker 0x33333333,
328*8fb009dcSAndroid Build Coastguard Worker 0x0f0f0f0f,
329*8fb009dcSAndroid Build Coastguard Worker 0x00ff00ff,
330*8fb009dcSAndroid Build Coastguard Worker 0x0000ffff,
331*8fb009dcSAndroid Build Coastguard Worker };
332*8fb009dcSAndroid Build Coastguard Worker #endif
333*8fb009dcSAndroid Build Coastguard Worker
334*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 0; i < OPENSSL_ARRAY_SIZE(kMasks); i++) {
335*8fb009dcSAndroid Build Coastguard Worker in = ((in >> (1 << i)) & kMasks[i]) | ((in & kMasks[i]) << (1 << i));
336*8fb009dcSAndroid Build Coastguard Worker }
337*8fb009dcSAndroid Build Coastguard Worker
338*8fb009dcSAndroid Build Coastguard Worker return in;
339*8fb009dcSAndroid Build Coastguard Worker }
340*8fb009dcSAndroid Build Coastguard Worker
341*8fb009dcSAndroid Build Coastguard Worker // lsb_to_all replicates the least-significant bit of |v| to all bits of the
342*8fb009dcSAndroid Build Coastguard Worker // word. This is used in bit-slicing operations to make a vector from a fixed
343*8fb009dcSAndroid Build Coastguard Worker // value.
lsb_to_all(crypto_word_t v)344*8fb009dcSAndroid Build Coastguard Worker static crypto_word_t lsb_to_all(crypto_word_t v) { return 0u - (v & 1); }
345*8fb009dcSAndroid Build Coastguard Worker
346*8fb009dcSAndroid Build Coastguard Worker // poly2_mod_phiN reduces |p| by Φ(N).
poly2_mod_phiN(struct poly2 * p)347*8fb009dcSAndroid Build Coastguard Worker static void poly2_mod_phiN(struct poly2 *p) {
348*8fb009dcSAndroid Build Coastguard Worker // m is the term at x^700, replicated to every bit.
349*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t m =
350*8fb009dcSAndroid Build Coastguard Worker lsb_to_all(p->v[WORDS_PER_POLY - 1] >> (BITS_IN_LAST_WORD - 1));
351*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 0; i < WORDS_PER_POLY; i++) {
352*8fb009dcSAndroid Build Coastguard Worker p->v[i] ^= m;
353*8fb009dcSAndroid Build Coastguard Worker }
354*8fb009dcSAndroid Build Coastguard Worker p->v[WORDS_PER_POLY - 1] &= (UINT64_C(1) << (BITS_IN_LAST_WORD - 1)) - 1;
355*8fb009dcSAndroid Build Coastguard Worker }
356*8fb009dcSAndroid Build Coastguard Worker
357*8fb009dcSAndroid Build Coastguard Worker // poly2_reverse_700 reverses the order of the first 700 bits of |in| and writes
358*8fb009dcSAndroid Build Coastguard Worker // the result to |out|.
poly2_reverse_700(struct poly2 * out,const struct poly2 * in)359*8fb009dcSAndroid Build Coastguard Worker static void poly2_reverse_700(struct poly2 *out, const struct poly2 *in) {
360*8fb009dcSAndroid Build Coastguard Worker struct poly2 t;
361*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 0; i < WORDS_PER_POLY; i++) {
362*8fb009dcSAndroid Build Coastguard Worker t.v[i] = word_reverse(in->v[i]);
363*8fb009dcSAndroid Build Coastguard Worker }
364*8fb009dcSAndroid Build Coastguard Worker
365*8fb009dcSAndroid Build Coastguard Worker static const size_t shift = BITS_PER_WORD - ((N-1) % BITS_PER_WORD);
366*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 0; i < WORDS_PER_POLY-1; i++) {
367*8fb009dcSAndroid Build Coastguard Worker out->v[i] = t.v[WORDS_PER_POLY-1-i] >> shift;
368*8fb009dcSAndroid Build Coastguard Worker out->v[i] |= t.v[WORDS_PER_POLY-2-i] << (BITS_PER_WORD - shift);
369*8fb009dcSAndroid Build Coastguard Worker }
370*8fb009dcSAndroid Build Coastguard Worker out->v[WORDS_PER_POLY-1] = t.v[0] >> shift;
371*8fb009dcSAndroid Build Coastguard Worker }
372*8fb009dcSAndroid Build Coastguard Worker
373*8fb009dcSAndroid Build Coastguard Worker // poly2_cswap exchanges the values of |a| and |b| if |swap| is all ones.
poly2_cswap(struct poly2 * a,struct poly2 * b,crypto_word_t swap)374*8fb009dcSAndroid Build Coastguard Worker static void poly2_cswap(struct poly2 *a, struct poly2 *b, crypto_word_t swap) {
375*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 0; i < WORDS_PER_POLY; i++) {
376*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t sum = swap & (a->v[i] ^ b->v[i]);
377*8fb009dcSAndroid Build Coastguard Worker a->v[i] ^= sum;
378*8fb009dcSAndroid Build Coastguard Worker b->v[i] ^= sum;
379*8fb009dcSAndroid Build Coastguard Worker }
380*8fb009dcSAndroid Build Coastguard Worker }
381*8fb009dcSAndroid Build Coastguard Worker
382*8fb009dcSAndroid Build Coastguard Worker // poly2_fmadd sets |out| to |out| + |in| * m, where m is either
383*8fb009dcSAndroid Build Coastguard Worker // |CONSTTIME_TRUE_W| or |CONSTTIME_FALSE_W|.
poly2_fmadd(struct poly2 * out,const struct poly2 * in,crypto_word_t m)384*8fb009dcSAndroid Build Coastguard Worker static void poly2_fmadd(struct poly2 *out, const struct poly2 *in,
385*8fb009dcSAndroid Build Coastguard Worker crypto_word_t m) {
386*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 0; i < WORDS_PER_POLY; i++) {
387*8fb009dcSAndroid Build Coastguard Worker out->v[i] ^= in->v[i] & m;
388*8fb009dcSAndroid Build Coastguard Worker }
389*8fb009dcSAndroid Build Coastguard Worker }
390*8fb009dcSAndroid Build Coastguard Worker
391*8fb009dcSAndroid Build Coastguard Worker // poly2_lshift1 left-shifts |p| by one bit.
poly2_lshift1(struct poly2 * p)392*8fb009dcSAndroid Build Coastguard Worker static void poly2_lshift1(struct poly2 *p) {
393*8fb009dcSAndroid Build Coastguard Worker crypto_word_t carry = 0;
394*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 0; i < WORDS_PER_POLY; i++) {
395*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t next_carry = p->v[i] >> (BITS_PER_WORD - 1);
396*8fb009dcSAndroid Build Coastguard Worker p->v[i] <<= 1;
397*8fb009dcSAndroid Build Coastguard Worker p->v[i] |= carry;
398*8fb009dcSAndroid Build Coastguard Worker carry = next_carry;
399*8fb009dcSAndroid Build Coastguard Worker }
400*8fb009dcSAndroid Build Coastguard Worker }
401*8fb009dcSAndroid Build Coastguard Worker
402*8fb009dcSAndroid Build Coastguard Worker // poly2_rshift1 right-shifts |p| by one bit.
poly2_rshift1(struct poly2 * p)403*8fb009dcSAndroid Build Coastguard Worker static void poly2_rshift1(struct poly2 *p) {
404*8fb009dcSAndroid Build Coastguard Worker crypto_word_t carry = 0;
405*8fb009dcSAndroid Build Coastguard Worker for (size_t i = WORDS_PER_POLY - 1; i < WORDS_PER_POLY; i--) {
406*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t next_carry = p->v[i] & 1;
407*8fb009dcSAndroid Build Coastguard Worker p->v[i] >>= 1;
408*8fb009dcSAndroid Build Coastguard Worker p->v[i] |= carry << (BITS_PER_WORD - 1);
409*8fb009dcSAndroid Build Coastguard Worker carry = next_carry;
410*8fb009dcSAndroid Build Coastguard Worker }
411*8fb009dcSAndroid Build Coastguard Worker }
412*8fb009dcSAndroid Build Coastguard Worker
413*8fb009dcSAndroid Build Coastguard Worker // poly2_clear_top_bits clears the bits in the final word that are only for
414*8fb009dcSAndroid Build Coastguard Worker // alignment.
poly2_clear_top_bits(struct poly2 * p)415*8fb009dcSAndroid Build Coastguard Worker static void poly2_clear_top_bits(struct poly2 *p) {
416*8fb009dcSAndroid Build Coastguard Worker p->v[WORDS_PER_POLY - 1] &= (UINT64_C(1) << BITS_IN_LAST_WORD) - 1;
417*8fb009dcSAndroid Build Coastguard Worker }
418*8fb009dcSAndroid Build Coastguard Worker
419*8fb009dcSAndroid Build Coastguard Worker // poly2_top_bits_are_clear returns one iff the extra bits in the final words of
420*8fb009dcSAndroid Build Coastguard Worker // |p| are zero.
poly2_top_bits_are_clear(const struct poly2 * p)421*8fb009dcSAndroid Build Coastguard Worker static int poly2_top_bits_are_clear(const struct poly2 *p) {
422*8fb009dcSAndroid Build Coastguard Worker return (p->v[WORDS_PER_POLY - 1] &
423*8fb009dcSAndroid Build Coastguard Worker ~((UINT64_C(1) << BITS_IN_LAST_WORD) - 1)) == 0;
424*8fb009dcSAndroid Build Coastguard Worker }
425*8fb009dcSAndroid Build Coastguard Worker
426*8fb009dcSAndroid Build Coastguard Worker // Ternary polynomials.
427*8fb009dcSAndroid Build Coastguard Worker
428*8fb009dcSAndroid Build Coastguard Worker // poly3 represents a degree-N polynomial over GF(3). Each coefficient is
429*8fb009dcSAndroid Build Coastguard Worker // bitsliced across the |s| and |a| arrays, like this:
430*8fb009dcSAndroid Build Coastguard Worker //
431*8fb009dcSAndroid Build Coastguard Worker // s | a | value
432*8fb009dcSAndroid Build Coastguard Worker // -----------------
433*8fb009dcSAndroid Build Coastguard Worker // 0 | 0 | 0
434*8fb009dcSAndroid Build Coastguard Worker // 0 | 1 | 1
435*8fb009dcSAndroid Build Coastguard Worker // 1 | 1 | -1 (aka 2)
436*8fb009dcSAndroid Build Coastguard Worker // 1 | 0 | <invalid>
437*8fb009dcSAndroid Build Coastguard Worker //
438*8fb009dcSAndroid Build Coastguard Worker // ('s' is for sign, and 'a' is the absolute value.)
439*8fb009dcSAndroid Build Coastguard Worker //
440*8fb009dcSAndroid Build Coastguard Worker // Once bitsliced as such, the following circuits can be used to implement
441*8fb009dcSAndroid Build Coastguard Worker // addition and multiplication mod 3:
442*8fb009dcSAndroid Build Coastguard Worker //
443*8fb009dcSAndroid Build Coastguard Worker // (s3, a3) = (s1, a1) × (s2, a2)
444*8fb009dcSAndroid Build Coastguard Worker // a3 = a1 ∧ a2
445*8fb009dcSAndroid Build Coastguard Worker // s3 = (s1 ⊕ s2) ∧ a3
446*8fb009dcSAndroid Build Coastguard Worker //
447*8fb009dcSAndroid Build Coastguard Worker // (s3, a3) = (s1, a1) + (s2, a2)
448*8fb009dcSAndroid Build Coastguard Worker // t = s1 ⊕ a2
449*8fb009dcSAndroid Build Coastguard Worker // s3 = t ∧ (s2 ⊕ a1)
450*8fb009dcSAndroid Build Coastguard Worker // a3 = (a1 ⊕ a2) ∨ (t ⊕ s2)
451*8fb009dcSAndroid Build Coastguard Worker //
452*8fb009dcSAndroid Build Coastguard Worker // (s3, a3) = (s1, a1) - (s2, a2)
453*8fb009dcSAndroid Build Coastguard Worker // t = a1 ⊕ a2
454*8fb009dcSAndroid Build Coastguard Worker // s3 = (s1 ⊕ a2) ∧ (t ⊕ s2)
455*8fb009dcSAndroid Build Coastguard Worker // a3 = t ∨ (s1 ⊕ s2)
456*8fb009dcSAndroid Build Coastguard Worker //
457*8fb009dcSAndroid Build Coastguard Worker // Negating a value just involves XORing s by a.
458*8fb009dcSAndroid Build Coastguard Worker //
459*8fb009dcSAndroid Build Coastguard Worker // struct poly3 {
460*8fb009dcSAndroid Build Coastguard Worker // struct poly2 s, a;
461*8fb009dcSAndroid Build Coastguard Worker // };
462*8fb009dcSAndroid Build Coastguard Worker
poly3_print(const struct poly3 * in)463*8fb009dcSAndroid Build Coastguard Worker OPENSSL_UNUSED static void poly3_print(const struct poly3 *in) {
464*8fb009dcSAndroid Build Coastguard Worker struct poly3 p;
465*8fb009dcSAndroid Build Coastguard Worker OPENSSL_memcpy(&p, in, sizeof(p));
466*8fb009dcSAndroid Build Coastguard Worker p.s.v[WORDS_PER_POLY - 1] &= ((crypto_word_t)1 << BITS_IN_LAST_WORD) - 1;
467*8fb009dcSAndroid Build Coastguard Worker p.a.v[WORDS_PER_POLY - 1] &= ((crypto_word_t)1 << BITS_IN_LAST_WORD) - 1;
468*8fb009dcSAndroid Build Coastguard Worker
469*8fb009dcSAndroid Build Coastguard Worker printf("{[");
470*8fb009dcSAndroid Build Coastguard Worker for (unsigned i = 0; i < WORDS_PER_POLY; i++) {
471*8fb009dcSAndroid Build Coastguard Worker if (i) {
472*8fb009dcSAndroid Build Coastguard Worker printf(" ");
473*8fb009dcSAndroid Build Coastguard Worker }
474*8fb009dcSAndroid Build Coastguard Worker printf(BN_HEX_FMT2, p.s.v[i]);
475*8fb009dcSAndroid Build Coastguard Worker }
476*8fb009dcSAndroid Build Coastguard Worker printf("] [");
477*8fb009dcSAndroid Build Coastguard Worker for (unsigned i = 0; i < WORDS_PER_POLY; i++) {
478*8fb009dcSAndroid Build Coastguard Worker if (i) {
479*8fb009dcSAndroid Build Coastguard Worker printf(" ");
480*8fb009dcSAndroid Build Coastguard Worker }
481*8fb009dcSAndroid Build Coastguard Worker printf(BN_HEX_FMT2, p.a.v[i]);
482*8fb009dcSAndroid Build Coastguard Worker }
483*8fb009dcSAndroid Build Coastguard Worker printf("]}\n");
484*8fb009dcSAndroid Build Coastguard Worker }
485*8fb009dcSAndroid Build Coastguard Worker
poly3_zero(struct poly3 * p)486*8fb009dcSAndroid Build Coastguard Worker static void poly3_zero(struct poly3 *p) {
487*8fb009dcSAndroid Build Coastguard Worker poly2_zero(&p->s);
488*8fb009dcSAndroid Build Coastguard Worker poly2_zero(&p->a);
489*8fb009dcSAndroid Build Coastguard Worker }
490*8fb009dcSAndroid Build Coastguard Worker
491*8fb009dcSAndroid Build Coastguard Worker // poly3_reverse_700 reverses the order of the first 700 terms of |in| and
492*8fb009dcSAndroid Build Coastguard Worker // writes them to |out|.
poly3_reverse_700(struct poly3 * out,const struct poly3 * in)493*8fb009dcSAndroid Build Coastguard Worker static void poly3_reverse_700(struct poly3 *out, const struct poly3 *in) {
494*8fb009dcSAndroid Build Coastguard Worker poly2_reverse_700(&out->a, &in->a);
495*8fb009dcSAndroid Build Coastguard Worker poly2_reverse_700(&out->s, &in->s);
496*8fb009dcSAndroid Build Coastguard Worker }
497*8fb009dcSAndroid Build Coastguard Worker
498*8fb009dcSAndroid Build Coastguard Worker // poly3_word_mul sets (|out_s|, |out_a|) to (|s1|, |a1|) × (|s2|, |a2|).
poly3_word_mul(crypto_word_t * out_s,crypto_word_t * out_a,const crypto_word_t s1,const crypto_word_t a1,const crypto_word_t s2,const crypto_word_t a2)499*8fb009dcSAndroid Build Coastguard Worker static void poly3_word_mul(crypto_word_t *out_s, crypto_word_t *out_a,
500*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t s1, const crypto_word_t a1,
501*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t s2, const crypto_word_t a2) {
502*8fb009dcSAndroid Build Coastguard Worker *out_a = a1 & a2;
503*8fb009dcSAndroid Build Coastguard Worker *out_s = (s1 ^ s2) & *out_a;
504*8fb009dcSAndroid Build Coastguard Worker }
505*8fb009dcSAndroid Build Coastguard Worker
506*8fb009dcSAndroid Build Coastguard Worker // poly3_word_add sets (|out_s|, |out_a|) to (|s1|, |a1|) + (|s2|, |a2|).
poly3_word_add(crypto_word_t * out_s,crypto_word_t * out_a,const crypto_word_t s1,const crypto_word_t a1,const crypto_word_t s2,const crypto_word_t a2)507*8fb009dcSAndroid Build Coastguard Worker static void poly3_word_add(crypto_word_t *out_s, crypto_word_t *out_a,
508*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t s1, const crypto_word_t a1,
509*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t s2, const crypto_word_t a2) {
510*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t t = s1 ^ a2;
511*8fb009dcSAndroid Build Coastguard Worker *out_s = t & (s2 ^ a1);
512*8fb009dcSAndroid Build Coastguard Worker *out_a = (a1 ^ a2) | (t ^ s2);
513*8fb009dcSAndroid Build Coastguard Worker }
514*8fb009dcSAndroid Build Coastguard Worker
515*8fb009dcSAndroid Build Coastguard Worker // poly3_word_sub sets (|out_s|, |out_a|) to (|s1|, |a1|) - (|s2|, |a2|).
poly3_word_sub(crypto_word_t * out_s,crypto_word_t * out_a,const crypto_word_t s1,const crypto_word_t a1,const crypto_word_t s2,const crypto_word_t a2)516*8fb009dcSAndroid Build Coastguard Worker static void poly3_word_sub(crypto_word_t *out_s, crypto_word_t *out_a,
517*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t s1, const crypto_word_t a1,
518*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t s2, const crypto_word_t a2) {
519*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t t = a1 ^ a2;
520*8fb009dcSAndroid Build Coastguard Worker *out_s = (s1 ^ a2) & (t ^ s2);
521*8fb009dcSAndroid Build Coastguard Worker *out_a = t | (s1 ^ s2);
522*8fb009dcSAndroid Build Coastguard Worker }
523*8fb009dcSAndroid Build Coastguard Worker
524*8fb009dcSAndroid Build Coastguard Worker // poly3_mul_const sets |p| to |p|×m, where m = (ms, ma).
poly3_mul_const(struct poly3 * p,crypto_word_t ms,crypto_word_t ma)525*8fb009dcSAndroid Build Coastguard Worker static void poly3_mul_const(struct poly3 *p, crypto_word_t ms,
526*8fb009dcSAndroid Build Coastguard Worker crypto_word_t ma) {
527*8fb009dcSAndroid Build Coastguard Worker ms = lsb_to_all(ms);
528*8fb009dcSAndroid Build Coastguard Worker ma = lsb_to_all(ma);
529*8fb009dcSAndroid Build Coastguard Worker
530*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 0; i < WORDS_PER_POLY; i++) {
531*8fb009dcSAndroid Build Coastguard Worker poly3_word_mul(&p->s.v[i], &p->a.v[i], p->s.v[i], p->a.v[i], ms, ma);
532*8fb009dcSAndroid Build Coastguard Worker }
533*8fb009dcSAndroid Build Coastguard Worker }
534*8fb009dcSAndroid Build Coastguard Worker
535*8fb009dcSAndroid Build Coastguard Worker // poly3_fmadd sets |out| to |out| - |in|×m, where m is (ms, ma).
poly3_fmsub(struct poly3 * RESTRICT out,const struct poly3 * RESTRICT in,crypto_word_t ms,crypto_word_t ma)536*8fb009dcSAndroid Build Coastguard Worker static void poly3_fmsub(struct poly3 *RESTRICT out,
537*8fb009dcSAndroid Build Coastguard Worker const struct poly3 *RESTRICT in, crypto_word_t ms,
538*8fb009dcSAndroid Build Coastguard Worker crypto_word_t ma) {
539*8fb009dcSAndroid Build Coastguard Worker crypto_word_t product_s, product_a;
540*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 0; i < WORDS_PER_POLY; i++) {
541*8fb009dcSAndroid Build Coastguard Worker poly3_word_mul(&product_s, &product_a, in->s.v[i], in->a.v[i], ms, ma);
542*8fb009dcSAndroid Build Coastguard Worker poly3_word_sub(&out->s.v[i], &out->a.v[i], out->s.v[i], out->a.v[i],
543*8fb009dcSAndroid Build Coastguard Worker product_s, product_a);
544*8fb009dcSAndroid Build Coastguard Worker }
545*8fb009dcSAndroid Build Coastguard Worker }
546*8fb009dcSAndroid Build Coastguard Worker
547*8fb009dcSAndroid Build Coastguard Worker // final_bit_to_all replicates the bit in the final position of the last word to
548*8fb009dcSAndroid Build Coastguard Worker // all the bits in the word.
final_bit_to_all(crypto_word_t v)549*8fb009dcSAndroid Build Coastguard Worker static crypto_word_t final_bit_to_all(crypto_word_t v) {
550*8fb009dcSAndroid Build Coastguard Worker return lsb_to_all(v >> (BITS_IN_LAST_WORD - 1));
551*8fb009dcSAndroid Build Coastguard Worker }
552*8fb009dcSAndroid Build Coastguard Worker
553*8fb009dcSAndroid Build Coastguard Worker // poly3_top_bits_are_clear returns one iff the extra bits in the final words of
554*8fb009dcSAndroid Build Coastguard Worker // |p| are zero.
poly3_top_bits_are_clear(const struct poly3 * p)555*8fb009dcSAndroid Build Coastguard Worker OPENSSL_UNUSED static int poly3_top_bits_are_clear(const struct poly3 *p) {
556*8fb009dcSAndroid Build Coastguard Worker return poly2_top_bits_are_clear(&p->s) && poly2_top_bits_are_clear(&p->a);
557*8fb009dcSAndroid Build Coastguard Worker }
558*8fb009dcSAndroid Build Coastguard Worker
559*8fb009dcSAndroid Build Coastguard Worker // poly3_mod_phiN reduces |p| by Φ(N).
poly3_mod_phiN(struct poly3 * p)560*8fb009dcSAndroid Build Coastguard Worker static void poly3_mod_phiN(struct poly3 *p) {
561*8fb009dcSAndroid Build Coastguard Worker // In order to reduce by Φ(N) we subtract by the value of the greatest
562*8fb009dcSAndroid Build Coastguard Worker // coefficient.
563*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t factor_s = final_bit_to_all(p->s.v[WORDS_PER_POLY - 1]);
564*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t factor_a = final_bit_to_all(p->a.v[WORDS_PER_POLY - 1]);
565*8fb009dcSAndroid Build Coastguard Worker
566*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 0; i < WORDS_PER_POLY; i++) {
567*8fb009dcSAndroid Build Coastguard Worker poly3_word_sub(&p->s.v[i], &p->a.v[i], p->s.v[i], p->a.v[i], factor_s,
568*8fb009dcSAndroid Build Coastguard Worker factor_a);
569*8fb009dcSAndroid Build Coastguard Worker }
570*8fb009dcSAndroid Build Coastguard Worker
571*8fb009dcSAndroid Build Coastguard Worker poly2_clear_top_bits(&p->s);
572*8fb009dcSAndroid Build Coastguard Worker poly2_clear_top_bits(&p->a);
573*8fb009dcSAndroid Build Coastguard Worker }
574*8fb009dcSAndroid Build Coastguard Worker
poly3_cswap(struct poly3 * a,struct poly3 * b,crypto_word_t swap)575*8fb009dcSAndroid Build Coastguard Worker static void poly3_cswap(struct poly3 *a, struct poly3 *b, crypto_word_t swap) {
576*8fb009dcSAndroid Build Coastguard Worker poly2_cswap(&a->s, &b->s, swap);
577*8fb009dcSAndroid Build Coastguard Worker poly2_cswap(&a->a, &b->a, swap);
578*8fb009dcSAndroid Build Coastguard Worker }
579*8fb009dcSAndroid Build Coastguard Worker
poly3_lshift1(struct poly3 * p)580*8fb009dcSAndroid Build Coastguard Worker static void poly3_lshift1(struct poly3 *p) {
581*8fb009dcSAndroid Build Coastguard Worker poly2_lshift1(&p->s);
582*8fb009dcSAndroid Build Coastguard Worker poly2_lshift1(&p->a);
583*8fb009dcSAndroid Build Coastguard Worker }
584*8fb009dcSAndroid Build Coastguard Worker
poly3_rshift1(struct poly3 * p)585*8fb009dcSAndroid Build Coastguard Worker static void poly3_rshift1(struct poly3 *p) {
586*8fb009dcSAndroid Build Coastguard Worker poly2_rshift1(&p->s);
587*8fb009dcSAndroid Build Coastguard Worker poly2_rshift1(&p->a);
588*8fb009dcSAndroid Build Coastguard Worker }
589*8fb009dcSAndroid Build Coastguard Worker
590*8fb009dcSAndroid Build Coastguard Worker // poly3_span represents a pointer into a poly3.
591*8fb009dcSAndroid Build Coastguard Worker struct poly3_span {
592*8fb009dcSAndroid Build Coastguard Worker crypto_word_t *s;
593*8fb009dcSAndroid Build Coastguard Worker crypto_word_t *a;
594*8fb009dcSAndroid Build Coastguard Worker };
595*8fb009dcSAndroid Build Coastguard Worker
596*8fb009dcSAndroid Build Coastguard Worker // poly3_span_add adds |n| words of values from |a| and |b| and writes the
597*8fb009dcSAndroid Build Coastguard Worker // result to |out|.
poly3_span_add(const struct poly3_span * out,const struct poly3_span * a,const struct poly3_span * b,size_t n)598*8fb009dcSAndroid Build Coastguard Worker static void poly3_span_add(const struct poly3_span *out,
599*8fb009dcSAndroid Build Coastguard Worker const struct poly3_span *a,
600*8fb009dcSAndroid Build Coastguard Worker const struct poly3_span *b, size_t n) {
601*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 0; i < n; i++) {
602*8fb009dcSAndroid Build Coastguard Worker poly3_word_add(&out->s[i], &out->a[i], a->s[i], a->a[i], b->s[i], b->a[i]);
603*8fb009dcSAndroid Build Coastguard Worker }
604*8fb009dcSAndroid Build Coastguard Worker }
605*8fb009dcSAndroid Build Coastguard Worker
606*8fb009dcSAndroid Build Coastguard Worker // poly3_span_sub subtracts |n| words of |b| from |n| words of |a|.
poly3_span_sub(const struct poly3_span * a,const struct poly3_span * b,size_t n)607*8fb009dcSAndroid Build Coastguard Worker static void poly3_span_sub(const struct poly3_span *a,
608*8fb009dcSAndroid Build Coastguard Worker const struct poly3_span *b, size_t n) {
609*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 0; i < n; i++) {
610*8fb009dcSAndroid Build Coastguard Worker poly3_word_sub(&a->s[i], &a->a[i], a->s[i], a->a[i], b->s[i], b->a[i]);
611*8fb009dcSAndroid Build Coastguard Worker }
612*8fb009dcSAndroid Build Coastguard Worker }
613*8fb009dcSAndroid Build Coastguard Worker
614*8fb009dcSAndroid Build Coastguard Worker // poly3_mul_aux is a recursive function that multiplies |n| words from |a| and
615*8fb009dcSAndroid Build Coastguard Worker // |b| and writes 2×|n| words to |out|. Each call uses 2*ceil(n/2) elements of
616*8fb009dcSAndroid Build Coastguard Worker // |scratch| and the function recurses, except if |n| == 1, when |scratch| isn't
617*8fb009dcSAndroid Build Coastguard Worker // used and the recursion stops. For |n| in {11, 22}, the transitive total
618*8fb009dcSAndroid Build Coastguard Worker // amount of |scratch| needed happens to be 2n+2.
poly3_mul_aux(const struct poly3_span * out,const struct poly3_span * scratch,const struct poly3_span * a,const struct poly3_span * b,size_t n)619*8fb009dcSAndroid Build Coastguard Worker static void poly3_mul_aux(const struct poly3_span *out,
620*8fb009dcSAndroid Build Coastguard Worker const struct poly3_span *scratch,
621*8fb009dcSAndroid Build Coastguard Worker const struct poly3_span *a,
622*8fb009dcSAndroid Build Coastguard Worker const struct poly3_span *b, size_t n) {
623*8fb009dcSAndroid Build Coastguard Worker if (n == 1) {
624*8fb009dcSAndroid Build Coastguard Worker crypto_word_t r_s_low = 0, r_s_high = 0, r_a_low = 0, r_a_high = 0;
625*8fb009dcSAndroid Build Coastguard Worker crypto_word_t b_s = b->s[0], b_a = b->a[0];
626*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t a_s = a->s[0], a_a = a->a[0];
627*8fb009dcSAndroid Build Coastguard Worker
628*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 0; i < BITS_PER_WORD; i++) {
629*8fb009dcSAndroid Build Coastguard Worker // Multiply (s, a) by the next value from (b_s, b_a).
630*8fb009dcSAndroid Build Coastguard Worker crypto_word_t m_s, m_a;
631*8fb009dcSAndroid Build Coastguard Worker poly3_word_mul(&m_s, &m_a, a_s, a_a, lsb_to_all(b_s), lsb_to_all(b_a));
632*8fb009dcSAndroid Build Coastguard Worker b_s >>= 1;
633*8fb009dcSAndroid Build Coastguard Worker b_a >>= 1;
634*8fb009dcSAndroid Build Coastguard Worker
635*8fb009dcSAndroid Build Coastguard Worker if (i == 0) {
636*8fb009dcSAndroid Build Coastguard Worker // Special case otherwise the code tries to shift by BITS_PER_WORD
637*8fb009dcSAndroid Build Coastguard Worker // below, which is undefined.
638*8fb009dcSAndroid Build Coastguard Worker r_s_low = m_s;
639*8fb009dcSAndroid Build Coastguard Worker r_a_low = m_a;
640*8fb009dcSAndroid Build Coastguard Worker continue;
641*8fb009dcSAndroid Build Coastguard Worker }
642*8fb009dcSAndroid Build Coastguard Worker
643*8fb009dcSAndroid Build Coastguard Worker // Shift the multiplication result to the correct position.
644*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t m_s_low = m_s << i;
645*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t m_s_high = m_s >> (BITS_PER_WORD - i);
646*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t m_a_low = m_a << i;
647*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t m_a_high = m_a >> (BITS_PER_WORD - i);
648*8fb009dcSAndroid Build Coastguard Worker
649*8fb009dcSAndroid Build Coastguard Worker // Add into the result.
650*8fb009dcSAndroid Build Coastguard Worker poly3_word_add(&r_s_low, &r_a_low, r_s_low, r_a_low, m_s_low, m_a_low);
651*8fb009dcSAndroid Build Coastguard Worker poly3_word_add(&r_s_high, &r_a_high, r_s_high, r_a_high, m_s_high,
652*8fb009dcSAndroid Build Coastguard Worker m_a_high);
653*8fb009dcSAndroid Build Coastguard Worker }
654*8fb009dcSAndroid Build Coastguard Worker
655*8fb009dcSAndroid Build Coastguard Worker out->s[0] = r_s_low;
656*8fb009dcSAndroid Build Coastguard Worker out->s[1] = r_s_high;
657*8fb009dcSAndroid Build Coastguard Worker out->a[0] = r_a_low;
658*8fb009dcSAndroid Build Coastguard Worker out->a[1] = r_a_high;
659*8fb009dcSAndroid Build Coastguard Worker return;
660*8fb009dcSAndroid Build Coastguard Worker }
661*8fb009dcSAndroid Build Coastguard Worker
662*8fb009dcSAndroid Build Coastguard Worker // Karatsuba multiplication.
663*8fb009dcSAndroid Build Coastguard Worker // https://en.wikipedia.org/wiki/Karatsuba_algorithm
664*8fb009dcSAndroid Build Coastguard Worker
665*8fb009dcSAndroid Build Coastguard Worker // When |n| is odd, the two "halves" will have different lengths. The first
666*8fb009dcSAndroid Build Coastguard Worker // is always the smaller.
667*8fb009dcSAndroid Build Coastguard Worker const size_t low_len = n / 2;
668*8fb009dcSAndroid Build Coastguard Worker const size_t high_len = n - low_len;
669*8fb009dcSAndroid Build Coastguard Worker const struct poly3_span a_high = {&a->s[low_len], &a->a[low_len]};
670*8fb009dcSAndroid Build Coastguard Worker const struct poly3_span b_high = {&b->s[low_len], &b->a[low_len]};
671*8fb009dcSAndroid Build Coastguard Worker
672*8fb009dcSAndroid Build Coastguard Worker // Store a_1 + a_0 in the first half of |out| and b_1 + b_0 in the second
673*8fb009dcSAndroid Build Coastguard Worker // half.
674*8fb009dcSAndroid Build Coastguard Worker const struct poly3_span a_cross_sum = *out;
675*8fb009dcSAndroid Build Coastguard Worker const struct poly3_span b_cross_sum = {&out->s[high_len], &out->a[high_len]};
676*8fb009dcSAndroid Build Coastguard Worker poly3_span_add(&a_cross_sum, a, &a_high, low_len);
677*8fb009dcSAndroid Build Coastguard Worker poly3_span_add(&b_cross_sum, b, &b_high, low_len);
678*8fb009dcSAndroid Build Coastguard Worker if (high_len != low_len) {
679*8fb009dcSAndroid Build Coastguard Worker a_cross_sum.s[low_len] = a_high.s[low_len];
680*8fb009dcSAndroid Build Coastguard Worker a_cross_sum.a[low_len] = a_high.a[low_len];
681*8fb009dcSAndroid Build Coastguard Worker b_cross_sum.s[low_len] = b_high.s[low_len];
682*8fb009dcSAndroid Build Coastguard Worker b_cross_sum.a[low_len] = b_high.a[low_len];
683*8fb009dcSAndroid Build Coastguard Worker }
684*8fb009dcSAndroid Build Coastguard Worker
685*8fb009dcSAndroid Build Coastguard Worker const struct poly3_span child_scratch = {&scratch->s[2 * high_len],
686*8fb009dcSAndroid Build Coastguard Worker &scratch->a[2 * high_len]};
687*8fb009dcSAndroid Build Coastguard Worker const struct poly3_span out_mid = {&out->s[low_len], &out->a[low_len]};
688*8fb009dcSAndroid Build Coastguard Worker const struct poly3_span out_high = {&out->s[2 * low_len],
689*8fb009dcSAndroid Build Coastguard Worker &out->a[2 * low_len]};
690*8fb009dcSAndroid Build Coastguard Worker
691*8fb009dcSAndroid Build Coastguard Worker // Calculate (a_1 + a_0) × (b_1 + b_0) and write to scratch buffer.
692*8fb009dcSAndroid Build Coastguard Worker poly3_mul_aux(scratch, &child_scratch, &a_cross_sum, &b_cross_sum, high_len);
693*8fb009dcSAndroid Build Coastguard Worker // Calculate a_1 × b_1.
694*8fb009dcSAndroid Build Coastguard Worker poly3_mul_aux(&out_high, &child_scratch, &a_high, &b_high, high_len);
695*8fb009dcSAndroid Build Coastguard Worker // Calculate a_0 × b_0.
696*8fb009dcSAndroid Build Coastguard Worker poly3_mul_aux(out, &child_scratch, a, b, low_len);
697*8fb009dcSAndroid Build Coastguard Worker
698*8fb009dcSAndroid Build Coastguard Worker // Subtract those last two products from the first.
699*8fb009dcSAndroid Build Coastguard Worker poly3_span_sub(scratch, out, low_len * 2);
700*8fb009dcSAndroid Build Coastguard Worker poly3_span_sub(scratch, &out_high, high_len * 2);
701*8fb009dcSAndroid Build Coastguard Worker
702*8fb009dcSAndroid Build Coastguard Worker // Add the middle product into the output.
703*8fb009dcSAndroid Build Coastguard Worker poly3_span_add(&out_mid, &out_mid, scratch, high_len * 2);
704*8fb009dcSAndroid Build Coastguard Worker }
705*8fb009dcSAndroid Build Coastguard Worker
706*8fb009dcSAndroid Build Coastguard Worker // HRSS_poly3_mul sets |*out| to |x|×|y| mod Φ(N).
HRSS_poly3_mul(struct poly3 * out,const struct poly3 * x,const struct poly3 * y)707*8fb009dcSAndroid Build Coastguard Worker void HRSS_poly3_mul(struct poly3 *out, const struct poly3 *x,
708*8fb009dcSAndroid Build Coastguard Worker const struct poly3 *y) {
709*8fb009dcSAndroid Build Coastguard Worker crypto_word_t prod_s[WORDS_PER_POLY * 2];
710*8fb009dcSAndroid Build Coastguard Worker crypto_word_t prod_a[WORDS_PER_POLY * 2];
711*8fb009dcSAndroid Build Coastguard Worker crypto_word_t scratch_s[WORDS_PER_POLY * 2 + 2];
712*8fb009dcSAndroid Build Coastguard Worker crypto_word_t scratch_a[WORDS_PER_POLY * 2 + 2];
713*8fb009dcSAndroid Build Coastguard Worker const struct poly3_span prod_span = {prod_s, prod_a};
714*8fb009dcSAndroid Build Coastguard Worker const struct poly3_span scratch_span = {scratch_s, scratch_a};
715*8fb009dcSAndroid Build Coastguard Worker const struct poly3_span x_span = {(crypto_word_t *)x->s.v,
716*8fb009dcSAndroid Build Coastguard Worker (crypto_word_t *)x->a.v};
717*8fb009dcSAndroid Build Coastguard Worker const struct poly3_span y_span = {(crypto_word_t *)y->s.v,
718*8fb009dcSAndroid Build Coastguard Worker (crypto_word_t *)y->a.v};
719*8fb009dcSAndroid Build Coastguard Worker
720*8fb009dcSAndroid Build Coastguard Worker poly3_mul_aux(&prod_span, &scratch_span, &x_span, &y_span, WORDS_PER_POLY);
721*8fb009dcSAndroid Build Coastguard Worker
722*8fb009dcSAndroid Build Coastguard Worker // |prod| needs to be reduced mod (^n - 1), which just involves adding the
723*8fb009dcSAndroid Build Coastguard Worker // upper-half to the lower-half. However, N is 701, which isn't a multiple of
724*8fb009dcSAndroid Build Coastguard Worker // BITS_PER_WORD, so the upper-half vectors all have to be shifted before
725*8fb009dcSAndroid Build Coastguard Worker // being added to the lower-half.
726*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 0; i < WORDS_PER_POLY; i++) {
727*8fb009dcSAndroid Build Coastguard Worker crypto_word_t v_s = prod_s[WORDS_PER_POLY + i - 1] >> BITS_IN_LAST_WORD;
728*8fb009dcSAndroid Build Coastguard Worker v_s |= prod_s[WORDS_PER_POLY + i] << (BITS_PER_WORD - BITS_IN_LAST_WORD);
729*8fb009dcSAndroid Build Coastguard Worker crypto_word_t v_a = prod_a[WORDS_PER_POLY + i - 1] >> BITS_IN_LAST_WORD;
730*8fb009dcSAndroid Build Coastguard Worker v_a |= prod_a[WORDS_PER_POLY + i] << (BITS_PER_WORD - BITS_IN_LAST_WORD);
731*8fb009dcSAndroid Build Coastguard Worker
732*8fb009dcSAndroid Build Coastguard Worker poly3_word_add(&out->s.v[i], &out->a.v[i], prod_s[i], prod_a[i], v_s, v_a);
733*8fb009dcSAndroid Build Coastguard Worker }
734*8fb009dcSAndroid Build Coastguard Worker
735*8fb009dcSAndroid Build Coastguard Worker poly3_mod_phiN(out);
736*8fb009dcSAndroid Build Coastguard Worker }
737*8fb009dcSAndroid Build Coastguard Worker
738*8fb009dcSAndroid Build Coastguard Worker #if defined(HRSS_HAVE_VECTOR_UNIT) && !defined(OPENSSL_AARCH64)
739*8fb009dcSAndroid Build Coastguard Worker
740*8fb009dcSAndroid Build Coastguard Worker // poly3_vec_cswap swaps (|a_s|, |a_a|) and (|b_s|, |b_a|) if |swap| is
741*8fb009dcSAndroid Build Coastguard Worker // |0xff..ff|. Otherwise, |swap| must be zero.
poly3_vec_cswap(vec_t a_s[6],vec_t a_a[6],vec_t b_s[6],vec_t b_a[6],const vec_t swap)742*8fb009dcSAndroid Build Coastguard Worker static inline void poly3_vec_cswap(vec_t a_s[6], vec_t a_a[6], vec_t b_s[6],
743*8fb009dcSAndroid Build Coastguard Worker vec_t b_a[6], const vec_t swap) {
744*8fb009dcSAndroid Build Coastguard Worker for (int i = 0; i < 6; i++) {
745*8fb009dcSAndroid Build Coastguard Worker const vec_t sum_s = swap & (a_s[i] ^ b_s[i]);
746*8fb009dcSAndroid Build Coastguard Worker a_s[i] ^= sum_s;
747*8fb009dcSAndroid Build Coastguard Worker b_s[i] ^= sum_s;
748*8fb009dcSAndroid Build Coastguard Worker
749*8fb009dcSAndroid Build Coastguard Worker const vec_t sum_a = swap & (a_a[i] ^ b_a[i]);
750*8fb009dcSAndroid Build Coastguard Worker a_a[i] ^= sum_a;
751*8fb009dcSAndroid Build Coastguard Worker b_a[i] ^= sum_a;
752*8fb009dcSAndroid Build Coastguard Worker }
753*8fb009dcSAndroid Build Coastguard Worker }
754*8fb009dcSAndroid Build Coastguard Worker
755*8fb009dcSAndroid Build Coastguard Worker // poly3_vec_fmsub subtracts (|ms|, |ma|) × (|b_s|, |b_a|) from (|a_s|, |a_a|).
poly3_vec_fmsub(vec_t a_s[6],vec_t a_a[6],vec_t b_s[6],vec_t b_a[6],const vec_t ms,const vec_t ma)756*8fb009dcSAndroid Build Coastguard Worker static inline void poly3_vec_fmsub(vec_t a_s[6], vec_t a_a[6], vec_t b_s[6],
757*8fb009dcSAndroid Build Coastguard Worker vec_t b_a[6], const vec_t ms,
758*8fb009dcSAndroid Build Coastguard Worker const vec_t ma) {
759*8fb009dcSAndroid Build Coastguard Worker for (int i = 0; i < 6; i++) {
760*8fb009dcSAndroid Build Coastguard Worker // See the bitslice formula, above.
761*8fb009dcSAndroid Build Coastguard Worker const vec_t s = b_s[i];
762*8fb009dcSAndroid Build Coastguard Worker const vec_t a = b_a[i];
763*8fb009dcSAndroid Build Coastguard Worker const vec_t product_a = a & ma;
764*8fb009dcSAndroid Build Coastguard Worker const vec_t product_s = (s ^ ms) & product_a;
765*8fb009dcSAndroid Build Coastguard Worker
766*8fb009dcSAndroid Build Coastguard Worker const vec_t out_s = a_s[i];
767*8fb009dcSAndroid Build Coastguard Worker const vec_t out_a = a_a[i];
768*8fb009dcSAndroid Build Coastguard Worker const vec_t t = out_a ^ product_a;
769*8fb009dcSAndroid Build Coastguard Worker a_s[i] = (out_s ^ product_a) & (t ^ product_s);
770*8fb009dcSAndroid Build Coastguard Worker a_a[i] = t | (out_s ^ product_s);
771*8fb009dcSAndroid Build Coastguard Worker }
772*8fb009dcSAndroid Build Coastguard Worker }
773*8fb009dcSAndroid Build Coastguard Worker
774*8fb009dcSAndroid Build Coastguard Worker // poly3_invert_vec sets |*out| to |in|^-1, i.e. such that |out|×|in| == 1 mod
775*8fb009dcSAndroid Build Coastguard Worker // Φ(N).
poly3_invert_vec(struct poly3 * out,const struct poly3 * in)776*8fb009dcSAndroid Build Coastguard Worker static void poly3_invert_vec(struct poly3 *out, const struct poly3 *in) {
777*8fb009dcSAndroid Build Coastguard Worker // This algorithm is taken from section 7.1 of [SAFEGCD].
778*8fb009dcSAndroid Build Coastguard Worker const vec_t kZero = {0};
779*8fb009dcSAndroid Build Coastguard Worker const vec_t kOne = {1};
780*8fb009dcSAndroid Build Coastguard Worker static const uint8_t kBottomSixtyOne[sizeof(vec_t)] = {
781*8fb009dcSAndroid Build Coastguard Worker 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1f};
782*8fb009dcSAndroid Build Coastguard Worker
783*8fb009dcSAndroid Build Coastguard Worker vec_t v_s[6], v_a[6], r_s[6], r_a[6], f_s[6], f_a[6], g_s[6], g_a[6];
784*8fb009dcSAndroid Build Coastguard Worker // v = 0
785*8fb009dcSAndroid Build Coastguard Worker memset(&v_s, 0, sizeof(v_s));
786*8fb009dcSAndroid Build Coastguard Worker memset(&v_a, 0, sizeof(v_a));
787*8fb009dcSAndroid Build Coastguard Worker // r = 1
788*8fb009dcSAndroid Build Coastguard Worker memset(&r_s, 0, sizeof(r_s));
789*8fb009dcSAndroid Build Coastguard Worker memset(&r_a, 0, sizeof(r_a));
790*8fb009dcSAndroid Build Coastguard Worker r_a[0] = kOne;
791*8fb009dcSAndroid Build Coastguard Worker // f = all ones.
792*8fb009dcSAndroid Build Coastguard Worker memset(f_s, 0, sizeof(f_s));
793*8fb009dcSAndroid Build Coastguard Worker memset(f_a, 0xff, 5 * sizeof(vec_t));
794*8fb009dcSAndroid Build Coastguard Worker memcpy(&f_a[5], kBottomSixtyOne, sizeof(kBottomSixtyOne));
795*8fb009dcSAndroid Build Coastguard Worker // g is the reversal of |in|.
796*8fb009dcSAndroid Build Coastguard Worker struct poly3 in_reversed;
797*8fb009dcSAndroid Build Coastguard Worker poly3_reverse_700(&in_reversed, in);
798*8fb009dcSAndroid Build Coastguard Worker g_s[5] = kZero;
799*8fb009dcSAndroid Build Coastguard Worker memcpy(&g_s, &in_reversed.s.v, WORDS_PER_POLY * sizeof(crypto_word_t));
800*8fb009dcSAndroid Build Coastguard Worker g_a[5] = kZero;
801*8fb009dcSAndroid Build Coastguard Worker memcpy(&g_a, &in_reversed.a.v, WORDS_PER_POLY * sizeof(crypto_word_t));
802*8fb009dcSAndroid Build Coastguard Worker
803*8fb009dcSAndroid Build Coastguard Worker int delta = 1;
804*8fb009dcSAndroid Build Coastguard Worker
805*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 0; i < (2*(N-1)) - 1; i++) {
806*8fb009dcSAndroid Build Coastguard Worker poly3_vec_lshift1(v_s, v_a);
807*8fb009dcSAndroid Build Coastguard Worker
808*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t delta_sign_bit = (delta >> (sizeof(delta) * 8 - 1)) & 1;
809*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t delta_is_non_negative = delta_sign_bit - 1;
810*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t delta_is_non_zero = ~constant_time_is_zero_w(delta);
811*8fb009dcSAndroid Build Coastguard Worker const vec_t g_has_constant_term = vec_broadcast_bit(g_a[0]);
812*8fb009dcSAndroid Build Coastguard Worker const vec_t mask_w =
813*8fb009dcSAndroid Build Coastguard Worker {delta_is_non_negative & delta_is_non_zero};
814*8fb009dcSAndroid Build Coastguard Worker const vec_t mask = vec_broadcast_bit(mask_w) & g_has_constant_term;
815*8fb009dcSAndroid Build Coastguard Worker
816*8fb009dcSAndroid Build Coastguard Worker const vec_t c_a = vec_broadcast_bit(f_a[0] & g_a[0]);
817*8fb009dcSAndroid Build Coastguard Worker const vec_t c_s = vec_broadcast_bit((f_s[0] ^ g_s[0]) & c_a);
818*8fb009dcSAndroid Build Coastguard Worker
819*8fb009dcSAndroid Build Coastguard Worker delta = constant_time_select_int(lsb_to_all(mask[0]), -delta, delta);
820*8fb009dcSAndroid Build Coastguard Worker delta++;
821*8fb009dcSAndroid Build Coastguard Worker
822*8fb009dcSAndroid Build Coastguard Worker poly3_vec_cswap(f_s, f_a, g_s, g_a, mask);
823*8fb009dcSAndroid Build Coastguard Worker poly3_vec_fmsub(g_s, g_a, f_s, f_a, c_s, c_a);
824*8fb009dcSAndroid Build Coastguard Worker poly3_vec_rshift1(g_s, g_a);
825*8fb009dcSAndroid Build Coastguard Worker
826*8fb009dcSAndroid Build Coastguard Worker poly3_vec_cswap(v_s, v_a, r_s, r_a, mask);
827*8fb009dcSAndroid Build Coastguard Worker poly3_vec_fmsub(r_s, r_a, v_s, v_a, c_s, c_a);
828*8fb009dcSAndroid Build Coastguard Worker }
829*8fb009dcSAndroid Build Coastguard Worker
830*8fb009dcSAndroid Build Coastguard Worker assert(delta == 0);
831*8fb009dcSAndroid Build Coastguard Worker memcpy(out->s.v, v_s, WORDS_PER_POLY * sizeof(crypto_word_t));
832*8fb009dcSAndroid Build Coastguard Worker memcpy(out->a.v, v_a, WORDS_PER_POLY * sizeof(crypto_word_t));
833*8fb009dcSAndroid Build Coastguard Worker poly3_mul_const(out, vec_get_word(f_s[0], 0), vec_get_word(f_a[0], 0));
834*8fb009dcSAndroid Build Coastguard Worker poly3_reverse_700(out, out);
835*8fb009dcSAndroid Build Coastguard Worker }
836*8fb009dcSAndroid Build Coastguard Worker
837*8fb009dcSAndroid Build Coastguard Worker #endif // HRSS_HAVE_VECTOR_UNIT
838*8fb009dcSAndroid Build Coastguard Worker
839*8fb009dcSAndroid Build Coastguard Worker // HRSS_poly3_invert sets |*out| to |in|^-1, i.e. such that |out|×|in| == 1 mod
840*8fb009dcSAndroid Build Coastguard Worker // Φ(N).
HRSS_poly3_invert(struct poly3 * out,const struct poly3 * in)841*8fb009dcSAndroid Build Coastguard Worker void HRSS_poly3_invert(struct poly3 *out, const struct poly3 *in) {
842*8fb009dcSAndroid Build Coastguard Worker // The vector version of this function seems slightly slower on AArch64, but
843*8fb009dcSAndroid Build Coastguard Worker // is useful on ARMv7 and x86-64.
844*8fb009dcSAndroid Build Coastguard Worker #if defined(HRSS_HAVE_VECTOR_UNIT) && !defined(OPENSSL_AARCH64)
845*8fb009dcSAndroid Build Coastguard Worker if (vec_capable()) {
846*8fb009dcSAndroid Build Coastguard Worker poly3_invert_vec(out, in);
847*8fb009dcSAndroid Build Coastguard Worker return;
848*8fb009dcSAndroid Build Coastguard Worker }
849*8fb009dcSAndroid Build Coastguard Worker #endif
850*8fb009dcSAndroid Build Coastguard Worker
851*8fb009dcSAndroid Build Coastguard Worker // This algorithm is taken from section 7.1 of [SAFEGCD].
852*8fb009dcSAndroid Build Coastguard Worker struct poly3 v, r, f, g;
853*8fb009dcSAndroid Build Coastguard Worker // v = 0
854*8fb009dcSAndroid Build Coastguard Worker poly3_zero(&v);
855*8fb009dcSAndroid Build Coastguard Worker // r = 1
856*8fb009dcSAndroid Build Coastguard Worker poly3_zero(&r);
857*8fb009dcSAndroid Build Coastguard Worker r.a.v[0] = 1;
858*8fb009dcSAndroid Build Coastguard Worker // f = all ones.
859*8fb009dcSAndroid Build Coastguard Worker OPENSSL_memset(&f.s, 0, sizeof(struct poly2));
860*8fb009dcSAndroid Build Coastguard Worker OPENSSL_memset(&f.a, 0xff, sizeof(struct poly2));
861*8fb009dcSAndroid Build Coastguard Worker f.a.v[WORDS_PER_POLY - 1] >>= BITS_PER_WORD - BITS_IN_LAST_WORD;
862*8fb009dcSAndroid Build Coastguard Worker // g is the reversal of |in|.
863*8fb009dcSAndroid Build Coastguard Worker poly3_reverse_700(&g, in);
864*8fb009dcSAndroid Build Coastguard Worker int delta = 1;
865*8fb009dcSAndroid Build Coastguard Worker
866*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 0; i < (2*(N-1)) - 1; i++) {
867*8fb009dcSAndroid Build Coastguard Worker poly3_lshift1(&v);
868*8fb009dcSAndroid Build Coastguard Worker
869*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t delta_sign_bit = (delta >> (sizeof(delta) * 8 - 1)) & 1;
870*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t delta_is_non_negative = delta_sign_bit - 1;
871*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t delta_is_non_zero = ~constant_time_is_zero_w(delta);
872*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t g_has_constant_term = lsb_to_all(g.a.v[0]);
873*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t mask =
874*8fb009dcSAndroid Build Coastguard Worker g_has_constant_term & delta_is_non_negative & delta_is_non_zero;
875*8fb009dcSAndroid Build Coastguard Worker
876*8fb009dcSAndroid Build Coastguard Worker crypto_word_t c_s, c_a;
877*8fb009dcSAndroid Build Coastguard Worker poly3_word_mul(&c_s, &c_a, f.s.v[0], f.a.v[0], g.s.v[0], g.a.v[0]);
878*8fb009dcSAndroid Build Coastguard Worker c_s = lsb_to_all(c_s);
879*8fb009dcSAndroid Build Coastguard Worker c_a = lsb_to_all(c_a);
880*8fb009dcSAndroid Build Coastguard Worker
881*8fb009dcSAndroid Build Coastguard Worker delta = constant_time_select_int(mask, -delta, delta);
882*8fb009dcSAndroid Build Coastguard Worker delta++;
883*8fb009dcSAndroid Build Coastguard Worker
884*8fb009dcSAndroid Build Coastguard Worker poly3_cswap(&f, &g, mask);
885*8fb009dcSAndroid Build Coastguard Worker poly3_fmsub(&g, &f, c_s, c_a);
886*8fb009dcSAndroid Build Coastguard Worker poly3_rshift1(&g);
887*8fb009dcSAndroid Build Coastguard Worker
888*8fb009dcSAndroid Build Coastguard Worker poly3_cswap(&v, &r, mask);
889*8fb009dcSAndroid Build Coastguard Worker poly3_fmsub(&r, &v, c_s, c_a);
890*8fb009dcSAndroid Build Coastguard Worker }
891*8fb009dcSAndroid Build Coastguard Worker
892*8fb009dcSAndroid Build Coastguard Worker assert(delta == 0);
893*8fb009dcSAndroid Build Coastguard Worker poly3_mul_const(&v, f.s.v[0], f.a.v[0]);
894*8fb009dcSAndroid Build Coastguard Worker poly3_reverse_700(out, &v);
895*8fb009dcSAndroid Build Coastguard Worker }
896*8fb009dcSAndroid Build Coastguard Worker
897*8fb009dcSAndroid Build Coastguard Worker // Polynomials in Q.
898*8fb009dcSAndroid Build Coastguard Worker
899*8fb009dcSAndroid Build Coastguard Worker // Coefficients are reduced mod Q. (Q is clearly not prime, therefore the
900*8fb009dcSAndroid Build Coastguard Worker // coefficients do not form a field.)
901*8fb009dcSAndroid Build Coastguard Worker #define Q 8192
902*8fb009dcSAndroid Build Coastguard Worker
903*8fb009dcSAndroid Build Coastguard Worker // VECS_PER_POLY is the number of 128-bit vectors needed to represent a
904*8fb009dcSAndroid Build Coastguard Worker // polynomial.
905*8fb009dcSAndroid Build Coastguard Worker #define COEFFICIENTS_PER_VEC (sizeof(vec_t) / sizeof(uint16_t))
906*8fb009dcSAndroid Build Coastguard Worker #define VECS_PER_POLY ((N + COEFFICIENTS_PER_VEC - 1) / COEFFICIENTS_PER_VEC)
907*8fb009dcSAndroid Build Coastguard Worker
908*8fb009dcSAndroid Build Coastguard Worker // poly represents a polynomial with coefficients mod Q. Note that, while Q is a
909*8fb009dcSAndroid Build Coastguard Worker // power of two, this does not operate in GF(Q). That would be a binary field
910*8fb009dcSAndroid Build Coastguard Worker // but this is simply mod Q. Thus the coefficients are not a field.
911*8fb009dcSAndroid Build Coastguard Worker //
912*8fb009dcSAndroid Build Coastguard Worker // Coefficients are ordered little-endian, thus the coefficient of x^0 is the
913*8fb009dcSAndroid Build Coastguard Worker // first element of the array.
914*8fb009dcSAndroid Build Coastguard Worker struct poly {
915*8fb009dcSAndroid Build Coastguard Worker #if defined(HRSS_HAVE_VECTOR_UNIT)
916*8fb009dcSAndroid Build Coastguard Worker union {
917*8fb009dcSAndroid Build Coastguard Worker // N + 3 = 704, which is a multiple of 64 and thus aligns things, esp for
918*8fb009dcSAndroid Build Coastguard Worker // the vector code.
919*8fb009dcSAndroid Build Coastguard Worker uint16_t v[N + 3];
920*8fb009dcSAndroid Build Coastguard Worker vec_t vectors[VECS_PER_POLY];
921*8fb009dcSAndroid Build Coastguard Worker };
922*8fb009dcSAndroid Build Coastguard Worker #else
923*8fb009dcSAndroid Build Coastguard Worker // Even if !HRSS_HAVE_VECTOR_UNIT, external assembly may be called that
924*8fb009dcSAndroid Build Coastguard Worker // requires alignment.
925*8fb009dcSAndroid Build Coastguard Worker alignas(16) uint16_t v[N + 3];
926*8fb009dcSAndroid Build Coastguard Worker #endif
927*8fb009dcSAndroid Build Coastguard Worker };
928*8fb009dcSAndroid Build Coastguard Worker
929*8fb009dcSAndroid Build Coastguard Worker // poly_normalize zeros out the excess elements of |x| which are included only
930*8fb009dcSAndroid Build Coastguard Worker // for alignment.
poly_normalize(struct poly * x)931*8fb009dcSAndroid Build Coastguard Worker static void poly_normalize(struct poly *x) {
932*8fb009dcSAndroid Build Coastguard Worker OPENSSL_memset(&x->v[N], 0, 3 * sizeof(uint16_t));
933*8fb009dcSAndroid Build Coastguard Worker }
934*8fb009dcSAndroid Build Coastguard Worker
935*8fb009dcSAndroid Build Coastguard Worker // poly_assert_normalized asserts that the excess elements of |x| are zeroed out
936*8fb009dcSAndroid Build Coastguard Worker // for the cases that case. (E.g. |poly_mul_vec|.)
poly_assert_normalized(const struct poly * x)937*8fb009dcSAndroid Build Coastguard Worker static void poly_assert_normalized(const struct poly *x) {
938*8fb009dcSAndroid Build Coastguard Worker assert(x->v[N] == 0);
939*8fb009dcSAndroid Build Coastguard Worker assert(x->v[N + 1] == 0);
940*8fb009dcSAndroid Build Coastguard Worker assert(x->v[N + 2] == 0);
941*8fb009dcSAndroid Build Coastguard Worker }
942*8fb009dcSAndroid Build Coastguard Worker
poly_print(const struct poly * p)943*8fb009dcSAndroid Build Coastguard Worker OPENSSL_UNUSED static void poly_print(const struct poly *p) {
944*8fb009dcSAndroid Build Coastguard Worker printf("[");
945*8fb009dcSAndroid Build Coastguard Worker for (unsigned i = 0; i < N; i++) {
946*8fb009dcSAndroid Build Coastguard Worker if (i) {
947*8fb009dcSAndroid Build Coastguard Worker printf(" ");
948*8fb009dcSAndroid Build Coastguard Worker }
949*8fb009dcSAndroid Build Coastguard Worker printf("%d", p->v[i]);
950*8fb009dcSAndroid Build Coastguard Worker }
951*8fb009dcSAndroid Build Coastguard Worker printf("]\n");
952*8fb009dcSAndroid Build Coastguard Worker }
953*8fb009dcSAndroid Build Coastguard Worker
954*8fb009dcSAndroid Build Coastguard Worker // POLY_MUL_SCRATCH contains space for the working variables needed by
955*8fb009dcSAndroid Build Coastguard Worker // |poly_mul|. The contents afterwards may be discarded, but the object may also
956*8fb009dcSAndroid Build Coastguard Worker // be reused with future |poly_mul| calls to save heap allocations.
957*8fb009dcSAndroid Build Coastguard Worker //
958*8fb009dcSAndroid Build Coastguard Worker // This object must have 32-byte alignment.
959*8fb009dcSAndroid Build Coastguard Worker struct POLY_MUL_SCRATCH {
960*8fb009dcSAndroid Build Coastguard Worker union {
961*8fb009dcSAndroid Build Coastguard Worker // This is used by |poly_mul_novec|.
962*8fb009dcSAndroid Build Coastguard Worker struct {
963*8fb009dcSAndroid Build Coastguard Worker uint16_t prod[2 * N];
964*8fb009dcSAndroid Build Coastguard Worker uint16_t scratch[1318];
965*8fb009dcSAndroid Build Coastguard Worker } novec;
966*8fb009dcSAndroid Build Coastguard Worker
967*8fb009dcSAndroid Build Coastguard Worker #if defined(HRSS_HAVE_VECTOR_UNIT)
968*8fb009dcSAndroid Build Coastguard Worker // This is used by |poly_mul_vec|.
969*8fb009dcSAndroid Build Coastguard Worker struct {
970*8fb009dcSAndroid Build Coastguard Worker vec_t prod[VECS_PER_POLY * 2];
971*8fb009dcSAndroid Build Coastguard Worker vec_t scratch[172];
972*8fb009dcSAndroid Build Coastguard Worker } vec;
973*8fb009dcSAndroid Build Coastguard Worker #endif
974*8fb009dcSAndroid Build Coastguard Worker
975*8fb009dcSAndroid Build Coastguard Worker #if defined(POLY_RQ_MUL_ASM)
976*8fb009dcSAndroid Build Coastguard Worker // This is the space used by |poly_Rq_mul|.
977*8fb009dcSAndroid Build Coastguard Worker uint8_t rq[POLY_MUL_RQ_SCRATCH_SPACE];
978*8fb009dcSAndroid Build Coastguard Worker #endif
979*8fb009dcSAndroid Build Coastguard Worker } u;
980*8fb009dcSAndroid Build Coastguard Worker };
981*8fb009dcSAndroid Build Coastguard Worker
982*8fb009dcSAndroid Build Coastguard Worker #if defined(HRSS_HAVE_VECTOR_UNIT)
983*8fb009dcSAndroid Build Coastguard Worker
984*8fb009dcSAndroid Build Coastguard Worker // poly_mul_vec_aux is a recursive function that multiplies |n| words from |a|
985*8fb009dcSAndroid Build Coastguard Worker // and |b| and writes 2×|n| words to |out|. Each call uses 2*ceil(n/2) elements
986*8fb009dcSAndroid Build Coastguard Worker // of |scratch| and the function recurses, except if |n| < 3, when |scratch|
987*8fb009dcSAndroid Build Coastguard Worker // isn't used and the recursion stops. If |n| == |VECS_PER_POLY| then |scratch|
988*8fb009dcSAndroid Build Coastguard Worker // needs 172 elements.
poly_mul_vec_aux(vec_t * restrict out,vec_t * restrict scratch,const vec_t * restrict a,const vec_t * restrict b,const size_t n)989*8fb009dcSAndroid Build Coastguard Worker static void poly_mul_vec_aux(vec_t *restrict out, vec_t *restrict scratch,
990*8fb009dcSAndroid Build Coastguard Worker const vec_t *restrict a, const vec_t *restrict b,
991*8fb009dcSAndroid Build Coastguard Worker const size_t n) {
992*8fb009dcSAndroid Build Coastguard Worker // In [HRSS], the technique they used for polynomial multiplication is
993*8fb009dcSAndroid Build Coastguard Worker // described: they start with Toom-4 at the top level and then two layers of
994*8fb009dcSAndroid Build Coastguard Worker // Karatsuba. Karatsuba is a specific instance of the general Toom–Cook
995*8fb009dcSAndroid Build Coastguard Worker // decomposition, which splits an input n-ways and produces 2n-1
996*8fb009dcSAndroid Build Coastguard Worker // multiplications of those parts. So, starting with 704 coefficients (rounded
997*8fb009dcSAndroid Build Coastguard Worker // up from 701 to have more factors of two), Toom-4 gives seven
998*8fb009dcSAndroid Build Coastguard Worker // multiplications of degree-174 polynomials. Each round of Karatsuba (which
999*8fb009dcSAndroid Build Coastguard Worker // is Toom-2) increases the number of multiplications by a factor of three
1000*8fb009dcSAndroid Build Coastguard Worker // while halving the size of the values being multiplied. So two rounds gives
1001*8fb009dcSAndroid Build Coastguard Worker // 63 multiplications of degree-44 polynomials. Then they (I think) form
1002*8fb009dcSAndroid Build Coastguard Worker // vectors by gathering all 63 coefficients of each power together, for each
1003*8fb009dcSAndroid Build Coastguard Worker // input, and doing more rounds of Karatsuba on the vectors until they bottom-
1004*8fb009dcSAndroid Build Coastguard Worker // out somewhere with schoolbook multiplication.
1005*8fb009dcSAndroid Build Coastguard Worker //
1006*8fb009dcSAndroid Build Coastguard Worker // I tried something like that for NEON. NEON vectors are 128 bits so hold
1007*8fb009dcSAndroid Build Coastguard Worker // eight coefficients. I wrote a function that did Karatsuba on eight
1008*8fb009dcSAndroid Build Coastguard Worker // multiplications at the same time, using such vectors, and a Go script that
1009*8fb009dcSAndroid Build Coastguard Worker // decomposed from degree-704, with Karatsuba in non-transposed form, until it
1010*8fb009dcSAndroid Build Coastguard Worker // reached multiplications of degree-44. It batched up those 81
1011*8fb009dcSAndroid Build Coastguard Worker // multiplications into lots of eight with a single one left over (which was
1012*8fb009dcSAndroid Build Coastguard Worker // handled directly).
1013*8fb009dcSAndroid Build Coastguard Worker //
1014*8fb009dcSAndroid Build Coastguard Worker // It worked, but it was significantly slower than the dumb algorithm used
1015*8fb009dcSAndroid Build Coastguard Worker // below. Potentially that was because I misunderstood how [HRSS] did it, or
1016*8fb009dcSAndroid Build Coastguard Worker // because Clang is bad at generating good code from NEON intrinsics on ARMv7.
1017*8fb009dcSAndroid Build Coastguard Worker // (Which is true: the code generated by Clang for the below is pretty crap.)
1018*8fb009dcSAndroid Build Coastguard Worker //
1019*8fb009dcSAndroid Build Coastguard Worker // This algorithm is much simpler. It just does Karatsuba decomposition all
1020*8fb009dcSAndroid Build Coastguard Worker // the way down and never transposes. When it gets down to degree-16 or
1021*8fb009dcSAndroid Build Coastguard Worker // degree-24 values, they are multiplied using schoolbook multiplication and
1022*8fb009dcSAndroid Build Coastguard Worker // vector intrinsics. The vector operations form each of the eight phase-
1023*8fb009dcSAndroid Build Coastguard Worker // shifts of one of the inputs, point-wise multiply, and then add into the
1024*8fb009dcSAndroid Build Coastguard Worker // result at the correct place. This means that 33% (degree-16) or 25%
1025*8fb009dcSAndroid Build Coastguard Worker // (degree-24) of the multiplies and adds are wasted, but it does ok.
1026*8fb009dcSAndroid Build Coastguard Worker if (n == 2) {
1027*8fb009dcSAndroid Build Coastguard Worker vec_t result[4];
1028*8fb009dcSAndroid Build Coastguard Worker vec_t vec_a[3];
1029*8fb009dcSAndroid Build Coastguard Worker static const vec_t kZero = {0};
1030*8fb009dcSAndroid Build Coastguard Worker vec_a[0] = a[0];
1031*8fb009dcSAndroid Build Coastguard Worker vec_a[1] = a[1];
1032*8fb009dcSAndroid Build Coastguard Worker vec_a[2] = kZero;
1033*8fb009dcSAndroid Build Coastguard Worker
1034*8fb009dcSAndroid Build Coastguard Worker result[0] = vec_mul(vec_a[0], vec_get_word(b[0], 0));
1035*8fb009dcSAndroid Build Coastguard Worker result[1] = vec_mul(vec_a[1], vec_get_word(b[0], 0));
1036*8fb009dcSAndroid Build Coastguard Worker
1037*8fb009dcSAndroid Build Coastguard Worker result[1] = vec_fma(result[1], vec_a[0], vec_get_word(b[1], 0));
1038*8fb009dcSAndroid Build Coastguard Worker result[2] = vec_mul(vec_a[1], vec_get_word(b[1], 0));
1039*8fb009dcSAndroid Build Coastguard Worker result[3] = kZero;
1040*8fb009dcSAndroid Build Coastguard Worker
1041*8fb009dcSAndroid Build Coastguard Worker vec3_rshift_word(vec_a);
1042*8fb009dcSAndroid Build Coastguard Worker
1043*8fb009dcSAndroid Build Coastguard Worker #define BLOCK(x, y) \
1044*8fb009dcSAndroid Build Coastguard Worker do { \
1045*8fb009dcSAndroid Build Coastguard Worker result[x + 0] = \
1046*8fb009dcSAndroid Build Coastguard Worker vec_fma(result[x + 0], vec_a[0], vec_get_word(b[y / 8], y % 8)); \
1047*8fb009dcSAndroid Build Coastguard Worker result[x + 1] = \
1048*8fb009dcSAndroid Build Coastguard Worker vec_fma(result[x + 1], vec_a[1], vec_get_word(b[y / 8], y % 8)); \
1049*8fb009dcSAndroid Build Coastguard Worker result[x + 2] = \
1050*8fb009dcSAndroid Build Coastguard Worker vec_fma(result[x + 2], vec_a[2], vec_get_word(b[y / 8], y % 8)); \
1051*8fb009dcSAndroid Build Coastguard Worker } while (0)
1052*8fb009dcSAndroid Build Coastguard Worker
1053*8fb009dcSAndroid Build Coastguard Worker BLOCK(0, 1);
1054*8fb009dcSAndroid Build Coastguard Worker BLOCK(1, 9);
1055*8fb009dcSAndroid Build Coastguard Worker
1056*8fb009dcSAndroid Build Coastguard Worker vec3_rshift_word(vec_a);
1057*8fb009dcSAndroid Build Coastguard Worker
1058*8fb009dcSAndroid Build Coastguard Worker BLOCK(0, 2);
1059*8fb009dcSAndroid Build Coastguard Worker BLOCK(1, 10);
1060*8fb009dcSAndroid Build Coastguard Worker
1061*8fb009dcSAndroid Build Coastguard Worker vec3_rshift_word(vec_a);
1062*8fb009dcSAndroid Build Coastguard Worker
1063*8fb009dcSAndroid Build Coastguard Worker BLOCK(0, 3);
1064*8fb009dcSAndroid Build Coastguard Worker BLOCK(1, 11);
1065*8fb009dcSAndroid Build Coastguard Worker
1066*8fb009dcSAndroid Build Coastguard Worker vec3_rshift_word(vec_a);
1067*8fb009dcSAndroid Build Coastguard Worker
1068*8fb009dcSAndroid Build Coastguard Worker BLOCK(0, 4);
1069*8fb009dcSAndroid Build Coastguard Worker BLOCK(1, 12);
1070*8fb009dcSAndroid Build Coastguard Worker
1071*8fb009dcSAndroid Build Coastguard Worker vec3_rshift_word(vec_a);
1072*8fb009dcSAndroid Build Coastguard Worker
1073*8fb009dcSAndroid Build Coastguard Worker BLOCK(0, 5);
1074*8fb009dcSAndroid Build Coastguard Worker BLOCK(1, 13);
1075*8fb009dcSAndroid Build Coastguard Worker
1076*8fb009dcSAndroid Build Coastguard Worker vec3_rshift_word(vec_a);
1077*8fb009dcSAndroid Build Coastguard Worker
1078*8fb009dcSAndroid Build Coastguard Worker BLOCK(0, 6);
1079*8fb009dcSAndroid Build Coastguard Worker BLOCK(1, 14);
1080*8fb009dcSAndroid Build Coastguard Worker
1081*8fb009dcSAndroid Build Coastguard Worker vec3_rshift_word(vec_a);
1082*8fb009dcSAndroid Build Coastguard Worker
1083*8fb009dcSAndroid Build Coastguard Worker BLOCK(0, 7);
1084*8fb009dcSAndroid Build Coastguard Worker BLOCK(1, 15);
1085*8fb009dcSAndroid Build Coastguard Worker
1086*8fb009dcSAndroid Build Coastguard Worker #undef BLOCK
1087*8fb009dcSAndroid Build Coastguard Worker
1088*8fb009dcSAndroid Build Coastguard Worker memcpy(out, result, sizeof(result));
1089*8fb009dcSAndroid Build Coastguard Worker return;
1090*8fb009dcSAndroid Build Coastguard Worker }
1091*8fb009dcSAndroid Build Coastguard Worker
1092*8fb009dcSAndroid Build Coastguard Worker if (n == 3) {
1093*8fb009dcSAndroid Build Coastguard Worker vec_t result[6];
1094*8fb009dcSAndroid Build Coastguard Worker vec_t vec_a[4];
1095*8fb009dcSAndroid Build Coastguard Worker static const vec_t kZero = {0};
1096*8fb009dcSAndroid Build Coastguard Worker vec_a[0] = a[0];
1097*8fb009dcSAndroid Build Coastguard Worker vec_a[1] = a[1];
1098*8fb009dcSAndroid Build Coastguard Worker vec_a[2] = a[2];
1099*8fb009dcSAndroid Build Coastguard Worker vec_a[3] = kZero;
1100*8fb009dcSAndroid Build Coastguard Worker
1101*8fb009dcSAndroid Build Coastguard Worker result[0] = vec_mul(a[0], vec_get_word(b[0], 0));
1102*8fb009dcSAndroid Build Coastguard Worker result[1] = vec_mul(a[1], vec_get_word(b[0], 0));
1103*8fb009dcSAndroid Build Coastguard Worker result[2] = vec_mul(a[2], vec_get_word(b[0], 0));
1104*8fb009dcSAndroid Build Coastguard Worker
1105*8fb009dcSAndroid Build Coastguard Worker #define BLOCK_PRE(x, y) \
1106*8fb009dcSAndroid Build Coastguard Worker do { \
1107*8fb009dcSAndroid Build Coastguard Worker result[x + 0] = \
1108*8fb009dcSAndroid Build Coastguard Worker vec_fma(result[x + 0], vec_a[0], vec_get_word(b[y / 8], y % 8)); \
1109*8fb009dcSAndroid Build Coastguard Worker result[x + 1] = \
1110*8fb009dcSAndroid Build Coastguard Worker vec_fma(result[x + 1], vec_a[1], vec_get_word(b[y / 8], y % 8)); \
1111*8fb009dcSAndroid Build Coastguard Worker result[x + 2] = vec_mul(vec_a[2], vec_get_word(b[y / 8], y % 8)); \
1112*8fb009dcSAndroid Build Coastguard Worker } while (0)
1113*8fb009dcSAndroid Build Coastguard Worker
1114*8fb009dcSAndroid Build Coastguard Worker BLOCK_PRE(1, 8);
1115*8fb009dcSAndroid Build Coastguard Worker BLOCK_PRE(2, 16);
1116*8fb009dcSAndroid Build Coastguard Worker
1117*8fb009dcSAndroid Build Coastguard Worker result[5] = kZero;
1118*8fb009dcSAndroid Build Coastguard Worker
1119*8fb009dcSAndroid Build Coastguard Worker vec4_rshift_word(vec_a);
1120*8fb009dcSAndroid Build Coastguard Worker
1121*8fb009dcSAndroid Build Coastguard Worker #define BLOCK(x, y) \
1122*8fb009dcSAndroid Build Coastguard Worker do { \
1123*8fb009dcSAndroid Build Coastguard Worker result[x + 0] = \
1124*8fb009dcSAndroid Build Coastguard Worker vec_fma(result[x + 0], vec_a[0], vec_get_word(b[y / 8], y % 8)); \
1125*8fb009dcSAndroid Build Coastguard Worker result[x + 1] = \
1126*8fb009dcSAndroid Build Coastguard Worker vec_fma(result[x + 1], vec_a[1], vec_get_word(b[y / 8], y % 8)); \
1127*8fb009dcSAndroid Build Coastguard Worker result[x + 2] = \
1128*8fb009dcSAndroid Build Coastguard Worker vec_fma(result[x + 2], vec_a[2], vec_get_word(b[y / 8], y % 8)); \
1129*8fb009dcSAndroid Build Coastguard Worker result[x + 3] = \
1130*8fb009dcSAndroid Build Coastguard Worker vec_fma(result[x + 3], vec_a[3], vec_get_word(b[y / 8], y % 8)); \
1131*8fb009dcSAndroid Build Coastguard Worker } while (0)
1132*8fb009dcSAndroid Build Coastguard Worker
1133*8fb009dcSAndroid Build Coastguard Worker BLOCK(0, 1);
1134*8fb009dcSAndroid Build Coastguard Worker BLOCK(1, 9);
1135*8fb009dcSAndroid Build Coastguard Worker BLOCK(2, 17);
1136*8fb009dcSAndroid Build Coastguard Worker
1137*8fb009dcSAndroid Build Coastguard Worker vec4_rshift_word(vec_a);
1138*8fb009dcSAndroid Build Coastguard Worker
1139*8fb009dcSAndroid Build Coastguard Worker BLOCK(0, 2);
1140*8fb009dcSAndroid Build Coastguard Worker BLOCK(1, 10);
1141*8fb009dcSAndroid Build Coastguard Worker BLOCK(2, 18);
1142*8fb009dcSAndroid Build Coastguard Worker
1143*8fb009dcSAndroid Build Coastguard Worker vec4_rshift_word(vec_a);
1144*8fb009dcSAndroid Build Coastguard Worker
1145*8fb009dcSAndroid Build Coastguard Worker BLOCK(0, 3);
1146*8fb009dcSAndroid Build Coastguard Worker BLOCK(1, 11);
1147*8fb009dcSAndroid Build Coastguard Worker BLOCK(2, 19);
1148*8fb009dcSAndroid Build Coastguard Worker
1149*8fb009dcSAndroid Build Coastguard Worker vec4_rshift_word(vec_a);
1150*8fb009dcSAndroid Build Coastguard Worker
1151*8fb009dcSAndroid Build Coastguard Worker BLOCK(0, 4);
1152*8fb009dcSAndroid Build Coastguard Worker BLOCK(1, 12);
1153*8fb009dcSAndroid Build Coastguard Worker BLOCK(2, 20);
1154*8fb009dcSAndroid Build Coastguard Worker
1155*8fb009dcSAndroid Build Coastguard Worker vec4_rshift_word(vec_a);
1156*8fb009dcSAndroid Build Coastguard Worker
1157*8fb009dcSAndroid Build Coastguard Worker BLOCK(0, 5);
1158*8fb009dcSAndroid Build Coastguard Worker BLOCK(1, 13);
1159*8fb009dcSAndroid Build Coastguard Worker BLOCK(2, 21);
1160*8fb009dcSAndroid Build Coastguard Worker
1161*8fb009dcSAndroid Build Coastguard Worker vec4_rshift_word(vec_a);
1162*8fb009dcSAndroid Build Coastguard Worker
1163*8fb009dcSAndroid Build Coastguard Worker BLOCK(0, 6);
1164*8fb009dcSAndroid Build Coastguard Worker BLOCK(1, 14);
1165*8fb009dcSAndroid Build Coastguard Worker BLOCK(2, 22);
1166*8fb009dcSAndroid Build Coastguard Worker
1167*8fb009dcSAndroid Build Coastguard Worker vec4_rshift_word(vec_a);
1168*8fb009dcSAndroid Build Coastguard Worker
1169*8fb009dcSAndroid Build Coastguard Worker BLOCK(0, 7);
1170*8fb009dcSAndroid Build Coastguard Worker BLOCK(1, 15);
1171*8fb009dcSAndroid Build Coastguard Worker BLOCK(2, 23);
1172*8fb009dcSAndroid Build Coastguard Worker
1173*8fb009dcSAndroid Build Coastguard Worker #undef BLOCK
1174*8fb009dcSAndroid Build Coastguard Worker #undef BLOCK_PRE
1175*8fb009dcSAndroid Build Coastguard Worker
1176*8fb009dcSAndroid Build Coastguard Worker memcpy(out, result, sizeof(result));
1177*8fb009dcSAndroid Build Coastguard Worker
1178*8fb009dcSAndroid Build Coastguard Worker return;
1179*8fb009dcSAndroid Build Coastguard Worker }
1180*8fb009dcSAndroid Build Coastguard Worker
1181*8fb009dcSAndroid Build Coastguard Worker // Karatsuba multiplication.
1182*8fb009dcSAndroid Build Coastguard Worker // https://en.wikipedia.org/wiki/Karatsuba_algorithm
1183*8fb009dcSAndroid Build Coastguard Worker
1184*8fb009dcSAndroid Build Coastguard Worker // When |n| is odd, the two "halves" will have different lengths. The first is
1185*8fb009dcSAndroid Build Coastguard Worker // always the smaller.
1186*8fb009dcSAndroid Build Coastguard Worker const size_t low_len = n / 2;
1187*8fb009dcSAndroid Build Coastguard Worker const size_t high_len = n - low_len;
1188*8fb009dcSAndroid Build Coastguard Worker const vec_t *a_high = &a[low_len];
1189*8fb009dcSAndroid Build Coastguard Worker const vec_t *b_high = &b[low_len];
1190*8fb009dcSAndroid Build Coastguard Worker
1191*8fb009dcSAndroid Build Coastguard Worker // Store a_1 + a_0 in the first half of |out| and b_1 + b_0 in the second
1192*8fb009dcSAndroid Build Coastguard Worker // half.
1193*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 0; i < low_len; i++) {
1194*8fb009dcSAndroid Build Coastguard Worker out[i] = vec_add(a_high[i], a[i]);
1195*8fb009dcSAndroid Build Coastguard Worker out[high_len + i] = vec_add(b_high[i], b[i]);
1196*8fb009dcSAndroid Build Coastguard Worker }
1197*8fb009dcSAndroid Build Coastguard Worker if (high_len != low_len) {
1198*8fb009dcSAndroid Build Coastguard Worker out[low_len] = a_high[low_len];
1199*8fb009dcSAndroid Build Coastguard Worker out[high_len + low_len] = b_high[low_len];
1200*8fb009dcSAndroid Build Coastguard Worker }
1201*8fb009dcSAndroid Build Coastguard Worker
1202*8fb009dcSAndroid Build Coastguard Worker vec_t *const child_scratch = &scratch[2 * high_len];
1203*8fb009dcSAndroid Build Coastguard Worker // Calculate (a_1 + a_0) × (b_1 + b_0) and write to scratch buffer.
1204*8fb009dcSAndroid Build Coastguard Worker poly_mul_vec_aux(scratch, child_scratch, out, &out[high_len], high_len);
1205*8fb009dcSAndroid Build Coastguard Worker // Calculate a_1 × b_1.
1206*8fb009dcSAndroid Build Coastguard Worker poly_mul_vec_aux(&out[low_len * 2], child_scratch, a_high, b_high, high_len);
1207*8fb009dcSAndroid Build Coastguard Worker // Calculate a_0 × b_0.
1208*8fb009dcSAndroid Build Coastguard Worker poly_mul_vec_aux(out, child_scratch, a, b, low_len);
1209*8fb009dcSAndroid Build Coastguard Worker
1210*8fb009dcSAndroid Build Coastguard Worker // Subtract those last two products from the first.
1211*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 0; i < low_len * 2; i++) {
1212*8fb009dcSAndroid Build Coastguard Worker scratch[i] = vec_sub(scratch[i], vec_add(out[i], out[low_len * 2 + i]));
1213*8fb009dcSAndroid Build Coastguard Worker }
1214*8fb009dcSAndroid Build Coastguard Worker if (low_len != high_len) {
1215*8fb009dcSAndroid Build Coastguard Worker scratch[low_len * 2] = vec_sub(scratch[low_len * 2], out[low_len * 4]);
1216*8fb009dcSAndroid Build Coastguard Worker scratch[low_len * 2 + 1] =
1217*8fb009dcSAndroid Build Coastguard Worker vec_sub(scratch[low_len * 2 + 1], out[low_len * 4 + 1]);
1218*8fb009dcSAndroid Build Coastguard Worker }
1219*8fb009dcSAndroid Build Coastguard Worker
1220*8fb009dcSAndroid Build Coastguard Worker // Add the middle product into the output.
1221*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 0; i < high_len * 2; i++) {
1222*8fb009dcSAndroid Build Coastguard Worker out[low_len + i] = vec_add(out[low_len + i], scratch[i]);
1223*8fb009dcSAndroid Build Coastguard Worker }
1224*8fb009dcSAndroid Build Coastguard Worker }
1225*8fb009dcSAndroid Build Coastguard Worker
1226*8fb009dcSAndroid Build Coastguard Worker // poly_mul_vec sets |*out| to |x|×|y| mod (^n - 1).
poly_mul_vec(struct POLY_MUL_SCRATCH * scratch,struct poly * out,const struct poly * x,const struct poly * y)1227*8fb009dcSAndroid Build Coastguard Worker static void poly_mul_vec(struct POLY_MUL_SCRATCH *scratch, struct poly *out,
1228*8fb009dcSAndroid Build Coastguard Worker const struct poly *x, const struct poly *y) {
1229*8fb009dcSAndroid Build Coastguard Worker static_assert(sizeof(out->v) == sizeof(vec_t) * VECS_PER_POLY,
1230*8fb009dcSAndroid Build Coastguard Worker "struct poly is the wrong size");
1231*8fb009dcSAndroid Build Coastguard Worker static_assert(alignof(struct poly) == alignof(vec_t),
1232*8fb009dcSAndroid Build Coastguard Worker "struct poly has incorrect alignment");
1233*8fb009dcSAndroid Build Coastguard Worker poly_assert_normalized(x);
1234*8fb009dcSAndroid Build Coastguard Worker poly_assert_normalized(y);
1235*8fb009dcSAndroid Build Coastguard Worker
1236*8fb009dcSAndroid Build Coastguard Worker vec_t *const prod = scratch->u.vec.prod;
1237*8fb009dcSAndroid Build Coastguard Worker vec_t *const aux_scratch = scratch->u.vec.scratch;
1238*8fb009dcSAndroid Build Coastguard Worker poly_mul_vec_aux(prod, aux_scratch, x->vectors, y->vectors, VECS_PER_POLY);
1239*8fb009dcSAndroid Build Coastguard Worker
1240*8fb009dcSAndroid Build Coastguard Worker // |prod| needs to be reduced mod (^n - 1), which just involves adding the
1241*8fb009dcSAndroid Build Coastguard Worker // upper-half to the lower-half. However, N is 701, which isn't a multiple of
1242*8fb009dcSAndroid Build Coastguard Worker // the vector size, so the upper-half vectors all have to be shifted before
1243*8fb009dcSAndroid Build Coastguard Worker // being added to the lower-half.
1244*8fb009dcSAndroid Build Coastguard Worker vec_t *out_vecs = (vec_t *)out->v;
1245*8fb009dcSAndroid Build Coastguard Worker
1246*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 0; i < VECS_PER_POLY; i++) {
1247*8fb009dcSAndroid Build Coastguard Worker const vec_t prev = prod[VECS_PER_POLY - 1 + i];
1248*8fb009dcSAndroid Build Coastguard Worker const vec_t this = prod[VECS_PER_POLY + i];
1249*8fb009dcSAndroid Build Coastguard Worker out_vecs[i] = vec_add(prod[i], vec_merge_3_5(prev, this));
1250*8fb009dcSAndroid Build Coastguard Worker }
1251*8fb009dcSAndroid Build Coastguard Worker
1252*8fb009dcSAndroid Build Coastguard Worker OPENSSL_memset(&out->v[N], 0, 3 * sizeof(uint16_t));
1253*8fb009dcSAndroid Build Coastguard Worker }
1254*8fb009dcSAndroid Build Coastguard Worker
1255*8fb009dcSAndroid Build Coastguard Worker #endif // HRSS_HAVE_VECTOR_UNIT
1256*8fb009dcSAndroid Build Coastguard Worker
1257*8fb009dcSAndroid Build Coastguard Worker // poly_mul_novec_aux writes the product of |a| and |b| to |out|, using
1258*8fb009dcSAndroid Build Coastguard Worker // |scratch| as scratch space. It'll use Karatsuba if the inputs are large
1259*8fb009dcSAndroid Build Coastguard Worker // enough to warrant it. Each call uses 2*ceil(n/2) elements of |scratch| and
1260*8fb009dcSAndroid Build Coastguard Worker // the function recurses, except if |n| < 64, when |scratch| isn't used and the
1261*8fb009dcSAndroid Build Coastguard Worker // recursion stops. If |n| == |N| then |scratch| needs 1318 elements.
poly_mul_novec_aux(uint16_t * out,uint16_t * scratch,const uint16_t * a,const uint16_t * b,size_t n)1262*8fb009dcSAndroid Build Coastguard Worker static void poly_mul_novec_aux(uint16_t *out, uint16_t *scratch,
1263*8fb009dcSAndroid Build Coastguard Worker const uint16_t *a, const uint16_t *b, size_t n) {
1264*8fb009dcSAndroid Build Coastguard Worker static const size_t kSchoolbookLimit = 64;
1265*8fb009dcSAndroid Build Coastguard Worker if (n < kSchoolbookLimit) {
1266*8fb009dcSAndroid Build Coastguard Worker OPENSSL_memset(out, 0, sizeof(uint16_t) * n * 2);
1267*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 0; i < n; i++) {
1268*8fb009dcSAndroid Build Coastguard Worker for (size_t j = 0; j < n; j++) {
1269*8fb009dcSAndroid Build Coastguard Worker out[i + j] += (unsigned) a[i] * b[j];
1270*8fb009dcSAndroid Build Coastguard Worker }
1271*8fb009dcSAndroid Build Coastguard Worker }
1272*8fb009dcSAndroid Build Coastguard Worker
1273*8fb009dcSAndroid Build Coastguard Worker return;
1274*8fb009dcSAndroid Build Coastguard Worker }
1275*8fb009dcSAndroid Build Coastguard Worker
1276*8fb009dcSAndroid Build Coastguard Worker // Karatsuba multiplication.
1277*8fb009dcSAndroid Build Coastguard Worker // https://en.wikipedia.org/wiki/Karatsuba_algorithm
1278*8fb009dcSAndroid Build Coastguard Worker
1279*8fb009dcSAndroid Build Coastguard Worker // When |n| is odd, the two "halves" will have different lengths. The
1280*8fb009dcSAndroid Build Coastguard Worker // first is always the smaller.
1281*8fb009dcSAndroid Build Coastguard Worker const size_t low_len = n / 2;
1282*8fb009dcSAndroid Build Coastguard Worker const size_t high_len = n - low_len;
1283*8fb009dcSAndroid Build Coastguard Worker const uint16_t *const a_high = &a[low_len];
1284*8fb009dcSAndroid Build Coastguard Worker const uint16_t *const b_high = &b[low_len];
1285*8fb009dcSAndroid Build Coastguard Worker
1286*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 0; i < low_len; i++) {
1287*8fb009dcSAndroid Build Coastguard Worker out[i] = a_high[i] + a[i];
1288*8fb009dcSAndroid Build Coastguard Worker out[high_len + i] = b_high[i] + b[i];
1289*8fb009dcSAndroid Build Coastguard Worker }
1290*8fb009dcSAndroid Build Coastguard Worker if (high_len != low_len) {
1291*8fb009dcSAndroid Build Coastguard Worker out[low_len] = a_high[low_len];
1292*8fb009dcSAndroid Build Coastguard Worker out[high_len + low_len] = b_high[low_len];
1293*8fb009dcSAndroid Build Coastguard Worker }
1294*8fb009dcSAndroid Build Coastguard Worker
1295*8fb009dcSAndroid Build Coastguard Worker uint16_t *const child_scratch = &scratch[2 * high_len];
1296*8fb009dcSAndroid Build Coastguard Worker poly_mul_novec_aux(scratch, child_scratch, out, &out[high_len], high_len);
1297*8fb009dcSAndroid Build Coastguard Worker poly_mul_novec_aux(&out[low_len * 2], child_scratch, a_high, b_high,
1298*8fb009dcSAndroid Build Coastguard Worker high_len);
1299*8fb009dcSAndroid Build Coastguard Worker poly_mul_novec_aux(out, child_scratch, a, b, low_len);
1300*8fb009dcSAndroid Build Coastguard Worker
1301*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 0; i < low_len * 2; i++) {
1302*8fb009dcSAndroid Build Coastguard Worker scratch[i] -= out[i] + out[low_len * 2 + i];
1303*8fb009dcSAndroid Build Coastguard Worker }
1304*8fb009dcSAndroid Build Coastguard Worker if (low_len != high_len) {
1305*8fb009dcSAndroid Build Coastguard Worker scratch[low_len * 2] -= out[low_len * 4];
1306*8fb009dcSAndroid Build Coastguard Worker assert(out[low_len * 4 + 1] == 0);
1307*8fb009dcSAndroid Build Coastguard Worker }
1308*8fb009dcSAndroid Build Coastguard Worker
1309*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 0; i < high_len * 2; i++) {
1310*8fb009dcSAndroid Build Coastguard Worker out[low_len + i] += scratch[i];
1311*8fb009dcSAndroid Build Coastguard Worker }
1312*8fb009dcSAndroid Build Coastguard Worker }
1313*8fb009dcSAndroid Build Coastguard Worker
1314*8fb009dcSAndroid Build Coastguard Worker // poly_mul_novec sets |*out| to |x|×|y| mod (^n - 1).
poly_mul_novec(struct POLY_MUL_SCRATCH * scratch,struct poly * out,const struct poly * x,const struct poly * y)1315*8fb009dcSAndroid Build Coastguard Worker static void poly_mul_novec(struct POLY_MUL_SCRATCH *scratch, struct poly *out,
1316*8fb009dcSAndroid Build Coastguard Worker const struct poly *x, const struct poly *y) {
1317*8fb009dcSAndroid Build Coastguard Worker uint16_t *const prod = scratch->u.novec.prod;
1318*8fb009dcSAndroid Build Coastguard Worker uint16_t *const aux_scratch = scratch->u.novec.scratch;
1319*8fb009dcSAndroid Build Coastguard Worker poly_mul_novec_aux(prod, aux_scratch, x->v, y->v, N);
1320*8fb009dcSAndroid Build Coastguard Worker
1321*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 0; i < N; i++) {
1322*8fb009dcSAndroid Build Coastguard Worker out->v[i] = prod[i] + prod[i + N];
1323*8fb009dcSAndroid Build Coastguard Worker }
1324*8fb009dcSAndroid Build Coastguard Worker OPENSSL_memset(&out->v[N], 0, 3 * sizeof(uint16_t));
1325*8fb009dcSAndroid Build Coastguard Worker }
1326*8fb009dcSAndroid Build Coastguard Worker
poly_mul(struct POLY_MUL_SCRATCH * scratch,struct poly * r,const struct poly * a,const struct poly * b)1327*8fb009dcSAndroid Build Coastguard Worker static void poly_mul(struct POLY_MUL_SCRATCH *scratch, struct poly *r,
1328*8fb009dcSAndroid Build Coastguard Worker const struct poly *a, const struct poly *b) {
1329*8fb009dcSAndroid Build Coastguard Worker #if defined(POLY_RQ_MUL_ASM)
1330*8fb009dcSAndroid Build Coastguard Worker if (CRYPTO_is_AVX2_capable()) {
1331*8fb009dcSAndroid Build Coastguard Worker poly_Rq_mul(r->v, a->v, b->v, scratch->u.rq);
1332*8fb009dcSAndroid Build Coastguard Worker poly_normalize(r);
1333*8fb009dcSAndroid Build Coastguard Worker } else
1334*8fb009dcSAndroid Build Coastguard Worker #endif
1335*8fb009dcSAndroid Build Coastguard Worker
1336*8fb009dcSAndroid Build Coastguard Worker #if defined(HRSS_HAVE_VECTOR_UNIT)
1337*8fb009dcSAndroid Build Coastguard Worker if (vec_capable()) {
1338*8fb009dcSAndroid Build Coastguard Worker poly_mul_vec(scratch, r, a, b);
1339*8fb009dcSAndroid Build Coastguard Worker } else
1340*8fb009dcSAndroid Build Coastguard Worker #endif
1341*8fb009dcSAndroid Build Coastguard Worker
1342*8fb009dcSAndroid Build Coastguard Worker // Fallback, non-vector case.
1343*8fb009dcSAndroid Build Coastguard Worker {
1344*8fb009dcSAndroid Build Coastguard Worker poly_mul_novec(scratch, r, a, b);
1345*8fb009dcSAndroid Build Coastguard Worker }
1346*8fb009dcSAndroid Build Coastguard Worker
1347*8fb009dcSAndroid Build Coastguard Worker poly_assert_normalized(r);
1348*8fb009dcSAndroid Build Coastguard Worker }
1349*8fb009dcSAndroid Build Coastguard Worker
1350*8fb009dcSAndroid Build Coastguard Worker // poly_mul_x_minus_1 sets |p| to |p|×( - 1) mod (^n - 1).
poly_mul_x_minus_1(struct poly * p)1351*8fb009dcSAndroid Build Coastguard Worker static void poly_mul_x_minus_1(struct poly *p) {
1352*8fb009dcSAndroid Build Coastguard Worker // Multiplying by ( - 1) means negating each coefficient and adding in
1353*8fb009dcSAndroid Build Coastguard Worker // the value of the previous one.
1354*8fb009dcSAndroid Build Coastguard Worker const uint16_t orig_final_coefficient = p->v[N - 1];
1355*8fb009dcSAndroid Build Coastguard Worker
1356*8fb009dcSAndroid Build Coastguard Worker for (size_t i = N - 1; i > 0; i--) {
1357*8fb009dcSAndroid Build Coastguard Worker p->v[i] = p->v[i - 1] - p->v[i];
1358*8fb009dcSAndroid Build Coastguard Worker }
1359*8fb009dcSAndroid Build Coastguard Worker p->v[0] = orig_final_coefficient - p->v[0];
1360*8fb009dcSAndroid Build Coastguard Worker }
1361*8fb009dcSAndroid Build Coastguard Worker
1362*8fb009dcSAndroid Build Coastguard Worker // poly_mod_phiN sets |p| to |p| mod Φ(N).
poly_mod_phiN(struct poly * p)1363*8fb009dcSAndroid Build Coastguard Worker static void poly_mod_phiN(struct poly *p) {
1364*8fb009dcSAndroid Build Coastguard Worker const uint16_t coeff700 = p->v[N - 1];
1365*8fb009dcSAndroid Build Coastguard Worker
1366*8fb009dcSAndroid Build Coastguard Worker for (unsigned i = 0; i < N; i++) {
1367*8fb009dcSAndroid Build Coastguard Worker p->v[i] -= coeff700;
1368*8fb009dcSAndroid Build Coastguard Worker }
1369*8fb009dcSAndroid Build Coastguard Worker }
1370*8fb009dcSAndroid Build Coastguard Worker
1371*8fb009dcSAndroid Build Coastguard Worker // poly_clamp reduces each coefficient mod Q.
poly_clamp(struct poly * p)1372*8fb009dcSAndroid Build Coastguard Worker static void poly_clamp(struct poly *p) {
1373*8fb009dcSAndroid Build Coastguard Worker for (unsigned i = 0; i < N; i++) {
1374*8fb009dcSAndroid Build Coastguard Worker p->v[i] &= Q - 1;
1375*8fb009dcSAndroid Build Coastguard Worker }
1376*8fb009dcSAndroid Build Coastguard Worker }
1377*8fb009dcSAndroid Build Coastguard Worker
1378*8fb009dcSAndroid Build Coastguard Worker
1379*8fb009dcSAndroid Build Coastguard Worker // Conversion functions
1380*8fb009dcSAndroid Build Coastguard Worker // --------------------
1381*8fb009dcSAndroid Build Coastguard Worker
1382*8fb009dcSAndroid Build Coastguard Worker // poly2_from_poly sets |*out| to |in| mod 2.
poly2_from_poly(struct poly2 * out,const struct poly * in)1383*8fb009dcSAndroid Build Coastguard Worker static void poly2_from_poly(struct poly2 *out, const struct poly *in) {
1384*8fb009dcSAndroid Build Coastguard Worker crypto_word_t *words = out->v;
1385*8fb009dcSAndroid Build Coastguard Worker unsigned shift = 0;
1386*8fb009dcSAndroid Build Coastguard Worker crypto_word_t word = 0;
1387*8fb009dcSAndroid Build Coastguard Worker
1388*8fb009dcSAndroid Build Coastguard Worker for (unsigned i = 0; i < N; i++) {
1389*8fb009dcSAndroid Build Coastguard Worker word >>= 1;
1390*8fb009dcSAndroid Build Coastguard Worker word |= (crypto_word_t)(in->v[i] & 1) << (BITS_PER_WORD - 1);
1391*8fb009dcSAndroid Build Coastguard Worker shift++;
1392*8fb009dcSAndroid Build Coastguard Worker
1393*8fb009dcSAndroid Build Coastguard Worker if (shift == BITS_PER_WORD) {
1394*8fb009dcSAndroid Build Coastguard Worker *words = word;
1395*8fb009dcSAndroid Build Coastguard Worker words++;
1396*8fb009dcSAndroid Build Coastguard Worker word = 0;
1397*8fb009dcSAndroid Build Coastguard Worker shift = 0;
1398*8fb009dcSAndroid Build Coastguard Worker }
1399*8fb009dcSAndroid Build Coastguard Worker }
1400*8fb009dcSAndroid Build Coastguard Worker
1401*8fb009dcSAndroid Build Coastguard Worker word >>= BITS_PER_WORD - shift;
1402*8fb009dcSAndroid Build Coastguard Worker *words = word;
1403*8fb009dcSAndroid Build Coastguard Worker }
1404*8fb009dcSAndroid Build Coastguard Worker
1405*8fb009dcSAndroid Build Coastguard Worker // mod3 treats |a| as a signed number and returns |a| mod 3.
mod3(int16_t a)1406*8fb009dcSAndroid Build Coastguard Worker static uint16_t mod3(int16_t a) {
1407*8fb009dcSAndroid Build Coastguard Worker const int16_t q = ((int32_t)a * 21845) >> 16;
1408*8fb009dcSAndroid Build Coastguard Worker int16_t ret = a - 3 * q;
1409*8fb009dcSAndroid Build Coastguard Worker // At this point, |ret| is in {0, 1, 2, 3} and that needs to be mapped to {0,
1410*8fb009dcSAndroid Build Coastguard Worker // 1, 2, 0}.
1411*8fb009dcSAndroid Build Coastguard Worker return ret & ((ret & (ret >> 1)) - 1);
1412*8fb009dcSAndroid Build Coastguard Worker }
1413*8fb009dcSAndroid Build Coastguard Worker
1414*8fb009dcSAndroid Build Coastguard Worker // poly3_from_poly sets |*out| to |in|.
poly3_from_poly(struct poly3 * out,const struct poly * in)1415*8fb009dcSAndroid Build Coastguard Worker static void poly3_from_poly(struct poly3 *out, const struct poly *in) {
1416*8fb009dcSAndroid Build Coastguard Worker crypto_word_t *words_s = out->s.v;
1417*8fb009dcSAndroid Build Coastguard Worker crypto_word_t *words_a = out->a.v;
1418*8fb009dcSAndroid Build Coastguard Worker crypto_word_t s = 0;
1419*8fb009dcSAndroid Build Coastguard Worker crypto_word_t a = 0;
1420*8fb009dcSAndroid Build Coastguard Worker unsigned shift = 0;
1421*8fb009dcSAndroid Build Coastguard Worker
1422*8fb009dcSAndroid Build Coastguard Worker for (unsigned i = 0; i < N; i++) {
1423*8fb009dcSAndroid Build Coastguard Worker // This duplicates the 13th bit upwards to the top of the uint16,
1424*8fb009dcSAndroid Build Coastguard Worker // essentially treating it as a sign bit and converting into a signed int16.
1425*8fb009dcSAndroid Build Coastguard Worker // The signed value is reduced mod 3, yielding {0, 1, 2}.
1426*8fb009dcSAndroid Build Coastguard Worker const uint16_t v = mod3((int16_t)(in->v[i] << 3) >> 3);
1427*8fb009dcSAndroid Build Coastguard Worker s >>= 1;
1428*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t s_bit = (crypto_word_t)(v & 2) << (BITS_PER_WORD - 2);
1429*8fb009dcSAndroid Build Coastguard Worker s |= s_bit;
1430*8fb009dcSAndroid Build Coastguard Worker a >>= 1;
1431*8fb009dcSAndroid Build Coastguard Worker a |= s_bit | (crypto_word_t)(v & 1) << (BITS_PER_WORD - 1);
1432*8fb009dcSAndroid Build Coastguard Worker shift++;
1433*8fb009dcSAndroid Build Coastguard Worker
1434*8fb009dcSAndroid Build Coastguard Worker if (shift == BITS_PER_WORD) {
1435*8fb009dcSAndroid Build Coastguard Worker *words_s = s;
1436*8fb009dcSAndroid Build Coastguard Worker words_s++;
1437*8fb009dcSAndroid Build Coastguard Worker *words_a = a;
1438*8fb009dcSAndroid Build Coastguard Worker words_a++;
1439*8fb009dcSAndroid Build Coastguard Worker s = a = 0;
1440*8fb009dcSAndroid Build Coastguard Worker shift = 0;
1441*8fb009dcSAndroid Build Coastguard Worker }
1442*8fb009dcSAndroid Build Coastguard Worker }
1443*8fb009dcSAndroid Build Coastguard Worker
1444*8fb009dcSAndroid Build Coastguard Worker s >>= BITS_PER_WORD - shift;
1445*8fb009dcSAndroid Build Coastguard Worker a >>= BITS_PER_WORD - shift;
1446*8fb009dcSAndroid Build Coastguard Worker *words_s = s;
1447*8fb009dcSAndroid Build Coastguard Worker *words_a = a;
1448*8fb009dcSAndroid Build Coastguard Worker }
1449*8fb009dcSAndroid Build Coastguard Worker
1450*8fb009dcSAndroid Build Coastguard Worker // poly3_from_poly_checked sets |*out| to |in|, which has coefficients in {0, 1,
1451*8fb009dcSAndroid Build Coastguard Worker // Q-1}. It returns a mask indicating whether all coefficients were found to be
1452*8fb009dcSAndroid Build Coastguard Worker // in that set.
poly3_from_poly_checked(struct poly3 * out,const struct poly * in)1453*8fb009dcSAndroid Build Coastguard Worker static crypto_word_t poly3_from_poly_checked(struct poly3 *out,
1454*8fb009dcSAndroid Build Coastguard Worker const struct poly *in) {
1455*8fb009dcSAndroid Build Coastguard Worker crypto_word_t *words_s = out->s.v;
1456*8fb009dcSAndroid Build Coastguard Worker crypto_word_t *words_a = out->a.v;
1457*8fb009dcSAndroid Build Coastguard Worker crypto_word_t s = 0;
1458*8fb009dcSAndroid Build Coastguard Worker crypto_word_t a = 0;
1459*8fb009dcSAndroid Build Coastguard Worker unsigned shift = 0;
1460*8fb009dcSAndroid Build Coastguard Worker crypto_word_t ok = CONSTTIME_TRUE_W;
1461*8fb009dcSAndroid Build Coastguard Worker
1462*8fb009dcSAndroid Build Coastguard Worker for (unsigned i = 0; i < N; i++) {
1463*8fb009dcSAndroid Build Coastguard Worker const uint16_t v = in->v[i];
1464*8fb009dcSAndroid Build Coastguard Worker // Maps {0, 1, Q-1} to {0, 1, 2}.
1465*8fb009dcSAndroid Build Coastguard Worker uint16_t mod3 = v & 3;
1466*8fb009dcSAndroid Build Coastguard Worker mod3 ^= mod3 >> 1;
1467*8fb009dcSAndroid Build Coastguard Worker const uint16_t expected = (uint16_t)((~((mod3 >> 1) - 1)) | mod3) % Q;
1468*8fb009dcSAndroid Build Coastguard Worker ok &= constant_time_eq_w(v, expected);
1469*8fb009dcSAndroid Build Coastguard Worker
1470*8fb009dcSAndroid Build Coastguard Worker s >>= 1;
1471*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t s_bit = (crypto_word_t)(mod3 & 2)
1472*8fb009dcSAndroid Build Coastguard Worker << (BITS_PER_WORD - 2);
1473*8fb009dcSAndroid Build Coastguard Worker s |= s_bit;
1474*8fb009dcSAndroid Build Coastguard Worker a >>= 1;
1475*8fb009dcSAndroid Build Coastguard Worker a |= s_bit | (crypto_word_t)(mod3 & 1) << (BITS_PER_WORD - 1);
1476*8fb009dcSAndroid Build Coastguard Worker shift++;
1477*8fb009dcSAndroid Build Coastguard Worker
1478*8fb009dcSAndroid Build Coastguard Worker if (shift == BITS_PER_WORD) {
1479*8fb009dcSAndroid Build Coastguard Worker *words_s = s;
1480*8fb009dcSAndroid Build Coastguard Worker words_s++;
1481*8fb009dcSAndroid Build Coastguard Worker *words_a = a;
1482*8fb009dcSAndroid Build Coastguard Worker words_a++;
1483*8fb009dcSAndroid Build Coastguard Worker s = a = 0;
1484*8fb009dcSAndroid Build Coastguard Worker shift = 0;
1485*8fb009dcSAndroid Build Coastguard Worker }
1486*8fb009dcSAndroid Build Coastguard Worker }
1487*8fb009dcSAndroid Build Coastguard Worker
1488*8fb009dcSAndroid Build Coastguard Worker s >>= BITS_PER_WORD - shift;
1489*8fb009dcSAndroid Build Coastguard Worker a >>= BITS_PER_WORD - shift;
1490*8fb009dcSAndroid Build Coastguard Worker *words_s = s;
1491*8fb009dcSAndroid Build Coastguard Worker *words_a = a;
1492*8fb009dcSAndroid Build Coastguard Worker
1493*8fb009dcSAndroid Build Coastguard Worker return ok;
1494*8fb009dcSAndroid Build Coastguard Worker }
1495*8fb009dcSAndroid Build Coastguard Worker
poly_from_poly2(struct poly * out,const struct poly2 * in)1496*8fb009dcSAndroid Build Coastguard Worker static void poly_from_poly2(struct poly *out, const struct poly2 *in) {
1497*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t *words = in->v;
1498*8fb009dcSAndroid Build Coastguard Worker unsigned shift = 0;
1499*8fb009dcSAndroid Build Coastguard Worker crypto_word_t word = *words;
1500*8fb009dcSAndroid Build Coastguard Worker
1501*8fb009dcSAndroid Build Coastguard Worker for (unsigned i = 0; i < N; i++) {
1502*8fb009dcSAndroid Build Coastguard Worker out->v[i] = word & 1;
1503*8fb009dcSAndroid Build Coastguard Worker word >>= 1;
1504*8fb009dcSAndroid Build Coastguard Worker shift++;
1505*8fb009dcSAndroid Build Coastguard Worker
1506*8fb009dcSAndroid Build Coastguard Worker if (shift == BITS_PER_WORD) {
1507*8fb009dcSAndroid Build Coastguard Worker words++;
1508*8fb009dcSAndroid Build Coastguard Worker word = *words;
1509*8fb009dcSAndroid Build Coastguard Worker shift = 0;
1510*8fb009dcSAndroid Build Coastguard Worker }
1511*8fb009dcSAndroid Build Coastguard Worker }
1512*8fb009dcSAndroid Build Coastguard Worker
1513*8fb009dcSAndroid Build Coastguard Worker poly_normalize(out);
1514*8fb009dcSAndroid Build Coastguard Worker }
1515*8fb009dcSAndroid Build Coastguard Worker
poly_from_poly3(struct poly * out,const struct poly3 * in)1516*8fb009dcSAndroid Build Coastguard Worker static void poly_from_poly3(struct poly *out, const struct poly3 *in) {
1517*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t *words_s = in->s.v;
1518*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t *words_a = in->a.v;
1519*8fb009dcSAndroid Build Coastguard Worker crypto_word_t word_s = ~(*words_s);
1520*8fb009dcSAndroid Build Coastguard Worker crypto_word_t word_a = *words_a;
1521*8fb009dcSAndroid Build Coastguard Worker unsigned shift = 0;
1522*8fb009dcSAndroid Build Coastguard Worker
1523*8fb009dcSAndroid Build Coastguard Worker for (unsigned i = 0; i < N; i++) {
1524*8fb009dcSAndroid Build Coastguard Worker out->v[i] = (uint16_t)(word_s & 1) - 1;
1525*8fb009dcSAndroid Build Coastguard Worker out->v[i] |= word_a & 1;
1526*8fb009dcSAndroid Build Coastguard Worker word_s >>= 1;
1527*8fb009dcSAndroid Build Coastguard Worker word_a >>= 1;
1528*8fb009dcSAndroid Build Coastguard Worker shift++;
1529*8fb009dcSAndroid Build Coastguard Worker
1530*8fb009dcSAndroid Build Coastguard Worker if (shift == BITS_PER_WORD) {
1531*8fb009dcSAndroid Build Coastguard Worker words_s++;
1532*8fb009dcSAndroid Build Coastguard Worker words_a++;
1533*8fb009dcSAndroid Build Coastguard Worker word_s = ~(*words_s);
1534*8fb009dcSAndroid Build Coastguard Worker word_a = *words_a;
1535*8fb009dcSAndroid Build Coastguard Worker shift = 0;
1536*8fb009dcSAndroid Build Coastguard Worker }
1537*8fb009dcSAndroid Build Coastguard Worker }
1538*8fb009dcSAndroid Build Coastguard Worker
1539*8fb009dcSAndroid Build Coastguard Worker poly_normalize(out);
1540*8fb009dcSAndroid Build Coastguard Worker }
1541*8fb009dcSAndroid Build Coastguard Worker
1542*8fb009dcSAndroid Build Coastguard Worker // Polynomial inversion
1543*8fb009dcSAndroid Build Coastguard Worker // --------------------
1544*8fb009dcSAndroid Build Coastguard Worker
1545*8fb009dcSAndroid Build Coastguard Worker // poly_invert_mod2 sets |*out| to |in^-1| (i.e. such that |*out|×|in| = 1 mod
1546*8fb009dcSAndroid Build Coastguard Worker // Φ(N)), all mod 2. This isn't useful in itself, but is part of doing inversion
1547*8fb009dcSAndroid Build Coastguard Worker // mod Q.
poly_invert_mod2(struct poly * out,const struct poly * in)1548*8fb009dcSAndroid Build Coastguard Worker static void poly_invert_mod2(struct poly *out, const struct poly *in) {
1549*8fb009dcSAndroid Build Coastguard Worker // This algorithm is taken from section 7.1 of [SAFEGCD].
1550*8fb009dcSAndroid Build Coastguard Worker struct poly2 v, r, f, g;
1551*8fb009dcSAndroid Build Coastguard Worker
1552*8fb009dcSAndroid Build Coastguard Worker // v = 0
1553*8fb009dcSAndroid Build Coastguard Worker poly2_zero(&v);
1554*8fb009dcSAndroid Build Coastguard Worker // r = 1
1555*8fb009dcSAndroid Build Coastguard Worker poly2_zero(&r);
1556*8fb009dcSAndroid Build Coastguard Worker r.v[0] = 1;
1557*8fb009dcSAndroid Build Coastguard Worker // f = all ones.
1558*8fb009dcSAndroid Build Coastguard Worker OPENSSL_memset(&f, 0xff, sizeof(struct poly2));
1559*8fb009dcSAndroid Build Coastguard Worker f.v[WORDS_PER_POLY - 1] >>= BITS_PER_WORD - BITS_IN_LAST_WORD;
1560*8fb009dcSAndroid Build Coastguard Worker // g is the reversal of |in|.
1561*8fb009dcSAndroid Build Coastguard Worker poly2_from_poly(&g, in);
1562*8fb009dcSAndroid Build Coastguard Worker poly2_mod_phiN(&g);
1563*8fb009dcSAndroid Build Coastguard Worker poly2_reverse_700(&g, &g);
1564*8fb009dcSAndroid Build Coastguard Worker int delta = 1;
1565*8fb009dcSAndroid Build Coastguard Worker
1566*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 0; i < (2*(N-1)) - 1; i++) {
1567*8fb009dcSAndroid Build Coastguard Worker poly2_lshift1(&v);
1568*8fb009dcSAndroid Build Coastguard Worker
1569*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t delta_sign_bit = (delta >> (sizeof(delta) * 8 - 1)) & 1;
1570*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t delta_is_non_negative = delta_sign_bit - 1;
1571*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t delta_is_non_zero = ~constant_time_is_zero_w(delta);
1572*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t g_has_constant_term = lsb_to_all(g.v[0]);
1573*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t mask =
1574*8fb009dcSAndroid Build Coastguard Worker g_has_constant_term & delta_is_non_negative & delta_is_non_zero;
1575*8fb009dcSAndroid Build Coastguard Worker
1576*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t c = lsb_to_all(f.v[0] & g.v[0]);
1577*8fb009dcSAndroid Build Coastguard Worker
1578*8fb009dcSAndroid Build Coastguard Worker delta = constant_time_select_int(mask, -delta, delta);
1579*8fb009dcSAndroid Build Coastguard Worker delta++;
1580*8fb009dcSAndroid Build Coastguard Worker
1581*8fb009dcSAndroid Build Coastguard Worker poly2_cswap(&f, &g, mask);
1582*8fb009dcSAndroid Build Coastguard Worker poly2_fmadd(&g, &f, c);
1583*8fb009dcSAndroid Build Coastguard Worker poly2_rshift1(&g);
1584*8fb009dcSAndroid Build Coastguard Worker
1585*8fb009dcSAndroid Build Coastguard Worker poly2_cswap(&v, &r, mask);
1586*8fb009dcSAndroid Build Coastguard Worker poly2_fmadd(&r, &v, c);
1587*8fb009dcSAndroid Build Coastguard Worker }
1588*8fb009dcSAndroid Build Coastguard Worker
1589*8fb009dcSAndroid Build Coastguard Worker assert(delta == 0);
1590*8fb009dcSAndroid Build Coastguard Worker assert(f.v[0] & 1);
1591*8fb009dcSAndroid Build Coastguard Worker poly2_reverse_700(&v, &v);
1592*8fb009dcSAndroid Build Coastguard Worker poly_from_poly2(out, &v);
1593*8fb009dcSAndroid Build Coastguard Worker poly_assert_normalized(out);
1594*8fb009dcSAndroid Build Coastguard Worker }
1595*8fb009dcSAndroid Build Coastguard Worker
1596*8fb009dcSAndroid Build Coastguard Worker // poly_invert sets |*out| to |in^-1| (i.e. such that |*out|×|in| = 1 mod Φ(N)).
poly_invert(struct POLY_MUL_SCRATCH * scratch,struct poly * out,const struct poly * in)1597*8fb009dcSAndroid Build Coastguard Worker static void poly_invert(struct POLY_MUL_SCRATCH *scratch, struct poly *out,
1598*8fb009dcSAndroid Build Coastguard Worker const struct poly *in) {
1599*8fb009dcSAndroid Build Coastguard Worker // Inversion mod Q, which is done based on the result of inverting mod
1600*8fb009dcSAndroid Build Coastguard Worker // 2. See [NTRUTN14] paper, bottom of page two.
1601*8fb009dcSAndroid Build Coastguard Worker struct poly a, *b, tmp;
1602*8fb009dcSAndroid Build Coastguard Worker
1603*8fb009dcSAndroid Build Coastguard Worker // a = -in.
1604*8fb009dcSAndroid Build Coastguard Worker for (unsigned i = 0; i < N; i++) {
1605*8fb009dcSAndroid Build Coastguard Worker a.v[i] = -in->v[i];
1606*8fb009dcSAndroid Build Coastguard Worker }
1607*8fb009dcSAndroid Build Coastguard Worker poly_normalize(&a);
1608*8fb009dcSAndroid Build Coastguard Worker
1609*8fb009dcSAndroid Build Coastguard Worker // b = in^-1 mod 2.
1610*8fb009dcSAndroid Build Coastguard Worker b = out;
1611*8fb009dcSAndroid Build Coastguard Worker poly_invert_mod2(b, in);
1612*8fb009dcSAndroid Build Coastguard Worker
1613*8fb009dcSAndroid Build Coastguard Worker // We are working mod Q=2**13 and we need to iterate ceil(log_2(13))
1614*8fb009dcSAndroid Build Coastguard Worker // times, which is four.
1615*8fb009dcSAndroid Build Coastguard Worker for (unsigned i = 0; i < 4; i++) {
1616*8fb009dcSAndroid Build Coastguard Worker poly_mul(scratch, &tmp, &a, b);
1617*8fb009dcSAndroid Build Coastguard Worker tmp.v[0] += 2;
1618*8fb009dcSAndroid Build Coastguard Worker poly_mul(scratch, b, b, &tmp);
1619*8fb009dcSAndroid Build Coastguard Worker }
1620*8fb009dcSAndroid Build Coastguard Worker
1621*8fb009dcSAndroid Build Coastguard Worker poly_assert_normalized(out);
1622*8fb009dcSAndroid Build Coastguard Worker }
1623*8fb009dcSAndroid Build Coastguard Worker
1624*8fb009dcSAndroid Build Coastguard Worker // Marshal and unmarshal functions for various basic types.
1625*8fb009dcSAndroid Build Coastguard Worker // --------------------------------------------------------
1626*8fb009dcSAndroid Build Coastguard Worker
1627*8fb009dcSAndroid Build Coastguard Worker #define POLY_BYTES 1138
1628*8fb009dcSAndroid Build Coastguard Worker
1629*8fb009dcSAndroid Build Coastguard Worker // poly_marshal serialises all but the final coefficient of |in| to |out|.
poly_marshal(uint8_t out[POLY_BYTES],const struct poly * in)1630*8fb009dcSAndroid Build Coastguard Worker static void poly_marshal(uint8_t out[POLY_BYTES], const struct poly *in) {
1631*8fb009dcSAndroid Build Coastguard Worker const uint16_t *p = in->v;
1632*8fb009dcSAndroid Build Coastguard Worker
1633*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 0; i < N / 8; i++) {
1634*8fb009dcSAndroid Build Coastguard Worker out[0] = p[0];
1635*8fb009dcSAndroid Build Coastguard Worker out[1] = (0x1f & (p[0] >> 8)) | ((p[1] & 0x07) << 5);
1636*8fb009dcSAndroid Build Coastguard Worker out[2] = p[1] >> 3;
1637*8fb009dcSAndroid Build Coastguard Worker out[3] = (3 & (p[1] >> 11)) | ((p[2] & 0x3f) << 2);
1638*8fb009dcSAndroid Build Coastguard Worker out[4] = (0x7f & (p[2] >> 6)) | ((p[3] & 0x01) << 7);
1639*8fb009dcSAndroid Build Coastguard Worker out[5] = p[3] >> 1;
1640*8fb009dcSAndroid Build Coastguard Worker out[6] = (0xf & (p[3] >> 9)) | ((p[4] & 0x0f) << 4);
1641*8fb009dcSAndroid Build Coastguard Worker out[7] = p[4] >> 4;
1642*8fb009dcSAndroid Build Coastguard Worker out[8] = (1 & (p[4] >> 12)) | ((p[5] & 0x7f) << 1);
1643*8fb009dcSAndroid Build Coastguard Worker out[9] = (0x3f & (p[5] >> 7)) | ((p[6] & 0x03) << 6);
1644*8fb009dcSAndroid Build Coastguard Worker out[10] = p[6] >> 2;
1645*8fb009dcSAndroid Build Coastguard Worker out[11] = (7 & (p[6] >> 10)) | ((p[7] & 0x1f) << 3);
1646*8fb009dcSAndroid Build Coastguard Worker out[12] = p[7] >> 5;
1647*8fb009dcSAndroid Build Coastguard Worker
1648*8fb009dcSAndroid Build Coastguard Worker p += 8;
1649*8fb009dcSAndroid Build Coastguard Worker out += 13;
1650*8fb009dcSAndroid Build Coastguard Worker }
1651*8fb009dcSAndroid Build Coastguard Worker
1652*8fb009dcSAndroid Build Coastguard Worker // There are four remaining values.
1653*8fb009dcSAndroid Build Coastguard Worker out[0] = p[0];
1654*8fb009dcSAndroid Build Coastguard Worker out[1] = (0x1f & (p[0] >> 8)) | ((p[1] & 0x07) << 5);
1655*8fb009dcSAndroid Build Coastguard Worker out[2] = p[1] >> 3;
1656*8fb009dcSAndroid Build Coastguard Worker out[3] = (3 & (p[1] >> 11)) | ((p[2] & 0x3f) << 2);
1657*8fb009dcSAndroid Build Coastguard Worker out[4] = (0x7f & (p[2] >> 6)) | ((p[3] & 0x01) << 7);
1658*8fb009dcSAndroid Build Coastguard Worker out[5] = p[3] >> 1;
1659*8fb009dcSAndroid Build Coastguard Worker out[6] = 0xf & (p[3] >> 9);
1660*8fb009dcSAndroid Build Coastguard Worker }
1661*8fb009dcSAndroid Build Coastguard Worker
1662*8fb009dcSAndroid Build Coastguard Worker // poly_unmarshal parses the output of |poly_marshal| and sets |out| such that
1663*8fb009dcSAndroid Build Coastguard Worker // all but the final coefficients match, and the final coefficient is calculated
1664*8fb009dcSAndroid Build Coastguard Worker // such that evaluating |out| at one results in zero. It returns one on success
1665*8fb009dcSAndroid Build Coastguard Worker // or zero if |in| is an invalid encoding.
poly_unmarshal(struct poly * out,const uint8_t in[POLY_BYTES])1666*8fb009dcSAndroid Build Coastguard Worker static int poly_unmarshal(struct poly *out, const uint8_t in[POLY_BYTES]) {
1667*8fb009dcSAndroid Build Coastguard Worker uint16_t *p = out->v;
1668*8fb009dcSAndroid Build Coastguard Worker
1669*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 0; i < N / 8; i++) {
1670*8fb009dcSAndroid Build Coastguard Worker p[0] = (uint16_t)(in[0]) | (uint16_t)(in[1] & 0x1f) << 8;
1671*8fb009dcSAndroid Build Coastguard Worker p[1] = (uint16_t)(in[1] >> 5) | (uint16_t)(in[2]) << 3 |
1672*8fb009dcSAndroid Build Coastguard Worker (uint16_t)(in[3] & 3) << 11;
1673*8fb009dcSAndroid Build Coastguard Worker p[2] = (uint16_t)(in[3] >> 2) | (uint16_t)(in[4] & 0x7f) << 6;
1674*8fb009dcSAndroid Build Coastguard Worker p[3] = (uint16_t)(in[4] >> 7) | (uint16_t)(in[5]) << 1 |
1675*8fb009dcSAndroid Build Coastguard Worker (uint16_t)(in[6] & 0xf) << 9;
1676*8fb009dcSAndroid Build Coastguard Worker p[4] = (uint16_t)(in[6] >> 4) | (uint16_t)(in[7]) << 4 |
1677*8fb009dcSAndroid Build Coastguard Worker (uint16_t)(in[8] & 1) << 12;
1678*8fb009dcSAndroid Build Coastguard Worker p[5] = (uint16_t)(in[8] >> 1) | (uint16_t)(in[9] & 0x3f) << 7;
1679*8fb009dcSAndroid Build Coastguard Worker p[6] = (uint16_t)(in[9] >> 6) | (uint16_t)(in[10]) << 2 |
1680*8fb009dcSAndroid Build Coastguard Worker (uint16_t)(in[11] & 7) << 10;
1681*8fb009dcSAndroid Build Coastguard Worker p[7] = (uint16_t)(in[11] >> 3) | (uint16_t)(in[12]) << 5;
1682*8fb009dcSAndroid Build Coastguard Worker
1683*8fb009dcSAndroid Build Coastguard Worker p += 8;
1684*8fb009dcSAndroid Build Coastguard Worker in += 13;
1685*8fb009dcSAndroid Build Coastguard Worker }
1686*8fb009dcSAndroid Build Coastguard Worker
1687*8fb009dcSAndroid Build Coastguard Worker // There are four coefficients remaining.
1688*8fb009dcSAndroid Build Coastguard Worker p[0] = (uint16_t)(in[0]) | (uint16_t)(in[1] & 0x1f) << 8;
1689*8fb009dcSAndroid Build Coastguard Worker p[1] = (uint16_t)(in[1] >> 5) | (uint16_t)(in[2]) << 3 |
1690*8fb009dcSAndroid Build Coastguard Worker (uint16_t)(in[3] & 3) << 11;
1691*8fb009dcSAndroid Build Coastguard Worker p[2] = (uint16_t)(in[3] >> 2) | (uint16_t)(in[4] & 0x7f) << 6;
1692*8fb009dcSAndroid Build Coastguard Worker p[3] = (uint16_t)(in[4] >> 7) | (uint16_t)(in[5]) << 1 |
1693*8fb009dcSAndroid Build Coastguard Worker (uint16_t)(in[6] & 0xf) << 9;
1694*8fb009dcSAndroid Build Coastguard Worker
1695*8fb009dcSAndroid Build Coastguard Worker for (unsigned i = 0; i < N - 1; i++) {
1696*8fb009dcSAndroid Build Coastguard Worker out->v[i] = (int16_t)(out->v[i] << 3) >> 3;
1697*8fb009dcSAndroid Build Coastguard Worker }
1698*8fb009dcSAndroid Build Coastguard Worker
1699*8fb009dcSAndroid Build Coastguard Worker // There are four unused bits in the last byte. We require them to be zero.
1700*8fb009dcSAndroid Build Coastguard Worker if ((in[6] & 0xf0) != 0) {
1701*8fb009dcSAndroid Build Coastguard Worker return 0;
1702*8fb009dcSAndroid Build Coastguard Worker }
1703*8fb009dcSAndroid Build Coastguard Worker
1704*8fb009dcSAndroid Build Coastguard Worker // Set the final coefficient as specifed in [HRSSNIST] 1.9.2 step 6.
1705*8fb009dcSAndroid Build Coastguard Worker uint32_t sum = 0;
1706*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 0; i < N - 1; i++) {
1707*8fb009dcSAndroid Build Coastguard Worker sum += out->v[i];
1708*8fb009dcSAndroid Build Coastguard Worker }
1709*8fb009dcSAndroid Build Coastguard Worker
1710*8fb009dcSAndroid Build Coastguard Worker out->v[N - 1] = (uint16_t)(0u - sum);
1711*8fb009dcSAndroid Build Coastguard Worker poly_normalize(out);
1712*8fb009dcSAndroid Build Coastguard Worker
1713*8fb009dcSAndroid Build Coastguard Worker return 1;
1714*8fb009dcSAndroid Build Coastguard Worker }
1715*8fb009dcSAndroid Build Coastguard Worker
1716*8fb009dcSAndroid Build Coastguard Worker // mod3_from_modQ maps {0, 1, Q-1, 65535} -> {0, 1, 2, 2}. Note that |v| may
1717*8fb009dcSAndroid Build Coastguard Worker // have an invalid value when processing attacker-controlled inputs.
mod3_from_modQ(uint16_t v)1718*8fb009dcSAndroid Build Coastguard Worker static uint16_t mod3_from_modQ(uint16_t v) {
1719*8fb009dcSAndroid Build Coastguard Worker v &= 3;
1720*8fb009dcSAndroid Build Coastguard Worker return v ^ (v >> 1);
1721*8fb009dcSAndroid Build Coastguard Worker }
1722*8fb009dcSAndroid Build Coastguard Worker
1723*8fb009dcSAndroid Build Coastguard Worker // poly_marshal_mod3 marshals |in| to |out| where the coefficients of |in| are
1724*8fb009dcSAndroid Build Coastguard Worker // all in {0, 1, Q-1, 65535} and |in| is mod Φ(N). (Note that coefficients may
1725*8fb009dcSAndroid Build Coastguard Worker // have invalid values when processing attacker-controlled inputs.)
poly_marshal_mod3(uint8_t out[HRSS_POLY3_BYTES],const struct poly * in)1726*8fb009dcSAndroid Build Coastguard Worker static void poly_marshal_mod3(uint8_t out[HRSS_POLY3_BYTES],
1727*8fb009dcSAndroid Build Coastguard Worker const struct poly *in) {
1728*8fb009dcSAndroid Build Coastguard Worker const uint16_t *coeffs = in->v;
1729*8fb009dcSAndroid Build Coastguard Worker
1730*8fb009dcSAndroid Build Coastguard Worker // Only 700 coefficients are marshaled because in[700] must be zero.
1731*8fb009dcSAndroid Build Coastguard Worker assert(coeffs[N-1] == 0);
1732*8fb009dcSAndroid Build Coastguard Worker
1733*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 0; i < HRSS_POLY3_BYTES; i++) {
1734*8fb009dcSAndroid Build Coastguard Worker const uint16_t coeffs0 = mod3_from_modQ(coeffs[0]);
1735*8fb009dcSAndroid Build Coastguard Worker const uint16_t coeffs1 = mod3_from_modQ(coeffs[1]);
1736*8fb009dcSAndroid Build Coastguard Worker const uint16_t coeffs2 = mod3_from_modQ(coeffs[2]);
1737*8fb009dcSAndroid Build Coastguard Worker const uint16_t coeffs3 = mod3_from_modQ(coeffs[3]);
1738*8fb009dcSAndroid Build Coastguard Worker const uint16_t coeffs4 = mod3_from_modQ(coeffs[4]);
1739*8fb009dcSAndroid Build Coastguard Worker out[i] = coeffs0 + coeffs1 * 3 + coeffs2 * 9 + coeffs3 * 27 + coeffs4 * 81;
1740*8fb009dcSAndroid Build Coastguard Worker coeffs += 5;
1741*8fb009dcSAndroid Build Coastguard Worker }
1742*8fb009dcSAndroid Build Coastguard Worker }
1743*8fb009dcSAndroid Build Coastguard Worker
1744*8fb009dcSAndroid Build Coastguard Worker // HRSS-specific functions
1745*8fb009dcSAndroid Build Coastguard Worker // -----------------------
1746*8fb009dcSAndroid Build Coastguard Worker
1747*8fb009dcSAndroid Build Coastguard Worker // poly_short_sample samples a vector of values in {0xffff (i.e. -1), 0, 1}.
1748*8fb009dcSAndroid Build Coastguard Worker // This is the same action as the algorithm in [HRSSNIST] section 1.8.1, but
1749*8fb009dcSAndroid Build Coastguard Worker // with HRSS-SXY the sampling algorithm is now a private detail of the
1750*8fb009dcSAndroid Build Coastguard Worker // implementation (previously it had to match between two parties). This
1751*8fb009dcSAndroid Build Coastguard Worker // function uses that freedom to implement a flatter distribution of values.
poly_short_sample(struct poly * out,const uint8_t in[HRSS_SAMPLE_BYTES])1752*8fb009dcSAndroid Build Coastguard Worker static void poly_short_sample(struct poly *out,
1753*8fb009dcSAndroid Build Coastguard Worker const uint8_t in[HRSS_SAMPLE_BYTES]) {
1754*8fb009dcSAndroid Build Coastguard Worker static_assert(HRSS_SAMPLE_BYTES == N - 1, "HRSS_SAMPLE_BYTES incorrect");
1755*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 0; i < N - 1; i++) {
1756*8fb009dcSAndroid Build Coastguard Worker uint16_t v = mod3(in[i]);
1757*8fb009dcSAndroid Build Coastguard Worker // Map {0, 1, 2} -> {0, 1, 0xffff}
1758*8fb009dcSAndroid Build Coastguard Worker v |= ((v >> 1) ^ 1) - 1;
1759*8fb009dcSAndroid Build Coastguard Worker out->v[i] = v;
1760*8fb009dcSAndroid Build Coastguard Worker }
1761*8fb009dcSAndroid Build Coastguard Worker out->v[N - 1] = 0;
1762*8fb009dcSAndroid Build Coastguard Worker poly_normalize(out);
1763*8fb009dcSAndroid Build Coastguard Worker }
1764*8fb009dcSAndroid Build Coastguard Worker
1765*8fb009dcSAndroid Build Coastguard Worker // poly_short_sample_plus performs the T+ sample as defined in [HRSSNIST],
1766*8fb009dcSAndroid Build Coastguard Worker // section 1.8.2.
poly_short_sample_plus(struct poly * out,const uint8_t in[HRSS_SAMPLE_BYTES])1767*8fb009dcSAndroid Build Coastguard Worker static void poly_short_sample_plus(struct poly *out,
1768*8fb009dcSAndroid Build Coastguard Worker const uint8_t in[HRSS_SAMPLE_BYTES]) {
1769*8fb009dcSAndroid Build Coastguard Worker poly_short_sample(out, in);
1770*8fb009dcSAndroid Build Coastguard Worker
1771*8fb009dcSAndroid Build Coastguard Worker // sum (and the product in the for loop) will overflow. But that's fine
1772*8fb009dcSAndroid Build Coastguard Worker // because |sum| is bound by +/- (N-2), and N < 2^15 so it works out.
1773*8fb009dcSAndroid Build Coastguard Worker uint16_t sum = 0;
1774*8fb009dcSAndroid Build Coastguard Worker for (unsigned i = 0; i < N - 2; i++) {
1775*8fb009dcSAndroid Build Coastguard Worker sum += (unsigned) out->v[i] * out->v[i + 1];
1776*8fb009dcSAndroid Build Coastguard Worker }
1777*8fb009dcSAndroid Build Coastguard Worker
1778*8fb009dcSAndroid Build Coastguard Worker // If the sum is negative, flip the sign of even-positioned coefficients. (See
1779*8fb009dcSAndroid Build Coastguard Worker // page 8 of [HRSS].)
1780*8fb009dcSAndroid Build Coastguard Worker sum = ((int16_t) sum) >> 15;
1781*8fb009dcSAndroid Build Coastguard Worker const uint16_t scale = sum | (~sum & 1);
1782*8fb009dcSAndroid Build Coastguard Worker for (unsigned i = 0; i < N; i += 2) {
1783*8fb009dcSAndroid Build Coastguard Worker out->v[i] = (unsigned) out->v[i] * scale;
1784*8fb009dcSAndroid Build Coastguard Worker }
1785*8fb009dcSAndroid Build Coastguard Worker poly_assert_normalized(out);
1786*8fb009dcSAndroid Build Coastguard Worker }
1787*8fb009dcSAndroid Build Coastguard Worker
1788*8fb009dcSAndroid Build Coastguard Worker // poly_lift computes the function discussed in [HRSS], appendix B.
poly_lift(struct poly * out,const struct poly * a)1789*8fb009dcSAndroid Build Coastguard Worker static void poly_lift(struct poly *out, const struct poly *a) {
1790*8fb009dcSAndroid Build Coastguard Worker // We wish to calculate a/(-1) mod Φ(N) over GF(3), where Φ(N) is the
1791*8fb009dcSAndroid Build Coastguard Worker // Nth cyclotomic polynomial, i.e. 1 + + … + ^700 (since N is prime).
1792*8fb009dcSAndroid Build Coastguard Worker
1793*8fb009dcSAndroid Build Coastguard Worker // 1/(-1) has a fairly basic structure that we can exploit to speed this up:
1794*8fb009dcSAndroid Build Coastguard Worker //
1795*8fb009dcSAndroid Build Coastguard Worker // R.<x> = PolynomialRing(GF(3)…)
1796*8fb009dcSAndroid Build Coastguard Worker // inv = R.cyclotomic_polynomial(1).inverse_mod(R.cyclotomic_polynomial(n))
1797*8fb009dcSAndroid Build Coastguard Worker // list(inv)[:15]
1798*8fb009dcSAndroid Build Coastguard Worker // [1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2]
1799*8fb009dcSAndroid Build Coastguard Worker //
1800*8fb009dcSAndroid Build Coastguard Worker // This three-element pattern of coefficients repeats for the whole
1801*8fb009dcSAndroid Build Coastguard Worker // polynomial.
1802*8fb009dcSAndroid Build Coastguard Worker //
1803*8fb009dcSAndroid Build Coastguard Worker // Next define the overbar operator such that z̅ = z[0] +
1804*8fb009dcSAndroid Build Coastguard Worker // reverse(z[1:]). (Index zero of a polynomial here is the coefficient
1805*8fb009dcSAndroid Build Coastguard Worker // of the constant term. So index one is the coefficient of and so
1806*8fb009dcSAndroid Build Coastguard Worker // on.)
1807*8fb009dcSAndroid Build Coastguard Worker //
1808*8fb009dcSAndroid Build Coastguard Worker // A less odd way to define this is to see that z̅ negates the indexes,
1809*8fb009dcSAndroid Build Coastguard Worker // so z̅[0] = z[-0], z̅[1] = z[-1] and so on.
1810*8fb009dcSAndroid Build Coastguard Worker //
1811*8fb009dcSAndroid Build Coastguard Worker // The use of z̅ is that, when working mod (^701 - 1), vz[0] = <v,
1812*8fb009dcSAndroid Build Coastguard Worker // z̅>, vz[1] = <v, z̅>, …. (Where <a, b> is the inner product: the sum
1813*8fb009dcSAndroid Build Coastguard Worker // of the point-wise products.) Although we calculated the inverse mod
1814*8fb009dcSAndroid Build Coastguard Worker // Φ(N), we can work mod (^N - 1) and reduce mod Φ(N) at the end.
1815*8fb009dcSAndroid Build Coastguard Worker // (That's because (^N - 1) is a multiple of Φ(N).)
1816*8fb009dcSAndroid Build Coastguard Worker //
1817*8fb009dcSAndroid Build Coastguard Worker // When working mod (^N - 1), multiplication by is a right-rotation
1818*8fb009dcSAndroid Build Coastguard Worker // of the list of coefficients.
1819*8fb009dcSAndroid Build Coastguard Worker //
1820*8fb009dcSAndroid Build Coastguard Worker // Thus we can consider what the pattern of z̅, z̅, ^2z̅, … looks like:
1821*8fb009dcSAndroid Build Coastguard Worker //
1822*8fb009dcSAndroid Build Coastguard Worker // def reverse(xs):
1823*8fb009dcSAndroid Build Coastguard Worker // suffix = list(xs[1:])
1824*8fb009dcSAndroid Build Coastguard Worker // suffix.reverse()
1825*8fb009dcSAndroid Build Coastguard Worker // return [xs[0]] + suffix
1826*8fb009dcSAndroid Build Coastguard Worker //
1827*8fb009dcSAndroid Build Coastguard Worker // def rotate(xs):
1828*8fb009dcSAndroid Build Coastguard Worker // return [xs[-1]] + xs[:-1]
1829*8fb009dcSAndroid Build Coastguard Worker //
1830*8fb009dcSAndroid Build Coastguard Worker // zoverbar = reverse(list(inv) + [0])
1831*8fb009dcSAndroid Build Coastguard Worker // xzoverbar = rotate(reverse(list(inv) + [0]))
1832*8fb009dcSAndroid Build Coastguard Worker // x2zoverbar = rotate(rotate(reverse(list(inv) + [0])))
1833*8fb009dcSAndroid Build Coastguard Worker //
1834*8fb009dcSAndroid Build Coastguard Worker // zoverbar[:15]
1835*8fb009dcSAndroid Build Coastguard Worker // [1, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1]
1836*8fb009dcSAndroid Build Coastguard Worker // xzoverbar[:15]
1837*8fb009dcSAndroid Build Coastguard Worker // [0, 1, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0]
1838*8fb009dcSAndroid Build Coastguard Worker // x2zoverbar[:15]
1839*8fb009dcSAndroid Build Coastguard Worker // [2, 0, 1, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2]
1840*8fb009dcSAndroid Build Coastguard Worker //
1841*8fb009dcSAndroid Build Coastguard Worker // (For a formula for z̅, see lemma two of appendix B.)
1842*8fb009dcSAndroid Build Coastguard Worker //
1843*8fb009dcSAndroid Build Coastguard Worker // After the first three elements have been taken care of, all then have
1844*8fb009dcSAndroid Build Coastguard Worker // a repeating three-element cycle. The next value (^3z̅) involves
1845*8fb009dcSAndroid Build Coastguard Worker // three rotations of the first pattern, thus the three-element cycle
1846*8fb009dcSAndroid Build Coastguard Worker // lines up. However, the discontinuity in the first three elements
1847*8fb009dcSAndroid Build Coastguard Worker // obviously moves to a different position. Consider the difference
1848*8fb009dcSAndroid Build Coastguard Worker // between ^3z̅ and z̅:
1849*8fb009dcSAndroid Build Coastguard Worker //
1850*8fb009dcSAndroid Build Coastguard Worker // [x-y for (x,y) in zip(zoverbar, x3zoverbar)][:15]
1851*8fb009dcSAndroid Build Coastguard Worker // [0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
1852*8fb009dcSAndroid Build Coastguard Worker //
1853*8fb009dcSAndroid Build Coastguard Worker // This pattern of differences is the same for all elements, although it
1854*8fb009dcSAndroid Build Coastguard Worker // obviously moves right with the rotations.
1855*8fb009dcSAndroid Build Coastguard Worker //
1856*8fb009dcSAndroid Build Coastguard Worker // From this, we reach algorithm eight of appendix B.
1857*8fb009dcSAndroid Build Coastguard Worker
1858*8fb009dcSAndroid Build Coastguard Worker // Handle the first three elements of the inner products.
1859*8fb009dcSAndroid Build Coastguard Worker out->v[0] = a->v[0] + a->v[2];
1860*8fb009dcSAndroid Build Coastguard Worker out->v[1] = a->v[1];
1861*8fb009dcSAndroid Build Coastguard Worker out->v[2] = -a->v[0] + a->v[2];
1862*8fb009dcSAndroid Build Coastguard Worker
1863*8fb009dcSAndroid Build Coastguard Worker // s0, s1, s2 are added into out->v[0], out->v[1], and out->v[2],
1864*8fb009dcSAndroid Build Coastguard Worker // respectively. We do not compute s1 because it's just -(s0 + s1).
1865*8fb009dcSAndroid Build Coastguard Worker uint16_t s0 = 0, s2 = 0;
1866*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 3; i < 699; i += 3) {
1867*8fb009dcSAndroid Build Coastguard Worker s0 += -a->v[i] + a->v[i + 2];
1868*8fb009dcSAndroid Build Coastguard Worker // s1 += a->v[i] - a->v[i + 1];
1869*8fb009dcSAndroid Build Coastguard Worker s2 += a->v[i + 1] - a->v[i + 2];
1870*8fb009dcSAndroid Build Coastguard Worker }
1871*8fb009dcSAndroid Build Coastguard Worker
1872*8fb009dcSAndroid Build Coastguard Worker // Handle the fact that the three-element pattern doesn't fill the
1873*8fb009dcSAndroid Build Coastguard Worker // polynomial exactly (since 701 isn't a multiple of three).
1874*8fb009dcSAndroid Build Coastguard Worker s0 -= a->v[699];
1875*8fb009dcSAndroid Build Coastguard Worker // s1 += a->v[699] - a->v[700];
1876*8fb009dcSAndroid Build Coastguard Worker s2 += a->v[700];
1877*8fb009dcSAndroid Build Coastguard Worker
1878*8fb009dcSAndroid Build Coastguard Worker // Note that s0 + s1 + s2 = 0.
1879*8fb009dcSAndroid Build Coastguard Worker out->v[0] += s0;
1880*8fb009dcSAndroid Build Coastguard Worker out->v[1] -= (s0 + s2); // = s1
1881*8fb009dcSAndroid Build Coastguard Worker out->v[2] += s2;
1882*8fb009dcSAndroid Build Coastguard Worker
1883*8fb009dcSAndroid Build Coastguard Worker // Calculate the remaining inner products by taking advantage of the
1884*8fb009dcSAndroid Build Coastguard Worker // fact that the pattern repeats every three cycles and the pattern of
1885*8fb009dcSAndroid Build Coastguard Worker // differences moves with the rotation.
1886*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 3; i < N; i++) {
1887*8fb009dcSAndroid Build Coastguard Worker out->v[i] = (out->v[i - 3] - (a->v[i - 2] + a->v[i - 1] + a->v[i]));
1888*8fb009dcSAndroid Build Coastguard Worker }
1889*8fb009dcSAndroid Build Coastguard Worker
1890*8fb009dcSAndroid Build Coastguard Worker // Reduce mod Φ(N) by subtracting a multiple of out[700] from every
1891*8fb009dcSAndroid Build Coastguard Worker // element and convert to mod Q. (See above about adding twice as
1892*8fb009dcSAndroid Build Coastguard Worker // subtraction.)
1893*8fb009dcSAndroid Build Coastguard Worker const crypto_word_t v = out->v[700];
1894*8fb009dcSAndroid Build Coastguard Worker for (unsigned i = 0; i < N; i++) {
1895*8fb009dcSAndroid Build Coastguard Worker const uint16_t vi_mod3 = mod3(out->v[i] - v);
1896*8fb009dcSAndroid Build Coastguard Worker // Map {0, 1, 2} to {0, 1, 0xffff}.
1897*8fb009dcSAndroid Build Coastguard Worker out->v[i] = (~((vi_mod3 >> 1) - 1)) | vi_mod3;
1898*8fb009dcSAndroid Build Coastguard Worker }
1899*8fb009dcSAndroid Build Coastguard Worker
1900*8fb009dcSAndroid Build Coastguard Worker poly_mul_x_minus_1(out);
1901*8fb009dcSAndroid Build Coastguard Worker poly_normalize(out);
1902*8fb009dcSAndroid Build Coastguard Worker }
1903*8fb009dcSAndroid Build Coastguard Worker
1904*8fb009dcSAndroid Build Coastguard Worker struct public_key {
1905*8fb009dcSAndroid Build Coastguard Worker struct poly ph;
1906*8fb009dcSAndroid Build Coastguard Worker };
1907*8fb009dcSAndroid Build Coastguard Worker
1908*8fb009dcSAndroid Build Coastguard Worker struct private_key {
1909*8fb009dcSAndroid Build Coastguard Worker struct poly3 f, f_inverse;
1910*8fb009dcSAndroid Build Coastguard Worker struct poly ph_inverse;
1911*8fb009dcSAndroid Build Coastguard Worker uint8_t hmac_key[32];
1912*8fb009dcSAndroid Build Coastguard Worker };
1913*8fb009dcSAndroid Build Coastguard Worker
1914*8fb009dcSAndroid Build Coastguard Worker // public_key_from_external converts an external public key pointer into an
1915*8fb009dcSAndroid Build Coastguard Worker // internal one. Externally the alignment is only specified to be eight bytes
1916*8fb009dcSAndroid Build Coastguard Worker // but we need 16-byte alignment. We could annotate the external struct with
1917*8fb009dcSAndroid Build Coastguard Worker // that alignment but we can only assume that malloced pointers are 8-byte
1918*8fb009dcSAndroid Build Coastguard Worker // aligned in any case. (Even if the underlying malloc returns values with
1919*8fb009dcSAndroid Build Coastguard Worker // 16-byte alignment, |OPENSSL_malloc| will store an 8-byte size prefix and mess
1920*8fb009dcSAndroid Build Coastguard Worker // that up.)
public_key_from_external(struct HRSS_public_key * ext)1921*8fb009dcSAndroid Build Coastguard Worker static struct public_key *public_key_from_external(
1922*8fb009dcSAndroid Build Coastguard Worker struct HRSS_public_key *ext) {
1923*8fb009dcSAndroid Build Coastguard Worker static_assert(
1924*8fb009dcSAndroid Build Coastguard Worker sizeof(struct HRSS_public_key) >= sizeof(struct public_key) + 15,
1925*8fb009dcSAndroid Build Coastguard Worker "HRSS public key too small");
1926*8fb009dcSAndroid Build Coastguard Worker
1927*8fb009dcSAndroid Build Coastguard Worker return align_pointer(ext->opaque, 16);
1928*8fb009dcSAndroid Build Coastguard Worker }
1929*8fb009dcSAndroid Build Coastguard Worker
1930*8fb009dcSAndroid Build Coastguard Worker // private_key_from_external does the same thing as |public_key_from_external|,
1931*8fb009dcSAndroid Build Coastguard Worker // but for private keys. See the comment on that function about alignment
1932*8fb009dcSAndroid Build Coastguard Worker // issues.
private_key_from_external(struct HRSS_private_key * ext)1933*8fb009dcSAndroid Build Coastguard Worker static struct private_key *private_key_from_external(
1934*8fb009dcSAndroid Build Coastguard Worker struct HRSS_private_key *ext) {
1935*8fb009dcSAndroid Build Coastguard Worker static_assert(
1936*8fb009dcSAndroid Build Coastguard Worker sizeof(struct HRSS_private_key) >= sizeof(struct private_key) + 15,
1937*8fb009dcSAndroid Build Coastguard Worker "HRSS private key too small");
1938*8fb009dcSAndroid Build Coastguard Worker
1939*8fb009dcSAndroid Build Coastguard Worker return align_pointer(ext->opaque, 16);
1940*8fb009dcSAndroid Build Coastguard Worker }
1941*8fb009dcSAndroid Build Coastguard Worker
1942*8fb009dcSAndroid Build Coastguard Worker // malloc_align32 returns a pointer to |size| bytes of 32-byte-aligned heap and
1943*8fb009dcSAndroid Build Coastguard Worker // sets |*out_ptr| to a value that can be passed to |OPENSSL_free| to release
1944*8fb009dcSAndroid Build Coastguard Worker // it. It returns NULL if out of memory.
malloc_align32(void ** out_ptr,size_t size)1945*8fb009dcSAndroid Build Coastguard Worker static void *malloc_align32(void **out_ptr, size_t size) {
1946*8fb009dcSAndroid Build Coastguard Worker void *ptr = OPENSSL_malloc(size + 31);
1947*8fb009dcSAndroid Build Coastguard Worker if (!ptr) {
1948*8fb009dcSAndroid Build Coastguard Worker *out_ptr = NULL;
1949*8fb009dcSAndroid Build Coastguard Worker return NULL;
1950*8fb009dcSAndroid Build Coastguard Worker }
1951*8fb009dcSAndroid Build Coastguard Worker
1952*8fb009dcSAndroid Build Coastguard Worker *out_ptr = ptr;
1953*8fb009dcSAndroid Build Coastguard Worker return align_pointer(ptr, 32);
1954*8fb009dcSAndroid Build Coastguard Worker }
1955*8fb009dcSAndroid Build Coastguard Worker
HRSS_generate_key(struct HRSS_public_key * out_pub,struct HRSS_private_key * out_priv,const uint8_t in[HRSS_SAMPLE_BYTES+HRSS_SAMPLE_BYTES+32])1956*8fb009dcSAndroid Build Coastguard Worker int HRSS_generate_key(
1957*8fb009dcSAndroid Build Coastguard Worker struct HRSS_public_key *out_pub, struct HRSS_private_key *out_priv,
1958*8fb009dcSAndroid Build Coastguard Worker const uint8_t in[HRSS_SAMPLE_BYTES + HRSS_SAMPLE_BYTES + 32]) {
1959*8fb009dcSAndroid Build Coastguard Worker struct public_key *pub = public_key_from_external(out_pub);
1960*8fb009dcSAndroid Build Coastguard Worker struct private_key *priv = private_key_from_external(out_priv);
1961*8fb009dcSAndroid Build Coastguard Worker
1962*8fb009dcSAndroid Build Coastguard Worker struct vars {
1963*8fb009dcSAndroid Build Coastguard Worker struct POLY_MUL_SCRATCH scratch;
1964*8fb009dcSAndroid Build Coastguard Worker struct poly f;
1965*8fb009dcSAndroid Build Coastguard Worker struct poly pg_phi1;
1966*8fb009dcSAndroid Build Coastguard Worker struct poly pfg_phi1;
1967*8fb009dcSAndroid Build Coastguard Worker struct poly pfg_phi1_inverse;
1968*8fb009dcSAndroid Build Coastguard Worker };
1969*8fb009dcSAndroid Build Coastguard Worker
1970*8fb009dcSAndroid Build Coastguard Worker void *malloc_ptr;
1971*8fb009dcSAndroid Build Coastguard Worker struct vars *const vars = malloc_align32(&malloc_ptr, sizeof(struct vars));
1972*8fb009dcSAndroid Build Coastguard Worker if (!vars) {
1973*8fb009dcSAndroid Build Coastguard Worker // If the caller ignores the return value the output will still be safe.
1974*8fb009dcSAndroid Build Coastguard Worker // The private key output is randomised in case it's later passed to
1975*8fb009dcSAndroid Build Coastguard Worker // |HRSS_encap|.
1976*8fb009dcSAndroid Build Coastguard Worker memset(out_pub, 0, sizeof(struct HRSS_public_key));
1977*8fb009dcSAndroid Build Coastguard Worker RAND_bytes((uint8_t*) out_priv, sizeof(struct HRSS_private_key));
1978*8fb009dcSAndroid Build Coastguard Worker return 0;
1979*8fb009dcSAndroid Build Coastguard Worker }
1980*8fb009dcSAndroid Build Coastguard Worker
1981*8fb009dcSAndroid Build Coastguard Worker #if !defined(NDEBUG)
1982*8fb009dcSAndroid Build Coastguard Worker OPENSSL_memset(vars, 0xff, sizeof(struct vars));
1983*8fb009dcSAndroid Build Coastguard Worker #endif
1984*8fb009dcSAndroid Build Coastguard Worker
1985*8fb009dcSAndroid Build Coastguard Worker OPENSSL_memcpy(priv->hmac_key, in + 2 * HRSS_SAMPLE_BYTES,
1986*8fb009dcSAndroid Build Coastguard Worker sizeof(priv->hmac_key));
1987*8fb009dcSAndroid Build Coastguard Worker
1988*8fb009dcSAndroid Build Coastguard Worker poly_short_sample_plus(&vars->f, in);
1989*8fb009dcSAndroid Build Coastguard Worker poly3_from_poly(&priv->f, &vars->f);
1990*8fb009dcSAndroid Build Coastguard Worker HRSS_poly3_invert(&priv->f_inverse, &priv->f);
1991*8fb009dcSAndroid Build Coastguard Worker
1992*8fb009dcSAndroid Build Coastguard Worker // pg_phi1 is p (i.e. 3) × g × Φ(1) (i.e. -1).
1993*8fb009dcSAndroid Build Coastguard Worker poly_short_sample_plus(&vars->pg_phi1, in + HRSS_SAMPLE_BYTES);
1994*8fb009dcSAndroid Build Coastguard Worker for (unsigned i = 0; i < N; i++) {
1995*8fb009dcSAndroid Build Coastguard Worker vars->pg_phi1.v[i] *= 3;
1996*8fb009dcSAndroid Build Coastguard Worker }
1997*8fb009dcSAndroid Build Coastguard Worker poly_mul_x_minus_1(&vars->pg_phi1);
1998*8fb009dcSAndroid Build Coastguard Worker
1999*8fb009dcSAndroid Build Coastguard Worker poly_mul(&vars->scratch, &vars->pfg_phi1, &vars->f, &vars->pg_phi1);
2000*8fb009dcSAndroid Build Coastguard Worker
2001*8fb009dcSAndroid Build Coastguard Worker poly_invert(&vars->scratch, &vars->pfg_phi1_inverse, &vars->pfg_phi1);
2002*8fb009dcSAndroid Build Coastguard Worker
2003*8fb009dcSAndroid Build Coastguard Worker poly_mul(&vars->scratch, &pub->ph, &vars->pfg_phi1_inverse, &vars->pg_phi1);
2004*8fb009dcSAndroid Build Coastguard Worker poly_mul(&vars->scratch, &pub->ph, &pub->ph, &vars->pg_phi1);
2005*8fb009dcSAndroid Build Coastguard Worker poly_clamp(&pub->ph);
2006*8fb009dcSAndroid Build Coastguard Worker
2007*8fb009dcSAndroid Build Coastguard Worker poly_mul(&vars->scratch, &priv->ph_inverse, &vars->pfg_phi1_inverse,
2008*8fb009dcSAndroid Build Coastguard Worker &vars->f);
2009*8fb009dcSAndroid Build Coastguard Worker poly_mul(&vars->scratch, &priv->ph_inverse, &priv->ph_inverse, &vars->f);
2010*8fb009dcSAndroid Build Coastguard Worker poly_clamp(&priv->ph_inverse);
2011*8fb009dcSAndroid Build Coastguard Worker
2012*8fb009dcSAndroid Build Coastguard Worker OPENSSL_free(malloc_ptr);
2013*8fb009dcSAndroid Build Coastguard Worker return 1;
2014*8fb009dcSAndroid Build Coastguard Worker }
2015*8fb009dcSAndroid Build Coastguard Worker
2016*8fb009dcSAndroid Build Coastguard Worker static const char kSharedKey[] = "shared key";
2017*8fb009dcSAndroid Build Coastguard Worker
HRSS_encap(uint8_t out_ciphertext[POLY_BYTES],uint8_t out_shared_key[32],const struct HRSS_public_key * in_pub,const uint8_t in[HRSS_SAMPLE_BYTES+HRSS_SAMPLE_BYTES])2018*8fb009dcSAndroid Build Coastguard Worker int HRSS_encap(uint8_t out_ciphertext[POLY_BYTES], uint8_t out_shared_key[32],
2019*8fb009dcSAndroid Build Coastguard Worker const struct HRSS_public_key *in_pub,
2020*8fb009dcSAndroid Build Coastguard Worker const uint8_t in[HRSS_SAMPLE_BYTES + HRSS_SAMPLE_BYTES]) {
2021*8fb009dcSAndroid Build Coastguard Worker const struct public_key *pub =
2022*8fb009dcSAndroid Build Coastguard Worker public_key_from_external((struct HRSS_public_key *)in_pub);
2023*8fb009dcSAndroid Build Coastguard Worker
2024*8fb009dcSAndroid Build Coastguard Worker struct vars {
2025*8fb009dcSAndroid Build Coastguard Worker struct POLY_MUL_SCRATCH scratch;
2026*8fb009dcSAndroid Build Coastguard Worker struct poly m, r, m_lifted;
2027*8fb009dcSAndroid Build Coastguard Worker struct poly prh_plus_m;
2028*8fb009dcSAndroid Build Coastguard Worker SHA256_CTX hash_ctx;
2029*8fb009dcSAndroid Build Coastguard Worker uint8_t m_bytes[HRSS_POLY3_BYTES];
2030*8fb009dcSAndroid Build Coastguard Worker uint8_t r_bytes[HRSS_POLY3_BYTES];
2031*8fb009dcSAndroid Build Coastguard Worker };
2032*8fb009dcSAndroid Build Coastguard Worker
2033*8fb009dcSAndroid Build Coastguard Worker void *malloc_ptr;
2034*8fb009dcSAndroid Build Coastguard Worker struct vars *const vars = malloc_align32(&malloc_ptr, sizeof(struct vars));
2035*8fb009dcSAndroid Build Coastguard Worker if (!vars) {
2036*8fb009dcSAndroid Build Coastguard Worker // If the caller ignores the return value the output will still be safe.
2037*8fb009dcSAndroid Build Coastguard Worker // The private key output is randomised in case it's used to encrypt and
2038*8fb009dcSAndroid Build Coastguard Worker // transmit something.
2039*8fb009dcSAndroid Build Coastguard Worker memset(out_ciphertext, 0, POLY_BYTES);
2040*8fb009dcSAndroid Build Coastguard Worker RAND_bytes(out_shared_key, 32);
2041*8fb009dcSAndroid Build Coastguard Worker return 0;
2042*8fb009dcSAndroid Build Coastguard Worker }
2043*8fb009dcSAndroid Build Coastguard Worker
2044*8fb009dcSAndroid Build Coastguard Worker #if !defined(NDEBUG)
2045*8fb009dcSAndroid Build Coastguard Worker OPENSSL_memset(vars, 0xff, sizeof(struct vars));
2046*8fb009dcSAndroid Build Coastguard Worker #endif
2047*8fb009dcSAndroid Build Coastguard Worker
2048*8fb009dcSAndroid Build Coastguard Worker poly_short_sample(&vars->m, in);
2049*8fb009dcSAndroid Build Coastguard Worker poly_short_sample(&vars->r, in + HRSS_SAMPLE_BYTES);
2050*8fb009dcSAndroid Build Coastguard Worker poly_lift(&vars->m_lifted, &vars->m);
2051*8fb009dcSAndroid Build Coastguard Worker
2052*8fb009dcSAndroid Build Coastguard Worker poly_mul(&vars->scratch, &vars->prh_plus_m, &vars->r, &pub->ph);
2053*8fb009dcSAndroid Build Coastguard Worker for (unsigned i = 0; i < N; i++) {
2054*8fb009dcSAndroid Build Coastguard Worker vars->prh_plus_m.v[i] += vars->m_lifted.v[i];
2055*8fb009dcSAndroid Build Coastguard Worker }
2056*8fb009dcSAndroid Build Coastguard Worker
2057*8fb009dcSAndroid Build Coastguard Worker poly_marshal(out_ciphertext, &vars->prh_plus_m);
2058*8fb009dcSAndroid Build Coastguard Worker
2059*8fb009dcSAndroid Build Coastguard Worker poly_marshal_mod3(vars->m_bytes, &vars->m);
2060*8fb009dcSAndroid Build Coastguard Worker poly_marshal_mod3(vars->r_bytes, &vars->r);
2061*8fb009dcSAndroid Build Coastguard Worker
2062*8fb009dcSAndroid Build Coastguard Worker SHA256_Init(&vars->hash_ctx);
2063*8fb009dcSAndroid Build Coastguard Worker SHA256_Update(&vars->hash_ctx, kSharedKey, sizeof(kSharedKey));
2064*8fb009dcSAndroid Build Coastguard Worker SHA256_Update(&vars->hash_ctx, vars->m_bytes, sizeof(vars->m_bytes));
2065*8fb009dcSAndroid Build Coastguard Worker SHA256_Update(&vars->hash_ctx, vars->r_bytes, sizeof(vars->r_bytes));
2066*8fb009dcSAndroid Build Coastguard Worker SHA256_Update(&vars->hash_ctx, out_ciphertext, POLY_BYTES);
2067*8fb009dcSAndroid Build Coastguard Worker SHA256_Final(out_shared_key, &vars->hash_ctx);
2068*8fb009dcSAndroid Build Coastguard Worker
2069*8fb009dcSAndroid Build Coastguard Worker OPENSSL_free(malloc_ptr);
2070*8fb009dcSAndroid Build Coastguard Worker return 1;
2071*8fb009dcSAndroid Build Coastguard Worker }
2072*8fb009dcSAndroid Build Coastguard Worker
HRSS_decap(uint8_t out_shared_key[HRSS_KEY_BYTES],const struct HRSS_private_key * in_priv,const uint8_t * ciphertext,size_t ciphertext_len)2073*8fb009dcSAndroid Build Coastguard Worker int HRSS_decap(uint8_t out_shared_key[HRSS_KEY_BYTES],
2074*8fb009dcSAndroid Build Coastguard Worker const struct HRSS_private_key *in_priv,
2075*8fb009dcSAndroid Build Coastguard Worker const uint8_t *ciphertext, size_t ciphertext_len) {
2076*8fb009dcSAndroid Build Coastguard Worker const struct private_key *priv =
2077*8fb009dcSAndroid Build Coastguard Worker private_key_from_external((struct HRSS_private_key *)in_priv);
2078*8fb009dcSAndroid Build Coastguard Worker
2079*8fb009dcSAndroid Build Coastguard Worker struct vars {
2080*8fb009dcSAndroid Build Coastguard Worker struct POLY_MUL_SCRATCH scratch;
2081*8fb009dcSAndroid Build Coastguard Worker uint8_t masked_key[SHA256_CBLOCK];
2082*8fb009dcSAndroid Build Coastguard Worker SHA256_CTX hash_ctx;
2083*8fb009dcSAndroid Build Coastguard Worker struct poly c;
2084*8fb009dcSAndroid Build Coastguard Worker struct poly f, cf;
2085*8fb009dcSAndroid Build Coastguard Worker struct poly3 cf3, m3;
2086*8fb009dcSAndroid Build Coastguard Worker struct poly m, m_lifted;
2087*8fb009dcSAndroid Build Coastguard Worker struct poly r;
2088*8fb009dcSAndroid Build Coastguard Worker struct poly3 r3;
2089*8fb009dcSAndroid Build Coastguard Worker uint8_t expected_ciphertext[HRSS_CIPHERTEXT_BYTES];
2090*8fb009dcSAndroid Build Coastguard Worker uint8_t m_bytes[HRSS_POLY3_BYTES];
2091*8fb009dcSAndroid Build Coastguard Worker uint8_t r_bytes[HRSS_POLY3_BYTES];
2092*8fb009dcSAndroid Build Coastguard Worker uint8_t shared_key[32];
2093*8fb009dcSAndroid Build Coastguard Worker };
2094*8fb009dcSAndroid Build Coastguard Worker
2095*8fb009dcSAndroid Build Coastguard Worker void *malloc_ptr;
2096*8fb009dcSAndroid Build Coastguard Worker struct vars *const vars = malloc_align32(&malloc_ptr, sizeof(struct vars));
2097*8fb009dcSAndroid Build Coastguard Worker if (!vars) {
2098*8fb009dcSAndroid Build Coastguard Worker // If the caller ignores the return value the output will still be safe.
2099*8fb009dcSAndroid Build Coastguard Worker // The private key output is randomised in case it's used to encrypt and
2100*8fb009dcSAndroid Build Coastguard Worker // transmit something.
2101*8fb009dcSAndroid Build Coastguard Worker RAND_bytes(out_shared_key, HRSS_KEY_BYTES);
2102*8fb009dcSAndroid Build Coastguard Worker return 0;
2103*8fb009dcSAndroid Build Coastguard Worker }
2104*8fb009dcSAndroid Build Coastguard Worker
2105*8fb009dcSAndroid Build Coastguard Worker #if !defined(NDEBUG)
2106*8fb009dcSAndroid Build Coastguard Worker OPENSSL_memset(vars, 0xff, sizeof(struct vars));
2107*8fb009dcSAndroid Build Coastguard Worker #endif
2108*8fb009dcSAndroid Build Coastguard Worker
2109*8fb009dcSAndroid Build Coastguard Worker // This is HMAC, expanded inline rather than using the |HMAC| function so that
2110*8fb009dcSAndroid Build Coastguard Worker // we can avoid dealing with possible allocation failures and so keep this
2111*8fb009dcSAndroid Build Coastguard Worker // function infallible.
2112*8fb009dcSAndroid Build Coastguard Worker static_assert(sizeof(priv->hmac_key) <= sizeof(vars->masked_key),
2113*8fb009dcSAndroid Build Coastguard Worker "HRSS HMAC key larger than SHA-256 block size");
2114*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 0; i < sizeof(priv->hmac_key); i++) {
2115*8fb009dcSAndroid Build Coastguard Worker vars->masked_key[i] = priv->hmac_key[i] ^ 0x36;
2116*8fb009dcSAndroid Build Coastguard Worker }
2117*8fb009dcSAndroid Build Coastguard Worker OPENSSL_memset(vars->masked_key + sizeof(priv->hmac_key), 0x36,
2118*8fb009dcSAndroid Build Coastguard Worker sizeof(vars->masked_key) - sizeof(priv->hmac_key));
2119*8fb009dcSAndroid Build Coastguard Worker
2120*8fb009dcSAndroid Build Coastguard Worker SHA256_Init(&vars->hash_ctx);
2121*8fb009dcSAndroid Build Coastguard Worker SHA256_Update(&vars->hash_ctx, vars->masked_key, sizeof(vars->masked_key));
2122*8fb009dcSAndroid Build Coastguard Worker SHA256_Update(&vars->hash_ctx, ciphertext, ciphertext_len);
2123*8fb009dcSAndroid Build Coastguard Worker uint8_t inner_digest[SHA256_DIGEST_LENGTH];
2124*8fb009dcSAndroid Build Coastguard Worker SHA256_Final(inner_digest, &vars->hash_ctx);
2125*8fb009dcSAndroid Build Coastguard Worker
2126*8fb009dcSAndroid Build Coastguard Worker for (size_t i = 0; i < sizeof(priv->hmac_key); i++) {
2127*8fb009dcSAndroid Build Coastguard Worker vars->masked_key[i] ^= (0x5c ^ 0x36);
2128*8fb009dcSAndroid Build Coastguard Worker }
2129*8fb009dcSAndroid Build Coastguard Worker OPENSSL_memset(vars->masked_key + sizeof(priv->hmac_key), 0x5c,
2130*8fb009dcSAndroid Build Coastguard Worker sizeof(vars->masked_key) - sizeof(priv->hmac_key));
2131*8fb009dcSAndroid Build Coastguard Worker
2132*8fb009dcSAndroid Build Coastguard Worker SHA256_Init(&vars->hash_ctx);
2133*8fb009dcSAndroid Build Coastguard Worker SHA256_Update(&vars->hash_ctx, vars->masked_key, sizeof(vars->masked_key));
2134*8fb009dcSAndroid Build Coastguard Worker SHA256_Update(&vars->hash_ctx, inner_digest, sizeof(inner_digest));
2135*8fb009dcSAndroid Build Coastguard Worker static_assert(HRSS_KEY_BYTES == SHA256_DIGEST_LENGTH,
2136*8fb009dcSAndroid Build Coastguard Worker "HRSS shared key length incorrect");
2137*8fb009dcSAndroid Build Coastguard Worker SHA256_Final(out_shared_key, &vars->hash_ctx);
2138*8fb009dcSAndroid Build Coastguard Worker
2139*8fb009dcSAndroid Build Coastguard Worker // If the ciphertext is publicly invalid then a random shared key is still
2140*8fb009dcSAndroid Build Coastguard Worker // returned to simply the logic of the caller, but this path is not constant
2141*8fb009dcSAndroid Build Coastguard Worker // time.
2142*8fb009dcSAndroid Build Coastguard Worker if (ciphertext_len != HRSS_CIPHERTEXT_BYTES ||
2143*8fb009dcSAndroid Build Coastguard Worker !poly_unmarshal(&vars->c, ciphertext)) {
2144*8fb009dcSAndroid Build Coastguard Worker goto out;
2145*8fb009dcSAndroid Build Coastguard Worker }
2146*8fb009dcSAndroid Build Coastguard Worker
2147*8fb009dcSAndroid Build Coastguard Worker poly_from_poly3(&vars->f, &priv->f);
2148*8fb009dcSAndroid Build Coastguard Worker poly_mul(&vars->scratch, &vars->cf, &vars->c, &vars->f);
2149*8fb009dcSAndroid Build Coastguard Worker poly3_from_poly(&vars->cf3, &vars->cf);
2150*8fb009dcSAndroid Build Coastguard Worker // Note that cf3 is not reduced mod Φ(N). That reduction is deferred.
2151*8fb009dcSAndroid Build Coastguard Worker HRSS_poly3_mul(&vars->m3, &vars->cf3, &priv->f_inverse);
2152*8fb009dcSAndroid Build Coastguard Worker
2153*8fb009dcSAndroid Build Coastguard Worker poly_from_poly3(&vars->m, &vars->m3);
2154*8fb009dcSAndroid Build Coastguard Worker poly_lift(&vars->m_lifted, &vars->m);
2155*8fb009dcSAndroid Build Coastguard Worker
2156*8fb009dcSAndroid Build Coastguard Worker for (unsigned i = 0; i < N; i++) {
2157*8fb009dcSAndroid Build Coastguard Worker vars->r.v[i] = vars->c.v[i] - vars->m_lifted.v[i];
2158*8fb009dcSAndroid Build Coastguard Worker }
2159*8fb009dcSAndroid Build Coastguard Worker poly_normalize(&vars->r);
2160*8fb009dcSAndroid Build Coastguard Worker poly_mul(&vars->scratch, &vars->r, &vars->r, &priv->ph_inverse);
2161*8fb009dcSAndroid Build Coastguard Worker poly_mod_phiN(&vars->r);
2162*8fb009dcSAndroid Build Coastguard Worker poly_clamp(&vars->r);
2163*8fb009dcSAndroid Build Coastguard Worker
2164*8fb009dcSAndroid Build Coastguard Worker crypto_word_t ok = poly3_from_poly_checked(&vars->r3, &vars->r);
2165*8fb009dcSAndroid Build Coastguard Worker
2166*8fb009dcSAndroid Build Coastguard Worker // [NTRUCOMP] section 5.1 includes ReEnc2 and a proof that it's valid. Rather
2167*8fb009dcSAndroid Build Coastguard Worker // than do an expensive |poly_mul|, it rebuilds |c'| from |c - lift(m)|
2168*8fb009dcSAndroid Build Coastguard Worker // (called |b|) with:
2169*8fb009dcSAndroid Build Coastguard Worker // t = (−b(1)/N) mod Q
2170*8fb009dcSAndroid Build Coastguard Worker // c' = b + tΦ(N) + lift(m) mod Q
2171*8fb009dcSAndroid Build Coastguard Worker //
2172*8fb009dcSAndroid Build Coastguard Worker // When polynomials are transmitted, the final coefficient is omitted and
2173*8fb009dcSAndroid Build Coastguard Worker // |poly_unmarshal| sets it such that f(1) == 0. Thus c(1) == 0. Also,
2174*8fb009dcSAndroid Build Coastguard Worker // |poly_lift| multiplies the result by (x-1) and therefore evaluating a
2175*8fb009dcSAndroid Build Coastguard Worker // lifted polynomial at 1 is also zero. Thus lift(m)(1) == 0 and so
2176*8fb009dcSAndroid Build Coastguard Worker // (c - lift(m))(1) == 0.
2177*8fb009dcSAndroid Build Coastguard Worker //
2178*8fb009dcSAndroid Build Coastguard Worker // Although we defer the reduction above, |b| is conceptually reduced mod
2179*8fb009dcSAndroid Build Coastguard Worker // Φ(N). In order to do that reduction one subtracts |c[N-1]| from every
2180*8fb009dcSAndroid Build Coastguard Worker // coefficient. Therefore b(1) = -c[N-1]×N. The value of |t|, above, then is
2181*8fb009dcSAndroid Build Coastguard Worker // just recovering |c[N-1]|, and adding tΦ(N) is simply undoing the reduction.
2182*8fb009dcSAndroid Build Coastguard Worker // Therefore b + tΦ(N) + lift(m) = c by construction and we don't need to
2183*8fb009dcSAndroid Build Coastguard Worker // recover |c| at all so long as we do the checks in
2184*8fb009dcSAndroid Build Coastguard Worker // |poly3_from_poly_checked|.
2185*8fb009dcSAndroid Build Coastguard Worker //
2186*8fb009dcSAndroid Build Coastguard Worker // The |poly_marshal| here then is just confirming that |poly_unmarshal| is
2187*8fb009dcSAndroid Build Coastguard Worker // strict and could be omitted.
2188*8fb009dcSAndroid Build Coastguard Worker
2189*8fb009dcSAndroid Build Coastguard Worker static_assert(HRSS_CIPHERTEXT_BYTES == POLY_BYTES,
2190*8fb009dcSAndroid Build Coastguard Worker "ciphertext is the wrong size");
2191*8fb009dcSAndroid Build Coastguard Worker assert(ciphertext_len == sizeof(vars->expected_ciphertext));
2192*8fb009dcSAndroid Build Coastguard Worker poly_marshal(vars->expected_ciphertext, &vars->c);
2193*8fb009dcSAndroid Build Coastguard Worker
2194*8fb009dcSAndroid Build Coastguard Worker poly_marshal_mod3(vars->m_bytes, &vars->m);
2195*8fb009dcSAndroid Build Coastguard Worker poly_marshal_mod3(vars->r_bytes, &vars->r);
2196*8fb009dcSAndroid Build Coastguard Worker
2197*8fb009dcSAndroid Build Coastguard Worker ok &= constant_time_is_zero_w(
2198*8fb009dcSAndroid Build Coastguard Worker CRYPTO_memcmp(ciphertext, vars->expected_ciphertext,
2199*8fb009dcSAndroid Build Coastguard Worker sizeof(vars->expected_ciphertext)));
2200*8fb009dcSAndroid Build Coastguard Worker
2201*8fb009dcSAndroid Build Coastguard Worker SHA256_Init(&vars->hash_ctx);
2202*8fb009dcSAndroid Build Coastguard Worker SHA256_Update(&vars->hash_ctx, kSharedKey, sizeof(kSharedKey));
2203*8fb009dcSAndroid Build Coastguard Worker SHA256_Update(&vars->hash_ctx, vars->m_bytes, sizeof(vars->m_bytes));
2204*8fb009dcSAndroid Build Coastguard Worker SHA256_Update(&vars->hash_ctx, vars->r_bytes, sizeof(vars->r_bytes));
2205*8fb009dcSAndroid Build Coastguard Worker SHA256_Update(&vars->hash_ctx, vars->expected_ciphertext,
2206*8fb009dcSAndroid Build Coastguard Worker sizeof(vars->expected_ciphertext));
2207*8fb009dcSAndroid Build Coastguard Worker SHA256_Final(vars->shared_key, &vars->hash_ctx);
2208*8fb009dcSAndroid Build Coastguard Worker
2209*8fb009dcSAndroid Build Coastguard Worker for (unsigned i = 0; i < sizeof(vars->shared_key); i++) {
2210*8fb009dcSAndroid Build Coastguard Worker out_shared_key[i] =
2211*8fb009dcSAndroid Build Coastguard Worker constant_time_select_8(ok, vars->shared_key[i], out_shared_key[i]);
2212*8fb009dcSAndroid Build Coastguard Worker }
2213*8fb009dcSAndroid Build Coastguard Worker
2214*8fb009dcSAndroid Build Coastguard Worker out:
2215*8fb009dcSAndroid Build Coastguard Worker OPENSSL_free(malloc_ptr);
2216*8fb009dcSAndroid Build Coastguard Worker return 1;
2217*8fb009dcSAndroid Build Coastguard Worker }
2218*8fb009dcSAndroid Build Coastguard Worker
HRSS_marshal_public_key(uint8_t out[HRSS_PUBLIC_KEY_BYTES],const struct HRSS_public_key * in_pub)2219*8fb009dcSAndroid Build Coastguard Worker void HRSS_marshal_public_key(uint8_t out[HRSS_PUBLIC_KEY_BYTES],
2220*8fb009dcSAndroid Build Coastguard Worker const struct HRSS_public_key *in_pub) {
2221*8fb009dcSAndroid Build Coastguard Worker const struct public_key *pub =
2222*8fb009dcSAndroid Build Coastguard Worker public_key_from_external((struct HRSS_public_key *)in_pub);
2223*8fb009dcSAndroid Build Coastguard Worker poly_marshal(out, &pub->ph);
2224*8fb009dcSAndroid Build Coastguard Worker }
2225*8fb009dcSAndroid Build Coastguard Worker
HRSS_parse_public_key(struct HRSS_public_key * out,const uint8_t in[HRSS_PUBLIC_KEY_BYTES])2226*8fb009dcSAndroid Build Coastguard Worker int HRSS_parse_public_key(struct HRSS_public_key *out,
2227*8fb009dcSAndroid Build Coastguard Worker const uint8_t in[HRSS_PUBLIC_KEY_BYTES]) {
2228*8fb009dcSAndroid Build Coastguard Worker struct public_key *pub = public_key_from_external(out);
2229*8fb009dcSAndroid Build Coastguard Worker if (!poly_unmarshal(&pub->ph, in)) {
2230*8fb009dcSAndroid Build Coastguard Worker return 0;
2231*8fb009dcSAndroid Build Coastguard Worker }
2232*8fb009dcSAndroid Build Coastguard Worker OPENSSL_memset(&pub->ph.v[N], 0, 3 * sizeof(uint16_t));
2233*8fb009dcSAndroid Build Coastguard Worker return 1;
2234*8fb009dcSAndroid Build Coastguard Worker }
2235