xref: /aosp_15_r20/external/boringssl/src/third_party/fiat/p256_64.h (revision 8fb009dc861624b67b6cdb62ea21f0f22d0c584b)
1 #include <openssl/base.h>
2 #include "../../crypto/internal.h"
3 #if !defined(OPENSSL_NO_ASM) && defined(__GNUC__) && defined(__x86_64__)
4 void fiat_p256_adx_mul(uint64_t*, const uint64_t*, const uint64_t*);
5 void fiat_p256_adx_sqr(uint64_t*, const uint64_t*);
6 #endif
7 
8 /* Autogenerated: 'src/ExtractionOCaml/word_by_word_montgomery' --inline --static --use-value-barrier p256 64 '2^256 - 2^224 + 2^192 + 2^96 - 1' mul square add sub opp from_montgomery to_montgomery nonzero selectznz to_bytes from_bytes one msat divstep divstep_precomp */
9 /* curve description: p256 */
10 /* machine_wordsize = 64 (from "64") */
11 /* requested operations: mul, square, add, sub, opp, from_montgomery, to_montgomery, nonzero, selectznz, to_bytes, from_bytes, one, msat, divstep, divstep_precomp */
12 /* m = 0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff (from "2^256 - 2^224 + 2^192 + 2^96 - 1") */
13 /*                                                                    */
14 /* NOTE: In addition to the bounds specified above each function, all */
15 /*   functions synthesized for this Montgomery arithmetic require the */
16 /*   input to be strictly less than the prime modulus (m), and also   */
17 /*   require the input to be in the unique saturated representation.  */
18 /*   All functions also ensure that these two properties are true of  */
19 /*   return values.                                                   */
20 /*  */
21 /* Computed values: */
22 /*   eval z = z[0] + (z[1] << 64) + (z[2] << 128) + (z[3] << 192) */
23 /*   bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248) */
24 /*   twos_complement_eval z = let x1 := z[0] + (z[1] << 64) + (z[2] << 128) + (z[3] << 192) in */
25 /*                            if x1 & (2^256-1) < 2^255 then x1 & (2^256-1) else (x1 & (2^256-1)) - 2^256 */
26 
27 #include <stdint.h>
28 typedef unsigned char fiat_p256_uint1;
29 typedef signed char fiat_p256_int1;
30 #if defined(__GNUC__) || defined(__clang__)
31 #  define FIAT_P256_FIAT_EXTENSION __extension__
32 #  define FIAT_P256_FIAT_INLINE __inline__
33 #else
34 #  define FIAT_P256_FIAT_EXTENSION
35 #  define FIAT_P256_FIAT_INLINE
36 #endif
37 
38 FIAT_P256_FIAT_EXTENSION typedef signed __int128 fiat_p256_int128;
39 FIAT_P256_FIAT_EXTENSION typedef unsigned __int128 fiat_p256_uint128;
40 
41 /* The type fiat_p256_montgomery_domain_field_element is a field element in the Montgomery domain. */
42 /* Bounds: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] */
43 typedef uint64_t fiat_p256_montgomery_domain_field_element[4];
44 
45 /* The type fiat_p256_non_montgomery_domain_field_element is a field element NOT in the Montgomery domain. */
46 /* Bounds: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] */
47 typedef uint64_t fiat_p256_non_montgomery_domain_field_element[4];
48 
49 #if (-1 & 3) != 3
50 #error "This code only works on a two's complement system"
51 #endif
52 
53 #if !defined(FIAT_P256_NO_ASM) && (defined(__GNUC__) || defined(__clang__))
fiat_p256_value_barrier_u64(uint64_t a)54 static __inline__ uint64_t fiat_p256_value_barrier_u64(uint64_t a) {
55   __asm__("" : "+r"(a) : /* no inputs */);
56   return a;
57 }
58 #else
59 #  define fiat_p256_value_barrier_u64(x) (x)
60 #endif
61 
62 
63 /*
64  * The function fiat_p256_addcarryx_u64 is an addition with carry.
65  *
66  * Postconditions:
67  *   out1 = (arg1 + arg2 + arg3) mod 2^64
68  *   out2 = ⌊(arg1 + arg2 + arg3) / 2^64⌋
69  *
70  * Input Bounds:
71  *   arg1: [0x0 ~> 0x1]
72  *   arg2: [0x0 ~> 0xffffffffffffffff]
73  *   arg3: [0x0 ~> 0xffffffffffffffff]
74  * Output Bounds:
75  *   out1: [0x0 ~> 0xffffffffffffffff]
76  *   out2: [0x0 ~> 0x1]
77  */
fiat_p256_addcarryx_u64(uint64_t * out1,fiat_p256_uint1 * out2,fiat_p256_uint1 arg1,uint64_t arg2,uint64_t arg3)78 static FIAT_P256_FIAT_INLINE void fiat_p256_addcarryx_u64(uint64_t* out1, fiat_p256_uint1* out2, fiat_p256_uint1 arg1, uint64_t arg2, uint64_t arg3) {
79   fiat_p256_uint128 x1;
80   uint64_t x2;
81   fiat_p256_uint1 x3;
82   x1 = ((arg1 + (fiat_p256_uint128)arg2) + arg3);
83   x2 = (uint64_t)(x1 & UINT64_C(0xffffffffffffffff));
84   x3 = (fiat_p256_uint1)(x1 >> 64);
85   *out1 = x2;
86   *out2 = x3;
87 }
88 
89 /*
90  * The function fiat_p256_subborrowx_u64 is a subtraction with borrow.
91  *
92  * Postconditions:
93  *   out1 = (-arg1 + arg2 + -arg3) mod 2^64
94  *   out2 = -⌊(-arg1 + arg2 + -arg3) / 2^64⌋
95  *
96  * Input Bounds:
97  *   arg1: [0x0 ~> 0x1]
98  *   arg2: [0x0 ~> 0xffffffffffffffff]
99  *   arg3: [0x0 ~> 0xffffffffffffffff]
100  * Output Bounds:
101  *   out1: [0x0 ~> 0xffffffffffffffff]
102  *   out2: [0x0 ~> 0x1]
103  */
fiat_p256_subborrowx_u64(uint64_t * out1,fiat_p256_uint1 * out2,fiat_p256_uint1 arg1,uint64_t arg2,uint64_t arg3)104 static FIAT_P256_FIAT_INLINE void fiat_p256_subborrowx_u64(uint64_t* out1, fiat_p256_uint1* out2, fiat_p256_uint1 arg1, uint64_t arg2, uint64_t arg3) {
105   fiat_p256_int128 x1;
106   fiat_p256_int1 x2;
107   uint64_t x3;
108   x1 = ((arg2 - (fiat_p256_int128)arg1) - arg3);
109   x2 = (fiat_p256_int1)(x1 >> 64);
110   x3 = (uint64_t)(x1 & UINT64_C(0xffffffffffffffff));
111   *out1 = x3;
112   *out2 = (fiat_p256_uint1)(0x0 - x2);
113 }
114 
115 /*
116  * The function fiat_p256_mulx_u64 is a multiplication, returning the full double-width result.
117  *
118  * Postconditions:
119  *   out1 = (arg1 * arg2) mod 2^64
120  *   out2 = ⌊arg1 * arg2 / 2^64⌋
121  *
122  * Input Bounds:
123  *   arg1: [0x0 ~> 0xffffffffffffffff]
124  *   arg2: [0x0 ~> 0xffffffffffffffff]
125  * Output Bounds:
126  *   out1: [0x0 ~> 0xffffffffffffffff]
127  *   out2: [0x0 ~> 0xffffffffffffffff]
128  */
fiat_p256_mulx_u64(uint64_t * out1,uint64_t * out2,uint64_t arg1,uint64_t arg2)129 static FIAT_P256_FIAT_INLINE void fiat_p256_mulx_u64(uint64_t* out1, uint64_t* out2, uint64_t arg1, uint64_t arg2) {
130   fiat_p256_uint128 x1;
131   uint64_t x2;
132   uint64_t x3;
133   x1 = ((fiat_p256_uint128)arg1 * arg2);
134   x2 = (uint64_t)(x1 & UINT64_C(0xffffffffffffffff));
135   x3 = (uint64_t)(x1 >> 64);
136   *out1 = x2;
137   *out2 = x3;
138 }
139 
140 /*
141  * The function fiat_p256_cmovznz_u64 is a single-word conditional move.
142  *
143  * Postconditions:
144  *   out1 = (if arg1 = 0 then arg2 else arg3)
145  *
146  * Input Bounds:
147  *   arg1: [0x0 ~> 0x1]
148  *   arg2: [0x0 ~> 0xffffffffffffffff]
149  *   arg3: [0x0 ~> 0xffffffffffffffff]
150  * Output Bounds:
151  *   out1: [0x0 ~> 0xffffffffffffffff]
152  */
fiat_p256_cmovznz_u64(uint64_t * out1,fiat_p256_uint1 arg1,uint64_t arg2,uint64_t arg3)153 static FIAT_P256_FIAT_INLINE void fiat_p256_cmovznz_u64(uint64_t* out1, fiat_p256_uint1 arg1, uint64_t arg2, uint64_t arg3) {
154   fiat_p256_uint1 x1;
155   uint64_t x2;
156   uint64_t x3;
157   x1 = (!(!arg1));
158   x2 = ((fiat_p256_int1)(0x0 - x1) & UINT64_C(0xffffffffffffffff));
159   x3 = ((fiat_p256_value_barrier_u64(x2) & arg3) | (fiat_p256_value_barrier_u64((~x2)) & arg2));
160   *out1 = x3;
161 }
162 
163 /*
164  * The function fiat_p256_mul multiplies two field elements in the Montgomery domain.
165  *
166  * Preconditions:
167  *   0 ≤ eval arg1 < m
168  *   0 ≤ eval arg2 < m
169  * Postconditions:
170  *   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg2)) mod m
171  *   0 ≤ eval out1 < m
172  *
173  */
fiat_p256_mul(fiat_p256_montgomery_domain_field_element out1,const fiat_p256_montgomery_domain_field_element arg1,const fiat_p256_montgomery_domain_field_element arg2)174 static FIAT_P256_FIAT_INLINE void fiat_p256_mul(fiat_p256_montgomery_domain_field_element out1, const fiat_p256_montgomery_domain_field_element arg1, const fiat_p256_montgomery_domain_field_element arg2) {
175 #if !defined(OPENSSL_NO_ASM) && defined(__GNUC__) && defined(__x86_64__)
176   if (CRYPTO_is_BMI1_capable() && CRYPTO_is_BMI2_capable() &&
177     CRYPTO_is_ADX_capable()) {
178       fiat_p256_adx_mul(out1, arg1, arg2);
179       return;
180   }
181 #endif
182   uint64_t x1;
183   uint64_t x2;
184   uint64_t x3;
185   uint64_t x4;
186   uint64_t x5;
187   uint64_t x6;
188   uint64_t x7;
189   uint64_t x8;
190   uint64_t x9;
191   uint64_t x10;
192   uint64_t x11;
193   uint64_t x12;
194   uint64_t x13;
195   fiat_p256_uint1 x14;
196   uint64_t x15;
197   fiat_p256_uint1 x16;
198   uint64_t x17;
199   fiat_p256_uint1 x18;
200   uint64_t x19;
201   uint64_t x20;
202   uint64_t x21;
203   uint64_t x22;
204   uint64_t x23;
205   uint64_t x24;
206   uint64_t x25;
207   uint64_t x26;
208   fiat_p256_uint1 x27;
209   uint64_t x28;
210   uint64_t x29;
211   fiat_p256_uint1 x30;
212   uint64_t x31;
213   fiat_p256_uint1 x32;
214   uint64_t x33;
215   fiat_p256_uint1 x34;
216   uint64_t x35;
217   fiat_p256_uint1 x36;
218   uint64_t x37;
219   fiat_p256_uint1 x38;
220   uint64_t x39;
221   uint64_t x40;
222   uint64_t x41;
223   uint64_t x42;
224   uint64_t x43;
225   uint64_t x44;
226   uint64_t x45;
227   uint64_t x46;
228   uint64_t x47;
229   fiat_p256_uint1 x48;
230   uint64_t x49;
231   fiat_p256_uint1 x50;
232   uint64_t x51;
233   fiat_p256_uint1 x52;
234   uint64_t x53;
235   uint64_t x54;
236   fiat_p256_uint1 x55;
237   uint64_t x56;
238   fiat_p256_uint1 x57;
239   uint64_t x58;
240   fiat_p256_uint1 x59;
241   uint64_t x60;
242   fiat_p256_uint1 x61;
243   uint64_t x62;
244   fiat_p256_uint1 x63;
245   uint64_t x64;
246   uint64_t x65;
247   uint64_t x66;
248   uint64_t x67;
249   uint64_t x68;
250   uint64_t x69;
251   uint64_t x70;
252   fiat_p256_uint1 x71;
253   uint64_t x72;
254   uint64_t x73;
255   fiat_p256_uint1 x74;
256   uint64_t x75;
257   fiat_p256_uint1 x76;
258   uint64_t x77;
259   fiat_p256_uint1 x78;
260   uint64_t x79;
261   fiat_p256_uint1 x80;
262   uint64_t x81;
263   fiat_p256_uint1 x82;
264   uint64_t x83;
265   uint64_t x84;
266   uint64_t x85;
267   uint64_t x86;
268   uint64_t x87;
269   uint64_t x88;
270   uint64_t x89;
271   uint64_t x90;
272   uint64_t x91;
273   uint64_t x92;
274   fiat_p256_uint1 x93;
275   uint64_t x94;
276   fiat_p256_uint1 x95;
277   uint64_t x96;
278   fiat_p256_uint1 x97;
279   uint64_t x98;
280   uint64_t x99;
281   fiat_p256_uint1 x100;
282   uint64_t x101;
283   fiat_p256_uint1 x102;
284   uint64_t x103;
285   fiat_p256_uint1 x104;
286   uint64_t x105;
287   fiat_p256_uint1 x106;
288   uint64_t x107;
289   fiat_p256_uint1 x108;
290   uint64_t x109;
291   uint64_t x110;
292   uint64_t x111;
293   uint64_t x112;
294   uint64_t x113;
295   uint64_t x114;
296   uint64_t x115;
297   fiat_p256_uint1 x116;
298   uint64_t x117;
299   uint64_t x118;
300   fiat_p256_uint1 x119;
301   uint64_t x120;
302   fiat_p256_uint1 x121;
303   uint64_t x122;
304   fiat_p256_uint1 x123;
305   uint64_t x124;
306   fiat_p256_uint1 x125;
307   uint64_t x126;
308   fiat_p256_uint1 x127;
309   uint64_t x128;
310   uint64_t x129;
311   uint64_t x130;
312   uint64_t x131;
313   uint64_t x132;
314   uint64_t x133;
315   uint64_t x134;
316   uint64_t x135;
317   uint64_t x136;
318   uint64_t x137;
319   fiat_p256_uint1 x138;
320   uint64_t x139;
321   fiat_p256_uint1 x140;
322   uint64_t x141;
323   fiat_p256_uint1 x142;
324   uint64_t x143;
325   uint64_t x144;
326   fiat_p256_uint1 x145;
327   uint64_t x146;
328   fiat_p256_uint1 x147;
329   uint64_t x148;
330   fiat_p256_uint1 x149;
331   uint64_t x150;
332   fiat_p256_uint1 x151;
333   uint64_t x152;
334   fiat_p256_uint1 x153;
335   uint64_t x154;
336   uint64_t x155;
337   uint64_t x156;
338   uint64_t x157;
339   uint64_t x158;
340   uint64_t x159;
341   uint64_t x160;
342   fiat_p256_uint1 x161;
343   uint64_t x162;
344   uint64_t x163;
345   fiat_p256_uint1 x164;
346   uint64_t x165;
347   fiat_p256_uint1 x166;
348   uint64_t x167;
349   fiat_p256_uint1 x168;
350   uint64_t x169;
351   fiat_p256_uint1 x170;
352   uint64_t x171;
353   fiat_p256_uint1 x172;
354   uint64_t x173;
355   uint64_t x174;
356   fiat_p256_uint1 x175;
357   uint64_t x176;
358   fiat_p256_uint1 x177;
359   uint64_t x178;
360   fiat_p256_uint1 x179;
361   uint64_t x180;
362   fiat_p256_uint1 x181;
363   uint64_t x182;
364   fiat_p256_uint1 x183;
365   uint64_t x184;
366   uint64_t x185;
367   uint64_t x186;
368   uint64_t x187;
369   x1 = (arg1[1]);
370   x2 = (arg1[2]);
371   x3 = (arg1[3]);
372   x4 = (arg1[0]);
373   fiat_p256_mulx_u64(&x5, &x6, x4, (arg2[3]));
374   fiat_p256_mulx_u64(&x7, &x8, x4, (arg2[2]));
375   fiat_p256_mulx_u64(&x9, &x10, x4, (arg2[1]));
376   fiat_p256_mulx_u64(&x11, &x12, x4, (arg2[0]));
377   fiat_p256_addcarryx_u64(&x13, &x14, 0x0, x12, x9);
378   fiat_p256_addcarryx_u64(&x15, &x16, x14, x10, x7);
379   fiat_p256_addcarryx_u64(&x17, &x18, x16, x8, x5);
380   x19 = (x18 + x6);
381   fiat_p256_mulx_u64(&x20, &x21, x11, UINT64_C(0xffffffff00000001));
382   fiat_p256_mulx_u64(&x22, &x23, x11, UINT32_C(0xffffffff));
383   fiat_p256_mulx_u64(&x24, &x25, x11, UINT64_C(0xffffffffffffffff));
384   fiat_p256_addcarryx_u64(&x26, &x27, 0x0, x25, x22);
385   x28 = (x27 + x23);
386   fiat_p256_addcarryx_u64(&x29, &x30, 0x0, x11, x24);
387   fiat_p256_addcarryx_u64(&x31, &x32, x30, x13, x26);
388   fiat_p256_addcarryx_u64(&x33, &x34, x32, x15, x28);
389   fiat_p256_addcarryx_u64(&x35, &x36, x34, x17, x20);
390   fiat_p256_addcarryx_u64(&x37, &x38, x36, x19, x21);
391   fiat_p256_mulx_u64(&x39, &x40, x1, (arg2[3]));
392   fiat_p256_mulx_u64(&x41, &x42, x1, (arg2[2]));
393   fiat_p256_mulx_u64(&x43, &x44, x1, (arg2[1]));
394   fiat_p256_mulx_u64(&x45, &x46, x1, (arg2[0]));
395   fiat_p256_addcarryx_u64(&x47, &x48, 0x0, x46, x43);
396   fiat_p256_addcarryx_u64(&x49, &x50, x48, x44, x41);
397   fiat_p256_addcarryx_u64(&x51, &x52, x50, x42, x39);
398   x53 = (x52 + x40);
399   fiat_p256_addcarryx_u64(&x54, &x55, 0x0, x31, x45);
400   fiat_p256_addcarryx_u64(&x56, &x57, x55, x33, x47);
401   fiat_p256_addcarryx_u64(&x58, &x59, x57, x35, x49);
402   fiat_p256_addcarryx_u64(&x60, &x61, x59, x37, x51);
403   fiat_p256_addcarryx_u64(&x62, &x63, x61, x38, x53);
404   fiat_p256_mulx_u64(&x64, &x65, x54, UINT64_C(0xffffffff00000001));
405   fiat_p256_mulx_u64(&x66, &x67, x54, UINT32_C(0xffffffff));
406   fiat_p256_mulx_u64(&x68, &x69, x54, UINT64_C(0xffffffffffffffff));
407   fiat_p256_addcarryx_u64(&x70, &x71, 0x0, x69, x66);
408   x72 = (x71 + x67);
409   fiat_p256_addcarryx_u64(&x73, &x74, 0x0, x54, x68);
410   fiat_p256_addcarryx_u64(&x75, &x76, x74, x56, x70);
411   fiat_p256_addcarryx_u64(&x77, &x78, x76, x58, x72);
412   fiat_p256_addcarryx_u64(&x79, &x80, x78, x60, x64);
413   fiat_p256_addcarryx_u64(&x81, &x82, x80, x62, x65);
414   x83 = ((uint64_t)x82 + x63);
415   fiat_p256_mulx_u64(&x84, &x85, x2, (arg2[3]));
416   fiat_p256_mulx_u64(&x86, &x87, x2, (arg2[2]));
417   fiat_p256_mulx_u64(&x88, &x89, x2, (arg2[1]));
418   fiat_p256_mulx_u64(&x90, &x91, x2, (arg2[0]));
419   fiat_p256_addcarryx_u64(&x92, &x93, 0x0, x91, x88);
420   fiat_p256_addcarryx_u64(&x94, &x95, x93, x89, x86);
421   fiat_p256_addcarryx_u64(&x96, &x97, x95, x87, x84);
422   x98 = (x97 + x85);
423   fiat_p256_addcarryx_u64(&x99, &x100, 0x0, x75, x90);
424   fiat_p256_addcarryx_u64(&x101, &x102, x100, x77, x92);
425   fiat_p256_addcarryx_u64(&x103, &x104, x102, x79, x94);
426   fiat_p256_addcarryx_u64(&x105, &x106, x104, x81, x96);
427   fiat_p256_addcarryx_u64(&x107, &x108, x106, x83, x98);
428   fiat_p256_mulx_u64(&x109, &x110, x99, UINT64_C(0xffffffff00000001));
429   fiat_p256_mulx_u64(&x111, &x112, x99, UINT32_C(0xffffffff));
430   fiat_p256_mulx_u64(&x113, &x114, x99, UINT64_C(0xffffffffffffffff));
431   fiat_p256_addcarryx_u64(&x115, &x116, 0x0, x114, x111);
432   x117 = (x116 + x112);
433   fiat_p256_addcarryx_u64(&x118, &x119, 0x0, x99, x113);
434   fiat_p256_addcarryx_u64(&x120, &x121, x119, x101, x115);
435   fiat_p256_addcarryx_u64(&x122, &x123, x121, x103, x117);
436   fiat_p256_addcarryx_u64(&x124, &x125, x123, x105, x109);
437   fiat_p256_addcarryx_u64(&x126, &x127, x125, x107, x110);
438   x128 = ((uint64_t)x127 + x108);
439   fiat_p256_mulx_u64(&x129, &x130, x3, (arg2[3]));
440   fiat_p256_mulx_u64(&x131, &x132, x3, (arg2[2]));
441   fiat_p256_mulx_u64(&x133, &x134, x3, (arg2[1]));
442   fiat_p256_mulx_u64(&x135, &x136, x3, (arg2[0]));
443   fiat_p256_addcarryx_u64(&x137, &x138, 0x0, x136, x133);
444   fiat_p256_addcarryx_u64(&x139, &x140, x138, x134, x131);
445   fiat_p256_addcarryx_u64(&x141, &x142, x140, x132, x129);
446   x143 = (x142 + x130);
447   fiat_p256_addcarryx_u64(&x144, &x145, 0x0, x120, x135);
448   fiat_p256_addcarryx_u64(&x146, &x147, x145, x122, x137);
449   fiat_p256_addcarryx_u64(&x148, &x149, x147, x124, x139);
450   fiat_p256_addcarryx_u64(&x150, &x151, x149, x126, x141);
451   fiat_p256_addcarryx_u64(&x152, &x153, x151, x128, x143);
452   fiat_p256_mulx_u64(&x154, &x155, x144, UINT64_C(0xffffffff00000001));
453   fiat_p256_mulx_u64(&x156, &x157, x144, UINT32_C(0xffffffff));
454   fiat_p256_mulx_u64(&x158, &x159, x144, UINT64_C(0xffffffffffffffff));
455   fiat_p256_addcarryx_u64(&x160, &x161, 0x0, x159, x156);
456   x162 = (x161 + x157);
457   fiat_p256_addcarryx_u64(&x163, &x164, 0x0, x144, x158);
458   fiat_p256_addcarryx_u64(&x165, &x166, x164, x146, x160);
459   fiat_p256_addcarryx_u64(&x167, &x168, x166, x148, x162);
460   fiat_p256_addcarryx_u64(&x169, &x170, x168, x150, x154);
461   fiat_p256_addcarryx_u64(&x171, &x172, x170, x152, x155);
462   x173 = ((uint64_t)x172 + x153);
463   fiat_p256_subborrowx_u64(&x174, &x175, 0x0, x165, UINT64_C(0xffffffffffffffff));
464   fiat_p256_subborrowx_u64(&x176, &x177, x175, x167, UINT32_C(0xffffffff));
465   fiat_p256_subborrowx_u64(&x178, &x179, x177, x169, 0x0);
466   fiat_p256_subborrowx_u64(&x180, &x181, x179, x171, UINT64_C(0xffffffff00000001));
467   fiat_p256_subborrowx_u64(&x182, &x183, x181, x173, 0x0);
468   fiat_p256_cmovznz_u64(&x184, x183, x174, x165);
469   fiat_p256_cmovznz_u64(&x185, x183, x176, x167);
470   fiat_p256_cmovznz_u64(&x186, x183, x178, x169);
471   fiat_p256_cmovznz_u64(&x187, x183, x180, x171);
472   out1[0] = x184;
473   out1[1] = x185;
474   out1[2] = x186;
475   out1[3] = x187;
476 }
477 
478 /*
479  * The function fiat_p256_square squares a field element in the Montgomery domain.
480  *
481  * Preconditions:
482  *   0 ≤ eval arg1 < m
483  * Postconditions:
484  *   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg1)) mod m
485  *   0 ≤ eval out1 < m
486  *
487  */
fiat_p256_square(fiat_p256_montgomery_domain_field_element out1,const fiat_p256_montgomery_domain_field_element arg1)488 static FIAT_P256_FIAT_INLINE void fiat_p256_square(fiat_p256_montgomery_domain_field_element out1, const fiat_p256_montgomery_domain_field_element arg1) {
489 #if !defined(OPENSSL_NO_ASM) && defined(__GNUC__) && defined(__x86_64__)
490   if (CRYPTO_is_BMI1_capable() && CRYPTO_is_BMI2_capable() &&
491     CRYPTO_is_ADX_capable()) {
492       fiat_p256_adx_sqr(out1, arg1);
493       return;
494   }
495 #endif
496   uint64_t x1;
497   uint64_t x2;
498   uint64_t x3;
499   uint64_t x4;
500   uint64_t x5;
501   uint64_t x6;
502   uint64_t x7;
503   uint64_t x8;
504   uint64_t x9;
505   uint64_t x10;
506   uint64_t x11;
507   uint64_t x12;
508   uint64_t x13;
509   fiat_p256_uint1 x14;
510   uint64_t x15;
511   fiat_p256_uint1 x16;
512   uint64_t x17;
513   fiat_p256_uint1 x18;
514   uint64_t x19;
515   uint64_t x20;
516   uint64_t x21;
517   uint64_t x22;
518   uint64_t x23;
519   uint64_t x24;
520   uint64_t x25;
521   uint64_t x26;
522   fiat_p256_uint1 x27;
523   uint64_t x28;
524   uint64_t x29;
525   fiat_p256_uint1 x30;
526   uint64_t x31;
527   fiat_p256_uint1 x32;
528   uint64_t x33;
529   fiat_p256_uint1 x34;
530   uint64_t x35;
531   fiat_p256_uint1 x36;
532   uint64_t x37;
533   fiat_p256_uint1 x38;
534   uint64_t x39;
535   uint64_t x40;
536   uint64_t x41;
537   uint64_t x42;
538   uint64_t x43;
539   uint64_t x44;
540   uint64_t x45;
541   uint64_t x46;
542   uint64_t x47;
543   fiat_p256_uint1 x48;
544   uint64_t x49;
545   fiat_p256_uint1 x50;
546   uint64_t x51;
547   fiat_p256_uint1 x52;
548   uint64_t x53;
549   uint64_t x54;
550   fiat_p256_uint1 x55;
551   uint64_t x56;
552   fiat_p256_uint1 x57;
553   uint64_t x58;
554   fiat_p256_uint1 x59;
555   uint64_t x60;
556   fiat_p256_uint1 x61;
557   uint64_t x62;
558   fiat_p256_uint1 x63;
559   uint64_t x64;
560   uint64_t x65;
561   uint64_t x66;
562   uint64_t x67;
563   uint64_t x68;
564   uint64_t x69;
565   uint64_t x70;
566   fiat_p256_uint1 x71;
567   uint64_t x72;
568   uint64_t x73;
569   fiat_p256_uint1 x74;
570   uint64_t x75;
571   fiat_p256_uint1 x76;
572   uint64_t x77;
573   fiat_p256_uint1 x78;
574   uint64_t x79;
575   fiat_p256_uint1 x80;
576   uint64_t x81;
577   fiat_p256_uint1 x82;
578   uint64_t x83;
579   uint64_t x84;
580   uint64_t x85;
581   uint64_t x86;
582   uint64_t x87;
583   uint64_t x88;
584   uint64_t x89;
585   uint64_t x90;
586   uint64_t x91;
587   uint64_t x92;
588   fiat_p256_uint1 x93;
589   uint64_t x94;
590   fiat_p256_uint1 x95;
591   uint64_t x96;
592   fiat_p256_uint1 x97;
593   uint64_t x98;
594   uint64_t x99;
595   fiat_p256_uint1 x100;
596   uint64_t x101;
597   fiat_p256_uint1 x102;
598   uint64_t x103;
599   fiat_p256_uint1 x104;
600   uint64_t x105;
601   fiat_p256_uint1 x106;
602   uint64_t x107;
603   fiat_p256_uint1 x108;
604   uint64_t x109;
605   uint64_t x110;
606   uint64_t x111;
607   uint64_t x112;
608   uint64_t x113;
609   uint64_t x114;
610   uint64_t x115;
611   fiat_p256_uint1 x116;
612   uint64_t x117;
613   uint64_t x118;
614   fiat_p256_uint1 x119;
615   uint64_t x120;
616   fiat_p256_uint1 x121;
617   uint64_t x122;
618   fiat_p256_uint1 x123;
619   uint64_t x124;
620   fiat_p256_uint1 x125;
621   uint64_t x126;
622   fiat_p256_uint1 x127;
623   uint64_t x128;
624   uint64_t x129;
625   uint64_t x130;
626   uint64_t x131;
627   uint64_t x132;
628   uint64_t x133;
629   uint64_t x134;
630   uint64_t x135;
631   uint64_t x136;
632   uint64_t x137;
633   fiat_p256_uint1 x138;
634   uint64_t x139;
635   fiat_p256_uint1 x140;
636   uint64_t x141;
637   fiat_p256_uint1 x142;
638   uint64_t x143;
639   uint64_t x144;
640   fiat_p256_uint1 x145;
641   uint64_t x146;
642   fiat_p256_uint1 x147;
643   uint64_t x148;
644   fiat_p256_uint1 x149;
645   uint64_t x150;
646   fiat_p256_uint1 x151;
647   uint64_t x152;
648   fiat_p256_uint1 x153;
649   uint64_t x154;
650   uint64_t x155;
651   uint64_t x156;
652   uint64_t x157;
653   uint64_t x158;
654   uint64_t x159;
655   uint64_t x160;
656   fiat_p256_uint1 x161;
657   uint64_t x162;
658   uint64_t x163;
659   fiat_p256_uint1 x164;
660   uint64_t x165;
661   fiat_p256_uint1 x166;
662   uint64_t x167;
663   fiat_p256_uint1 x168;
664   uint64_t x169;
665   fiat_p256_uint1 x170;
666   uint64_t x171;
667   fiat_p256_uint1 x172;
668   uint64_t x173;
669   uint64_t x174;
670   fiat_p256_uint1 x175;
671   uint64_t x176;
672   fiat_p256_uint1 x177;
673   uint64_t x178;
674   fiat_p256_uint1 x179;
675   uint64_t x180;
676   fiat_p256_uint1 x181;
677   uint64_t x182;
678   fiat_p256_uint1 x183;
679   uint64_t x184;
680   uint64_t x185;
681   uint64_t x186;
682   uint64_t x187;
683   x1 = (arg1[1]);
684   x2 = (arg1[2]);
685   x3 = (arg1[3]);
686   x4 = (arg1[0]);
687   fiat_p256_mulx_u64(&x5, &x6, x4, (arg1[3]));
688   fiat_p256_mulx_u64(&x7, &x8, x4, (arg1[2]));
689   fiat_p256_mulx_u64(&x9, &x10, x4, (arg1[1]));
690   fiat_p256_mulx_u64(&x11, &x12, x4, (arg1[0]));
691   fiat_p256_addcarryx_u64(&x13, &x14, 0x0, x12, x9);
692   fiat_p256_addcarryx_u64(&x15, &x16, x14, x10, x7);
693   fiat_p256_addcarryx_u64(&x17, &x18, x16, x8, x5);
694   x19 = (x18 + x6);
695   fiat_p256_mulx_u64(&x20, &x21, x11, UINT64_C(0xffffffff00000001));
696   fiat_p256_mulx_u64(&x22, &x23, x11, UINT32_C(0xffffffff));
697   fiat_p256_mulx_u64(&x24, &x25, x11, UINT64_C(0xffffffffffffffff));
698   fiat_p256_addcarryx_u64(&x26, &x27, 0x0, x25, x22);
699   x28 = (x27 + x23);
700   fiat_p256_addcarryx_u64(&x29, &x30, 0x0, x11, x24);
701   fiat_p256_addcarryx_u64(&x31, &x32, x30, x13, x26);
702   fiat_p256_addcarryx_u64(&x33, &x34, x32, x15, x28);
703   fiat_p256_addcarryx_u64(&x35, &x36, x34, x17, x20);
704   fiat_p256_addcarryx_u64(&x37, &x38, x36, x19, x21);
705   fiat_p256_mulx_u64(&x39, &x40, x1, (arg1[3]));
706   fiat_p256_mulx_u64(&x41, &x42, x1, (arg1[2]));
707   fiat_p256_mulx_u64(&x43, &x44, x1, (arg1[1]));
708   fiat_p256_mulx_u64(&x45, &x46, x1, (arg1[0]));
709   fiat_p256_addcarryx_u64(&x47, &x48, 0x0, x46, x43);
710   fiat_p256_addcarryx_u64(&x49, &x50, x48, x44, x41);
711   fiat_p256_addcarryx_u64(&x51, &x52, x50, x42, x39);
712   x53 = (x52 + x40);
713   fiat_p256_addcarryx_u64(&x54, &x55, 0x0, x31, x45);
714   fiat_p256_addcarryx_u64(&x56, &x57, x55, x33, x47);
715   fiat_p256_addcarryx_u64(&x58, &x59, x57, x35, x49);
716   fiat_p256_addcarryx_u64(&x60, &x61, x59, x37, x51);
717   fiat_p256_addcarryx_u64(&x62, &x63, x61, x38, x53);
718   fiat_p256_mulx_u64(&x64, &x65, x54, UINT64_C(0xffffffff00000001));
719   fiat_p256_mulx_u64(&x66, &x67, x54, UINT32_C(0xffffffff));
720   fiat_p256_mulx_u64(&x68, &x69, x54, UINT64_C(0xffffffffffffffff));
721   fiat_p256_addcarryx_u64(&x70, &x71, 0x0, x69, x66);
722   x72 = (x71 + x67);
723   fiat_p256_addcarryx_u64(&x73, &x74, 0x0, x54, x68);
724   fiat_p256_addcarryx_u64(&x75, &x76, x74, x56, x70);
725   fiat_p256_addcarryx_u64(&x77, &x78, x76, x58, x72);
726   fiat_p256_addcarryx_u64(&x79, &x80, x78, x60, x64);
727   fiat_p256_addcarryx_u64(&x81, &x82, x80, x62, x65);
728   x83 = ((uint64_t)x82 + x63);
729   fiat_p256_mulx_u64(&x84, &x85, x2, (arg1[3]));
730   fiat_p256_mulx_u64(&x86, &x87, x2, (arg1[2]));
731   fiat_p256_mulx_u64(&x88, &x89, x2, (arg1[1]));
732   fiat_p256_mulx_u64(&x90, &x91, x2, (arg1[0]));
733   fiat_p256_addcarryx_u64(&x92, &x93, 0x0, x91, x88);
734   fiat_p256_addcarryx_u64(&x94, &x95, x93, x89, x86);
735   fiat_p256_addcarryx_u64(&x96, &x97, x95, x87, x84);
736   x98 = (x97 + x85);
737   fiat_p256_addcarryx_u64(&x99, &x100, 0x0, x75, x90);
738   fiat_p256_addcarryx_u64(&x101, &x102, x100, x77, x92);
739   fiat_p256_addcarryx_u64(&x103, &x104, x102, x79, x94);
740   fiat_p256_addcarryx_u64(&x105, &x106, x104, x81, x96);
741   fiat_p256_addcarryx_u64(&x107, &x108, x106, x83, x98);
742   fiat_p256_mulx_u64(&x109, &x110, x99, UINT64_C(0xffffffff00000001));
743   fiat_p256_mulx_u64(&x111, &x112, x99, UINT32_C(0xffffffff));
744   fiat_p256_mulx_u64(&x113, &x114, x99, UINT64_C(0xffffffffffffffff));
745   fiat_p256_addcarryx_u64(&x115, &x116, 0x0, x114, x111);
746   x117 = (x116 + x112);
747   fiat_p256_addcarryx_u64(&x118, &x119, 0x0, x99, x113);
748   fiat_p256_addcarryx_u64(&x120, &x121, x119, x101, x115);
749   fiat_p256_addcarryx_u64(&x122, &x123, x121, x103, x117);
750   fiat_p256_addcarryx_u64(&x124, &x125, x123, x105, x109);
751   fiat_p256_addcarryx_u64(&x126, &x127, x125, x107, x110);
752   x128 = ((uint64_t)x127 + x108);
753   fiat_p256_mulx_u64(&x129, &x130, x3, (arg1[3]));
754   fiat_p256_mulx_u64(&x131, &x132, x3, (arg1[2]));
755   fiat_p256_mulx_u64(&x133, &x134, x3, (arg1[1]));
756   fiat_p256_mulx_u64(&x135, &x136, x3, (arg1[0]));
757   fiat_p256_addcarryx_u64(&x137, &x138, 0x0, x136, x133);
758   fiat_p256_addcarryx_u64(&x139, &x140, x138, x134, x131);
759   fiat_p256_addcarryx_u64(&x141, &x142, x140, x132, x129);
760   x143 = (x142 + x130);
761   fiat_p256_addcarryx_u64(&x144, &x145, 0x0, x120, x135);
762   fiat_p256_addcarryx_u64(&x146, &x147, x145, x122, x137);
763   fiat_p256_addcarryx_u64(&x148, &x149, x147, x124, x139);
764   fiat_p256_addcarryx_u64(&x150, &x151, x149, x126, x141);
765   fiat_p256_addcarryx_u64(&x152, &x153, x151, x128, x143);
766   fiat_p256_mulx_u64(&x154, &x155, x144, UINT64_C(0xffffffff00000001));
767   fiat_p256_mulx_u64(&x156, &x157, x144, UINT32_C(0xffffffff));
768   fiat_p256_mulx_u64(&x158, &x159, x144, UINT64_C(0xffffffffffffffff));
769   fiat_p256_addcarryx_u64(&x160, &x161, 0x0, x159, x156);
770   x162 = (x161 + x157);
771   fiat_p256_addcarryx_u64(&x163, &x164, 0x0, x144, x158);
772   fiat_p256_addcarryx_u64(&x165, &x166, x164, x146, x160);
773   fiat_p256_addcarryx_u64(&x167, &x168, x166, x148, x162);
774   fiat_p256_addcarryx_u64(&x169, &x170, x168, x150, x154);
775   fiat_p256_addcarryx_u64(&x171, &x172, x170, x152, x155);
776   x173 = ((uint64_t)x172 + x153);
777   fiat_p256_subborrowx_u64(&x174, &x175, 0x0, x165, UINT64_C(0xffffffffffffffff));
778   fiat_p256_subborrowx_u64(&x176, &x177, x175, x167, UINT32_C(0xffffffff));
779   fiat_p256_subborrowx_u64(&x178, &x179, x177, x169, 0x0);
780   fiat_p256_subborrowx_u64(&x180, &x181, x179, x171, UINT64_C(0xffffffff00000001));
781   fiat_p256_subborrowx_u64(&x182, &x183, x181, x173, 0x0);
782   fiat_p256_cmovznz_u64(&x184, x183, x174, x165);
783   fiat_p256_cmovznz_u64(&x185, x183, x176, x167);
784   fiat_p256_cmovznz_u64(&x186, x183, x178, x169);
785   fiat_p256_cmovznz_u64(&x187, x183, x180, x171);
786   out1[0] = x184;
787   out1[1] = x185;
788   out1[2] = x186;
789   out1[3] = x187;
790 }
791 
792 /*
793  * The function fiat_p256_add adds two field elements in the Montgomery domain.
794  *
795  * Preconditions:
796  *   0 ≤ eval arg1 < m
797  *   0 ≤ eval arg2 < m
798  * Postconditions:
799  *   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) + eval (from_montgomery arg2)) mod m
800  *   0 ≤ eval out1 < m
801  *
802  */
fiat_p256_add(fiat_p256_montgomery_domain_field_element out1,const fiat_p256_montgomery_domain_field_element arg1,const fiat_p256_montgomery_domain_field_element arg2)803 static FIAT_P256_FIAT_INLINE void fiat_p256_add(fiat_p256_montgomery_domain_field_element out1, const fiat_p256_montgomery_domain_field_element arg1, const fiat_p256_montgomery_domain_field_element arg2) {
804   uint64_t x1;
805   fiat_p256_uint1 x2;
806   uint64_t x3;
807   fiat_p256_uint1 x4;
808   uint64_t x5;
809   fiat_p256_uint1 x6;
810   uint64_t x7;
811   fiat_p256_uint1 x8;
812   uint64_t x9;
813   fiat_p256_uint1 x10;
814   uint64_t x11;
815   fiat_p256_uint1 x12;
816   uint64_t x13;
817   fiat_p256_uint1 x14;
818   uint64_t x15;
819   fiat_p256_uint1 x16;
820   uint64_t x17;
821   fiat_p256_uint1 x18;
822   uint64_t x19;
823   uint64_t x20;
824   uint64_t x21;
825   uint64_t x22;
826   fiat_p256_addcarryx_u64(&x1, &x2, 0x0, (arg1[0]), (arg2[0]));
827   fiat_p256_addcarryx_u64(&x3, &x4, x2, (arg1[1]), (arg2[1]));
828   fiat_p256_addcarryx_u64(&x5, &x6, x4, (arg1[2]), (arg2[2]));
829   fiat_p256_addcarryx_u64(&x7, &x8, x6, (arg1[3]), (arg2[3]));
830   fiat_p256_subborrowx_u64(&x9, &x10, 0x0, x1, UINT64_C(0xffffffffffffffff));
831   fiat_p256_subborrowx_u64(&x11, &x12, x10, x3, UINT32_C(0xffffffff));
832   fiat_p256_subborrowx_u64(&x13, &x14, x12, x5, 0x0);
833   fiat_p256_subborrowx_u64(&x15, &x16, x14, x7, UINT64_C(0xffffffff00000001));
834   fiat_p256_subborrowx_u64(&x17, &x18, x16, x8, 0x0);
835   fiat_p256_cmovznz_u64(&x19, x18, x9, x1);
836   fiat_p256_cmovznz_u64(&x20, x18, x11, x3);
837   fiat_p256_cmovznz_u64(&x21, x18, x13, x5);
838   fiat_p256_cmovznz_u64(&x22, x18, x15, x7);
839   out1[0] = x19;
840   out1[1] = x20;
841   out1[2] = x21;
842   out1[3] = x22;
843 }
844 
845 /*
846  * The function fiat_p256_sub subtracts two field elements in the Montgomery domain.
847  *
848  * Preconditions:
849  *   0 ≤ eval arg1 < m
850  *   0 ≤ eval arg2 < m
851  * Postconditions:
852  *   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) - eval (from_montgomery arg2)) mod m
853  *   0 ≤ eval out1 < m
854  *
855  */
fiat_p256_sub(fiat_p256_montgomery_domain_field_element out1,const fiat_p256_montgomery_domain_field_element arg1,const fiat_p256_montgomery_domain_field_element arg2)856 static FIAT_P256_FIAT_INLINE void fiat_p256_sub(fiat_p256_montgomery_domain_field_element out1, const fiat_p256_montgomery_domain_field_element arg1, const fiat_p256_montgomery_domain_field_element arg2) {
857   uint64_t x1;
858   fiat_p256_uint1 x2;
859   uint64_t x3;
860   fiat_p256_uint1 x4;
861   uint64_t x5;
862   fiat_p256_uint1 x6;
863   uint64_t x7;
864   fiat_p256_uint1 x8;
865   uint64_t x9;
866   uint64_t x10;
867   fiat_p256_uint1 x11;
868   uint64_t x12;
869   fiat_p256_uint1 x13;
870   uint64_t x14;
871   fiat_p256_uint1 x15;
872   uint64_t x16;
873   fiat_p256_uint1 x17;
874   fiat_p256_subborrowx_u64(&x1, &x2, 0x0, (arg1[0]), (arg2[0]));
875   fiat_p256_subborrowx_u64(&x3, &x4, x2, (arg1[1]), (arg2[1]));
876   fiat_p256_subborrowx_u64(&x5, &x6, x4, (arg1[2]), (arg2[2]));
877   fiat_p256_subborrowx_u64(&x7, &x8, x6, (arg1[3]), (arg2[3]));
878   fiat_p256_cmovznz_u64(&x9, x8, 0x0, UINT64_C(0xffffffffffffffff));
879   fiat_p256_addcarryx_u64(&x10, &x11, 0x0, x1, x9);
880   fiat_p256_addcarryx_u64(&x12, &x13, x11, x3, (x9 & UINT32_C(0xffffffff)));
881   fiat_p256_addcarryx_u64(&x14, &x15, x13, x5, 0x0);
882   fiat_p256_addcarryx_u64(&x16, &x17, x15, x7, (x9 & UINT64_C(0xffffffff00000001)));
883   out1[0] = x10;
884   out1[1] = x12;
885   out1[2] = x14;
886   out1[3] = x16;
887 }
888 
889 /*
890  * The function fiat_p256_opp negates a field element in the Montgomery domain.
891  *
892  * Preconditions:
893  *   0 ≤ eval arg1 < m
894  * Postconditions:
895  *   eval (from_montgomery out1) mod m = -eval (from_montgomery arg1) mod m
896  *   0 ≤ eval out1 < m
897  *
898  */
fiat_p256_opp(fiat_p256_montgomery_domain_field_element out1,const fiat_p256_montgomery_domain_field_element arg1)899 static FIAT_P256_FIAT_INLINE void fiat_p256_opp(fiat_p256_montgomery_domain_field_element out1, const fiat_p256_montgomery_domain_field_element arg1) {
900   uint64_t x1;
901   fiat_p256_uint1 x2;
902   uint64_t x3;
903   fiat_p256_uint1 x4;
904   uint64_t x5;
905   fiat_p256_uint1 x6;
906   uint64_t x7;
907   fiat_p256_uint1 x8;
908   uint64_t x9;
909   uint64_t x10;
910   fiat_p256_uint1 x11;
911   uint64_t x12;
912   fiat_p256_uint1 x13;
913   uint64_t x14;
914   fiat_p256_uint1 x15;
915   uint64_t x16;
916   fiat_p256_uint1 x17;
917   fiat_p256_subborrowx_u64(&x1, &x2, 0x0, 0x0, (arg1[0]));
918   fiat_p256_subborrowx_u64(&x3, &x4, x2, 0x0, (arg1[1]));
919   fiat_p256_subborrowx_u64(&x5, &x6, x4, 0x0, (arg1[2]));
920   fiat_p256_subborrowx_u64(&x7, &x8, x6, 0x0, (arg1[3]));
921   fiat_p256_cmovznz_u64(&x9, x8, 0x0, UINT64_C(0xffffffffffffffff));
922   fiat_p256_addcarryx_u64(&x10, &x11, 0x0, x1, x9);
923   fiat_p256_addcarryx_u64(&x12, &x13, x11, x3, (x9 & UINT32_C(0xffffffff)));
924   fiat_p256_addcarryx_u64(&x14, &x15, x13, x5, 0x0);
925   fiat_p256_addcarryx_u64(&x16, &x17, x15, x7, (x9 & UINT64_C(0xffffffff00000001)));
926   out1[0] = x10;
927   out1[1] = x12;
928   out1[2] = x14;
929   out1[3] = x16;
930 }
931 
932 /*
933  * The function fiat_p256_from_montgomery translates a field element out of the Montgomery domain.
934  *
935  * Preconditions:
936  *   0 ≤ eval arg1 < m
937  * Postconditions:
938  *   eval out1 mod m = (eval arg1 * ((2^64)⁻¹ mod m)^4) mod m
939  *   0 ≤ eval out1 < m
940  *
941  */
fiat_p256_from_montgomery(fiat_p256_non_montgomery_domain_field_element out1,const fiat_p256_montgomery_domain_field_element arg1)942 static FIAT_P256_FIAT_INLINE void fiat_p256_from_montgomery(fiat_p256_non_montgomery_domain_field_element out1, const fiat_p256_montgomery_domain_field_element arg1) {
943   uint64_t x1;
944   uint64_t x2;
945   uint64_t x3;
946   uint64_t x4;
947   uint64_t x5;
948   uint64_t x6;
949   uint64_t x7;
950   uint64_t x8;
951   fiat_p256_uint1 x9;
952   uint64_t x10;
953   fiat_p256_uint1 x11;
954   uint64_t x12;
955   fiat_p256_uint1 x13;
956   uint64_t x14;
957   fiat_p256_uint1 x15;
958   uint64_t x16;
959   uint64_t x17;
960   uint64_t x18;
961   uint64_t x19;
962   uint64_t x20;
963   uint64_t x21;
964   uint64_t x22;
965   fiat_p256_uint1 x23;
966   uint64_t x24;
967   fiat_p256_uint1 x25;
968   uint64_t x26;
969   fiat_p256_uint1 x27;
970   uint64_t x28;
971   fiat_p256_uint1 x29;
972   uint64_t x30;
973   fiat_p256_uint1 x31;
974   uint64_t x32;
975   fiat_p256_uint1 x33;
976   uint64_t x34;
977   fiat_p256_uint1 x35;
978   uint64_t x36;
979   fiat_p256_uint1 x37;
980   uint64_t x38;
981   uint64_t x39;
982   uint64_t x40;
983   uint64_t x41;
984   uint64_t x42;
985   uint64_t x43;
986   uint64_t x44;
987   fiat_p256_uint1 x45;
988   uint64_t x46;
989   fiat_p256_uint1 x47;
990   uint64_t x48;
991   fiat_p256_uint1 x49;
992   uint64_t x50;
993   fiat_p256_uint1 x51;
994   uint64_t x52;
995   fiat_p256_uint1 x53;
996   uint64_t x54;
997   fiat_p256_uint1 x55;
998   uint64_t x56;
999   fiat_p256_uint1 x57;
1000   uint64_t x58;
1001   fiat_p256_uint1 x59;
1002   uint64_t x60;
1003   uint64_t x61;
1004   uint64_t x62;
1005   uint64_t x63;
1006   uint64_t x64;
1007   uint64_t x65;
1008   uint64_t x66;
1009   fiat_p256_uint1 x67;
1010   uint64_t x68;
1011   fiat_p256_uint1 x69;
1012   uint64_t x70;
1013   fiat_p256_uint1 x71;
1014   uint64_t x72;
1015   fiat_p256_uint1 x73;
1016   uint64_t x74;
1017   fiat_p256_uint1 x75;
1018   uint64_t x76;
1019   uint64_t x77;
1020   fiat_p256_uint1 x78;
1021   uint64_t x79;
1022   fiat_p256_uint1 x80;
1023   uint64_t x81;
1024   fiat_p256_uint1 x82;
1025   uint64_t x83;
1026   fiat_p256_uint1 x84;
1027   uint64_t x85;
1028   fiat_p256_uint1 x86;
1029   uint64_t x87;
1030   uint64_t x88;
1031   uint64_t x89;
1032   uint64_t x90;
1033   x1 = (arg1[0]);
1034   fiat_p256_mulx_u64(&x2, &x3, x1, UINT64_C(0xffffffff00000001));
1035   fiat_p256_mulx_u64(&x4, &x5, x1, UINT32_C(0xffffffff));
1036   fiat_p256_mulx_u64(&x6, &x7, x1, UINT64_C(0xffffffffffffffff));
1037   fiat_p256_addcarryx_u64(&x8, &x9, 0x0, x7, x4);
1038   fiat_p256_addcarryx_u64(&x10, &x11, 0x0, x1, x6);
1039   fiat_p256_addcarryx_u64(&x12, &x13, x11, 0x0, x8);
1040   fiat_p256_addcarryx_u64(&x14, &x15, 0x0, x12, (arg1[1]));
1041   fiat_p256_mulx_u64(&x16, &x17, x14, UINT64_C(0xffffffff00000001));
1042   fiat_p256_mulx_u64(&x18, &x19, x14, UINT32_C(0xffffffff));
1043   fiat_p256_mulx_u64(&x20, &x21, x14, UINT64_C(0xffffffffffffffff));
1044   fiat_p256_addcarryx_u64(&x22, &x23, 0x0, x21, x18);
1045   fiat_p256_addcarryx_u64(&x24, &x25, 0x0, x14, x20);
1046   fiat_p256_addcarryx_u64(&x26, &x27, x25, (x15 + (x13 + (x9 + x5))), x22);
1047   fiat_p256_addcarryx_u64(&x28, &x29, x27, x2, (x23 + x19));
1048   fiat_p256_addcarryx_u64(&x30, &x31, x29, x3, x16);
1049   fiat_p256_addcarryx_u64(&x32, &x33, 0x0, x26, (arg1[2]));
1050   fiat_p256_addcarryx_u64(&x34, &x35, x33, x28, 0x0);
1051   fiat_p256_addcarryx_u64(&x36, &x37, x35, x30, 0x0);
1052   fiat_p256_mulx_u64(&x38, &x39, x32, UINT64_C(0xffffffff00000001));
1053   fiat_p256_mulx_u64(&x40, &x41, x32, UINT32_C(0xffffffff));
1054   fiat_p256_mulx_u64(&x42, &x43, x32, UINT64_C(0xffffffffffffffff));
1055   fiat_p256_addcarryx_u64(&x44, &x45, 0x0, x43, x40);
1056   fiat_p256_addcarryx_u64(&x46, &x47, 0x0, x32, x42);
1057   fiat_p256_addcarryx_u64(&x48, &x49, x47, x34, x44);
1058   fiat_p256_addcarryx_u64(&x50, &x51, x49, x36, (x45 + x41));
1059   fiat_p256_addcarryx_u64(&x52, &x53, x51, (x37 + (x31 + x17)), x38);
1060   fiat_p256_addcarryx_u64(&x54, &x55, 0x0, x48, (arg1[3]));
1061   fiat_p256_addcarryx_u64(&x56, &x57, x55, x50, 0x0);
1062   fiat_p256_addcarryx_u64(&x58, &x59, x57, x52, 0x0);
1063   fiat_p256_mulx_u64(&x60, &x61, x54, UINT64_C(0xffffffff00000001));
1064   fiat_p256_mulx_u64(&x62, &x63, x54, UINT32_C(0xffffffff));
1065   fiat_p256_mulx_u64(&x64, &x65, x54, UINT64_C(0xffffffffffffffff));
1066   fiat_p256_addcarryx_u64(&x66, &x67, 0x0, x65, x62);
1067   fiat_p256_addcarryx_u64(&x68, &x69, 0x0, x54, x64);
1068   fiat_p256_addcarryx_u64(&x70, &x71, x69, x56, x66);
1069   fiat_p256_addcarryx_u64(&x72, &x73, x71, x58, (x67 + x63));
1070   fiat_p256_addcarryx_u64(&x74, &x75, x73, (x59 + (x53 + x39)), x60);
1071   x76 = (x75 + x61);
1072   fiat_p256_subborrowx_u64(&x77, &x78, 0x0, x70, UINT64_C(0xffffffffffffffff));
1073   fiat_p256_subborrowx_u64(&x79, &x80, x78, x72, UINT32_C(0xffffffff));
1074   fiat_p256_subborrowx_u64(&x81, &x82, x80, x74, 0x0);
1075   fiat_p256_subborrowx_u64(&x83, &x84, x82, x76, UINT64_C(0xffffffff00000001));
1076   fiat_p256_subborrowx_u64(&x85, &x86, x84, 0x0, 0x0);
1077   fiat_p256_cmovznz_u64(&x87, x86, x77, x70);
1078   fiat_p256_cmovznz_u64(&x88, x86, x79, x72);
1079   fiat_p256_cmovznz_u64(&x89, x86, x81, x74);
1080   fiat_p256_cmovznz_u64(&x90, x86, x83, x76);
1081   out1[0] = x87;
1082   out1[1] = x88;
1083   out1[2] = x89;
1084   out1[3] = x90;
1085 }
1086 
1087 /*
1088  * The function fiat_p256_to_montgomery translates a field element into the Montgomery domain.
1089  *
1090  * Preconditions:
1091  *   0 ≤ eval arg1 < m
1092  * Postconditions:
1093  *   eval (from_montgomery out1) mod m = eval arg1 mod m
1094  *   0 ≤ eval out1 < m
1095  *
1096  */
fiat_p256_to_montgomery(fiat_p256_montgomery_domain_field_element out1,const fiat_p256_non_montgomery_domain_field_element arg1)1097 static FIAT_P256_FIAT_INLINE void fiat_p256_to_montgomery(fiat_p256_montgomery_domain_field_element out1, const fiat_p256_non_montgomery_domain_field_element arg1) {
1098   uint64_t x1;
1099   uint64_t x2;
1100   uint64_t x3;
1101   uint64_t x4;
1102   uint64_t x5;
1103   uint64_t x6;
1104   uint64_t x7;
1105   uint64_t x8;
1106   uint64_t x9;
1107   uint64_t x10;
1108   uint64_t x11;
1109   uint64_t x12;
1110   uint64_t x13;
1111   fiat_p256_uint1 x14;
1112   uint64_t x15;
1113   fiat_p256_uint1 x16;
1114   uint64_t x17;
1115   fiat_p256_uint1 x18;
1116   uint64_t x19;
1117   uint64_t x20;
1118   uint64_t x21;
1119   uint64_t x22;
1120   uint64_t x23;
1121   uint64_t x24;
1122   uint64_t x25;
1123   fiat_p256_uint1 x26;
1124   uint64_t x27;
1125   fiat_p256_uint1 x28;
1126   uint64_t x29;
1127   fiat_p256_uint1 x30;
1128   uint64_t x31;
1129   fiat_p256_uint1 x32;
1130   uint64_t x33;
1131   fiat_p256_uint1 x34;
1132   uint64_t x35;
1133   fiat_p256_uint1 x36;
1134   uint64_t x37;
1135   uint64_t x38;
1136   uint64_t x39;
1137   uint64_t x40;
1138   uint64_t x41;
1139   uint64_t x42;
1140   uint64_t x43;
1141   uint64_t x44;
1142   uint64_t x45;
1143   fiat_p256_uint1 x46;
1144   uint64_t x47;
1145   fiat_p256_uint1 x48;
1146   uint64_t x49;
1147   fiat_p256_uint1 x50;
1148   uint64_t x51;
1149   fiat_p256_uint1 x52;
1150   uint64_t x53;
1151   fiat_p256_uint1 x54;
1152   uint64_t x55;
1153   fiat_p256_uint1 x56;
1154   uint64_t x57;
1155   fiat_p256_uint1 x58;
1156   uint64_t x59;
1157   uint64_t x60;
1158   uint64_t x61;
1159   uint64_t x62;
1160   uint64_t x63;
1161   uint64_t x64;
1162   uint64_t x65;
1163   fiat_p256_uint1 x66;
1164   uint64_t x67;
1165   fiat_p256_uint1 x68;
1166   uint64_t x69;
1167   fiat_p256_uint1 x70;
1168   uint64_t x71;
1169   fiat_p256_uint1 x72;
1170   uint64_t x73;
1171   fiat_p256_uint1 x74;
1172   uint64_t x75;
1173   fiat_p256_uint1 x76;
1174   uint64_t x77;
1175   uint64_t x78;
1176   uint64_t x79;
1177   uint64_t x80;
1178   uint64_t x81;
1179   uint64_t x82;
1180   uint64_t x83;
1181   uint64_t x84;
1182   uint64_t x85;
1183   fiat_p256_uint1 x86;
1184   uint64_t x87;
1185   fiat_p256_uint1 x88;
1186   uint64_t x89;
1187   fiat_p256_uint1 x90;
1188   uint64_t x91;
1189   fiat_p256_uint1 x92;
1190   uint64_t x93;
1191   fiat_p256_uint1 x94;
1192   uint64_t x95;
1193   fiat_p256_uint1 x96;
1194   uint64_t x97;
1195   fiat_p256_uint1 x98;
1196   uint64_t x99;
1197   uint64_t x100;
1198   uint64_t x101;
1199   uint64_t x102;
1200   uint64_t x103;
1201   uint64_t x104;
1202   uint64_t x105;
1203   fiat_p256_uint1 x106;
1204   uint64_t x107;
1205   fiat_p256_uint1 x108;
1206   uint64_t x109;
1207   fiat_p256_uint1 x110;
1208   uint64_t x111;
1209   fiat_p256_uint1 x112;
1210   uint64_t x113;
1211   fiat_p256_uint1 x114;
1212   uint64_t x115;
1213   fiat_p256_uint1 x116;
1214   uint64_t x117;
1215   uint64_t x118;
1216   uint64_t x119;
1217   uint64_t x120;
1218   uint64_t x121;
1219   uint64_t x122;
1220   uint64_t x123;
1221   uint64_t x124;
1222   uint64_t x125;
1223   fiat_p256_uint1 x126;
1224   uint64_t x127;
1225   fiat_p256_uint1 x128;
1226   uint64_t x129;
1227   fiat_p256_uint1 x130;
1228   uint64_t x131;
1229   fiat_p256_uint1 x132;
1230   uint64_t x133;
1231   fiat_p256_uint1 x134;
1232   uint64_t x135;
1233   fiat_p256_uint1 x136;
1234   uint64_t x137;
1235   fiat_p256_uint1 x138;
1236   uint64_t x139;
1237   uint64_t x140;
1238   uint64_t x141;
1239   uint64_t x142;
1240   uint64_t x143;
1241   uint64_t x144;
1242   uint64_t x145;
1243   fiat_p256_uint1 x146;
1244   uint64_t x147;
1245   fiat_p256_uint1 x148;
1246   uint64_t x149;
1247   fiat_p256_uint1 x150;
1248   uint64_t x151;
1249   fiat_p256_uint1 x152;
1250   uint64_t x153;
1251   fiat_p256_uint1 x154;
1252   uint64_t x155;
1253   fiat_p256_uint1 x156;
1254   uint64_t x157;
1255   fiat_p256_uint1 x158;
1256   uint64_t x159;
1257   fiat_p256_uint1 x160;
1258   uint64_t x161;
1259   fiat_p256_uint1 x162;
1260   uint64_t x163;
1261   fiat_p256_uint1 x164;
1262   uint64_t x165;
1263   fiat_p256_uint1 x166;
1264   uint64_t x167;
1265   uint64_t x168;
1266   uint64_t x169;
1267   uint64_t x170;
1268   x1 = (arg1[1]);
1269   x2 = (arg1[2]);
1270   x3 = (arg1[3]);
1271   x4 = (arg1[0]);
1272   fiat_p256_mulx_u64(&x5, &x6, x4, UINT64_C(0x4fffffffd));
1273   fiat_p256_mulx_u64(&x7, &x8, x4, UINT64_C(0xfffffffffffffffe));
1274   fiat_p256_mulx_u64(&x9, &x10, x4, UINT64_C(0xfffffffbffffffff));
1275   fiat_p256_mulx_u64(&x11, &x12, x4, 0x3);
1276   fiat_p256_addcarryx_u64(&x13, &x14, 0x0, x12, x9);
1277   fiat_p256_addcarryx_u64(&x15, &x16, x14, x10, x7);
1278   fiat_p256_addcarryx_u64(&x17, &x18, x16, x8, x5);
1279   fiat_p256_mulx_u64(&x19, &x20, x11, UINT64_C(0xffffffff00000001));
1280   fiat_p256_mulx_u64(&x21, &x22, x11, UINT32_C(0xffffffff));
1281   fiat_p256_mulx_u64(&x23, &x24, x11, UINT64_C(0xffffffffffffffff));
1282   fiat_p256_addcarryx_u64(&x25, &x26, 0x0, x24, x21);
1283   fiat_p256_addcarryx_u64(&x27, &x28, 0x0, x11, x23);
1284   fiat_p256_addcarryx_u64(&x29, &x30, x28, x13, x25);
1285   fiat_p256_addcarryx_u64(&x31, &x32, x30, x15, (x26 + x22));
1286   fiat_p256_addcarryx_u64(&x33, &x34, x32, x17, x19);
1287   fiat_p256_addcarryx_u64(&x35, &x36, x34, (x18 + x6), x20);
1288   fiat_p256_mulx_u64(&x37, &x38, x1, UINT64_C(0x4fffffffd));
1289   fiat_p256_mulx_u64(&x39, &x40, x1, UINT64_C(0xfffffffffffffffe));
1290   fiat_p256_mulx_u64(&x41, &x42, x1, UINT64_C(0xfffffffbffffffff));
1291   fiat_p256_mulx_u64(&x43, &x44, x1, 0x3);
1292   fiat_p256_addcarryx_u64(&x45, &x46, 0x0, x44, x41);
1293   fiat_p256_addcarryx_u64(&x47, &x48, x46, x42, x39);
1294   fiat_p256_addcarryx_u64(&x49, &x50, x48, x40, x37);
1295   fiat_p256_addcarryx_u64(&x51, &x52, 0x0, x29, x43);
1296   fiat_p256_addcarryx_u64(&x53, &x54, x52, x31, x45);
1297   fiat_p256_addcarryx_u64(&x55, &x56, x54, x33, x47);
1298   fiat_p256_addcarryx_u64(&x57, &x58, x56, x35, x49);
1299   fiat_p256_mulx_u64(&x59, &x60, x51, UINT64_C(0xffffffff00000001));
1300   fiat_p256_mulx_u64(&x61, &x62, x51, UINT32_C(0xffffffff));
1301   fiat_p256_mulx_u64(&x63, &x64, x51, UINT64_C(0xffffffffffffffff));
1302   fiat_p256_addcarryx_u64(&x65, &x66, 0x0, x64, x61);
1303   fiat_p256_addcarryx_u64(&x67, &x68, 0x0, x51, x63);
1304   fiat_p256_addcarryx_u64(&x69, &x70, x68, x53, x65);
1305   fiat_p256_addcarryx_u64(&x71, &x72, x70, x55, (x66 + x62));
1306   fiat_p256_addcarryx_u64(&x73, &x74, x72, x57, x59);
1307   fiat_p256_addcarryx_u64(&x75, &x76, x74, (((uint64_t)x58 + x36) + (x50 + x38)), x60);
1308   fiat_p256_mulx_u64(&x77, &x78, x2, UINT64_C(0x4fffffffd));
1309   fiat_p256_mulx_u64(&x79, &x80, x2, UINT64_C(0xfffffffffffffffe));
1310   fiat_p256_mulx_u64(&x81, &x82, x2, UINT64_C(0xfffffffbffffffff));
1311   fiat_p256_mulx_u64(&x83, &x84, x2, 0x3);
1312   fiat_p256_addcarryx_u64(&x85, &x86, 0x0, x84, x81);
1313   fiat_p256_addcarryx_u64(&x87, &x88, x86, x82, x79);
1314   fiat_p256_addcarryx_u64(&x89, &x90, x88, x80, x77);
1315   fiat_p256_addcarryx_u64(&x91, &x92, 0x0, x69, x83);
1316   fiat_p256_addcarryx_u64(&x93, &x94, x92, x71, x85);
1317   fiat_p256_addcarryx_u64(&x95, &x96, x94, x73, x87);
1318   fiat_p256_addcarryx_u64(&x97, &x98, x96, x75, x89);
1319   fiat_p256_mulx_u64(&x99, &x100, x91, UINT64_C(0xffffffff00000001));
1320   fiat_p256_mulx_u64(&x101, &x102, x91, UINT32_C(0xffffffff));
1321   fiat_p256_mulx_u64(&x103, &x104, x91, UINT64_C(0xffffffffffffffff));
1322   fiat_p256_addcarryx_u64(&x105, &x106, 0x0, x104, x101);
1323   fiat_p256_addcarryx_u64(&x107, &x108, 0x0, x91, x103);
1324   fiat_p256_addcarryx_u64(&x109, &x110, x108, x93, x105);
1325   fiat_p256_addcarryx_u64(&x111, &x112, x110, x95, (x106 + x102));
1326   fiat_p256_addcarryx_u64(&x113, &x114, x112, x97, x99);
1327   fiat_p256_addcarryx_u64(&x115, &x116, x114, (((uint64_t)x98 + x76) + (x90 + x78)), x100);
1328   fiat_p256_mulx_u64(&x117, &x118, x3, UINT64_C(0x4fffffffd));
1329   fiat_p256_mulx_u64(&x119, &x120, x3, UINT64_C(0xfffffffffffffffe));
1330   fiat_p256_mulx_u64(&x121, &x122, x3, UINT64_C(0xfffffffbffffffff));
1331   fiat_p256_mulx_u64(&x123, &x124, x3, 0x3);
1332   fiat_p256_addcarryx_u64(&x125, &x126, 0x0, x124, x121);
1333   fiat_p256_addcarryx_u64(&x127, &x128, x126, x122, x119);
1334   fiat_p256_addcarryx_u64(&x129, &x130, x128, x120, x117);
1335   fiat_p256_addcarryx_u64(&x131, &x132, 0x0, x109, x123);
1336   fiat_p256_addcarryx_u64(&x133, &x134, x132, x111, x125);
1337   fiat_p256_addcarryx_u64(&x135, &x136, x134, x113, x127);
1338   fiat_p256_addcarryx_u64(&x137, &x138, x136, x115, x129);
1339   fiat_p256_mulx_u64(&x139, &x140, x131, UINT64_C(0xffffffff00000001));
1340   fiat_p256_mulx_u64(&x141, &x142, x131, UINT32_C(0xffffffff));
1341   fiat_p256_mulx_u64(&x143, &x144, x131, UINT64_C(0xffffffffffffffff));
1342   fiat_p256_addcarryx_u64(&x145, &x146, 0x0, x144, x141);
1343   fiat_p256_addcarryx_u64(&x147, &x148, 0x0, x131, x143);
1344   fiat_p256_addcarryx_u64(&x149, &x150, x148, x133, x145);
1345   fiat_p256_addcarryx_u64(&x151, &x152, x150, x135, (x146 + x142));
1346   fiat_p256_addcarryx_u64(&x153, &x154, x152, x137, x139);
1347   fiat_p256_addcarryx_u64(&x155, &x156, x154, (((uint64_t)x138 + x116) + (x130 + x118)), x140);
1348   fiat_p256_subborrowx_u64(&x157, &x158, 0x0, x149, UINT64_C(0xffffffffffffffff));
1349   fiat_p256_subborrowx_u64(&x159, &x160, x158, x151, UINT32_C(0xffffffff));
1350   fiat_p256_subborrowx_u64(&x161, &x162, x160, x153, 0x0);
1351   fiat_p256_subborrowx_u64(&x163, &x164, x162, x155, UINT64_C(0xffffffff00000001));
1352   fiat_p256_subborrowx_u64(&x165, &x166, x164, x156, 0x0);
1353   fiat_p256_cmovznz_u64(&x167, x166, x157, x149);
1354   fiat_p256_cmovznz_u64(&x168, x166, x159, x151);
1355   fiat_p256_cmovznz_u64(&x169, x166, x161, x153);
1356   fiat_p256_cmovznz_u64(&x170, x166, x163, x155);
1357   out1[0] = x167;
1358   out1[1] = x168;
1359   out1[2] = x169;
1360   out1[3] = x170;
1361 }
1362 
1363 /*
1364  * The function fiat_p256_nonzero outputs a single non-zero word if the input is non-zero and zero otherwise.
1365  *
1366  * Preconditions:
1367  *   0 ≤ eval arg1 < m
1368  * Postconditions:
1369  *   out1 = 0 ↔ eval (from_montgomery arg1) mod m = 0
1370  *
1371  * Input Bounds:
1372  *   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
1373  * Output Bounds:
1374  *   out1: [0x0 ~> 0xffffffffffffffff]
1375  */
fiat_p256_nonzero(uint64_t * out1,const uint64_t arg1[4])1376 static FIAT_P256_FIAT_INLINE void fiat_p256_nonzero(uint64_t* out1, const uint64_t arg1[4]) {
1377   uint64_t x1;
1378   x1 = ((arg1[0]) | ((arg1[1]) | ((arg1[2]) | (arg1[3]))));
1379   *out1 = x1;
1380 }
1381 
1382 /*
1383  * The function fiat_p256_selectznz is a multi-limb conditional select.
1384  *
1385  * Postconditions:
1386  *   eval out1 = (if arg1 = 0 then eval arg2 else eval arg3)
1387  *
1388  * Input Bounds:
1389  *   arg1: [0x0 ~> 0x1]
1390  *   arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
1391  *   arg3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
1392  * Output Bounds:
1393  *   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
1394  */
fiat_p256_selectznz(uint64_t out1[4],fiat_p256_uint1 arg1,const uint64_t arg2[4],const uint64_t arg3[4])1395 static FIAT_P256_FIAT_INLINE void fiat_p256_selectznz(uint64_t out1[4], fiat_p256_uint1 arg1, const uint64_t arg2[4], const uint64_t arg3[4]) {
1396   uint64_t x1;
1397   uint64_t x2;
1398   uint64_t x3;
1399   uint64_t x4;
1400   fiat_p256_cmovznz_u64(&x1, arg1, (arg2[0]), (arg3[0]));
1401   fiat_p256_cmovznz_u64(&x2, arg1, (arg2[1]), (arg3[1]));
1402   fiat_p256_cmovznz_u64(&x3, arg1, (arg2[2]), (arg3[2]));
1403   fiat_p256_cmovznz_u64(&x4, arg1, (arg2[3]), (arg3[3]));
1404   out1[0] = x1;
1405   out1[1] = x2;
1406   out1[2] = x3;
1407   out1[3] = x4;
1408 }
1409 
1410 /*
1411  * The function fiat_p256_to_bytes serializes a field element NOT in the Montgomery domain to bytes in little-endian order.
1412  *
1413  * Preconditions:
1414  *   0 ≤ eval arg1 < m
1415  * Postconditions:
1416  *   out1 = map (λ x, ⌊((eval arg1 mod m) mod 2^(8 * (x + 1))) / 2^(8 * x)⌋) [0..31]
1417  *
1418  * Input Bounds:
1419  *   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
1420  * Output Bounds:
1421  *   out1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff]]
1422  */
fiat_p256_to_bytes(uint8_t out1[32],const uint64_t arg1[4])1423 static FIAT_P256_FIAT_INLINE void fiat_p256_to_bytes(uint8_t out1[32], const uint64_t arg1[4]) {
1424   uint64_t x1;
1425   uint64_t x2;
1426   uint64_t x3;
1427   uint64_t x4;
1428   uint8_t x5;
1429   uint64_t x6;
1430   uint8_t x7;
1431   uint64_t x8;
1432   uint8_t x9;
1433   uint64_t x10;
1434   uint8_t x11;
1435   uint64_t x12;
1436   uint8_t x13;
1437   uint64_t x14;
1438   uint8_t x15;
1439   uint64_t x16;
1440   uint8_t x17;
1441   uint8_t x18;
1442   uint8_t x19;
1443   uint64_t x20;
1444   uint8_t x21;
1445   uint64_t x22;
1446   uint8_t x23;
1447   uint64_t x24;
1448   uint8_t x25;
1449   uint64_t x26;
1450   uint8_t x27;
1451   uint64_t x28;
1452   uint8_t x29;
1453   uint64_t x30;
1454   uint8_t x31;
1455   uint8_t x32;
1456   uint8_t x33;
1457   uint64_t x34;
1458   uint8_t x35;
1459   uint64_t x36;
1460   uint8_t x37;
1461   uint64_t x38;
1462   uint8_t x39;
1463   uint64_t x40;
1464   uint8_t x41;
1465   uint64_t x42;
1466   uint8_t x43;
1467   uint64_t x44;
1468   uint8_t x45;
1469   uint8_t x46;
1470   uint8_t x47;
1471   uint64_t x48;
1472   uint8_t x49;
1473   uint64_t x50;
1474   uint8_t x51;
1475   uint64_t x52;
1476   uint8_t x53;
1477   uint64_t x54;
1478   uint8_t x55;
1479   uint64_t x56;
1480   uint8_t x57;
1481   uint64_t x58;
1482   uint8_t x59;
1483   uint8_t x60;
1484   x1 = (arg1[3]);
1485   x2 = (arg1[2]);
1486   x3 = (arg1[1]);
1487   x4 = (arg1[0]);
1488   x5 = (uint8_t)(x4 & UINT8_C(0xff));
1489   x6 = (x4 >> 8);
1490   x7 = (uint8_t)(x6 & UINT8_C(0xff));
1491   x8 = (x6 >> 8);
1492   x9 = (uint8_t)(x8 & UINT8_C(0xff));
1493   x10 = (x8 >> 8);
1494   x11 = (uint8_t)(x10 & UINT8_C(0xff));
1495   x12 = (x10 >> 8);
1496   x13 = (uint8_t)(x12 & UINT8_C(0xff));
1497   x14 = (x12 >> 8);
1498   x15 = (uint8_t)(x14 & UINT8_C(0xff));
1499   x16 = (x14 >> 8);
1500   x17 = (uint8_t)(x16 & UINT8_C(0xff));
1501   x18 = (uint8_t)(x16 >> 8);
1502   x19 = (uint8_t)(x3 & UINT8_C(0xff));
1503   x20 = (x3 >> 8);
1504   x21 = (uint8_t)(x20 & UINT8_C(0xff));
1505   x22 = (x20 >> 8);
1506   x23 = (uint8_t)(x22 & UINT8_C(0xff));
1507   x24 = (x22 >> 8);
1508   x25 = (uint8_t)(x24 & UINT8_C(0xff));
1509   x26 = (x24 >> 8);
1510   x27 = (uint8_t)(x26 & UINT8_C(0xff));
1511   x28 = (x26 >> 8);
1512   x29 = (uint8_t)(x28 & UINT8_C(0xff));
1513   x30 = (x28 >> 8);
1514   x31 = (uint8_t)(x30 & UINT8_C(0xff));
1515   x32 = (uint8_t)(x30 >> 8);
1516   x33 = (uint8_t)(x2 & UINT8_C(0xff));
1517   x34 = (x2 >> 8);
1518   x35 = (uint8_t)(x34 & UINT8_C(0xff));
1519   x36 = (x34 >> 8);
1520   x37 = (uint8_t)(x36 & UINT8_C(0xff));
1521   x38 = (x36 >> 8);
1522   x39 = (uint8_t)(x38 & UINT8_C(0xff));
1523   x40 = (x38 >> 8);
1524   x41 = (uint8_t)(x40 & UINT8_C(0xff));
1525   x42 = (x40 >> 8);
1526   x43 = (uint8_t)(x42 & UINT8_C(0xff));
1527   x44 = (x42 >> 8);
1528   x45 = (uint8_t)(x44 & UINT8_C(0xff));
1529   x46 = (uint8_t)(x44 >> 8);
1530   x47 = (uint8_t)(x1 & UINT8_C(0xff));
1531   x48 = (x1 >> 8);
1532   x49 = (uint8_t)(x48 & UINT8_C(0xff));
1533   x50 = (x48 >> 8);
1534   x51 = (uint8_t)(x50 & UINT8_C(0xff));
1535   x52 = (x50 >> 8);
1536   x53 = (uint8_t)(x52 & UINT8_C(0xff));
1537   x54 = (x52 >> 8);
1538   x55 = (uint8_t)(x54 & UINT8_C(0xff));
1539   x56 = (x54 >> 8);
1540   x57 = (uint8_t)(x56 & UINT8_C(0xff));
1541   x58 = (x56 >> 8);
1542   x59 = (uint8_t)(x58 & UINT8_C(0xff));
1543   x60 = (uint8_t)(x58 >> 8);
1544   out1[0] = x5;
1545   out1[1] = x7;
1546   out1[2] = x9;
1547   out1[3] = x11;
1548   out1[4] = x13;
1549   out1[5] = x15;
1550   out1[6] = x17;
1551   out1[7] = x18;
1552   out1[8] = x19;
1553   out1[9] = x21;
1554   out1[10] = x23;
1555   out1[11] = x25;
1556   out1[12] = x27;
1557   out1[13] = x29;
1558   out1[14] = x31;
1559   out1[15] = x32;
1560   out1[16] = x33;
1561   out1[17] = x35;
1562   out1[18] = x37;
1563   out1[19] = x39;
1564   out1[20] = x41;
1565   out1[21] = x43;
1566   out1[22] = x45;
1567   out1[23] = x46;
1568   out1[24] = x47;
1569   out1[25] = x49;
1570   out1[26] = x51;
1571   out1[27] = x53;
1572   out1[28] = x55;
1573   out1[29] = x57;
1574   out1[30] = x59;
1575   out1[31] = x60;
1576 }
1577 
1578 /*
1579  * The function fiat_p256_from_bytes deserializes a field element NOT in the Montgomery domain from bytes in little-endian order.
1580  *
1581  * Preconditions:
1582  *   0 ≤ bytes_eval arg1 < m
1583  * Postconditions:
1584  *   eval out1 mod m = bytes_eval arg1 mod m
1585  *   0 ≤ eval out1 < m
1586  *
1587  * Input Bounds:
1588  *   arg1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff]]
1589  * Output Bounds:
1590  *   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
1591  */
fiat_p256_from_bytes(uint64_t out1[4],const uint8_t arg1[32])1592 static FIAT_P256_FIAT_INLINE void fiat_p256_from_bytes(uint64_t out1[4], const uint8_t arg1[32]) {
1593   uint64_t x1;
1594   uint64_t x2;
1595   uint64_t x3;
1596   uint64_t x4;
1597   uint64_t x5;
1598   uint64_t x6;
1599   uint64_t x7;
1600   uint8_t x8;
1601   uint64_t x9;
1602   uint64_t x10;
1603   uint64_t x11;
1604   uint64_t x12;
1605   uint64_t x13;
1606   uint64_t x14;
1607   uint64_t x15;
1608   uint8_t x16;
1609   uint64_t x17;
1610   uint64_t x18;
1611   uint64_t x19;
1612   uint64_t x20;
1613   uint64_t x21;
1614   uint64_t x22;
1615   uint64_t x23;
1616   uint8_t x24;
1617   uint64_t x25;
1618   uint64_t x26;
1619   uint64_t x27;
1620   uint64_t x28;
1621   uint64_t x29;
1622   uint64_t x30;
1623   uint64_t x31;
1624   uint8_t x32;
1625   uint64_t x33;
1626   uint64_t x34;
1627   uint64_t x35;
1628   uint64_t x36;
1629   uint64_t x37;
1630   uint64_t x38;
1631   uint64_t x39;
1632   uint64_t x40;
1633   uint64_t x41;
1634   uint64_t x42;
1635   uint64_t x43;
1636   uint64_t x44;
1637   uint64_t x45;
1638   uint64_t x46;
1639   uint64_t x47;
1640   uint64_t x48;
1641   uint64_t x49;
1642   uint64_t x50;
1643   uint64_t x51;
1644   uint64_t x52;
1645   uint64_t x53;
1646   uint64_t x54;
1647   uint64_t x55;
1648   uint64_t x56;
1649   uint64_t x57;
1650   uint64_t x58;
1651   uint64_t x59;
1652   uint64_t x60;
1653   x1 = ((uint64_t)(arg1[31]) << 56);
1654   x2 = ((uint64_t)(arg1[30]) << 48);
1655   x3 = ((uint64_t)(arg1[29]) << 40);
1656   x4 = ((uint64_t)(arg1[28]) << 32);
1657   x5 = ((uint64_t)(arg1[27]) << 24);
1658   x6 = ((uint64_t)(arg1[26]) << 16);
1659   x7 = ((uint64_t)(arg1[25]) << 8);
1660   x8 = (arg1[24]);
1661   x9 = ((uint64_t)(arg1[23]) << 56);
1662   x10 = ((uint64_t)(arg1[22]) << 48);
1663   x11 = ((uint64_t)(arg1[21]) << 40);
1664   x12 = ((uint64_t)(arg1[20]) << 32);
1665   x13 = ((uint64_t)(arg1[19]) << 24);
1666   x14 = ((uint64_t)(arg1[18]) << 16);
1667   x15 = ((uint64_t)(arg1[17]) << 8);
1668   x16 = (arg1[16]);
1669   x17 = ((uint64_t)(arg1[15]) << 56);
1670   x18 = ((uint64_t)(arg1[14]) << 48);
1671   x19 = ((uint64_t)(arg1[13]) << 40);
1672   x20 = ((uint64_t)(arg1[12]) << 32);
1673   x21 = ((uint64_t)(arg1[11]) << 24);
1674   x22 = ((uint64_t)(arg1[10]) << 16);
1675   x23 = ((uint64_t)(arg1[9]) << 8);
1676   x24 = (arg1[8]);
1677   x25 = ((uint64_t)(arg1[7]) << 56);
1678   x26 = ((uint64_t)(arg1[6]) << 48);
1679   x27 = ((uint64_t)(arg1[5]) << 40);
1680   x28 = ((uint64_t)(arg1[4]) << 32);
1681   x29 = ((uint64_t)(arg1[3]) << 24);
1682   x30 = ((uint64_t)(arg1[2]) << 16);
1683   x31 = ((uint64_t)(arg1[1]) << 8);
1684   x32 = (arg1[0]);
1685   x33 = (x31 + (uint64_t)x32);
1686   x34 = (x30 + x33);
1687   x35 = (x29 + x34);
1688   x36 = (x28 + x35);
1689   x37 = (x27 + x36);
1690   x38 = (x26 + x37);
1691   x39 = (x25 + x38);
1692   x40 = (x23 + (uint64_t)x24);
1693   x41 = (x22 + x40);
1694   x42 = (x21 + x41);
1695   x43 = (x20 + x42);
1696   x44 = (x19 + x43);
1697   x45 = (x18 + x44);
1698   x46 = (x17 + x45);
1699   x47 = (x15 + (uint64_t)x16);
1700   x48 = (x14 + x47);
1701   x49 = (x13 + x48);
1702   x50 = (x12 + x49);
1703   x51 = (x11 + x50);
1704   x52 = (x10 + x51);
1705   x53 = (x9 + x52);
1706   x54 = (x7 + (uint64_t)x8);
1707   x55 = (x6 + x54);
1708   x56 = (x5 + x55);
1709   x57 = (x4 + x56);
1710   x58 = (x3 + x57);
1711   x59 = (x2 + x58);
1712   x60 = (x1 + x59);
1713   out1[0] = x39;
1714   out1[1] = x46;
1715   out1[2] = x53;
1716   out1[3] = x60;
1717 }
1718 
1719 /*
1720  * The function fiat_p256_set_one returns the field element one in the Montgomery domain.
1721  *
1722  * Postconditions:
1723  *   eval (from_montgomery out1) mod m = 1 mod m
1724  *   0 ≤ eval out1 < m
1725  *
1726  */
fiat_p256_set_one(fiat_p256_montgomery_domain_field_element out1)1727 static FIAT_P256_FIAT_INLINE void fiat_p256_set_one(fiat_p256_montgomery_domain_field_element out1) {
1728   out1[0] = 0x1;
1729   out1[1] = UINT64_C(0xffffffff00000000);
1730   out1[2] = UINT64_C(0xffffffffffffffff);
1731   out1[3] = UINT32_C(0xfffffffe);
1732 }
1733 
1734 /*
1735  * The function fiat_p256_msat returns the saturated representation of the prime modulus.
1736  *
1737  * Postconditions:
1738  *   twos_complement_eval out1 = m
1739  *   0 ≤ eval out1 < m
1740  *
1741  * Output Bounds:
1742  *   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
1743  */
fiat_p256_msat(uint64_t out1[5])1744 static FIAT_P256_FIAT_INLINE void fiat_p256_msat(uint64_t out1[5]) {
1745   out1[0] = UINT64_C(0xffffffffffffffff);
1746   out1[1] = UINT32_C(0xffffffff);
1747   out1[2] = 0x0;
1748   out1[3] = UINT64_C(0xffffffff00000001);
1749   out1[4] = 0x0;
1750 }
1751 
1752 /*
1753  * The function fiat_p256_divstep computes a divstep.
1754  *
1755  * Preconditions:
1756  *   0 ≤ eval arg4 < m
1757  *   0 ≤ eval arg5 < m
1758  * Postconditions:
1759  *   out1 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then 1 - arg1 else 1 + arg1)
1760  *   twos_complement_eval out2 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then twos_complement_eval arg3 else twos_complement_eval arg2)
1761  *   twos_complement_eval out3 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then ⌊(twos_complement_eval arg3 - twos_complement_eval arg2) / 2⌋ else ⌊(twos_complement_eval arg3 + (twos_complement_eval arg3 mod 2) * twos_complement_eval arg2) / 2⌋)
1762  *   eval (from_montgomery out4) mod m = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then (2 * eval (from_montgomery arg5)) mod m else (2 * eval (from_montgomery arg4)) mod m)
1763  *   eval (from_montgomery out5) mod m = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then (eval (from_montgomery arg4) - eval (from_montgomery arg4)) mod m else (eval (from_montgomery arg5) + (twos_complement_eval arg3 mod 2) * eval (from_montgomery arg4)) mod m)
1764  *   0 ≤ eval out5 < m
1765  *   0 ≤ eval out5 < m
1766  *   0 ≤ eval out2 < m
1767  *   0 ≤ eval out3 < m
1768  *
1769  * Input Bounds:
1770  *   arg1: [0x0 ~> 0xffffffffffffffff]
1771  *   arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
1772  *   arg3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
1773  *   arg4: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
1774  *   arg5: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
1775  * Output Bounds:
1776  *   out1: [0x0 ~> 0xffffffffffffffff]
1777  *   out2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
1778  *   out3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
1779  *   out4: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
1780  *   out5: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
1781  */
fiat_p256_divstep(uint64_t * out1,uint64_t out2[5],uint64_t out3[5],uint64_t out4[4],uint64_t out5[4],uint64_t arg1,const uint64_t arg2[5],const uint64_t arg3[5],const uint64_t arg4[4],const uint64_t arg5[4])1782 static FIAT_P256_FIAT_INLINE void fiat_p256_divstep(uint64_t* out1, uint64_t out2[5], uint64_t out3[5], uint64_t out4[4], uint64_t out5[4], uint64_t arg1, const uint64_t arg2[5], const uint64_t arg3[5], const uint64_t arg4[4], const uint64_t arg5[4]) {
1783   uint64_t x1;
1784   fiat_p256_uint1 x2;
1785   fiat_p256_uint1 x3;
1786   uint64_t x4;
1787   fiat_p256_uint1 x5;
1788   uint64_t x6;
1789   uint64_t x7;
1790   uint64_t x8;
1791   uint64_t x9;
1792   uint64_t x10;
1793   uint64_t x11;
1794   uint64_t x12;
1795   fiat_p256_uint1 x13;
1796   uint64_t x14;
1797   fiat_p256_uint1 x15;
1798   uint64_t x16;
1799   fiat_p256_uint1 x17;
1800   uint64_t x18;
1801   fiat_p256_uint1 x19;
1802   uint64_t x20;
1803   fiat_p256_uint1 x21;
1804   uint64_t x22;
1805   uint64_t x23;
1806   uint64_t x24;
1807   uint64_t x25;
1808   uint64_t x26;
1809   uint64_t x27;
1810   uint64_t x28;
1811   uint64_t x29;
1812   uint64_t x30;
1813   uint64_t x31;
1814   fiat_p256_uint1 x32;
1815   uint64_t x33;
1816   fiat_p256_uint1 x34;
1817   uint64_t x35;
1818   fiat_p256_uint1 x36;
1819   uint64_t x37;
1820   fiat_p256_uint1 x38;
1821   uint64_t x39;
1822   fiat_p256_uint1 x40;
1823   uint64_t x41;
1824   fiat_p256_uint1 x42;
1825   uint64_t x43;
1826   fiat_p256_uint1 x44;
1827   uint64_t x45;
1828   fiat_p256_uint1 x46;
1829   uint64_t x47;
1830   fiat_p256_uint1 x48;
1831   uint64_t x49;
1832   uint64_t x50;
1833   uint64_t x51;
1834   uint64_t x52;
1835   uint64_t x53;
1836   fiat_p256_uint1 x54;
1837   uint64_t x55;
1838   fiat_p256_uint1 x56;
1839   uint64_t x57;
1840   fiat_p256_uint1 x58;
1841   uint64_t x59;
1842   fiat_p256_uint1 x60;
1843   uint64_t x61;
1844   uint64_t x62;
1845   fiat_p256_uint1 x63;
1846   uint64_t x64;
1847   fiat_p256_uint1 x65;
1848   uint64_t x66;
1849   fiat_p256_uint1 x67;
1850   uint64_t x68;
1851   fiat_p256_uint1 x69;
1852   uint64_t x70;
1853   uint64_t x71;
1854   uint64_t x72;
1855   uint64_t x73;
1856   fiat_p256_uint1 x74;
1857   uint64_t x75;
1858   uint64_t x76;
1859   uint64_t x77;
1860   uint64_t x78;
1861   uint64_t x79;
1862   uint64_t x80;
1863   fiat_p256_uint1 x81;
1864   uint64_t x82;
1865   fiat_p256_uint1 x83;
1866   uint64_t x84;
1867   fiat_p256_uint1 x85;
1868   uint64_t x86;
1869   fiat_p256_uint1 x87;
1870   uint64_t x88;
1871   fiat_p256_uint1 x89;
1872   uint64_t x90;
1873   uint64_t x91;
1874   uint64_t x92;
1875   uint64_t x93;
1876   uint64_t x94;
1877   fiat_p256_uint1 x95;
1878   uint64_t x96;
1879   fiat_p256_uint1 x97;
1880   uint64_t x98;
1881   fiat_p256_uint1 x99;
1882   uint64_t x100;
1883   fiat_p256_uint1 x101;
1884   uint64_t x102;
1885   fiat_p256_uint1 x103;
1886   uint64_t x104;
1887   fiat_p256_uint1 x105;
1888   uint64_t x106;
1889   fiat_p256_uint1 x107;
1890   uint64_t x108;
1891   fiat_p256_uint1 x109;
1892   uint64_t x110;
1893   fiat_p256_uint1 x111;
1894   uint64_t x112;
1895   fiat_p256_uint1 x113;
1896   uint64_t x114;
1897   uint64_t x115;
1898   uint64_t x116;
1899   uint64_t x117;
1900   uint64_t x118;
1901   uint64_t x119;
1902   uint64_t x120;
1903   uint64_t x121;
1904   uint64_t x122;
1905   uint64_t x123;
1906   uint64_t x124;
1907   uint64_t x125;
1908   uint64_t x126;
1909   fiat_p256_addcarryx_u64(&x1, &x2, 0x0, (~arg1), 0x1);
1910   x3 = (fiat_p256_uint1)((fiat_p256_uint1)(x1 >> 63) & (fiat_p256_uint1)((arg3[0]) & 0x1));
1911   fiat_p256_addcarryx_u64(&x4, &x5, 0x0, (~arg1), 0x1);
1912   fiat_p256_cmovznz_u64(&x6, x3, arg1, x4);
1913   fiat_p256_cmovznz_u64(&x7, x3, (arg2[0]), (arg3[0]));
1914   fiat_p256_cmovznz_u64(&x8, x3, (arg2[1]), (arg3[1]));
1915   fiat_p256_cmovznz_u64(&x9, x3, (arg2[2]), (arg3[2]));
1916   fiat_p256_cmovznz_u64(&x10, x3, (arg2[3]), (arg3[3]));
1917   fiat_p256_cmovznz_u64(&x11, x3, (arg2[4]), (arg3[4]));
1918   fiat_p256_addcarryx_u64(&x12, &x13, 0x0, 0x1, (~(arg2[0])));
1919   fiat_p256_addcarryx_u64(&x14, &x15, x13, 0x0, (~(arg2[1])));
1920   fiat_p256_addcarryx_u64(&x16, &x17, x15, 0x0, (~(arg2[2])));
1921   fiat_p256_addcarryx_u64(&x18, &x19, x17, 0x0, (~(arg2[3])));
1922   fiat_p256_addcarryx_u64(&x20, &x21, x19, 0x0, (~(arg2[4])));
1923   fiat_p256_cmovznz_u64(&x22, x3, (arg3[0]), x12);
1924   fiat_p256_cmovznz_u64(&x23, x3, (arg3[1]), x14);
1925   fiat_p256_cmovznz_u64(&x24, x3, (arg3[2]), x16);
1926   fiat_p256_cmovznz_u64(&x25, x3, (arg3[3]), x18);
1927   fiat_p256_cmovznz_u64(&x26, x3, (arg3[4]), x20);
1928   fiat_p256_cmovznz_u64(&x27, x3, (arg4[0]), (arg5[0]));
1929   fiat_p256_cmovznz_u64(&x28, x3, (arg4[1]), (arg5[1]));
1930   fiat_p256_cmovznz_u64(&x29, x3, (arg4[2]), (arg5[2]));
1931   fiat_p256_cmovznz_u64(&x30, x3, (arg4[3]), (arg5[3]));
1932   fiat_p256_addcarryx_u64(&x31, &x32, 0x0, x27, x27);
1933   fiat_p256_addcarryx_u64(&x33, &x34, x32, x28, x28);
1934   fiat_p256_addcarryx_u64(&x35, &x36, x34, x29, x29);
1935   fiat_p256_addcarryx_u64(&x37, &x38, x36, x30, x30);
1936   fiat_p256_subborrowx_u64(&x39, &x40, 0x0, x31, UINT64_C(0xffffffffffffffff));
1937   fiat_p256_subborrowx_u64(&x41, &x42, x40, x33, UINT32_C(0xffffffff));
1938   fiat_p256_subborrowx_u64(&x43, &x44, x42, x35, 0x0);
1939   fiat_p256_subborrowx_u64(&x45, &x46, x44, x37, UINT64_C(0xffffffff00000001));
1940   fiat_p256_subborrowx_u64(&x47, &x48, x46, x38, 0x0);
1941   x49 = (arg4[3]);
1942   x50 = (arg4[2]);
1943   x51 = (arg4[1]);
1944   x52 = (arg4[0]);
1945   fiat_p256_subborrowx_u64(&x53, &x54, 0x0, 0x0, x52);
1946   fiat_p256_subborrowx_u64(&x55, &x56, x54, 0x0, x51);
1947   fiat_p256_subborrowx_u64(&x57, &x58, x56, 0x0, x50);
1948   fiat_p256_subborrowx_u64(&x59, &x60, x58, 0x0, x49);
1949   fiat_p256_cmovznz_u64(&x61, x60, 0x0, UINT64_C(0xffffffffffffffff));
1950   fiat_p256_addcarryx_u64(&x62, &x63, 0x0, x53, x61);
1951   fiat_p256_addcarryx_u64(&x64, &x65, x63, x55, (x61 & UINT32_C(0xffffffff)));
1952   fiat_p256_addcarryx_u64(&x66, &x67, x65, x57, 0x0);
1953   fiat_p256_addcarryx_u64(&x68, &x69, x67, x59, (x61 & UINT64_C(0xffffffff00000001)));
1954   fiat_p256_cmovznz_u64(&x70, x3, (arg5[0]), x62);
1955   fiat_p256_cmovznz_u64(&x71, x3, (arg5[1]), x64);
1956   fiat_p256_cmovznz_u64(&x72, x3, (arg5[2]), x66);
1957   fiat_p256_cmovznz_u64(&x73, x3, (arg5[3]), x68);
1958   x74 = (fiat_p256_uint1)(x22 & 0x1);
1959   fiat_p256_cmovznz_u64(&x75, x74, 0x0, x7);
1960   fiat_p256_cmovznz_u64(&x76, x74, 0x0, x8);
1961   fiat_p256_cmovznz_u64(&x77, x74, 0x0, x9);
1962   fiat_p256_cmovznz_u64(&x78, x74, 0x0, x10);
1963   fiat_p256_cmovznz_u64(&x79, x74, 0x0, x11);
1964   fiat_p256_addcarryx_u64(&x80, &x81, 0x0, x22, x75);
1965   fiat_p256_addcarryx_u64(&x82, &x83, x81, x23, x76);
1966   fiat_p256_addcarryx_u64(&x84, &x85, x83, x24, x77);
1967   fiat_p256_addcarryx_u64(&x86, &x87, x85, x25, x78);
1968   fiat_p256_addcarryx_u64(&x88, &x89, x87, x26, x79);
1969   fiat_p256_cmovznz_u64(&x90, x74, 0x0, x27);
1970   fiat_p256_cmovznz_u64(&x91, x74, 0x0, x28);
1971   fiat_p256_cmovznz_u64(&x92, x74, 0x0, x29);
1972   fiat_p256_cmovznz_u64(&x93, x74, 0x0, x30);
1973   fiat_p256_addcarryx_u64(&x94, &x95, 0x0, x70, x90);
1974   fiat_p256_addcarryx_u64(&x96, &x97, x95, x71, x91);
1975   fiat_p256_addcarryx_u64(&x98, &x99, x97, x72, x92);
1976   fiat_p256_addcarryx_u64(&x100, &x101, x99, x73, x93);
1977   fiat_p256_subborrowx_u64(&x102, &x103, 0x0, x94, UINT64_C(0xffffffffffffffff));
1978   fiat_p256_subborrowx_u64(&x104, &x105, x103, x96, UINT32_C(0xffffffff));
1979   fiat_p256_subborrowx_u64(&x106, &x107, x105, x98, 0x0);
1980   fiat_p256_subborrowx_u64(&x108, &x109, x107, x100, UINT64_C(0xffffffff00000001));
1981   fiat_p256_subborrowx_u64(&x110, &x111, x109, x101, 0x0);
1982   fiat_p256_addcarryx_u64(&x112, &x113, 0x0, x6, 0x1);
1983   x114 = ((x80 >> 1) | ((x82 << 63) & UINT64_C(0xffffffffffffffff)));
1984   x115 = ((x82 >> 1) | ((x84 << 63) & UINT64_C(0xffffffffffffffff)));
1985   x116 = ((x84 >> 1) | ((x86 << 63) & UINT64_C(0xffffffffffffffff)));
1986   x117 = ((x86 >> 1) | ((x88 << 63) & UINT64_C(0xffffffffffffffff)));
1987   x118 = ((x88 & UINT64_C(0x8000000000000000)) | (x88 >> 1));
1988   fiat_p256_cmovznz_u64(&x119, x48, x39, x31);
1989   fiat_p256_cmovznz_u64(&x120, x48, x41, x33);
1990   fiat_p256_cmovznz_u64(&x121, x48, x43, x35);
1991   fiat_p256_cmovznz_u64(&x122, x48, x45, x37);
1992   fiat_p256_cmovznz_u64(&x123, x111, x102, x94);
1993   fiat_p256_cmovznz_u64(&x124, x111, x104, x96);
1994   fiat_p256_cmovznz_u64(&x125, x111, x106, x98);
1995   fiat_p256_cmovznz_u64(&x126, x111, x108, x100);
1996   *out1 = x112;
1997   out2[0] = x7;
1998   out2[1] = x8;
1999   out2[2] = x9;
2000   out2[3] = x10;
2001   out2[4] = x11;
2002   out3[0] = x114;
2003   out3[1] = x115;
2004   out3[2] = x116;
2005   out3[3] = x117;
2006   out3[4] = x118;
2007   out4[0] = x119;
2008   out4[1] = x120;
2009   out4[2] = x121;
2010   out4[3] = x122;
2011   out5[0] = x123;
2012   out5[1] = x124;
2013   out5[2] = x125;
2014   out5[3] = x126;
2015 }
2016 
2017 /*
2018  * The function fiat_p256_divstep_precomp returns the precomputed value for Bernstein-Yang-inversion (in montgomery form).
2019  *
2020  * Postconditions:
2021  *   eval (from_montgomery out1) = ⌊(m - 1) / 2⌋^(if ⌊log2 m⌋ + 1 < 46 then ⌊(49 * (⌊log2 m⌋ + 1) + 80) / 17⌋ else ⌊(49 * (⌊log2 m⌋ + 1) + 57) / 17⌋)
2022  *   0 ≤ eval out1 < m
2023  *
2024  * Output Bounds:
2025  *   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
2026  */
fiat_p256_divstep_precomp(uint64_t out1[4])2027 static FIAT_P256_FIAT_INLINE void fiat_p256_divstep_precomp(uint64_t out1[4]) {
2028   out1[0] = UINT64_C(0x67ffffffb8000000);
2029   out1[1] = UINT64_C(0xc000000038000000);
2030   out1[2] = UINT64_C(0xd80000007fffffff);
2031   out1[3] = UINT64_C(0x2fffffffffffffff);
2032 }
2033