1 /* Copyright 2014, Kenneth MacKay. Licensed under the BSD 2-clause license. */
2
3 #include "uECC.h"
4
5 // NULL
6 #include "stddef.h"
7
8 // suppress MSVC C4244: conversion from uECC_word_t to int
9 #ifdef _MSC_VER
10 #pragma warning( disable : 4244 )
11 #endif
12
13 #ifndef uECC_PLATFORM
14 #if __AVR__
15 #define uECC_PLATFORM uECC_avr
16 #elif defined(__thumb2__) || defined(_M_ARMT) /* I think MSVC only supports Thumb-2 targets */
17 #define uECC_PLATFORM uECC_arm_thumb2
18 #elif defined(__thumb__)
19 #define uECC_PLATFORM uECC_arm_thumb
20 #elif defined(__arm__) || defined(_M_ARM)
21 #define uECC_PLATFORM uECC_arm
22 #elif defined(__i386__) || defined(_M_IX86) || defined(_X86_) || defined(__I86__)
23 #define uECC_PLATFORM uECC_x86
24 #elif defined(__amd64__) || defined(_M_X64)
25 #define uECC_PLATFORM uECC_x86_64
26 #else
27 #define uECC_PLATFORM uECC_arch_other
28 #endif
29 #endif
30
31 #ifndef uECC_WORD_SIZE
32 #if uECC_PLATFORM == uECC_avr
33 #define uECC_WORD_SIZE 1
34 #elif (uECC_PLATFORM == uECC_x86_64)
35 #define uECC_WORD_SIZE 8
36 #else
37 #define uECC_WORD_SIZE 4
38 #endif
39 #endif
40
41 #if (uECC_CURVE == uECC_secp160r1 || uECC_CURVE == uECC_secp224r1) && (uECC_WORD_SIZE == 8)
42 #undef uECC_WORD_SIZE
43 #define uECC_WORD_SIZE 4
44 #if (uECC_PLATFORM == uECC_x86_64)
45 #undef uECC_PLATFORM
46 #define uECC_PLATFORM uECC_x86
47 #endif
48 #endif
49
50 #if (uECC_WORD_SIZE != 1) && (uECC_WORD_SIZE != 4) && (uECC_WORD_SIZE != 8)
51 #error "Unsupported value for uECC_WORD_SIZE"
52 #endif
53
54 #if (uECC_ASM && (uECC_PLATFORM == uECC_avr) && (uECC_WORD_SIZE != 1))
55 #pragma message ("uECC_WORD_SIZE must be 1 when using AVR asm")
56 #undef uECC_WORD_SIZE
57 #define uECC_WORD_SIZE 1
58 #endif
59
60 #if (uECC_ASM && \
61 (uECC_PLATFORM == uECC_arm || uECC_PLATFORM == uECC_arm_thumb) && \
62 (uECC_WORD_SIZE != 4))
63 #pragma message ("uECC_WORD_SIZE must be 4 when using ARM asm")
64 #undef uECC_WORD_SIZE
65 #define uECC_WORD_SIZE 4
66 #endif
67
68 #if __STDC_VERSION__ >= 199901L
69 #define RESTRICT restrict
70 #else
71 #define RESTRICT
72 #endif
73
74 #if defined(__SIZEOF_INT128__) || ((__clang_major__ * 100 + __clang_minor__) >= 302)
75 #define SUPPORTS_INT128 1
76 #else
77 #define SUPPORTS_INT128 0
78 #endif
79
80 #define MAX_TRIES 64
81
82 #if (uECC_WORD_SIZE == 1)
83
84 typedef uint8_t uECC_word_t;
85 typedef uint16_t uECC_dword_t;
86 typedef uint8_t wordcount_t;
87 typedef int8_t swordcount_t;
88 typedef int16_t bitcount_t;
89 typedef int8_t cmpresult_t;
90
91 #define HIGH_BIT_SET 0x80
92 #define uECC_WORD_BITS 8
93 #define uECC_WORD_BITS_SHIFT 3
94 #define uECC_WORD_BITS_MASK 0x07
95
96 #define uECC_WORDS_1 20
97 #define uECC_WORDS_2 24
98 #define uECC_WORDS_3 32
99 #define uECC_WORDS_4 32
100 #define uECC_WORDS_5 28
101
102 #define uECC_N_WORDS_1 21
103 #define uECC_N_WORDS_2 24
104 #define uECC_N_WORDS_3 32
105 #define uECC_N_WORDS_4 32
106 #define uECC_N_WORDS_5 28
107
108 #define Curve_P_1 {0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, \
109 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, \
110 0xFF, 0xFF, 0xFF, 0xFF}
111 #define Curve_P_2 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, \
112 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, \
113 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}
114 #define Curve_P_3 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, \
115 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, \
116 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
117 0x01, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF}
118 #define Curve_P_4 {0x2F, 0xFC, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, \
119 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, \
120 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, \
121 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}
122 #define Curve_P_5 {0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
123 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, \
124 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, \
125 0xFF, 0xFF, 0xFF, 0xFF}
126
127 #define Curve_B_1 {0x45, 0xFA, 0x65, 0xC5, 0xAD, 0xD4, 0xD4, 0x81, \
128 0x9F, 0xF8, 0xAC, 0x65, 0x8B, 0x7A, 0xBD, 0x54, \
129 0xFC, 0xBE, 0x97, 0x1C}
130 #define Curve_B_2 {0xB1, 0xB9, 0x46, 0xC1, 0xEC, 0xDE, 0xB8, 0xFE, \
131 0x49, 0x30, 0x24, 0x72, 0xAB, 0xE9, 0xA7, 0x0F, \
132 0xE7, 0x80, 0x9C, 0xE5, 0x19, 0x05, 0x21, 0x64}
133 #define Curve_B_3 {0x4B, 0x60, 0xD2, 0x27, 0x3E, 0x3C, 0xCE, 0x3B, \
134 0xF6, 0xB0, 0x53, 0xCC, 0xB0, 0x06, 0x1D, 0x65, \
135 0xBC, 0x86, 0x98, 0x76, 0x55, 0xBD, 0xEB, 0xB3, \
136 0xE7, 0x93, 0x3A, 0xAA, 0xD8, 0x35, 0xC6, 0x5A}
137 #define Curve_B_4 {0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
138 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
139 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
140 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}
141 #define Curve_B_5 {0xB4, 0xFF, 0x55, 0x23, 0x43, 0x39, 0x0B, 0x27, \
142 0xBA, 0xD8, 0xBF, 0xD7, 0xB7, 0xB0, 0x44, 0x50, \
143 0x56, 0x32, 0x41, 0xF5, 0xAB, 0xB3, 0x04, 0x0C, \
144 0x85, 0x0A, 0x05, 0xB4}
145
146 #define Curve_G_1 { \
147 {0x82, 0xFC, 0xCB, 0x13, 0xB9, 0x8B, 0xC3, 0x68, \
148 0x89, 0x69, 0x64, 0x46, 0x28, 0x73, 0xF5, 0x8E, \
149 0x68, 0xB5, 0x96, 0x4A}, \
150 {0x32, 0xFB, 0xC5, 0x7A, 0x37, 0x51, 0x23, 0x04, \
151 0x12, 0xC9, 0xDC, 0x59, 0x7D, 0x94, 0x68, 0x31, \
152 0x55, 0x28, 0xA6, 0x23}}
153
154 #define Curve_G_2 { \
155 {0x12, 0x10, 0xFF, 0x82, 0xFD, 0x0A, 0xFF, 0xF4, \
156 0x00, 0x88, 0xA1, 0x43, 0xEB, 0x20, 0xBF, 0x7C, \
157 0xF6, 0x90, 0x30, 0xB0, 0x0E, 0xA8, 0x8D, 0x18}, \
158 {0x11, 0x48, 0x79, 0x1E, 0xA1, 0x77, 0xF9, 0x73, \
159 0xD5, 0xCD, 0x24, 0x6B, 0xED, 0x11, 0x10, 0x63, \
160 0x78, 0xDA, 0xC8, 0xFF, 0x95, 0x2B, 0x19, 0x07}}
161
162 #define Curve_G_3 { \
163 {0x96, 0xC2, 0x98, 0xD8, 0x45, 0x39, 0xA1, 0xF4, \
164 0xA0, 0x33, 0xEB, 0x2D, 0x81, 0x7D, 0x03, 0x77, \
165 0xF2, 0x40, 0xA4, 0x63, 0xE5, 0xE6, 0xBC, 0xF8, \
166 0x47, 0x42, 0x2C, 0xE1, 0xF2, 0xD1, 0x17, 0x6B}, \
167 {0xF5, 0x51, 0xBF, 0x37, 0x68, 0x40, 0xB6, 0xCB, \
168 0xCE, 0x5E, 0x31, 0x6B, 0x57, 0x33, 0xCE, 0x2B, \
169 0x16, 0x9E, 0x0F, 0x7C, 0x4A, 0xEB, 0xE7, 0x8E, \
170 0x9B, 0x7F, 0x1A, 0xFE, 0xE2, 0x42, 0xE3, 0x4F}}
171
172 #define Curve_G_4 { \
173 {0x98, 0x17, 0xF8, 0x16, 0x5B, 0x81, 0xF2, 0x59, \
174 0xD9, 0x28, 0xCE, 0x2D, 0xDB, 0xFC, 0x9B, 0x02, \
175 0x07, 0x0B, 0x87, 0xCE, 0x95, 0x62, 0xA0, 0x55, \
176 0xAC, 0xBB, 0xDC, 0xF9, 0x7E, 0x66, 0xBE, 0x79}, \
177 {0xB8, 0xD4, 0x10, 0xFB, 0x8F, 0xD0, 0x47, 0x9C, \
178 0x19, 0x54, 0x85, 0xA6, 0x48, 0xB4, 0x17, 0xFD, \
179 0xA8, 0x08, 0x11, 0x0E, 0xFC, 0xFB, 0xA4, 0x5D, \
180 0x65, 0xC4, 0xA3, 0x26, 0x77, 0xDA, 0x3A, 0x48}}
181
182 #define Curve_G_5 { \
183 {0x21, 0x1D, 0x5C, 0x11, 0xD6, 0x80, 0x32, 0x34, \
184 0x22, 0x11, 0xC2, 0x56, 0xD3, 0xC1, 0x03, 0x4A, \
185 0xB9, 0x90, 0x13, 0x32, 0x7F, 0xBF, 0xB4, 0x6B, \
186 0xBD, 0x0C, 0x0E, 0xB7}, \
187 {0x34, 0x7E, 0x00, 0x85, 0x99, 0x81, 0xD5, 0x44, \
188 0x64, 0x47, 0x07, 0x5A, 0xA0, 0x75, 0x43, 0xCD, \
189 0xE6, 0xDF, 0x22, 0x4C, 0xFB, 0x23, 0xF7, 0xB5, \
190 0x88, 0x63, 0x37, 0xBD}}
191
192 #define Curve_N_1 {0x57, 0x22, 0x75, 0xCA, 0xD3, 0xAE, 0x27, 0xF9, \
193 0xC8, 0xF4, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, \
194 0x00, 0x00, 0x00, 0x00, 0x01}
195 #define Curve_N_2 {0x31, 0x28, 0xD2, 0xB4, 0xB1, 0xC9, 0x6B, 0x14, \
196 0x36, 0xF8, 0xDE, 0x99, 0xFF, 0xFF, 0xFF, 0xFF, \
197 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}
198 #define Curve_N_3 {0x51, 0x25, 0x63, 0xFC, 0xC2, 0xCA, 0xB9, 0xF3, \
199 0x84, 0x9E, 0x17, 0xA7, 0xAD, 0xFA, 0xE6, 0xBC, \
200 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, \
201 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF}
202 #define Curve_N_4 {0x41, 0x41, 0x36, 0xD0, 0x8C, 0x5E, 0xD2, 0xBF, \
203 0x3B, 0xA0, 0x48, 0xAF, 0xE6, 0xDC, 0xAE, 0xBA, \
204 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, \
205 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}
206 #define Curve_N_5 {0x3D, 0x2A, 0x5C, 0x5C, 0x45, 0x29, 0xDD, 0x13, \
207 0x3E, 0xF0, 0xB8, 0xE0, 0xA2, 0x16, 0xFF, 0xFF, \
208 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, \
209 0xFF, 0xFF, 0xFF, 0xFF}
210
211 #elif (uECC_WORD_SIZE == 4)
212
213 typedef uint32_t uECC_word_t;
214 typedef uint64_t uECC_dword_t;
215 typedef unsigned wordcount_t;
216 typedef int swordcount_t;
217 typedef int bitcount_t;
218 typedef int cmpresult_t;
219
220 #define HIGH_BIT_SET 0x80000000
221 #define uECC_WORD_BITS 32
222 #define uECC_WORD_BITS_SHIFT 5
223 #define uECC_WORD_BITS_MASK 0x01F
224
225 #define uECC_WORDS_1 5
226 #define uECC_WORDS_2 6
227 #define uECC_WORDS_3 8
228 #define uECC_WORDS_4 8
229 #define uECC_WORDS_5 7
230
231 #define uECC_N_WORDS_1 6
232 #define uECC_N_WORDS_2 6
233 #define uECC_N_WORDS_3 8
234 #define uECC_N_WORDS_4 8
235 #define uECC_N_WORDS_5 7
236
237 #define Curve_P_1 {0x7FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}
238 #define Curve_P_2 {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}
239 #define Curve_P_3 {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, \
240 0x00000000, 0x00000000, 0x00000001, 0xFFFFFFFF}
241 #define Curve_P_4 {0xFFFFFC2F, 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, \
242 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}
243 #define Curve_P_5 {0x00000001, 0x00000000, 0x00000000, 0xFFFFFFFF, \
244 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}
245
246 #define Curve_B_1 {0xC565FA45, 0x81D4D4AD, 0x65ACF89F, 0x54BD7A8B, 0x1C97BEFC}
247 #define Curve_B_2 {0xC146B9B1, 0xFEB8DEEC, 0x72243049, 0x0FA7E9AB, 0xE59C80E7, 0x64210519}
248 #define Curve_B_3 {0x27D2604B, 0x3BCE3C3E, 0xCC53B0F6, 0x651D06B0, \
249 0x769886BC, 0xB3EBBD55, 0xAA3A93E7, 0x5AC635D8}
250 #define Curve_B_4 {0x00000007, 0x00000000, 0x00000000, 0x00000000, \
251 0x00000000, 0x00000000, 0x00000000, 0x00000000}
252 #define Curve_B_5 {0x2355FFB4, 0x270B3943, 0xD7BFD8BA, 0x5044B0B7, \
253 0xF5413256, 0x0C04B3AB, 0xB4050A85}
254
255 #define Curve_G_1 { \
256 {0x13CBFC82, 0x68C38BB9, 0x46646989, 0x8EF57328, 0x4A96B568}, \
257 {0x7AC5FB32, 0x04235137, 0x59DCC912, 0x3168947D, 0x23A62855}}
258
259 #define Curve_G_2 { \
260 {0x82FF1012, 0xF4FF0AFD, 0x43A18800, 0x7CBF20EB, 0xB03090F6, 0x188DA80E}, \
261 {0x1E794811, 0x73F977A1, 0x6B24CDD5, 0x631011ED, 0xFFC8DA78, 0x07192B95}}
262
263 #define Curve_G_3 { \
264 {0xD898C296, 0xF4A13945, 0x2DEB33A0, 0x77037D81, \
265 0x63A440F2, 0xF8BCE6E5, 0xE12C4247, 0x6B17D1F2}, \
266 {0x37BF51F5, 0xCBB64068, 0x6B315ECE, 0x2BCE3357, \
267 0x7C0F9E16, 0x8EE7EB4A, 0xFE1A7F9B, 0x4FE342E2}}
268
269 #define Curve_G_4 { \
270 {0x16F81798, 0x59F2815B, 0x2DCE28D9, 0x029BFCDB, \
271 0xCE870B07, 0x55A06295, 0xF9DCBBAC, 0x79BE667E}, \
272 {0xFB10D4B8, 0x9C47D08F, 0xA6855419, 0xFD17B448, \
273 0x0E1108A8, 0x5DA4FBFC, 0x26A3C465, 0x483ADA77}}
274
275 #define Curve_G_5 { \
276 {0x115C1D21, 0x343280D6, 0x56C21122, 0x4A03C1D3, \
277 0x321390B9, 0x6BB4BF7F, 0xB70E0CBD}, \
278 {0x85007E34, 0x44D58199, 0x5A074764, 0xCD4375A0, \
279 0x4C22DFE6, 0xB5F723FB, 0xBD376388}}
280
281 #define Curve_N_1 {0xCA752257, 0xF927AED3, 0x0001F4C8, 0x00000000, 0x00000000, 0x00000001}
282 #define Curve_N_2 {0xB4D22831, 0x146BC9B1, 0x99DEF836, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}
283 #define Curve_N_3 {0xFC632551, 0xF3B9CAC2, 0xA7179E84, 0xBCE6FAAD, \
284 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF}
285 #define Curve_N_4 {0xD0364141, 0xBFD25E8C, 0xAF48A03B, 0xBAAEDCE6, \
286 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}
287 #define Curve_N_5 {0x5C5C2A3D, 0x13DD2945, 0xE0B8F03E, 0xFFFF16A2, \
288 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}
289
290 #elif (uECC_WORD_SIZE == 8)
291
292 typedef uint64_t uECC_word_t;
293 #if SUPPORTS_INT128
294 typedef unsigned __int128 uECC_dword_t;
295 #endif
296 typedef unsigned wordcount_t;
297 typedef int swordcount_t;
298 typedef int bitcount_t;
299 typedef int cmpresult_t;
300
301 #define HIGH_BIT_SET 0x8000000000000000ull
302 #define uECC_WORD_BITS 64
303 #define uECC_WORD_BITS_SHIFT 6
304 #define uECC_WORD_BITS_MASK 0x03F
305
306 #define uECC_WORDS_1 3
307 #define uECC_WORDS_2 3
308 #define uECC_WORDS_3 4
309 #define uECC_WORDS_4 4
310 #define uECC_WORDS_5 4
311
312 #define uECC_N_WORDS_1 3
313 #define uECC_N_WORDS_2 3
314 #define uECC_N_WORDS_3 4
315 #define uECC_N_WORDS_4 4
316 #define uECC_N_WORDS_5 4
317
318 #define Curve_P_1 {0xFFFFFFFF7FFFFFFFull, 0xFFFFFFFFFFFFFFFFull, 0x00000000FFFFFFFFull}
319 #define Curve_P_2 {0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFEull, 0xFFFFFFFFFFFFFFFFull}
320 #define Curve_P_3 {0xFFFFFFFFFFFFFFFFull, 0x00000000FFFFFFFFull, \
321 0x0000000000000000ull, 0xFFFFFFFF00000001ull}
322 #define Curve_P_4 {0xFFFFFFFEFFFFFC2Full, 0xFFFFFFFFFFFFFFFFull, \
323 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull}
324 #define Curve_P_5 {0x0000000000000001ull, 0xFFFFFFFF00000000ull, \
325 0xFFFFFFFFFFFFFFFFull, 0x00000000FFFFFFFFull}
326
327 #define Curve_B_1 {0x81D4D4ADC565FA45ull, 0x54BD7A8B65ACF89Full, 0x000000001C97BEFCull}
328 #define Curve_B_2 {0xFEB8DEECC146B9B1ull, 0x0FA7E9AB72243049ull, 0x64210519E59C80E7ull}
329 #define Curve_B_3 {0x3BCE3C3E27D2604Bull, 0x651D06B0CC53B0F6ull, \
330 0xB3EBBD55769886BCull, 0x5AC635D8AA3A93E7ull}
331 #define Curve_B_4 {0x0000000000000007ull, 0x0000000000000000ull, \
332 0x0000000000000000ull, 0x0000000000000000ull}
333 #define Curve_B_5 {0x270B39432355FFB4ull, 0x5044B0B7D7BFD8BAull, \
334 0x0C04B3ABF5413256ull, 0x00000000B4050A85ull}
335
336 #define Curve_G_1 { \
337 {0x68C38BB913CBFC82ull, 0x8EF5732846646989ull, 0x000000004A96B568ull}, \
338 {0x042351377AC5FB32ull, 0x3168947D59DCC912ull, 0x0000000023A62855ull}}
339
340 #define Curve_G_2 { \
341 {0xF4FF0AFD82FF1012ull, 0x7CBF20EB43A18800ull, 0x188DA80EB03090F6ull}, \
342 {0x73F977A11E794811ull, 0x631011ED6B24CDD5ull, 0x07192B95FFC8DA78ull}}
343
344 #define Curve_G_3 { \
345 {0xF4A13945D898C296ull, 0x77037D812DEB33A0ull, 0xF8BCE6E563A440F2ull, 0x6B17D1F2E12C4247ull}, \
346 {0xCBB6406837BF51F5ull, 0x2BCE33576B315ECEull, 0x8EE7EB4A7C0F9E16ull, 0x4FE342E2FE1A7F9Bull}}
347
348 #define Curve_G_4 { \
349 {0x59F2815B16F81798ull, 0x029BFCDB2DCE28D9ull, 0x55A06295CE870B07ull, 0x79BE667EF9DCBBACull}, \
350 {0x9C47D08FFB10D4B8ull, 0xFD17B448A6855419ull, 0x5DA4FBFC0E1108A8ull, 0x483ADA7726A3C465ull}}
351
352 #define Curve_G_5 { \
353 {0x343280D6115C1D21ull, 0x4A03C1D356C21122ull, 0x6BB4BF7F321390B9ull, 0x00000000B70E0CBDull}, \
354 {0x44D5819985007E34ull, 0xCD4375A05A074764ull, 0xB5F723FB4C22DFE6ull, 0x00000000BD376388ull}}
355
356 #define Curve_N_1 {0xF927AED3CA752257ull, 0x000000000001F4C8ull, 0x0000000100000000ull}
357 #define Curve_N_2 {0x146BC9B1B4D22831ull, 0xFFFFFFFF99DEF836ull, 0xFFFFFFFFFFFFFFFFull}
358 #define Curve_N_3 {0xF3B9CAC2FC632551ull, 0xBCE6FAADA7179E84ull, \
359 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFF00000000ull}
360 #define Curve_N_4 {0xBFD25E8CD0364141ull, 0xBAAEDCE6AF48A03Bull, \
361 0xFFFFFFFFFFFFFFFEull, 0xFFFFFFFFFFFFFFFFull}
362 #define Curve_N_5 {0x13DD29455C5C2A3Dull, 0xFFFF16A2E0B8F03Eull, \
363 0xFFFFFFFFFFFFFFFFull, 0x00000000FFFFFFFFull}
364
365 #endif /* (uECC_WORD_SIZE == 8) */
366
367 #define uECC_WORDS uECC_CONCAT(uECC_WORDS_, uECC_CURVE)
368 #define uECC_N_WORDS uECC_CONCAT(uECC_N_WORDS_, uECC_CURVE)
369
370 typedef struct EccPoint {
371 uECC_word_t x[uECC_WORDS];
372 uECC_word_t y[uECC_WORDS];
373 } EccPoint;
374
375 static const uECC_word_t curve_p[uECC_WORDS] = uECC_CONCAT(Curve_P_, uECC_CURVE);
376 // Global object `curve_b' is only referenced from function `curve_x_side', it should be defined within that functions block scope
377 static const EccPoint curve_G = uECC_CONCAT(Curve_G_, uECC_CURVE);
378 static const uECC_word_t curve_n[uECC_N_WORDS] = uECC_CONCAT(Curve_N_, uECC_CURVE);
379
380 static void vli_clear(uECC_word_t *vli);
381 static uECC_word_t vli_isZero(const uECC_word_t *vli);
382 static uECC_word_t vli_testBit(const uECC_word_t *vli, bitcount_t bit);
383 #ifdef ENABLE_MICRO_ECC_ECDSA
384 static bitcount_t vli_numBits(const uECC_word_t *vli, wordcount_t max_words);
385 #endif
386 static void vli_set(uECC_word_t *dest, const uECC_word_t *src);
387 static cmpresult_t vli_cmp(const uECC_word_t *left, const uECC_word_t *right);
388 #ifdef ENABLE_MICRO_ECC_ECDSA
389 static cmpresult_t vli_equal(const uECC_word_t *left, const uECC_word_t *right);
390 #endif
391 static void vli_rshift1(uECC_word_t *vli);
392 static uECC_word_t vli_add(uECC_word_t *result,
393 const uECC_word_t *left,
394 const uECC_word_t *right);
395 static uECC_word_t vli_sub(uECC_word_t *result,
396 const uECC_word_t *left,
397 const uECC_word_t *right);
398 static void vli_mult(uECC_word_t *result, const uECC_word_t *left, const uECC_word_t *right);
399 static void vli_modAdd(uECC_word_t *result,
400 const uECC_word_t *left,
401 const uECC_word_t *right,
402 const uECC_word_t *mod);
403 static void vli_modSub(uECC_word_t *result,
404 const uECC_word_t *left,
405 const uECC_word_t *right,
406 const uECC_word_t *mod);
407 static void vli_mmod_fast(uECC_word_t *RESTRICT result, uECC_word_t *RESTRICT product);
408 static void vli_modMult_fast(uECC_word_t *result,
409 const uECC_word_t *left,
410 const uECC_word_t *right);
411 static void vli_modInv(uECC_word_t *result, const uECC_word_t *input, const uECC_word_t *mod);
412 #if uECC_SQUARE_FUNC
413 static void vli_square(uECC_word_t *result, const uECC_word_t *left);
414 static void vli_modSquare_fast(uECC_word_t *result, const uECC_word_t *left);
415 #endif
416
417 #if ((defined(_WIN32) || defined(_WIN64)) && !defined(uECC_NO_DEFAULT_RNG))
418 /* Windows */
419
420 #define WIN32_LEAN_AND_MEAN
421 #include <windows.h>
422 #include <wincrypt.h>
423
default_RNG(uint8_t * dest,unsigned size)424 static int default_RNG(uint8_t *dest, unsigned size) {
425 HCRYPTPROV prov;
426 if (!CryptAcquireContext(&prov, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) {
427 return 0;
428 }
429
430 CryptGenRandom(prov, size, (BYTE *)dest);
431 CryptReleaseContext(prov, 0);
432 return 1;
433 }
434
435 #elif (defined(unix) || defined(__linux__) || defined(__unix__) || defined(__unix) || \
436 (defined(__APPLE__) && defined(__MACH__)) || defined(uECC_POSIX)) && !defined(uECC_NO_DEFAULT_RNG)
437
438 /* Some POSIX-like system with /dev/urandom or /dev/random. */
439 #include <sys/types.h>
440 #include <fcntl.h>
441 #include <unistd.h>
442
443 #ifndef O_CLOEXEC
444 #define O_CLOEXEC 0
445 #endif
446
default_RNG(uint8_t * dest,unsigned size)447 static int default_RNG(uint8_t *dest, unsigned size) {
448 int fd = open("/dev/urandom", O_RDONLY | O_CLOEXEC);
449 if (fd == -1) {
450 fd = open("/dev/random", O_RDONLY | O_CLOEXEC);
451 if (fd == -1) {
452 return 0;
453 }
454 }
455
456 char *ptr = (char *)dest;
457 size_t left = size;
458 while (left > 0) {
459 ssize_t bytes_read = read(fd, ptr, left);
460 if (bytes_read <= 0) { // read failed
461 close(fd);
462 return 0;
463 }
464 left -= bytes_read;
465 ptr += bytes_read;
466 }
467
468 close(fd);
469 return 1;
470 }
471
472 #else /* Some other platform */
473
default_RNG(uint8_t * dest,unsigned size)474 static int default_RNG(uint8_t *dest, unsigned size) {
475 (void) dest;
476 (void) size;
477 return 0;
478 }
479
480 #endif
481
482 static uECC_RNG_Function g_rng_function = &default_RNG;
483
uECC_set_rng(uECC_RNG_Function rng_function)484 void uECC_set_rng(uECC_RNG_Function rng_function) {
485 g_rng_function = rng_function;
486 }
487
488 #ifdef __GNUC__ /* Only support GCC inline asm for now */
489 #if (uECC_ASM && (uECC_PLATFORM == uECC_avr))
490 #include "asm_avr.inc"
491 #endif
492
493 #if (uECC_ASM && (uECC_PLATFORM == uECC_arm || uECC_PLATFORM == uECC_arm_thumb || \
494 uECC_PLATFORM == uECC_arm_thumb2))
495 #include "asm_arm.inc"
496 #endif
497 #endif
498
499 #if !asm_clear
vli_clear(uECC_word_t * vli)500 static void vli_clear(uECC_word_t *vli) {
501 wordcount_t i;
502 for (i = 0; i < uECC_WORDS; ++i) {
503 vli[i] = 0;
504 }
505 }
506 #endif
507
508 /* Returns 1 if vli == 0, 0 otherwise. */
509 #if !asm_isZero
vli_isZero(const uECC_word_t * vli)510 static uECC_word_t vli_isZero(const uECC_word_t *vli) {
511 wordcount_t i;
512 for (i = 0; i < uECC_WORDS; ++i) {
513 if (vli[i]) {
514 return 0;
515 }
516 }
517 return 1;
518 }
519 #endif
520
521 /* Returns nonzero if bit 'bit' of vli is set. */
522 #if !asm_testBit
vli_testBit(const uECC_word_t * vli,bitcount_t bit)523 static uECC_word_t vli_testBit(const uECC_word_t *vli, bitcount_t bit) {
524 return (vli[bit >> uECC_WORD_BITS_SHIFT] & ((uECC_word_t)1 << (bit & uECC_WORD_BITS_MASK)));
525 }
526 #endif
527
528 #ifdef ENABLE_MICO_ECC_ECDSA
529
530 /* Counts the number of words in vli. */
531 #if !asm_numBits
vli_numDigits(const uECC_word_t * vli,wordcount_t max_words)532 static wordcount_t vli_numDigits(const uECC_word_t *vli, wordcount_t max_words) {
533 swordcount_t i;
534 /* Search from the end until we find a non-zero digit.
535 We do it in reverse because we expect that most digits will be nonzero. */
536 for (i = max_words - 1; i >= 0 && vli[i] == 0; --i) {
537 }
538
539 return (i + 1);
540 }
541
542 /* Counts the number of bits required to represent vli. */
vli_numBits(const uECC_word_t * vli,wordcount_t max_words)543 static bitcount_t vli_numBits(const uECC_word_t *vli, wordcount_t max_words) {
544 uECC_word_t i;
545 uECC_word_t digit;
546
547 wordcount_t num_digits = vli_numDigits(vli, max_words);
548 if (num_digits == 0) {
549 return 0;
550 }
551
552 digit = vli[num_digits - 1];
553 for (i = 0; digit; ++i) {
554 digit >>= 1;
555 }
556
557 return (((bitcount_t)(num_digits - 1) << uECC_WORD_BITS_SHIFT) + i);
558 }
559
560 #endif /* ENABLE_MICO_ECC_ECDSA */
561
562 #endif /* !asm_numBits */
563
564 /* Sets dest = src. */
565 #if !asm_set
vli_set(uECC_word_t * dest,const uECC_word_t * src)566 static void vli_set(uECC_word_t *dest, const uECC_word_t *src) {
567 wordcount_t i;
568 for (i = 0; i < uECC_WORDS; ++i) {
569 dest[i] = src[i];
570 }
571 }
572 #endif
573
574 /* Returns sign of left - right. */
575 #if !asm_cmp
vli_cmp(const uECC_word_t * left,const uECC_word_t * right)576 static cmpresult_t vli_cmp(const uECC_word_t *left, const uECC_word_t *right) {
577 swordcount_t i;
578 for (i = uECC_WORDS - 1; i >= 0; --i) {
579 if (left[i] > right[i]) {
580 return 1;
581 } else if (left[i] < right[i]) {
582 return -1;
583 }
584 }
585 return 0;
586 }
587 #endif
588
589 #ifdef ENABLE_MICRO_ECC_ECDSA
590
vli_equal(const uECC_word_t * left,const uECC_word_t * right)591 static cmpresult_t vli_equal(const uECC_word_t *left, const uECC_word_t *right) {
592 uECC_word_t result = 0;
593 swordcount_t i;
594 for (i = uECC_WORDS - 1; i >= 0; --i) {
595 result |= (left[i] ^ right[i]);
596 }
597 return (result == 0);
598 }
599
600 #endif
601
602 /* Computes vli = vli >> 1. */
603 #if !asm_rshift1
vli_rshift1(uECC_word_t * vli)604 static void vli_rshift1(uECC_word_t *vli) {
605 uECC_word_t *end = vli;
606 uECC_word_t carry = 0;
607 uECC_word_t *vli_ = vli;
608
609 vli_ += uECC_WORDS;
610 while (vli_-- > end) {
611 uECC_word_t temp = *vli_;
612 *vli_ = (temp >> 1) | carry;
613 carry = temp << (uECC_WORD_BITS - 1);
614 }
615 }
616 #endif
617
618 /* Computes result = left + right, returning carry. Can modify in place. */
619 #if !asm_add
vli_add(uECC_word_t * result,const uECC_word_t * left,const uECC_word_t * right)620 static uECC_word_t vli_add(uECC_word_t *result, const uECC_word_t *left, const uECC_word_t *right) {
621 uECC_word_t carry = 0;
622 wordcount_t i;
623 for (i = 0; i < uECC_WORDS; ++i) {
624 uECC_word_t sum = left[i] + right[i] + carry;
625 if (sum != left[i]) {
626 carry = (sum < left[i]);
627 }
628 result[i] = sum;
629 }
630 return carry;
631 }
632 #endif
633
634 /* Computes result = left - right, returning borrow. Can modify in place. */
635 #if !asm_sub
vli_sub(uECC_word_t * result,const uECC_word_t * left,const uECC_word_t * right)636 static uECC_word_t vli_sub(uECC_word_t *result, const uECC_word_t *left, const uECC_word_t *right) {
637 uECC_word_t borrow = 0;
638 wordcount_t i;
639 for (i = 0; i < uECC_WORDS; ++i) {
640 uECC_word_t diff = left[i] - right[i] - borrow;
641 if (diff != left[i]) {
642 borrow = (diff > left[i]);
643 }
644 result[i] = diff;
645 }
646 return borrow;
647 }
648 #endif
649
650 #if (!asm_mult || (uECC_SQUARE_FUNC && !asm_square) || uECC_CURVE == uECC_secp256k1)
muladd(uECC_word_t a,uECC_word_t b,uECC_word_t * r0,uECC_word_t * r1,uECC_word_t * r2)651 static void muladd(uECC_word_t a,
652 uECC_word_t b,
653 uECC_word_t *r0,
654 uECC_word_t *r1,
655 uECC_word_t *r2) {
656 #if uECC_WORD_SIZE == 8 && !SUPPORTS_INT128
657 uint64_t a0 = a & 0xffffffffull;
658 uint64_t a1 = a >> 32;
659 uint64_t b0 = b & 0xffffffffull;
660 uint64_t b1 = b >> 32;
661
662 uint64_t i0 = a0 * b0;
663 uint64_t i1 = a0 * b1;
664 uint64_t i2 = a1 * b0;
665 uint64_t i3 = a1 * b1;
666
667 uint64_t p0, p1;
668
669 i2 += (i0 >> 32);
670 i2 += i1;
671 if (i2 < i1) { // overflow
672 i3 += 0x100000000ull;
673 }
674
675 p0 = (i0 & 0xffffffffull) | (i2 << 32);
676 p1 = i3 + (i2 >> 32);
677
678 *r0 += p0;
679 *r1 += (p1 + (*r0 < p0));
680 *r2 += ((*r1 < p1) || (*r1 == p1 && *r0 < p0));
681 #else
682 uECC_dword_t p = (uECC_dword_t)a * b;
683 uECC_dword_t r01 = ((uECC_dword_t)(*r1) << uECC_WORD_BITS) | *r0;
684 r01 += p;
685 *r2 += (r01 < p);
686 *r1 = r01 >> uECC_WORD_BITS;
687 *r0 = (uECC_word_t)r01;
688 #endif
689 }
690 #define muladd_exists 1
691 #endif
692
693 #if !asm_mult
vli_mult(uECC_word_t * result,const uECC_word_t * left,const uECC_word_t * right)694 static void vli_mult(uECC_word_t *result, const uECC_word_t *left, const uECC_word_t *right) {
695 uECC_word_t r0 = 0;
696 uECC_word_t r1 = 0;
697 uECC_word_t r2 = 0;
698 wordcount_t i, k;
699
700 /* Compute each digit of result in sequence, maintaining the carries. */
701 for (k = 0; k < uECC_WORDS; ++k) {
702 for (i = 0; i <= k; ++i) {
703 muladd(left[i], right[k - i], &r0, &r1, &r2);
704 }
705 result[k] = r0;
706 r0 = r1;
707 r1 = r2;
708 r2 = 0;
709 }
710 for (k = uECC_WORDS; k < uECC_WORDS * 2 - 1; ++k) {
711 for (i = (k + 1) - uECC_WORDS; i < uECC_WORDS; ++i) {
712 muladd(left[i], right[k - i], &r0, &r1, &r2);
713 }
714 result[k] = r0;
715 r0 = r1;
716 r1 = r2;
717 r2 = 0;
718 }
719 result[uECC_WORDS * 2 - 1] = r0;
720 }
721 #endif
722
723 #if uECC_SQUARE_FUNC
724
725 #if !asm_square
mul2add(uECC_word_t a,uECC_word_t b,uECC_word_t * r0,uECC_word_t * r1,uECC_word_t * r2)726 static void mul2add(uECC_word_t a,
727 uECC_word_t b,
728 uECC_word_t *r0,
729 uECC_word_t *r1,
730 uECC_word_t *r2) {
731 #if uECC_WORD_SIZE == 8 && !SUPPORTS_INT128
732 uint64_t a0 = a & 0xffffffffull;
733 uint64_t a1 = a >> 32;
734 uint64_t b0 = b & 0xffffffffull;
735 uint64_t b1 = b >> 32;
736
737 uint64_t i0 = a0 * b0;
738 uint64_t i1 = a0 * b1;
739 uint64_t i2 = a1 * b0;
740 uint64_t i3 = a1 * b1;
741
742 uint64_t p0, p1;
743
744 i2 += (i0 >> 32);
745 i2 += i1;
746 if (i2 < i1)
747 { // overflow
748 i3 += 0x100000000ull;
749 }
750
751 p0 = (i0 & 0xffffffffull) | (i2 << 32);
752 p1 = i3 + (i2 >> 32);
753
754 *r2 += (p1 >> 63);
755 p1 = (p1 << 1) | (p0 >> 63);
756 p0 <<= 1;
757
758 *r0 += p0;
759 *r1 += (p1 + (*r0 < p0));
760 *r2 += ((*r1 < p1) || (*r1 == p1 && *r0 < p0));
761 #else
762 uECC_dword_t p = (uECC_dword_t)a * b;
763 uECC_dword_t r01 = ((uECC_dword_t)(*r1) << uECC_WORD_BITS) | *r0;
764 *r2 += (p >> (uECC_WORD_BITS * 2 - 1));
765 p *= 2;
766 r01 += p;
767 *r2 += (r01 < p);
768 *r1 = r01 >> uECC_WORD_BITS;
769 *r0 = (uECC_word_t)r01;
770 #endif
771 }
772
vli_square(uECC_word_t * result,const uECC_word_t * left)773 static void vli_square(uECC_word_t *result, const uECC_word_t *left) {
774 uECC_word_t r0 = 0;
775 uECC_word_t r1 = 0;
776 uECC_word_t r2 = 0;
777
778 wordcount_t i, k;
779
780 for (k = 0; k < uECC_WORDS * 2 - 1; ++k) {
781 uECC_word_t min = (k < uECC_WORDS ? 0 : (k + 1) - uECC_WORDS);
782 for (i = min; i <= k && i <= k - i; ++i) {
783 if (i < k-i) {
784 mul2add(left[i], left[k - i], &r0, &r1, &r2);
785 } else {
786 muladd(left[i], left[k - i], &r0, &r1, &r2);
787 }
788 }
789 result[k] = r0;
790 r0 = r1;
791 r1 = r2;
792 r2 = 0;
793 }
794
795 result[uECC_WORDS * 2 - 1] = r0;
796 }
797 #endif
798
799 #else /* uECC_SQUARE_FUNC */
800
801 #define vli_square(result, left, size) vli_mult((result), (left), (left), (size))
802
803 #endif /* uECC_SQUARE_FUNC */
804
805
806 /* Computes result = (left + right) % mod.
807 Assumes that left < mod and right < mod, and that result does not overlap mod. */
808 #if !asm_modAdd
vli_modAdd(uECC_word_t * result,const uECC_word_t * left,const uECC_word_t * right,const uECC_word_t * mod)809 static void vli_modAdd(uECC_word_t *result,
810 const uECC_word_t *left,
811 const uECC_word_t *right,
812 const uECC_word_t *mod) {
813 uECC_word_t carry = vli_add(result, left, right);
814 if (carry || vli_cmp(result, mod) >= 0) {
815 /* result > mod (result = mod + remainder), so subtract mod to get remainder. */
816 vli_sub(result, result, mod);
817 }
818 }
819 #endif
820
821 /* Computes result = (left - right) % mod.
822 Assumes that left < mod and right < mod, and that result does not overlap mod. */
823 #if !asm_modSub
vli_modSub(uECC_word_t * result,const uECC_word_t * left,const uECC_word_t * right,const uECC_word_t * mod)824 static void vli_modSub(uECC_word_t *result,
825 const uECC_word_t *left,
826 const uECC_word_t *right,
827 const uECC_word_t *mod) {
828 uECC_word_t l_borrow = vli_sub(result, left, right);
829 if (l_borrow) {
830 /* In this case, result == -diff == (max int) - diff. Since -x % d == d - x,
831 we can get the correct result from result + mod (with overflow). */
832 vli_add(result, result, mod);
833 }
834 }
835 #endif
836
837 #if !asm_modSub_fast
838 #define vli_modSub_fast(result, left, right) vli_modSub((result), (left), (right), curve_p)
839 #endif
840
841 #if !asm_mmod_fast
842
843 #if (uECC_CURVE == uECC_secp160r1 || uECC_CURVE == uECC_secp256k1)
844 /* omega_mult() is defined farther below for the different curves / word sizes */
845 static void omega_mult(uECC_word_t * RESTRICT result, const uECC_word_t * RESTRICT right);
846
847 /* Computes result = product % curve_p
848 see http://www.isys.uni-klu.ac.at/PDF/2001-0126-MT.pdf page 354
849
850 Note that this only works if log2(omega) < log2(p) / 2 */
vli_mmod_fast(uECC_word_t * RESTRICT result,uECC_word_t * RESTRICT product)851 static void vli_mmod_fast(uECC_word_t *RESTRICT result, uECC_word_t *RESTRICT product) {
852 uECC_word_t tmp[2 * uECC_WORDS];
853 uECC_word_t carry;
854
855 vli_clear(tmp);
856 vli_clear(tmp + uECC_WORDS);
857
858 omega_mult(tmp, product + uECC_WORDS); /* (Rq, q) = q * c */
859
860 carry = vli_add(result, product, tmp); /* (C, r) = r + q */
861 vli_clear(product);
862 omega_mult(product, tmp + uECC_WORDS); /* Rq*c */
863 carry += vli_add(result, result, product); /* (C1, r) = r + Rq*c */
864
865 while (carry > 0) {
866 --carry;
867 vli_sub(result, result, curve_p);
868 }
869 if (vli_cmp(result, curve_p) > 0) {
870 vli_sub(result, result, curve_p);
871 }
872 }
873
874 #endif
875
876 #if uECC_CURVE == uECC_secp160r1
877
878 #if uECC_WORD_SIZE == 1
omega_mult(uint8_t * RESTRICT result,const uint8_t * RESTRICT right)879 static void omega_mult(uint8_t * RESTRICT result, const uint8_t * RESTRICT right) {
880 uint8_t carry;
881 uint8_t i;
882
883 /* Multiply by (2^31 + 1). */
884 vli_set(result + 4, right); /* 2^32 */
885 vli_rshift1(result + 4); /* 2^31 */
886 result[3] = right[0] << 7; /* get last bit from shift */
887
888 carry = vli_add(result, result, right); /* 2^31 + 1 */
889 for (i = uECC_WORDS; carry; ++i) {
890 uint16_t sum = (uint16_t)result[i] + carry;
891 result[i] = (uint8_t)sum;
892 carry = sum >> 8;
893 }
894 }
895 #elif uECC_WORD_SIZE == 4
omega_mult(uint32_t * RESTRICT result,const uint32_t * RESTRICT right)896 static void omega_mult(uint32_t * RESTRICT result, const uint32_t * RESTRICT right) {
897 uint32_t carry;
898 unsigned i;
899
900 /* Multiply by (2^31 + 1). */
901 vli_set(result + 1, right); /* 2^32 */
902 vli_rshift1(result + 1); /* 2^31 */
903 result[0] = right[0] << 31; /* get last bit from shift */
904
905 carry = vli_add(result, result, right); /* 2^31 + 1 */
906 for (i = uECC_WORDS; carry; ++i) {
907 uint64_t sum = (uint64_t)result[i] + carry;
908 result[i] = (uint32_t)sum;
909 carry = sum >> 32;
910 }
911 }
912 #endif /* uECC_WORD_SIZE */
913
914 #elif uECC_CURVE == uECC_secp192r1
915
916 /* Computes result = product % curve_p.
917 See algorithm 5 and 6 from http://www.isys.uni-klu.ac.at/PDF/2001-0126-MT.pdf */
918 #if uECC_WORD_SIZE == 1
vli_mmod_fast(uint8_t * RESTRICT result,uint8_t * RESTRICT product)919 static void vli_mmod_fast(uint8_t *RESTRICT result, uint8_t *RESTRICT product) {
920 uint8_t tmp[uECC_WORDS];
921 uint8_t carry;
922
923 vli_set(result, product);
924
925 vli_set(tmp, &product[24]);
926 carry = vli_add(result, result, tmp);
927
928 tmp[0] = tmp[1] = tmp[2] = tmp[3] = tmp[4] = tmp[5] = tmp[6] = tmp[7] = 0;
929 tmp[8] = product[24]; tmp[9] = product[25]; tmp[10] = product[26]; tmp[11] = product[27];
930 tmp[12] = product[28]; tmp[13] = product[29]; tmp[14] = product[30]; tmp[15] = product[31];
931 tmp[16] = product[32]; tmp[17] = product[33]; tmp[18] = product[34]; tmp[19] = product[35];
932 tmp[20] = product[36]; tmp[21] = product[37]; tmp[22] = product[38]; tmp[23] = product[39];
933 carry += vli_add(result, result, tmp);
934
935 tmp[0] = tmp[8] = product[40];
936 tmp[1] = tmp[9] = product[41];
937 tmp[2] = tmp[10] = product[42];
938 tmp[3] = tmp[11] = product[43];
939 tmp[4] = tmp[12] = product[44];
940 tmp[5] = tmp[13] = product[45];
941 tmp[6] = tmp[14] = product[46];
942 tmp[7] = tmp[15] = product[47];
943 tmp[16] = tmp[17] = tmp[18] = tmp[19] = tmp[20] = tmp[21] = tmp[22] = tmp[23] = 0;
944 carry += vli_add(result, result, tmp);
945
946 while (carry || vli_cmp(curve_p, result) != 1) {
947 carry -= vli_sub(result, result, curve_p);
948 }
949 }
950 #elif uECC_WORD_SIZE == 4
vli_mmod_fast(uint32_t * RESTRICT result,uint32_t * RESTRICT product)951 static void vli_mmod_fast(uint32_t *RESTRICT result, uint32_t *RESTRICT product) {
952 uint32_t tmp[uECC_WORDS];
953 int carry;
954
955 vli_set(result, product);
956
957 vli_set(tmp, &product[6]);
958 carry = vli_add(result, result, tmp);
959
960 tmp[0] = tmp[1] = 0;
961 tmp[2] = product[6];
962 tmp[3] = product[7];
963 tmp[4] = product[8];
964 tmp[5] = product[9];
965 carry += vli_add(result, result, tmp);
966
967 tmp[0] = tmp[2] = product[10];
968 tmp[1] = tmp[3] = product[11];
969 tmp[4] = tmp[5] = 0;
970 carry += vli_add(result, result, tmp);
971
972 while (carry || vli_cmp(curve_p, result) != 1) {
973 carry -= vli_sub(result, result, curve_p);
974 }
975 }
976 #else
vli_mmod_fast(uint64_t * RESTRICT result,uint64_t * RESTRICT product)977 static void vli_mmod_fast(uint64_t *RESTRICT result, uint64_t *RESTRICT product) {
978 uint64_t tmp[uECC_WORDS];
979 int carry;
980
981 vli_set(result, product);
982
983 vli_set(tmp, &product[3]);
984 carry = vli_add(result, result, tmp);
985
986 tmp[0] = 0;
987 tmp[1] = product[3];
988 tmp[2] = product[4];
989 carry += vli_add(result, result, tmp);
990
991 tmp[0] = tmp[1] = product[5];
992 tmp[2] = 0;
993 carry += vli_add(result, result, tmp);
994
995 while (carry || vli_cmp(curve_p, result) != 1) {
996 carry -= vli_sub(result, result, curve_p);
997 }
998 }
999 #endif /* uECC_WORD_SIZE */
1000
1001 #elif uECC_CURVE == uECC_secp256r1
1002
1003 /* Computes result = product % curve_p
1004 from www.nsa.gov/ia/_files/nist-routines.pdf */
1005 #if uECC_WORD_SIZE == 1
vli_mmod_fast(uint8_t * RESTRICT result,uint8_t * RESTRICT product)1006 static void vli_mmod_fast(uint8_t *RESTRICT result, uint8_t *RESTRICT product) {
1007 uint8_t tmp[uECC_BYTES];
1008 int8_t carry;
1009
1010 /* t */
1011 vli_set(result, product);
1012
1013 /* s1 */
1014 tmp[0] = tmp[1] = tmp[2] = tmp[3] = 0;
1015 tmp[4] = tmp[5] = tmp[6] = tmp[7] = 0;
1016 tmp[8] = tmp[9] = tmp[10] = tmp[11] = 0;
1017 tmp[12] = product[44]; tmp[13] = product[45]; tmp[14] = product[46]; tmp[15] = product[47];
1018 tmp[16] = product[48]; tmp[17] = product[49]; tmp[18] = product[50]; tmp[19] = product[51];
1019 tmp[20] = product[52]; tmp[21] = product[53]; tmp[22] = product[54]; tmp[23] = product[55];
1020 tmp[24] = product[56]; tmp[25] = product[57]; tmp[26] = product[58]; tmp[27] = product[59];
1021 tmp[28] = product[60]; tmp[29] = product[61]; tmp[30] = product[62]; tmp[31] = product[63];
1022 carry = vli_add(tmp, tmp, tmp);
1023 carry += vli_add(result, result, tmp);
1024
1025 /* s2 */
1026 tmp[12] = product[48]; tmp[13] = product[49]; tmp[14] = product[50]; tmp[15] = product[51];
1027 tmp[16] = product[52]; tmp[17] = product[53]; tmp[18] = product[54]; tmp[19] = product[55];
1028 tmp[20] = product[56]; tmp[21] = product[57]; tmp[22] = product[58]; tmp[23] = product[59];
1029 tmp[24] = product[60]; tmp[25] = product[61]; tmp[26] = product[62]; tmp[27] = product[63];
1030 tmp[28] = tmp[29] = tmp[30] = tmp[31] = 0;
1031 carry += vli_add(tmp, tmp, tmp);
1032 carry += vli_add(result, result, tmp);
1033
1034 /* s3 */
1035 tmp[0] = product[32]; tmp[1] = product[33]; tmp[2] = product[34]; tmp[3] = product[35];
1036 tmp[4] = product[36]; tmp[5] = product[37]; tmp[6] = product[38]; tmp[7] = product[39];
1037 tmp[8] = product[40]; tmp[9] = product[41]; tmp[10] = product[42]; tmp[11] = product[43];
1038 tmp[12] = tmp[13] = tmp[14] = tmp[15] = 0;
1039 tmp[16] = tmp[17] = tmp[18] = tmp[19] = 0;
1040 tmp[20] = tmp[21] = tmp[22] = tmp[23] = 0;
1041 tmp[24] = product[56]; tmp[25] = product[57]; tmp[26] = product[58]; tmp[27] = product[59];
1042 tmp[28] = product[60]; tmp[29] = product[61]; tmp[30] = product[62]; tmp[31] = product[63];
1043 carry += vli_add(result, result, tmp);
1044
1045 /* s4 */
1046 tmp[0] = product[36]; tmp[1] = product[37]; tmp[2] = product[38]; tmp[3] = product[39];
1047 tmp[4] = product[40]; tmp[5] = product[41]; tmp[6] = product[42]; tmp[7] = product[43];
1048 tmp[8] = product[44]; tmp[9] = product[45]; tmp[10] = product[46]; tmp[11] = product[47];
1049 tmp[12] = product[52]; tmp[13] = product[53]; tmp[14] = product[54]; tmp[15] = product[55];
1050 tmp[16] = product[56]; tmp[17] = product[57]; tmp[18] = product[58]; tmp[19] = product[59];
1051 tmp[20] = product[60]; tmp[21] = product[61]; tmp[22] = product[62]; tmp[23] = product[63];
1052 tmp[24] = product[52]; tmp[25] = product[53]; tmp[26] = product[54]; tmp[27] = product[55];
1053 tmp[28] = product[32]; tmp[29] = product[33]; tmp[30] = product[34]; tmp[31] = product[35];
1054 carry += vli_add(result, result, tmp);
1055
1056 /* d1 */
1057 tmp[0] = product[44]; tmp[1] = product[45]; tmp[2] = product[46]; tmp[3] = product[47];
1058 tmp[4] = product[48]; tmp[5] = product[49]; tmp[6] = product[50]; tmp[7] = product[51];
1059 tmp[8] = product[52]; tmp[9] = product[53]; tmp[10] = product[54]; tmp[11] = product[55];
1060 tmp[12] = tmp[13] = tmp[14] = tmp[15] = 0;
1061 tmp[16] = tmp[17] = tmp[18] = tmp[19] = 0;
1062 tmp[20] = tmp[21] = tmp[22] = tmp[23] = 0;
1063 tmp[24] = product[32]; tmp[25] = product[33]; tmp[26] = product[34]; tmp[27] = product[35];
1064 tmp[28] = product[40]; tmp[29] = product[41]; tmp[30] = product[42]; tmp[31] = product[43];
1065 carry -= vli_sub(result, result, tmp);
1066
1067 /* d2 */
1068 tmp[0] = product[48]; tmp[1] = product[49]; tmp[2] = product[50]; tmp[3] = product[51];
1069 tmp[4] = product[52]; tmp[5] = product[53]; tmp[6] = product[54]; tmp[7] = product[55];
1070 tmp[8] = product[56]; tmp[9] = product[57]; tmp[10] = product[58]; tmp[11] = product[59];
1071 tmp[12] = product[60]; tmp[13] = product[61]; tmp[14] = product[62]; tmp[15] = product[63];
1072 tmp[16] = tmp[17] = tmp[18] = tmp[19] = 0;
1073 tmp[20] = tmp[21] = tmp[22] = tmp[23] = 0;
1074 tmp[24] = product[36]; tmp[25] = product[37]; tmp[26] = product[38]; tmp[27] = product[39];
1075 tmp[28] = product[44]; tmp[29] = product[45]; tmp[30] = product[46]; tmp[31] = product[47];
1076 carry -= vli_sub(result, result, tmp);
1077
1078 /* d3 */
1079 tmp[0] = product[52]; tmp[1] = product[53]; tmp[2] = product[54]; tmp[3] = product[55];
1080 tmp[4] = product[56]; tmp[5] = product[57]; tmp[6] = product[58]; tmp[7] = product[59];
1081 tmp[8] = product[60]; tmp[9] = product[61]; tmp[10] = product[62]; tmp[11] = product[63];
1082 tmp[12] = product[32]; tmp[13] = product[33]; tmp[14] = product[34]; tmp[15] = product[35];
1083 tmp[16] = product[36]; tmp[17] = product[37]; tmp[18] = product[38]; tmp[19] = product[39];
1084 tmp[20] = product[40]; tmp[21] = product[41]; tmp[22] = product[42]; tmp[23] = product[43];
1085 tmp[24] = tmp[25] = tmp[26] = tmp[27] = 0;
1086 tmp[28] = product[48]; tmp[29] = product[49]; tmp[30] = product[50]; tmp[31] = product[51];
1087 carry -= vli_sub(result, result, tmp);
1088
1089 /* d4 */
1090 tmp[0] = product[56]; tmp[1] = product[57]; tmp[2] = product[58]; tmp[3] = product[59];
1091 tmp[4] = product[60]; tmp[5] = product[61]; tmp[6] = product[62]; tmp[7] = product[63];
1092 tmp[8] = tmp[9] = tmp[10] = tmp[11] = 0;
1093 tmp[12] = product[36]; tmp[13] = product[37]; tmp[14] = product[38]; tmp[15] = product[39];
1094 tmp[16] = product[40]; tmp[17] = product[41]; tmp[18] = product[42]; tmp[19] = product[43];
1095 tmp[20] = product[44]; tmp[21] = product[45]; tmp[22] = product[46]; tmp[23] = product[47];
1096 tmp[24] = tmp[25] = tmp[26] = tmp[27] = 0;
1097 tmp[28] = product[52]; tmp[29] = product[53]; tmp[30] = product[54]; tmp[31] = product[55];
1098 carry -= vli_sub(result, result, tmp);
1099
1100 if (carry < 0) {
1101 do {
1102 carry += vli_add(result, result, curve_p);
1103 } while (carry < 0);
1104 } else {
1105 while (carry || vli_cmp(curve_p, result) != 1) {
1106 carry -= vli_sub(result, result, curve_p);
1107 }
1108 }
1109 }
1110 #elif uECC_WORD_SIZE == 4
vli_mmod_fast(uint32_t * RESTRICT result,uint32_t * RESTRICT product)1111 static void vli_mmod_fast(uint32_t *RESTRICT result, uint32_t *RESTRICT product) {
1112 uint32_t tmp[uECC_WORDS];
1113 int carry;
1114
1115 /* t */
1116 vli_set(result, product);
1117
1118 /* s1 */
1119 tmp[0] = tmp[1] = tmp[2] = 0;
1120 tmp[3] = product[11];
1121 tmp[4] = product[12];
1122 tmp[5] = product[13];
1123 tmp[6] = product[14];
1124 tmp[7] = product[15];
1125 carry = vli_add(tmp, tmp, tmp);
1126 carry += vli_add(result, result, tmp);
1127
1128 /* s2 */
1129 tmp[3] = product[12];
1130 tmp[4] = product[13];
1131 tmp[5] = product[14];
1132 tmp[6] = product[15];
1133 tmp[7] = 0;
1134 carry += vli_add(tmp, tmp, tmp);
1135 carry += vli_add(result, result, tmp);
1136
1137 /* s3 */
1138 tmp[0] = product[8];
1139 tmp[1] = product[9];
1140 tmp[2] = product[10];
1141 tmp[3] = tmp[4] = tmp[5] = 0;
1142 tmp[6] = product[14];
1143 tmp[7] = product[15];
1144 carry += vli_add(result, result, tmp);
1145
1146 /* s4 */
1147 tmp[0] = product[9];
1148 tmp[1] = product[10];
1149 tmp[2] = product[11];
1150 tmp[3] = product[13];
1151 tmp[4] = product[14];
1152 tmp[5] = product[15];
1153 tmp[6] = product[13];
1154 tmp[7] = product[8];
1155 carry += vli_add(result, result, tmp);
1156
1157 /* d1 */
1158 tmp[0] = product[11];
1159 tmp[1] = product[12];
1160 tmp[2] = product[13];
1161 tmp[3] = tmp[4] = tmp[5] = 0;
1162 tmp[6] = product[8];
1163 tmp[7] = product[10];
1164 carry -= vli_sub(result, result, tmp);
1165
1166 /* d2 */
1167 tmp[0] = product[12];
1168 tmp[1] = product[13];
1169 tmp[2] = product[14];
1170 tmp[3] = product[15];
1171 tmp[4] = tmp[5] = 0;
1172 tmp[6] = product[9];
1173 tmp[7] = product[11];
1174 carry -= vli_sub(result, result, tmp);
1175
1176 /* d3 */
1177 tmp[0] = product[13];
1178 tmp[1] = product[14];
1179 tmp[2] = product[15];
1180 tmp[3] = product[8];
1181 tmp[4] = product[9];
1182 tmp[5] = product[10];
1183 tmp[6] = 0;
1184 tmp[7] = product[12];
1185 carry -= vli_sub(result, result, tmp);
1186
1187 /* d4 */
1188 tmp[0] = product[14];
1189 tmp[1] = product[15];
1190 tmp[2] = 0;
1191 tmp[3] = product[9];
1192 tmp[4] = product[10];
1193 tmp[5] = product[11];
1194 tmp[6] = 0;
1195 tmp[7] = product[13];
1196 carry -= vli_sub(result, result, tmp);
1197
1198 if (carry < 0) {
1199 do {
1200 carry += vli_add(result, result, curve_p);
1201 } while (carry < 0);
1202 } else {
1203 while (carry || vli_cmp(curve_p, result) != 1) {
1204 carry -= vli_sub(result, result, curve_p);
1205 }
1206 }
1207 }
1208 #else
vli_mmod_fast(uint64_t * RESTRICT result,uint64_t * RESTRICT product)1209 static void vli_mmod_fast(uint64_t *RESTRICT result, uint64_t *RESTRICT product) {
1210 uint64_t tmp[uECC_WORDS];
1211 int carry;
1212
1213 /* t */
1214 vli_set(result, product);
1215
1216 /* s1 */
1217 tmp[0] = 0;
1218 tmp[1] = product[5] & 0xffffffff00000000ull;
1219 tmp[2] = product[6];
1220 tmp[3] = product[7];
1221 carry = vli_add(tmp, tmp, tmp);
1222 carry += vli_add(result, result, tmp);
1223
1224 /* s2 */
1225 tmp[1] = product[6] << 32;
1226 tmp[2] = (product[6] >> 32) | (product[7] << 32);
1227 tmp[3] = product[7] >> 32;
1228 carry += vli_add(tmp, tmp, tmp);
1229 carry += vli_add(result, result, tmp);
1230
1231 /* s3 */
1232 tmp[0] = product[4];
1233 tmp[1] = product[5] & 0xffffffff;
1234 tmp[2] = 0;
1235 tmp[3] = product[7];
1236 carry += vli_add(result, result, tmp);
1237
1238 /* s4 */
1239 tmp[0] = (product[4] >> 32) | (product[5] << 32);
1240 tmp[1] = (product[5] >> 32) | (product[6] & 0xffffffff00000000ull);
1241 tmp[2] = product[7];
1242 tmp[3] = (product[6] >> 32) | (product[4] << 32);
1243 carry += vli_add(result, result, tmp);
1244
1245 /* d1 */
1246 tmp[0] = (product[5] >> 32) | (product[6] << 32);
1247 tmp[1] = (product[6] >> 32);
1248 tmp[2] = 0;
1249 tmp[3] = (product[4] & 0xffffffff) | (product[5] << 32);
1250 carry -= vli_sub(result, result, tmp);
1251
1252 /* d2 */
1253 tmp[0] = product[6];
1254 tmp[1] = product[7];
1255 tmp[2] = 0;
1256 tmp[3] = (product[4] >> 32) | (product[5] & 0xffffffff00000000ull);
1257 carry -= vli_sub(result, result, tmp);
1258
1259 /* d3 */
1260 tmp[0] = (product[6] >> 32) | (product[7] << 32);
1261 tmp[1] = (product[7] >> 32) | (product[4] << 32);
1262 tmp[2] = (product[4] >> 32) | (product[5] << 32);
1263 tmp[3] = (product[6] << 32);
1264 carry -= vli_sub(result, result, tmp);
1265
1266 /* d4 */
1267 tmp[0] = product[7];
1268 tmp[1] = product[4] & 0xffffffff00000000ull;
1269 tmp[2] = product[5];
1270 tmp[3] = product[6] & 0xffffffff00000000ull;
1271 carry -= vli_sub(result, result, tmp);
1272
1273 if (carry < 0) {
1274 do {
1275 carry += vli_add(result, result, curve_p);
1276 } while (carry < 0);
1277 } else {
1278 while (carry || vli_cmp(curve_p, result) != 1) {
1279 carry -= vli_sub(result, result, curve_p);
1280 }
1281 }
1282 }
1283 #endif /* uECC_WORD_SIZE */
1284
1285 #elif uECC_CURVE == uECC_secp256k1
1286
1287 #if uECC_WORD_SIZE == 1
omega_mult(uint8_t * RESTRICT result,const uint8_t * RESTRICT right)1288 static void omega_mult(uint8_t * RESTRICT result, const uint8_t * RESTRICT right) {
1289 /* Multiply by (2^32 + 2^9 + 2^8 + 2^7 + 2^6 + 2^4 + 1). */
1290 uECC_word_t r0 = 0;
1291 uECC_word_t r1 = 0;
1292 uECC_word_t r2 = 0;
1293 wordcount_t k;
1294
1295 /* Multiply by (2^9 + 2^8 + 2^7 + 2^6 + 2^4 + 1). */
1296 muladd(0xD1, right[0], &r0, &r1, &r2);
1297 result[0] = r0;
1298 r0 = r1;
1299 r1 = r2;
1300 /* r2 is still 0 */
1301
1302 for (k = 1; k < uECC_WORDS; ++k) {
1303 muladd(0x03, right[k - 1], &r0, &r1, &r2);
1304 muladd(0xD1, right[k], &r0, &r1, &r2);
1305 result[k] = r0;
1306 r0 = r1;
1307 r1 = r2;
1308 r2 = 0;
1309 }
1310 muladd(0x03, right[uECC_WORDS - 1], &r0, &r1, &r2);
1311 result[uECC_WORDS] = r0;
1312 result[uECC_WORDS + 1] = r1;
1313
1314 result[4 + uECC_WORDS] = vli_add(result + 4, result + 4, right); /* add the 2^32 multiple */
1315 }
1316 #elif uECC_WORD_SIZE == 4
omega_mult(uint32_t * RESTRICT result,const uint32_t * RESTRICT right)1317 static void omega_mult(uint32_t * RESTRICT result, const uint32_t * RESTRICT right) {
1318 /* Multiply by (2^9 + 2^8 + 2^7 + 2^6 + 2^4 + 1). */
1319 uint32_t carry = 0;
1320 wordcount_t k;
1321
1322 for (k = 0; k < uECC_WORDS; ++k) {
1323 uint64_t p = (uint64_t)0x3D1 * right[k] + carry;
1324 result[k] = (p & 0xffffffff);
1325 carry = p >> 32;
1326 }
1327 result[uECC_WORDS] = carry;
1328
1329 result[1 + uECC_WORDS] = vli_add(result + 1, result + 1, right); /* add the 2^32 multiple */
1330 }
1331 #else
omega_mult(uint64_t * RESTRICT result,const uint64_t * RESTRICT right)1332 static void omega_mult(uint64_t * RESTRICT result, const uint64_t * RESTRICT right) {
1333 uECC_word_t r0 = 0;
1334 uECC_word_t r1 = 0;
1335 uECC_word_t r2 = 0;
1336 wordcount_t k;
1337
1338 /* Multiply by (2^32 + 2^9 + 2^8 + 2^7 + 2^6 + 2^4 + 1). */
1339 for (k = 0; k < uECC_WORDS; ++k) {
1340 muladd(0x1000003D1ull, right[k], &r0, &r1, &r2);
1341 result[k] = r0;
1342 r0 = r1;
1343 r1 = r2;
1344 r2 = 0;
1345 }
1346 result[uECC_WORDS] = r0;
1347 }
1348 #endif /* uECC_WORD_SIZE */
1349
1350 #elif uECC_CURVE == uECC_secp224r1
1351
1352 /* Computes result = product % curve_p
1353 from http://www.nsa.gov/ia/_files/nist-routines.pdf */
1354 #if uECC_WORD_SIZE == 1
1355 // TODO it may be faster to use the omega_mult method when fully asm optimized.
vli_mmod_fast(uint8_t * RESTRICT result,uint8_t * RESTRICT product)1356 void vli_mmod_fast(uint8_t *RESTRICT result, uint8_t *RESTRICT product) {
1357 uint8_t tmp[uECC_WORDS];
1358 int8_t carry;
1359
1360 /* t */
1361 vli_set(result, product);
1362
1363 /* s1 */
1364 tmp[0] = tmp[1] = tmp[2] = tmp[3] = 0;
1365 tmp[4] = tmp[5] = tmp[6] = tmp[7] = 0;
1366 tmp[8] = tmp[9] = tmp[10] = tmp[11] = 0;
1367 tmp[12] = product[28]; tmp[13] = product[29]; tmp[14] = product[30]; tmp[15] = product[31];
1368 tmp[16] = product[32]; tmp[17] = product[33]; tmp[18] = product[34]; tmp[19] = product[35];
1369 tmp[20] = product[36]; tmp[21] = product[37]; tmp[22] = product[38]; tmp[23] = product[39];
1370 tmp[24] = product[40]; tmp[25] = product[41]; tmp[26] = product[42]; tmp[27] = product[43];
1371 carry = vli_add(result, result, tmp);
1372
1373 /* s2 */
1374 tmp[12] = product[44]; tmp[13] = product[45]; tmp[14] = product[46]; tmp[15] = product[47];
1375 tmp[16] = product[48]; tmp[17] = product[49]; tmp[18] = product[50]; tmp[19] = product[51];
1376 tmp[20] = product[52]; tmp[21] = product[53]; tmp[22] = product[54]; tmp[23] = product[55];
1377 tmp[24] = tmp[25] = tmp[26] = tmp[27] = 0;
1378 carry += vli_add(result, result, tmp);
1379
1380 /* d1 */
1381 tmp[0] = product[28]; tmp[1] = product[29]; tmp[2] = product[30]; tmp[3] = product[31];
1382 tmp[4] = product[32]; tmp[5] = product[33]; tmp[6] = product[34]; tmp[7] = product[35];
1383 tmp[8] = product[36]; tmp[9] = product[37]; tmp[10] = product[38]; tmp[11] = product[39];
1384 tmp[12] = product[40]; tmp[13] = product[41]; tmp[14] = product[42]; tmp[15] = product[43];
1385 tmp[16] = product[44]; tmp[17] = product[45]; tmp[18] = product[46]; tmp[19] = product[47];
1386 tmp[20] = product[48]; tmp[21] = product[49]; tmp[22] = product[50]; tmp[23] = product[51];
1387 tmp[24] = product[52]; tmp[25] = product[53]; tmp[26] = product[54]; tmp[27] = product[55];
1388 carry -= vli_sub(result, result, tmp);
1389
1390 /* d2 */
1391 tmp[0] = product[44]; tmp[1] = product[45]; tmp[2] = product[46]; tmp[3] = product[47];
1392 tmp[4] = product[48]; tmp[5] = product[49]; tmp[6] = product[50]; tmp[7] = product[51];
1393 tmp[8] = product[52]; tmp[9] = product[53]; tmp[10] = product[54]; tmp[11] = product[55];
1394 tmp[12] = tmp[13] = tmp[14] = tmp[15] = 0;
1395 tmp[16] = tmp[17] = tmp[18] = tmp[19] = 0;
1396 tmp[20] = tmp[21] = tmp[22] = tmp[23] = 0;
1397 tmp[24] = tmp[25] = tmp[26] = tmp[27] = 0;
1398 carry -= vli_sub(result, result, tmp);
1399
1400 if (carry < 0) {
1401 do {
1402 carry += vli_add(result, result, curve_p);
1403 } while (carry < 0);
1404 } else {
1405 while (carry || vli_cmp(curve_p, result) != 1) {
1406 carry -= vli_sub(result, result, curve_p);
1407 }
1408 }
1409 }
1410 #elif uECC_WORD_SIZE == 4
vli_mmod_fast(uint32_t * RESTRICT result,uint32_t * RESTRICT product)1411 void vli_mmod_fast(uint32_t *RESTRICT result, uint32_t *RESTRICT product)
1412 {
1413 uint32_t tmp[uECC_WORDS];
1414 int carry;
1415
1416 /* t */
1417 vli_set(result, product);
1418
1419 /* s1 */
1420 tmp[0] = tmp[1] = tmp[2] = 0;
1421 tmp[3] = product[7];
1422 tmp[4] = product[8];
1423 tmp[5] = product[9];
1424 tmp[6] = product[10];
1425 carry = vli_add(result, result, tmp);
1426
1427 /* s2 */
1428 tmp[3] = product[11];
1429 tmp[4] = product[12];
1430 tmp[5] = product[13];
1431 tmp[6] = 0;
1432 carry += vli_add(result, result, tmp);
1433
1434 /* d1 */
1435 tmp[0] = product[7];
1436 tmp[1] = product[8];
1437 tmp[2] = product[9];
1438 tmp[3] = product[10];
1439 tmp[4] = product[11];
1440 tmp[5] = product[12];
1441 tmp[6] = product[13];
1442 carry -= vli_sub(result, result, tmp);
1443
1444 /* d2 */
1445 tmp[0] = product[11];
1446 tmp[1] = product[12];
1447 tmp[2] = product[13];
1448 tmp[3] = tmp[4] = tmp[5] = tmp[6] = 0;
1449 carry -= vli_sub(result, result, tmp);
1450
1451 if (carry < 0) {
1452 do {
1453 carry += vli_add(result, result, curve_p);
1454 } while (carry < 0);
1455 } else {
1456 while (carry || vli_cmp(curve_p, result) != 1) {
1457 carry -= vli_sub(result, result, curve_p);
1458 }
1459 }
1460 }
1461 #endif /* uECC_WORD_SIZE */
1462
1463 #endif /* uECC_CURVE */
1464 #endif /* !asm_mmod_fast */
1465
1466 /* Computes result = (left * right) % curve_p. */
vli_modMult_fast(uECC_word_t * result,const uECC_word_t * left,const uECC_word_t * right)1467 static void vli_modMult_fast(uECC_word_t *result,
1468 const uECC_word_t *left,
1469 const uECC_word_t *right) {
1470 uECC_word_t product[2 * uECC_WORDS];
1471 vli_mult(product, left, right);
1472 vli_mmod_fast(result, product);
1473 }
1474
1475 #if uECC_SQUARE_FUNC
1476
1477 /* Computes result = left^2 % curve_p. */
vli_modSquare_fast(uECC_word_t * result,const uECC_word_t * left)1478 static void vli_modSquare_fast(uECC_word_t *result, const uECC_word_t *left) {
1479 uECC_word_t product[2 * uECC_WORDS];
1480 vli_square(product, left);
1481 vli_mmod_fast(result, product);
1482 }
1483
1484 #else /* uECC_SQUARE_FUNC */
1485
1486 #define vli_modSquare_fast(result, left) vli_modMult_fast((result), (left), (left))
1487
1488 #endif /* uECC_SQUARE_FUNC */
1489
1490
1491 #define EVEN(vli) (!(vli[0] & 1))
1492 /* Computes result = (1 / input) % mod. All VLIs are the same size.
1493 See "From Euclid's GCD to Montgomery Multiplication to the Great Divide"
1494 labs.oracle.com/techrep/2001/smli_tr-2001-95.pdf */
1495 #if !asm_modInv
vli_modInv(uECC_word_t * result,const uECC_word_t * input,const uECC_word_t * mod)1496 static void vli_modInv(uECC_word_t *result, const uECC_word_t *input, const uECC_word_t *mod) {
1497 uECC_word_t a[uECC_WORDS], b[uECC_WORDS], u[uECC_WORDS], v[uECC_WORDS];
1498 uECC_word_t carry;
1499 cmpresult_t cmpResult;
1500
1501 if (vli_isZero(input)) {
1502 vli_clear(result);
1503 return;
1504 }
1505
1506 vli_set(a, input);
1507 vli_set(b, mod);
1508 vli_clear(u);
1509 u[0] = 1;
1510 vli_clear(v);
1511 while ((cmpResult = vli_cmp(a, b)) != 0) {
1512 carry = 0;
1513 if (EVEN(a)) {
1514 vli_rshift1(a);
1515 if (!EVEN(u)) {
1516 carry = vli_add(u, u, mod);
1517 }
1518 vli_rshift1(u);
1519 if (carry) {
1520 u[uECC_WORDS - 1] |= HIGH_BIT_SET;
1521 }
1522 } else if (EVEN(b)) {
1523 vli_rshift1(b);
1524 if (!EVEN(v)) {
1525 carry = vli_add(v, v, mod);
1526 }
1527 vli_rshift1(v);
1528 if (carry) {
1529 v[uECC_WORDS - 1] |= HIGH_BIT_SET;
1530 }
1531 } else if (cmpResult > 0) {
1532 vli_sub(a, a, b);
1533 vli_rshift1(a);
1534 if (vli_cmp(u, v) < 0) {
1535 vli_add(u, u, mod);
1536 }
1537 vli_sub(u, u, v);
1538 if (!EVEN(u)) {
1539 carry = vli_add(u, u, mod);
1540 }
1541 vli_rshift1(u);
1542 if (carry) {
1543 u[uECC_WORDS - 1] |= HIGH_BIT_SET;
1544 }
1545 } else {
1546 vli_sub(b, b, a);
1547 vli_rshift1(b);
1548 if (vli_cmp(v, u) < 0) {
1549 vli_add(v, v, mod);
1550 }
1551 vli_sub(v, v, u);
1552 if (!EVEN(v)) {
1553 carry = vli_add(v, v, mod);
1554 }
1555 vli_rshift1(v);
1556 if (carry) {
1557 v[uECC_WORDS - 1] |= HIGH_BIT_SET;
1558 }
1559 }
1560 }
1561 vli_set(result, u);
1562 }
1563 #endif /* !asm_modInv */
1564
1565 /* ------ Point operations ------ */
1566
1567 /* Returns 1 if 'point' is the point at infinity, 0 otherwise. */
EccPoint_isZero(const EccPoint * point)1568 static cmpresult_t EccPoint_isZero(const EccPoint *point) {
1569 return (vli_isZero(point->x) && vli_isZero(point->y));
1570 }
1571
1572 /* Point multiplication algorithm using Montgomery's ladder with co-Z coordinates.
1573 From eprint.iacr.org/2011/338.pdf
1574 */
1575
1576 /* Double in place */
1577 #if (uECC_CURVE == uECC_secp256k1)
EccPoint_double_jacobian(uECC_word_t * RESTRICT X1,uECC_word_t * RESTRICT Y1,uECC_word_t * RESTRICT Z1)1578 static void EccPoint_double_jacobian(uECC_word_t * RESTRICT X1,
1579 uECC_word_t * RESTRICT Y1,
1580 uECC_word_t * RESTRICT Z1) {
1581 /* t1 = X, t2 = Y, t3 = Z */
1582 uECC_word_t t4[uECC_WORDS];
1583 uECC_word_t t5[uECC_WORDS];
1584
1585 if (vli_isZero(Z1)) {
1586 return;
1587 }
1588
1589 vli_modSquare_fast(t5, Y1); /* t5 = y1^2 */
1590 vli_modMult_fast(t4, X1, t5); /* t4 = x1*y1^2 = A */
1591 vli_modSquare_fast(X1, X1); /* t1 = x1^2 */
1592 vli_modSquare_fast(t5, t5); /* t5 = y1^4 */
1593 vli_modMult_fast(Z1, Y1, Z1); /* t3 = y1*z1 = z3 */
1594
1595 vli_modAdd(Y1, X1, X1, curve_p); /* t2 = 2*x1^2 */
1596 vli_modAdd(Y1, Y1, X1, curve_p); /* t2 = 3*x1^2 */
1597 if (vli_testBit(Y1, 0)) {
1598 uECC_word_t carry = vli_add(Y1, Y1, curve_p);
1599 vli_rshift1(Y1);
1600 Y1[uECC_WORDS - 1] |= carry << (uECC_WORD_BITS - 1);
1601 } else {
1602 vli_rshift1(Y1);
1603 }
1604 /* t2 = 3/2*(x1^2) = B */
1605
1606 vli_modSquare_fast(X1, Y1); /* t1 = B^2 */
1607 vli_modSub(X1, X1, t4, curve_p); /* t1 = B^2 - A */
1608 vli_modSub(X1, X1, t4, curve_p); /* t1 = B^2 - 2A = x3 */
1609
1610 vli_modSub(t4, t4, X1, curve_p); /* t4 = A - x3 */
1611 vli_modMult_fast(Y1, Y1, t4); /* t2 = B * (A - x3) */
1612 vli_modSub(Y1, Y1, t5, curve_p); /* t2 = B * (A - x3) - y1^4 = y3 */
1613 }
1614 #else
EccPoint_double_jacobian(uECC_word_t * RESTRICT X1,uECC_word_t * RESTRICT Y1,uECC_word_t * RESTRICT Z1)1615 static void EccPoint_double_jacobian(uECC_word_t * RESTRICT X1,
1616 uECC_word_t * RESTRICT Y1,
1617 uECC_word_t * RESTRICT Z1) {
1618 /* t1 = X, t2 = Y, t3 = Z */
1619 uECC_word_t t4[uECC_WORDS];
1620 uECC_word_t t5[uECC_WORDS];
1621
1622 if (vli_isZero(Z1)) {
1623 return;
1624 }
1625
1626 vli_modSquare_fast(t4, Y1); /* t4 = y1^2 */
1627 vli_modMult_fast(t5, X1, t4); /* t5 = x1*y1^2 = A */
1628 vli_modSquare_fast(t4, t4); /* t4 = y1^4 */
1629 vli_modMult_fast(Y1, Y1, Z1); /* t2 = y1*z1 = z3 */
1630 vli_modSquare_fast(Z1, Z1); /* t3 = z1^2 */
1631
1632 vli_modAdd(X1, X1, Z1, curve_p); /* t1 = x1 + z1^2 */
1633 vli_modAdd(Z1, Z1, Z1, curve_p); /* t3 = 2*z1^2 */
1634 vli_modSub_fast(Z1, X1, Z1); /* t3 = x1 - z1^2 */
1635 vli_modMult_fast(X1, X1, Z1); /* t1 = x1^2 - z1^4 */
1636
1637 vli_modAdd(Z1, X1, X1, curve_p); /* t3 = 2*(x1^2 - z1^4) */
1638 vli_modAdd(X1, X1, Z1, curve_p); /* t1 = 3*(x1^2 - z1^4) */
1639 if (vli_testBit(X1, 0)) {
1640 uECC_word_t l_carry = vli_add(X1, X1, curve_p);
1641 vli_rshift1(X1);
1642 X1[uECC_WORDS - 1] |= l_carry << (uECC_WORD_BITS - 1);
1643 } else {
1644 vli_rshift1(X1);
1645 }
1646 /* t1 = 3/2*(x1^2 - z1^4) = B */
1647
1648 vli_modSquare_fast(Z1, X1); /* t3 = B^2 */
1649 vli_modSub_fast(Z1, Z1, t5); /* t3 = B^2 - A */
1650 vli_modSub_fast(Z1, Z1, t5); /* t3 = B^2 - 2A = x3 */
1651 vli_modSub_fast(t5, t5, Z1); /* t5 = A - x3 */
1652 vli_modMult_fast(X1, X1, t5); /* t1 = B * (A - x3) */
1653 vli_modSub_fast(t4, X1, t4); /* t4 = B * (A - x3) - y1^4 = y3 */
1654
1655 vli_set(X1, Z1);
1656 vli_set(Z1, Y1);
1657 vli_set(Y1, t4);
1658 }
1659 #endif
1660
1661 /* Modify (x1, y1) => (x1 * z^2, y1 * z^3) */
apply_z(uECC_word_t * RESTRICT X1,uECC_word_t * RESTRICT Y1,const uECC_word_t * RESTRICT Z)1662 static void apply_z(uECC_word_t * RESTRICT X1,
1663 uECC_word_t * RESTRICT Y1,
1664 const uECC_word_t * RESTRICT Z) {
1665 uECC_word_t t1[uECC_WORDS];
1666
1667 vli_modSquare_fast(t1, Z); /* z^2 */
1668 vli_modMult_fast(X1, X1, t1); /* x1 * z^2 */
1669 vli_modMult_fast(t1, t1, Z); /* z^3 */
1670 vli_modMult_fast(Y1, Y1, t1); /* y1 * z^3 */
1671 }
1672
1673 /* P = (x1, y1) => 2P, (x2, y2) => P' */
XYcZ_initial_double(uECC_word_t * RESTRICT X1,uECC_word_t * RESTRICT Y1,uECC_word_t * RESTRICT X2,uECC_word_t * RESTRICT Y2,const uECC_word_t * RESTRICT initial_Z)1674 static void XYcZ_initial_double(uECC_word_t * RESTRICT X1,
1675 uECC_word_t * RESTRICT Y1,
1676 uECC_word_t * RESTRICT X2,
1677 uECC_word_t * RESTRICT Y2,
1678 const uECC_word_t * RESTRICT initial_Z) {
1679 uECC_word_t z[uECC_WORDS];
1680 if (initial_Z) {
1681 vli_set(z, initial_Z);
1682 } else {
1683 vli_clear(z);
1684 z[0] = 1;
1685 }
1686
1687 vli_set(X2, X1);
1688 vli_set(Y2, Y1);
1689
1690 apply_z(X1, Y1, z);
1691 EccPoint_double_jacobian(X1, Y1, z);
1692 apply_z(X2, Y2, z);
1693 }
1694
1695 /* Input P = (x1, y1, Z), Q = (x2, y2, Z)
1696 Output P' = (x1', y1', Z3), P + Q = (x3, y3, Z3)
1697 or P => P', Q => P + Q
1698 */
XYcZ_add(uECC_word_t * RESTRICT X1,uECC_word_t * RESTRICT Y1,uECC_word_t * RESTRICT X2,uECC_word_t * RESTRICT Y2)1699 static void XYcZ_add(uECC_word_t * RESTRICT X1,
1700 uECC_word_t * RESTRICT Y1,
1701 uECC_word_t * RESTRICT X2,
1702 uECC_word_t * RESTRICT Y2) {
1703 /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */
1704 uECC_word_t t5[uECC_WORDS];
1705
1706 vli_modSub_fast(t5, X2, X1); /* t5 = x2 - x1 */
1707 vli_modSquare_fast(t5, t5); /* t5 = (x2 - x1)^2 = A */
1708 vli_modMult_fast(X1, X1, t5); /* t1 = x1*A = B */
1709 vli_modMult_fast(X2, X2, t5); /* t3 = x2*A = C */
1710 vli_modSub_fast(Y2, Y2, Y1); /* t4 = y2 - y1 */
1711 vli_modSquare_fast(t5, Y2); /* t5 = (y2 - y1)^2 = D */
1712
1713 vli_modSub_fast(t5, t5, X1); /* t5 = D - B */
1714 vli_modSub_fast(t5, t5, X2); /* t5 = D - B - C = x3 */
1715 vli_modSub_fast(X2, X2, X1); /* t3 = C - B */
1716 vli_modMult_fast(Y1, Y1, X2); /* t2 = y1*(C - B) */
1717 vli_modSub_fast(X2, X1, t5); /* t3 = B - x3 */
1718 vli_modMult_fast(Y2, Y2, X2); /* t4 = (y2 - y1)*(B - x3) */
1719 vli_modSub_fast(Y2, Y2, Y1); /* t4 = y3 */
1720
1721 vli_set(X2, t5);
1722 }
1723
1724 /* Input P = (x1, y1, Z), Q = (x2, y2, Z)
1725 Output P + Q = (x3, y3, Z3), P - Q = (x3', y3', Z3)
1726 or P => P - Q, Q => P + Q
1727 */
XYcZ_addC(uECC_word_t * RESTRICT X1,uECC_word_t * RESTRICT Y1,uECC_word_t * RESTRICT X2,uECC_word_t * RESTRICT Y2)1728 static void XYcZ_addC(uECC_word_t * RESTRICT X1,
1729 uECC_word_t * RESTRICT Y1,
1730 uECC_word_t * RESTRICT X2,
1731 uECC_word_t * RESTRICT Y2) {
1732 /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */
1733 uECC_word_t t5[uECC_WORDS];
1734 uECC_word_t t6[uECC_WORDS];
1735 uECC_word_t t7[uECC_WORDS];
1736
1737 vli_modSub_fast(t5, X2, X1); /* t5 = x2 - x1 */
1738 vli_modSquare_fast(t5, t5); /* t5 = (x2 - x1)^2 = A */
1739 vli_modMult_fast(X1, X1, t5); /* t1 = x1*A = B */
1740 vli_modMult_fast(X2, X2, t5); /* t3 = x2*A = C */
1741 vli_modAdd(t5, Y2, Y1, curve_p); /* t5 = y2 + y1 */
1742 vli_modSub_fast(Y2, Y2, Y1); /* t4 = y2 - y1 */
1743
1744 vli_modSub_fast(t6, X2, X1); /* t6 = C - B */
1745 vli_modMult_fast(Y1, Y1, t6); /* t2 = y1 * (C - B) = E */
1746 vli_modAdd(t6, X1, X2, curve_p); /* t6 = B + C */
1747 vli_modSquare_fast(X2, Y2); /* t3 = (y2 - y1)^2 = D */
1748 vli_modSub_fast(X2, X2, t6); /* t3 = D - (B + C) = x3 */
1749
1750 vli_modSub_fast(t7, X1, X2); /* t7 = B - x3 */
1751 vli_modMult_fast(Y2, Y2, t7); /* t4 = (y2 - y1)*(B - x3) */
1752 vli_modSub_fast(Y2, Y2, Y1); /* t4 = (y2 - y1)*(B - x3) - E = y3 */
1753
1754 vli_modSquare_fast(t7, t5); /* t7 = (y2 + y1)^2 = F */
1755 vli_modSub_fast(t7, t7, t6); /* t7 = F - (B + C) = x3' */
1756 vli_modSub_fast(t6, t7, X1); /* t6 = x3' - B */
1757 vli_modMult_fast(t6, t6, t5); /* t6 = (y2 + y1)*(x3' - B) */
1758 vli_modSub_fast(Y1, t6, Y1); /* t2 = (y2 + y1)*(x3' - B) - E = y3' */
1759
1760 vli_set(X1, t7);
1761 }
1762
EccPoint_mult(EccPoint * RESTRICT result,const EccPoint * RESTRICT point,const uECC_word_t * RESTRICT scalar,const uECC_word_t * RESTRICT initialZ,bitcount_t numBits)1763 static void EccPoint_mult(EccPoint * RESTRICT result,
1764 const EccPoint * RESTRICT point,
1765 const uECC_word_t * RESTRICT scalar,
1766 const uECC_word_t * RESTRICT initialZ,
1767 bitcount_t numBits) {
1768 /* R0 and R1 */
1769 uECC_word_t Rx[2][uECC_WORDS];
1770 uECC_word_t Ry[2][uECC_WORDS];
1771 uECC_word_t z[uECC_WORDS];
1772 bitcount_t i;
1773 uECC_word_t nb;
1774
1775 vli_set(Rx[1], point->x);
1776 vli_set(Ry[1], point->y);
1777
1778 XYcZ_initial_double(Rx[1], Ry[1], Rx[0], Ry[0], initialZ);
1779
1780 for (i = numBits - 2; i > 0; --i) {
1781 nb = !vli_testBit(scalar, i);
1782 XYcZ_addC(Rx[1 - nb], Ry[1 - nb], Rx[nb], Ry[nb]);
1783 XYcZ_add(Rx[nb], Ry[nb], Rx[1 - nb], Ry[1 - nb]);
1784 }
1785
1786 nb = !vli_testBit(scalar, 0);
1787 XYcZ_addC(Rx[1 - nb], Ry[1 - nb], Rx[nb], Ry[nb]);
1788
1789 /* Find final 1/Z value. */
1790 vli_modSub_fast(z, Rx[1], Rx[0]); /* X1 - X0 */
1791 vli_modMult_fast(z, z, Ry[1 - nb]); /* Yb * (X1 - X0) */
1792 vli_modMult_fast(z, z, point->x); /* xP * Yb * (X1 - X0) */
1793 vli_modInv(z, z, curve_p); /* 1 / (xP * Yb * (X1 - X0)) */
1794 vli_modMult_fast(z, z, point->y); /* yP / (xP * Yb * (X1 - X0)) */
1795 vli_modMult_fast(z, z, Rx[1 - nb]); /* Xb * yP / (xP * Yb * (X1 - X0)) */
1796 /* End 1/Z calculation */
1797
1798 XYcZ_add(Rx[nb], Ry[nb], Rx[1 - nb], Ry[1 - nb]);
1799 apply_z(Rx[0], Ry[0], z);
1800
1801 vli_set(result->x, Rx[0]);
1802 vli_set(result->y, Ry[0]);
1803 }
1804
EccPoint_compute_public_key(EccPoint * result,uECC_word_t * private)1805 static int EccPoint_compute_public_key(EccPoint *result, uECC_word_t *private) {
1806 uECC_word_t tmp1[uECC_WORDS];
1807 uECC_word_t tmp2[uECC_WORDS];
1808 uECC_word_t *p2[2] = {tmp1, tmp2};
1809 uECC_word_t carry;
1810
1811 /* Make sure the private key is in the range [1, n-1]. */
1812 if (vli_isZero(private)) {
1813 return 0;
1814 }
1815
1816 #if (uECC_CURVE == uECC_secp160r1)
1817 // Don't regularize the bitcount for secp160r1, since it would have a larger performance
1818 // impact (about 2% slower on average) and requires the vli_xxx_n functions, leading to
1819 // a significant increase in code size.
1820
1821 EccPoint_mult(result, &curve_G, private, NULL, vli_numBits(private, uECC_WORDS));
1822 #else
1823 if (vli_cmp(curve_n, private) != 1) {
1824 return 0;
1825 }
1826
1827 // Regularize the bitcount for the private key so that attackers cannot use a side channel
1828 // attack to learn the number of leading zeros.
1829 carry = vli_add(tmp1, private, curve_n);
1830 vli_add(tmp2, tmp1, curve_n);
1831 EccPoint_mult(result, &curve_G, p2[!carry], NULL, (uECC_BYTES * 8) + 1);
1832 #endif
1833
1834 if (EccPoint_isZero(result)) {
1835 return 0;
1836 }
1837 return 1;
1838 }
1839
1840 #ifdef ENABLE_MICRO_ECC_COMPRESSION
1841
1842 #if uECC_CURVE == uECC_secp224r1
1843
1844 /* Routine 3.2.4 RS; from http://www.nsa.gov/ia/_files/nist-routines.pdf */
mod_sqrt_secp224r1_rs(uECC_word_t * d1,uECC_word_t * e1,uECC_word_t * f1,const uECC_word_t * d0,const uECC_word_t * e0,const uECC_word_t * f0)1845 static void mod_sqrt_secp224r1_rs(uECC_word_t *d1,
1846 uECC_word_t *e1,
1847 uECC_word_t *f1,
1848 const uECC_word_t *d0,
1849 const uECC_word_t *e0,
1850 const uECC_word_t *f0) {
1851 uECC_word_t t[uECC_WORDS];
1852
1853 vli_modSquare_fast(t, d0); /* t <-- d0 ^ 2 */
1854 vli_modMult_fast(e1, d0, e0); /* e1 <-- d0 * e0 */
1855 vli_modAdd(d1, t, f0, curve_p); /* d1 <-- t + f0 */
1856 vli_modAdd(e1, e1, e1, curve_p); /* e1 <-- e1 + e1 */
1857 vli_modMult_fast(f1, t, f0); /* f1 <-- t * f0 */
1858 vli_modAdd(f1, f1, f1, curve_p); /* f1 <-- f1 + f1 */
1859 vli_modAdd(f1, f1, f1, curve_p); /* f1 <-- f1 + f1 */
1860 }
1861
1862 /* Routine 3.2.5 RSS; from http://www.nsa.gov/ia/_files/nist-routines.pdf */
mod_sqrt_secp224r1_rss(uECC_word_t * d1,uECC_word_t * e1,uECC_word_t * f1,const uECC_word_t * d0,const uECC_word_t * e0,const uECC_word_t * f0,const bitcount_t j)1863 static void mod_sqrt_secp224r1_rss(uECC_word_t *d1,
1864 uECC_word_t *e1,
1865 uECC_word_t *f1,
1866 const uECC_word_t *d0,
1867 const uECC_word_t *e0,
1868 const uECC_word_t *f0,
1869 const bitcount_t j) {
1870 bitcount_t i;
1871
1872 vli_set(d1, d0); /* d1 <-- d0 */
1873 vli_set(e1, e0); /* e1 <-- e0 */
1874 vli_set(f1, f0); /* f1 <-- f0 */
1875 for (i = 1; i <= j; i++) {
1876 mod_sqrt_secp224r1_rs(d1, e1, f1, d1, e1, f1); /* RS (d1,e1,f1,d1,e1,f1) */
1877 }
1878 }
1879
1880 /* Routine 3.2.6 RM; from http://www.nsa.gov/ia/_files/nist-routines.pdf */
mod_sqrt_secp224r1_rm(uECC_word_t * d2,uECC_word_t * e2,uECC_word_t * f2,const uECC_word_t * c,const uECC_word_t * d0,const uECC_word_t * e0,const uECC_word_t * d1,const uECC_word_t * e1)1881 static void mod_sqrt_secp224r1_rm(uECC_word_t *d2,
1882 uECC_word_t *e2,
1883 uECC_word_t *f2,
1884 const uECC_word_t *c,
1885 const uECC_word_t *d0,
1886 const uECC_word_t *e0,
1887 const uECC_word_t *d1,
1888 const uECC_word_t *e1) {
1889 uECC_word_t t1[uECC_WORDS];
1890 uECC_word_t t2[uECC_WORDS];
1891
1892 vli_modMult_fast(t1, e0, e1); /* t1 <-- e0 * e1 */
1893 vli_modMult_fast(t1, t1, c); /* t1 <-- t1 * c */
1894 vli_modSub_fast(t1, curve_p, t1); /* t1 <-- p - t1 */
1895 vli_modMult_fast(t2, d0, d1); /* t2 <-- d0 * d1 */
1896 vli_modAdd(t2, t2, t1, curve_p); /* t2 <-- t2 + t1 */
1897 vli_modMult_fast(t1, d0, e1); /* t1 <-- d0 * e1 */
1898 vli_modMult_fast(e2, d1, e0); /* e2 <-- d1 * e0 */
1899 vli_modAdd(e2, e2, t1, curve_p); /* e2 <-- e2 + t1 */
1900 vli_modSquare_fast(f2, e2); /* f2 <-- e2^2 */
1901 vli_modMult_fast(f2, f2, c); /* f2 <-- f2 * c */
1902 vli_modSub_fast(f2, curve_p, f2); /* f2 <-- p - f2 */
1903 vli_set(d2, t2); /* d2 <-- t2 */
1904 }
1905
1906 /* Routine 3.2.7 RP; from http://www.nsa.gov/ia/_files/nist-routines.pdf */
mod_sqrt_secp224r1_rp(uECC_word_t * d1,uECC_word_t * e1,uECC_word_t * f1,const uECC_word_t * c,const uECC_word_t * r)1907 static void mod_sqrt_secp224r1_rp(uECC_word_t *d1,
1908 uECC_word_t *e1,
1909 uECC_word_t *f1,
1910 const uECC_word_t *c,
1911 const uECC_word_t *r) {
1912 wordcount_t i;
1913 wordcount_t pow2i = 1;
1914 uECC_word_t d0[uECC_WORDS];
1915 uECC_word_t e0[uECC_WORDS] = {1}; /* e0 <-- 1 */
1916 uECC_word_t f0[uECC_WORDS];
1917
1918 vli_set(d0, r); /* d0 <-- r */
1919 vli_modSub_fast(f0, curve_p, c); /* f0 <-- p - c */
1920 for (i = 0; i <= 6; i++) {
1921 mod_sqrt_secp224r1_rss(d1, e1, f1, d0, e0, f0, pow2i); /* RSS (d1,e1,f1,d0,e0,f0,2^i) */
1922 mod_sqrt_secp224r1_rm(d1, e1, f1, c, d1, e1, d0, e0); /* RM (d1,e1,f1,c,d1,e1,d0,e0) */
1923 vli_set(d0, d1); /* d0 <-- d1 */
1924 vli_set(e0, e1); /* e0 <-- e1 */
1925 vli_set(f0, f1); /* f0 <-- f1 */
1926 pow2i *= 2;
1927 }
1928 }
1929
1930 /* Compute a = sqrt(a) (mod curve_p). */
1931 /* Routine 3.2.8 mp_mod_sqrt_224; from http://www.nsa.gov/ia/_files/nist-routines.pdf */
mod_sqrt(uECC_word_t * a)1932 static void mod_sqrt(uECC_word_t *a) {
1933 bitcount_t i;
1934 uECC_word_t e1[uECC_WORDS];
1935 uECC_word_t f1[uECC_WORDS];
1936 uECC_word_t d0[uECC_WORDS];
1937 uECC_word_t e0[uECC_WORDS];
1938 uECC_word_t f0[uECC_WORDS];
1939 uECC_word_t d1[uECC_WORDS];
1940
1941 // s = a; using constant instead of random value
1942 mod_sqrt_secp224r1_rp(d0, e0, f0, a, a); /* RP (d0, e0, f0, c, s) */
1943 mod_sqrt_secp224r1_rs(d1, e1, f1, d0, e0, f0); /* RS (d1, e1, f1, d0, e0, f0) */
1944 for (i = 1; i <= 95; i++) {
1945 vli_set(d0, d1); /* d0 <-- d1 */
1946 vli_set(e0, e1); /* e0 <-- e1 */
1947 vli_set(f0, f1); /* f0 <-- f1 */
1948 mod_sqrt_secp224r1_rs(d1, e1, f1, d0, e0, f0); /* RS (d1, e1, f1, d0, e0, f0) */
1949 if (vli_isZero(d1)) { /* if d1 == 0 */
1950 break;
1951 }
1952 }
1953 vli_modInv(f1, e0, curve_p); /* f1 <-- 1 / e0 */
1954 vli_modMult_fast(a, d0, f1); /* a <-- d0 / e0 */
1955 }
1956
1957 #else /* uECC_CURVE */
1958
1959 /* Compute a = sqrt(a) (mod curve_p). */
mod_sqrt(uECC_word_t * a)1960 static void mod_sqrt(uECC_word_t *a) {
1961 bitcount_t i;
1962 uECC_word_t p1[uECC_WORDS] = {1};
1963 uECC_word_t l_result[uECC_WORDS] = {1};
1964
1965 /* Since curve_p == 3 (mod 4) for all supported curves, we can
1966 compute sqrt(a) = a^((curve_p + 1) / 4) (mod curve_p). */
1967 vli_add(p1, curve_p, p1); /* p1 = curve_p + 1 */
1968 for (i = vli_numBits(p1, uECC_WORDS) - 1; i > 1; --i) {
1969 vli_modSquare_fast(l_result, l_result);
1970 if (vli_testBit(p1, i)) {
1971 vli_modMult_fast(l_result, l_result, a);
1972 }
1973 }
1974 vli_set(a, l_result);
1975 }
1976 #endif /* uECC_CURVE */
1977
1978 #endif /* ENABLE_MICRO_ECC_COMPRESSION */
1979
1980
1981 #if uECC_WORD_SIZE == 1
1982
vli_nativeToBytes(uint8_t * RESTRICT dest,const uint8_t * RESTRICT src)1983 static void vli_nativeToBytes(uint8_t * RESTRICT dest, const uint8_t * RESTRICT src) {
1984 uint8_t i;
1985 for (i = 0; i < uECC_BYTES; ++i) {
1986 dest[i] = src[(uECC_BYTES - 1) - i];
1987 }
1988 }
1989
1990 #define vli_bytesToNative(dest, src) vli_nativeToBytes((dest), (src))
1991
1992 #elif uECC_WORD_SIZE == 4
1993
vli_nativeToBytes(uint8_t * bytes,const uint32_t * native)1994 static void vli_nativeToBytes(uint8_t *bytes, const uint32_t *native) {
1995 unsigned i;
1996 for (i = 0; i < uECC_WORDS; ++i) {
1997 uint8_t *digit = bytes + 4 * (uECC_WORDS - 1 - i);
1998 digit[0] = native[i] >> 24;
1999 digit[1] = native[i] >> 16;
2000 digit[2] = native[i] >> 8;
2001 digit[3] = native[i];
2002 }
2003 }
2004
vli_bytesToNative(uint32_t * native,const uint8_t * bytes)2005 static void vli_bytesToNative(uint32_t *native, const uint8_t *bytes) {
2006 unsigned i;
2007 for (i = 0; i < uECC_WORDS; ++i) {
2008 const uint8_t *digit = bytes + 4 * (uECC_WORDS - 1 - i);
2009 native[i] = ((uint32_t)digit[0] << 24) | ((uint32_t)digit[1] << 16) |
2010 ((uint32_t)digit[2] << 8) | (uint32_t)digit[3];
2011 }
2012 }
2013
2014 #else
2015
vli_nativeToBytes(uint8_t * bytes,const uint64_t * native)2016 static void vli_nativeToBytes(uint8_t *bytes, const uint64_t *native) {
2017 unsigned i;
2018 for (i = 0; i < uECC_WORDS; ++i) {
2019 uint8_t *digit = bytes + 8 * (uECC_WORDS - 1 - i);
2020 digit[0] = native[i] >> 56;
2021 digit[1] = native[i] >> 48;
2022 digit[2] = native[i] >> 40;
2023 digit[3] = native[i] >> 32;
2024 digit[4] = native[i] >> 24;
2025 digit[5] = native[i] >> 16;
2026 digit[6] = native[i] >> 8;
2027 digit[7] = native[i];
2028 }
2029 }
2030
vli_bytesToNative(uint64_t * native,const uint8_t * bytes)2031 static void vli_bytesToNative(uint64_t *native, const uint8_t *bytes) {
2032 unsigned i;
2033 for (i = 0; i < uECC_WORDS; ++i) {
2034 const uint8_t *digit = bytes + 8 * (uECC_WORDS - 1 - i);
2035 native[i] = ((uint64_t)digit[0] << 56) | ((uint64_t)digit[1] << 48) |
2036 ((uint64_t)digit[2] << 40) | ((uint64_t)digit[3] << 32) |
2037 ((uint64_t)digit[4] << 24) | ((uint64_t)digit[5] << 16) |
2038 ((uint64_t)digit[6] << 8) | (uint64_t)digit[7];
2039 }
2040 }
2041
2042 #endif /* uECC_WORD_SIZE */
2043
uECC_make_key(uint8_t public_key[uECC_BYTES * 2],uint8_t private_key[uECC_BYTES])2044 int uECC_make_key(uint8_t public_key[uECC_BYTES*2], uint8_t private_key[uECC_BYTES]) {
2045 uECC_word_t private[uECC_WORDS];
2046 EccPoint public;
2047 uECC_word_t tries;
2048 for (tries = 0; tries < MAX_TRIES; ++tries) {
2049 if (g_rng_function((uint8_t *)private, sizeof(private)) &&
2050 EccPoint_compute_public_key(&public, private)) {
2051 vli_nativeToBytes(private_key, private);
2052 vli_nativeToBytes(public_key, public.x);
2053 vli_nativeToBytes(public_key + uECC_BYTES, public.y);
2054 return 1;
2055 }
2056 }
2057 return 0;
2058 }
2059
uECC_shared_secret(const uint8_t public_key[uECC_BYTES * 2],const uint8_t private_key[uECC_BYTES],uint8_t secret[uECC_BYTES])2060 int uECC_shared_secret(const uint8_t public_key[uECC_BYTES*2],
2061 const uint8_t private_key[uECC_BYTES],
2062 uint8_t secret[uECC_BYTES]) {
2063 EccPoint public;
2064 EccPoint product;
2065 uECC_word_t private[uECC_WORDS];
2066 uECC_word_t tmp[uECC_WORDS];
2067 uECC_word_t *p2[2] = {private, tmp};
2068 uECC_word_t random[uECC_WORDS];
2069 uECC_word_t *initial_Z = NULL;
2070 uECC_word_t tries;
2071 uECC_word_t carry;
2072
2073 // Try to get a random initial Z value to improve protection against side-channel
2074 // attacks. If the RNG fails every time (eg it was not defined), we continue so that
2075 // uECC_shared_secret() can still work without an RNG defined.
2076 for (tries = 0; tries < MAX_TRIES; ++tries) {
2077 if (g_rng_function((uint8_t *)random, sizeof(random)) && !vli_isZero(random)) {
2078 initial_Z = random;
2079 break;
2080 }
2081 }
2082
2083 vli_bytesToNative(private, private_key);
2084 vli_bytesToNative(public.x, public_key);
2085 vli_bytesToNative(public.y, public_key + uECC_BYTES);
2086
2087 #if (uECC_CURVE == uECC_secp160r1)
2088 // Don't regularize the bitcount for secp160r1.
2089 EccPoint_mult(&product, &public, private, initial_Z, vli_numBits(private, uECC_WORDS));
2090 #else
2091 // Regularize the bitcount for the private key so that attackers cannot use a side channel
2092 // attack to learn the number of leading zeros.
2093 carry = vli_add(private, private, curve_n);
2094 vli_add(tmp, private, curve_n);
2095 EccPoint_mult(&product, &public, p2[!carry], initial_Z, (uECC_BYTES * 8) + 1);
2096 #endif
2097
2098 vli_nativeToBytes(secret, product.x);
2099 return !EccPoint_isZero(&product);
2100 }
2101
2102 #ifdef ENABLE_MICRO_ECC_COMPRESSION
2103
uECC_compress(const uint8_t public_key[uECC_BYTES * 2],uint8_t compressed[uECC_BYTES+1])2104 void uECC_compress(const uint8_t public_key[uECC_BYTES*2], uint8_t compressed[uECC_BYTES+1]) {
2105 wordcount_t i;
2106 for (i = 0; i < uECC_BYTES; ++i) {
2107 compressed[i+1] = public_key[i];
2108 }
2109 compressed[0] = 2 + (public_key[uECC_BYTES * 2 - 1] & 0x01);
2110 }
2111
2112 #endif
2113
2114 /* Computes result = x^3 + ax + b. result must not overlap x. */
curve_x_side(uECC_word_t * RESTRICT result,const uECC_word_t * RESTRICT x)2115 static void curve_x_side(uECC_word_t * RESTRICT result, const uECC_word_t * RESTRICT x) {
2116 static const uECC_word_t curve_b[uECC_WORDS] = uECC_CONCAT(Curve_B_, uECC_CURVE);
2117 #if (uECC_CURVE == uECC_secp256k1)
2118 vli_modSquare_fast(result, x); /* r = x^2 */
2119 vli_modMult_fast(result, result, x); /* r = x^3 */
2120 vli_modAdd(result, result, curve_b, curve_p); /* r = x^3 + b */
2121 #else
2122 uECC_word_t _3[uECC_WORDS] = {3}; /* -a = 3 */
2123
2124 vli_modSquare_fast(result, x); /* r = x^2 */
2125 vli_modSub_fast(result, result, _3); /* r = x^2 - 3 */
2126 vli_modMult_fast(result, result, x); /* r = x^3 - 3x */
2127 vli_modAdd(result, result, curve_b, curve_p); /* r = x^3 - 3x + b */
2128 #endif
2129 }
2130
2131 #ifdef ENABLE_MICRO_ECC_COMPRESSION
2132
uECC_decompress(const uint8_t compressed[uECC_BYTES+1],uint8_t public_key[uECC_BYTES * 2])2133 void uECC_decompress(const uint8_t compressed[uECC_BYTES+1], uint8_t public_key[uECC_BYTES*2]) {
2134 EccPoint point;
2135 vli_bytesToNative(point.x, compressed + 1);
2136 curve_x_side(point.y, point.x);
2137 mod_sqrt(point.y);
2138
2139 if ((point.y[0] & 0x01) != (compressed[0] & 0x01)) {
2140 vli_sub(point.y, curve_p, point.y);
2141 }
2142
2143 vli_nativeToBytes(public_key, point.x);
2144 vli_nativeToBytes(public_key + uECC_BYTES, point.y);
2145 }
2146
2147 #endif /* ENABLE_MICRO_ECC_COMPRESSION */
2148
uECC_valid_public_key(const uint8_t public_key[uECC_BYTES * 2])2149 int uECC_valid_public_key(const uint8_t public_key[uECC_BYTES*2]) {
2150 uECC_word_t tmp1[uECC_WORDS];
2151 uECC_word_t tmp2[uECC_WORDS];
2152 EccPoint public;
2153
2154 vli_bytesToNative(public.x, public_key);
2155 vli_bytesToNative(public.y, public_key + uECC_BYTES);
2156
2157 // The point at infinity is invalid.
2158 if (EccPoint_isZero(&public)) {
2159 return 0;
2160 }
2161
2162 // x and y must be smaller than p.
2163 if (vli_cmp(curve_p, public.x) != 1 || vli_cmp(curve_p, public.y) != 1) {
2164 return 0;
2165 }
2166
2167 vli_modSquare_fast(tmp1, public.y); /* tmp1 = y^2 */
2168 curve_x_side(tmp2, public.x); /* tmp2 = x^3 + ax + b */
2169
2170 /* Make sure that y^2 == x^3 + ax + b */
2171 return (vli_cmp(tmp1, tmp2) == 0);
2172 }
2173
uECC_compute_public_key(const uint8_t private_key[uECC_BYTES],uint8_t public_key[uECC_BYTES * 2])2174 int uECC_compute_public_key(const uint8_t private_key[uECC_BYTES],
2175 uint8_t public_key[uECC_BYTES * 2]) {
2176 uECC_word_t private[uECC_WORDS];
2177 EccPoint public;
2178
2179 vli_bytesToNative(private, private_key);
2180
2181 if (!EccPoint_compute_public_key(&public, private)) {
2182 return 0;
2183 }
2184
2185 vli_nativeToBytes(public_key, public.x);
2186 vli_nativeToBytes(public_key + uECC_BYTES, public.y);
2187 return 1;
2188 }
2189
uECC_bytes(void)2190 int uECC_bytes(void) {
2191 return uECC_BYTES;
2192 }
2193
uECC_curve(void)2194 int uECC_curve(void) {
2195 return uECC_CURVE;
2196 }
2197
2198 /* -------- ECDSA code -------- */
2199
2200 #ifdef ENABLE_MICRO_ECC_ECDSA
2201
2202 #if (uECC_CURVE == uECC_secp160r1)
vli_clear_n(uECC_word_t * vli)2203 static void vli_clear_n(uECC_word_t *vli) {
2204 vli_clear(vli);
2205 vli[uECC_N_WORDS - 1] = 0;
2206 }
2207
vli_isZero_n(const uECC_word_t * vli)2208 static uECC_word_t vli_isZero_n(const uECC_word_t *vli) {
2209 if (vli[uECC_N_WORDS - 1]) {
2210 return 0;
2211 }
2212 return vli_isZero(vli);
2213 }
2214
vli_set_n(uECC_word_t * dest,const uECC_word_t * src)2215 static void vli_set_n(uECC_word_t *dest, const uECC_word_t *src) {
2216 vli_set(dest, src);
2217 dest[uECC_N_WORDS - 1] = src[uECC_N_WORDS - 1];
2218 }
2219
vli_cmp_n(const uECC_word_t * left,const uECC_word_t * right)2220 static cmpresult_t vli_cmp_n(const uECC_word_t *left, const uECC_word_t *right) {
2221 if (left[uECC_N_WORDS - 1] > right[uECC_N_WORDS - 1]) {
2222 return 1;
2223 } else if (left[uECC_N_WORDS - 1] < right[uECC_N_WORDS - 1]) {
2224 return -1;
2225 }
2226 return vli_cmp(left, right);
2227 }
2228
vli_rshift1_n(uECC_word_t * vli)2229 static void vli_rshift1_n(uECC_word_t *vli) {
2230 vli_rshift1(vli);
2231 vli[uECC_N_WORDS - 2] |= vli[uECC_N_WORDS - 1] << (uECC_WORD_BITS - 1);
2232 vli[uECC_N_WORDS - 1] = vli[uECC_N_WORDS - 1] >> 1;
2233 }
2234
vli_add_n(uECC_word_t * result,const uECC_word_t * left,const uECC_word_t * right)2235 static uECC_word_t vli_add_n(uECC_word_t *result,
2236 const uECC_word_t *left,
2237 const uECC_word_t *right) {
2238 uECC_word_t carry = vli_add(result, left, right);
2239 uECC_word_t sum = left[uECC_N_WORDS - 1] + right[uECC_N_WORDS - 1] + carry;
2240 if (sum != left[uECC_N_WORDS - 1]) {
2241 carry = (sum < left[uECC_N_WORDS - 1]);
2242 }
2243 result[uECC_N_WORDS - 1] = sum;
2244 return carry;
2245 }
2246
vli_sub_n(uECC_word_t * result,const uECC_word_t * left,const uECC_word_t * right)2247 static uECC_word_t vli_sub_n(uECC_word_t *result,
2248 const uECC_word_t *left,
2249 const uECC_word_t *right) {
2250 uECC_word_t borrow = vli_sub(result, left, right);
2251 uECC_word_t diff = left[uECC_N_WORDS - 1] - right[uECC_N_WORDS - 1] - borrow;
2252 if (diff != left[uECC_N_WORDS - 1]) {
2253 borrow = (diff > left[uECC_N_WORDS - 1]);
2254 }
2255 result[uECC_N_WORDS - 1] = diff;
2256 return borrow;
2257 }
2258
2259 #if !muladd_exists
muladd(uECC_word_t a,uECC_word_t b,uECC_word_t * r0,uECC_word_t * r1,uECC_word_t * r2)2260 static void muladd(uECC_word_t a,
2261 uECC_word_t b,
2262 uECC_word_t *r0,
2263 uECC_word_t *r1,
2264 uECC_word_t *r2) {
2265 uECC_dword_t p = (uECC_dword_t)a * b;
2266 uECC_dword_t r01 = ((uECC_dword_t)(*r1) << uECC_WORD_BITS) | *r0;
2267 r01 += p;
2268 *r2 += (r01 < p);
2269 *r1 = r01 >> uECC_WORD_BITS;
2270 *r0 = (uECC_word_t)r01;
2271 }
2272 #define muladd_exists 1
2273 #endif
2274
vli_mult_n(uECC_word_t * result,const uECC_word_t * left,const uECC_word_t * right)2275 static void vli_mult_n(uECC_word_t *result, const uECC_word_t *left, const uECC_word_t *right) {
2276 uECC_word_t r0 = 0;
2277 uECC_word_t r1 = 0;
2278 uECC_word_t r2 = 0;
2279 wordcount_t i, k;
2280
2281 for (k = 0; k < uECC_N_WORDS * 2 - 1; ++k) {
2282 wordcount_t min = (k < uECC_N_WORDS ? 0 : (k + 1) - uECC_N_WORDS);
2283 wordcount_t max = (k < uECC_N_WORDS ? k : uECC_N_WORDS - 1);
2284 for (i = min; i <= max; ++i) {
2285 muladd(left[i], right[k - i], &r0, &r1, &r2);
2286 }
2287 result[k] = r0;
2288 r0 = r1;
2289 r1 = r2;
2290 r2 = 0;
2291 }
2292 result[uECC_N_WORDS * 2 - 1] = r0;
2293 }
2294
vli_modAdd_n(uECC_word_t * result,const uECC_word_t * left,const uECC_word_t * right,const uECC_word_t * mod)2295 static void vli_modAdd_n(uECC_word_t *result,
2296 const uECC_word_t *left,
2297 const uECC_word_t *right,
2298 const uECC_word_t *mod) {
2299 uECC_word_t carry = vli_add_n(result, left, right);
2300 if (carry || vli_cmp_n(result, mod) >= 0) {
2301 vli_sub_n(result, result, mod);
2302 }
2303 }
2304
vli_modInv_n(uECC_word_t * result,const uECC_word_t * input,const uECC_word_t * mod)2305 static void vli_modInv_n(uECC_word_t *result, const uECC_word_t *input, const uECC_word_t *mod) {
2306 uECC_word_t a[uECC_N_WORDS], b[uECC_N_WORDS], u[uECC_N_WORDS], v[uECC_N_WORDS];
2307 uECC_word_t carry;
2308 cmpresult_t cmpResult;
2309
2310 if (vli_isZero_n(input)) {
2311 vli_clear_n(result);
2312 return;
2313 }
2314
2315 vli_set_n(a, input);
2316 vli_set_n(b, mod);
2317 vli_clear_n(u);
2318 u[0] = 1;
2319 vli_clear_n(v);
2320 while ((cmpResult = vli_cmp_n(a, b)) != 0) {
2321 carry = 0;
2322 if (EVEN(a)) {
2323 vli_rshift1_n(a);
2324 if (!EVEN(u)) {
2325 carry = vli_add_n(u, u, mod);
2326 }
2327 vli_rshift1_n(u);
2328 if (carry) {
2329 u[uECC_N_WORDS - 1] |= HIGH_BIT_SET;
2330 }
2331 } else if (EVEN(b)) {
2332 vli_rshift1_n(b);
2333 if (!EVEN(v)) {
2334 carry = vli_add_n(v, v, mod);
2335 }
2336 vli_rshift1_n(v);
2337 if (carry) {
2338 v[uECC_N_WORDS - 1] |= HIGH_BIT_SET;
2339 }
2340 } else if (cmpResult > 0) {
2341 vli_sub_n(a, a, b);
2342 vli_rshift1_n(a);
2343 if (vli_cmp_n(u, v) < 0) {
2344 vli_add_n(u, u, mod);
2345 }
2346 vli_sub_n(u, u, v);
2347 if (!EVEN(u)) {
2348 carry = vli_add_n(u, u, mod);
2349 }
2350 vli_rshift1_n(u);
2351 if (carry) {
2352 u[uECC_N_WORDS - 1] |= HIGH_BIT_SET;
2353 }
2354 } else {
2355 vli_sub_n(b, b, a);
2356 vli_rshift1_n(b);
2357 if (vli_cmp_n(v, u) < 0) {
2358 vli_add_n(v, v, mod);
2359 }
2360 vli_sub_n(v, v, u);
2361 if (!EVEN(v)) {
2362 carry = vli_add_n(v, v, mod);
2363 }
2364 vli_rshift1_n(v);
2365 if (carry) {
2366 v[uECC_N_WORDS - 1] |= HIGH_BIT_SET;
2367 }
2368 }
2369 }
2370 vli_set_n(result, u);
2371 }
2372
vli2_rshift1_n(uECC_word_t * vli)2373 static void vli2_rshift1_n(uECC_word_t *vli) {
2374 vli_rshift1_n(vli);
2375 vli[uECC_N_WORDS - 1] |= vli[uECC_N_WORDS] << (uECC_WORD_BITS - 1);
2376 vli_rshift1_n(vli + uECC_N_WORDS);
2377 }
2378
vli2_sub_n(uECC_word_t * result,const uECC_word_t * left,const uECC_word_t * right)2379 static uECC_word_t vli2_sub_n(uECC_word_t *result,
2380 const uECC_word_t *left,
2381 const uECC_word_t *right) {
2382 uECC_word_t borrow = 0;
2383 wordcount_t i;
2384 for (i = 0; i < uECC_N_WORDS * 2; ++i) {
2385 uECC_word_t diff = left[i] - right[i] - borrow;
2386 if (diff != left[i]) {
2387 borrow = (diff > left[i]);
2388 }
2389 result[i] = diff;
2390 }
2391 return borrow;
2392 }
2393
2394 /* Computes result = (left * right) % curve_n. */
vli_modMult_n(uECC_word_t * result,const uECC_word_t * left,const uECC_word_t * right)2395 static void vli_modMult_n(uECC_word_t *result, const uECC_word_t *left, const uECC_word_t *right) {
2396 bitcount_t i;
2397 uECC_word_t product[2 * uECC_N_WORDS];
2398 uECC_word_t modMultiple[2 * uECC_N_WORDS];
2399 uECC_word_t tmp[2 * uECC_N_WORDS];
2400 uECC_word_t *v[2] = {tmp, product};
2401 uECC_word_t index = 1;
2402
2403 vli_mult_n(product, left, right);
2404 vli_clear_n(modMultiple);
2405 vli_set(modMultiple + uECC_N_WORDS + 1, curve_n);
2406 vli_rshift1(modMultiple + uECC_N_WORDS + 1);
2407 modMultiple[2 * uECC_N_WORDS - 1] |= HIGH_BIT_SET;
2408 modMultiple[uECC_N_WORDS] = HIGH_BIT_SET;
2409
2410 for (i = 0;
2411 i <= ((((bitcount_t)uECC_N_WORDS) << uECC_WORD_BITS_SHIFT) + (uECC_WORD_BITS - 1));
2412 ++i) {
2413 uECC_word_t borrow = vli2_sub_n(v[1 - index], v[index], modMultiple);
2414 index = !(index ^ borrow); /* Swap the index if there was no borrow */
2415 vli2_rshift1_n(modMultiple);
2416 }
2417 vli_set_n(result, v[index]);
2418 }
2419
2420 #else
2421
2422 #define vli_cmp_n vli_cmp
2423 #define vli_modInv_n vli_modInv
2424 #define vli_modAdd_n vli_modAdd
2425
vli2_rshift1(uECC_word_t * vli)2426 static void vli2_rshift1(uECC_word_t *vli) {
2427 vli_rshift1(vli);
2428 vli[uECC_WORDS - 1] |= vli[uECC_WORDS] << (uECC_WORD_BITS - 1);
2429 vli_rshift1(vli + uECC_WORDS);
2430 }
2431
vli2_sub(uECC_word_t * result,const uECC_word_t * left,const uECC_word_t * right)2432 static uECC_word_t vli2_sub(uECC_word_t *result,
2433 const uECC_word_t *left,
2434 const uECC_word_t *right) {
2435 uECC_word_t borrow = 0;
2436 wordcount_t i;
2437 for (i = 0; i < uECC_WORDS * 2; ++i) {
2438 uECC_word_t diff = left[i] - right[i] - borrow;
2439 if (diff != left[i]) {
2440 borrow = (diff > left[i]);
2441 }
2442 result[i] = diff;
2443 }
2444 return borrow;
2445 }
2446
2447 /* Computes result = (left * right) % curve_n. */
vli_modMult_n(uECC_word_t * result,const uECC_word_t * left,const uECC_word_t * right)2448 static void vli_modMult_n(uECC_word_t *result, const uECC_word_t *left, const uECC_word_t *right) {
2449 uECC_word_t product[2 * uECC_WORDS];
2450 uECC_word_t modMultiple[2 * uECC_WORDS];
2451 uECC_word_t tmp[2 * uECC_WORDS];
2452 uECC_word_t *v[2] = {tmp, product};
2453 bitcount_t i;
2454 uECC_word_t index = 1;
2455
2456 vli_mult(product, left, right);
2457 vli_set(modMultiple + uECC_WORDS, curve_n); /* works if curve_n has its highest bit set */
2458 vli_clear(modMultiple);
2459
2460 for (i = 0; i <= uECC_BYTES * 8; ++i) {
2461 uECC_word_t borrow = vli2_sub(v[1 - index], v[index], modMultiple);
2462 index = !(index ^ borrow); /* Swap the index if there was no borrow */
2463 vli2_rshift1(modMultiple);
2464 }
2465 vli_set(result, v[index]);
2466 }
2467 #endif /* (uECC_CURVE != uECC_secp160r1) */
2468
uECC_sign_with_k(const uint8_t private_key[uECC_BYTES],const uint8_t message_hash[uECC_BYTES],uECC_word_t k[uECC_N_WORDS],uint8_t signature[uECC_BYTES * 2])2469 static int uECC_sign_with_k(const uint8_t private_key[uECC_BYTES],
2470 const uint8_t message_hash[uECC_BYTES],
2471 uECC_word_t k[uECC_N_WORDS],
2472 uint8_t signature[uECC_BYTES*2]) {
2473 uECC_word_t tmp[uECC_N_WORDS];
2474 uECC_word_t s[uECC_N_WORDS];
2475 uECC_word_t *k2[2] = {tmp, s};
2476 EccPoint p;
2477 uECC_word_t carry;
2478 uECC_word_t tries;
2479
2480 /* Make sure 0 < k < curve_n */
2481 if (vli_isZero(k) || vli_cmp_n(curve_n, k) != 1) {
2482 return 0;
2483 }
2484
2485 #if (uECC_CURVE == uECC_secp160r1)
2486 /* Make sure that we don't leak timing information about k.
2487 See http://eprint.iacr.org/2011/232.pdf */
2488 vli_add_n(tmp, k, curve_n);
2489 carry = (tmp[uECC_WORDS] & 0x02);
2490 vli_add_n(s, tmp, curve_n);
2491
2492 /* p = k * G */
2493 EccPoint_mult(&p, &curve_G, k2[!carry], NULL, (uECC_BYTES * 8) + 2);
2494 #else
2495 /* Make sure that we don't leak timing information about k.
2496 See http://eprint.iacr.org/2011/232.pdf */
2497 carry = vli_add(tmp, k, curve_n);
2498 vli_add(s, tmp, curve_n);
2499
2500 /* p = k * G */
2501 EccPoint_mult(&p, &curve_G, k2[!carry], NULL, (uECC_BYTES * 8) + 1);
2502
2503 /* r = x1 (mod n) */
2504 if (vli_cmp(curve_n, p.x) != 1) {
2505 vli_sub(p.x, p.x, curve_n);
2506 }
2507 #endif
2508 if (vli_isZero(p.x)) {
2509 return 0;
2510 }
2511
2512 // Attempt to get a random number to prevent side channel analysis of k.
2513 // If the RNG fails every time (eg it was not defined), we continue so that
2514 // deterministic signing can still work (with reduced security) without
2515 // an RNG defined.
2516 carry = 0; // use to signal that the RNG succeeded at least once.
2517 for (tries = 0; tries < MAX_TRIES; ++tries) {
2518 if (!g_rng_function((uint8_t *)tmp, sizeof(tmp))) {
2519 continue;
2520 }
2521 carry = 1;
2522 if (!vli_isZero(tmp)) {
2523 break;
2524 }
2525 }
2526 if (!carry) {
2527 vli_clear(tmp);
2528 tmp[0] = 1;
2529 }
2530
2531 /* Prevent side channel analysis of vli_modInv() to determine
2532 bits of k / the private key by premultiplying by a random number */
2533 vli_modMult_n(k, k, tmp); /* k' = rand * k */
2534 vli_modInv_n(k, k, curve_n); /* k = 1 / k' */
2535 vli_modMult_n(k, k, tmp); /* k = 1 / k */
2536
2537 vli_nativeToBytes(signature, p.x); /* store r */
2538
2539 tmp[uECC_N_WORDS - 1] = 0;
2540 vli_bytesToNative(tmp, private_key); /* tmp = d */
2541 s[uECC_N_WORDS - 1] = 0;
2542 vli_set(s, p.x);
2543 vli_modMult_n(s, tmp, s); /* s = r*d */
2544
2545 vli_bytesToNative(tmp, message_hash);
2546 vli_modAdd_n(s, tmp, s, curve_n); /* s = e + r*d */
2547 vli_modMult_n(s, s, k); /* s = (e + r*d) / k */
2548 #if (uECC_CURVE == uECC_secp160r1)
2549 if (s[uECC_N_WORDS - 1]) {
2550 return 0;
2551 }
2552 #endif
2553 vli_nativeToBytes(signature + uECC_BYTES, s);
2554 return 1;
2555 }
2556
uECC_sign(const uint8_t private_key[uECC_BYTES],const uint8_t message_hash[uECC_BYTES],uint8_t signature[uECC_BYTES * 2])2557 int uECC_sign(const uint8_t private_key[uECC_BYTES],
2558 const uint8_t message_hash[uECC_BYTES],
2559 uint8_t signature[uECC_BYTES*2]) {
2560 uECC_word_t k[uECC_N_WORDS];
2561 uECC_word_t tries;
2562
2563 for (tries = 0; tries < MAX_TRIES; ++tries) {
2564 if(g_rng_function((uint8_t *)k, sizeof(k))) {
2565 #if (uECC_CURVE == uECC_secp160r1)
2566 k[uECC_WORDS] &= 0x01;
2567 #endif
2568 if (uECC_sign_with_k(private_key, message_hash, k, signature)) {
2569 return 1;
2570 }
2571 }
2572 }
2573 return 0;
2574 }
2575
2576 /* Compute an HMAC using K as a key (as in RFC 6979). Note that K is always
2577 the same size as the hash result size. */
HMAC_init(uECC_HashContext * hash_context,const uint8_t * K)2578 static void HMAC_init(uECC_HashContext *hash_context, const uint8_t *K) {
2579 uint8_t *pad = hash_context->tmp + 2 * hash_context->result_size;
2580 unsigned i;
2581 for (i = 0; i < hash_context->result_size; ++i)
2582 pad[i] = K[i] ^ 0x36;
2583 for (; i < hash_context->block_size; ++i)
2584 pad[i] = 0x36;
2585
2586 hash_context->init_hash(hash_context);
2587 hash_context->update_hash(hash_context, pad, hash_context->block_size);
2588 }
2589
HMAC_update(uECC_HashContext * hash_context,const uint8_t * message,unsigned message_size)2590 static void HMAC_update(uECC_HashContext *hash_context,
2591 const uint8_t *message,
2592 unsigned message_size) {
2593 hash_context->update_hash(hash_context, message, message_size);
2594 }
2595
HMAC_finish(uECC_HashContext * hash_context,const uint8_t * K,uint8_t * result)2596 static void HMAC_finish(uECC_HashContext *hash_context, const uint8_t *K, uint8_t *result) {
2597 uint8_t *pad = hash_context->tmp + 2 * hash_context->result_size;
2598 unsigned i;
2599 for (i = 0; i < hash_context->result_size; ++i)
2600 pad[i] = K[i] ^ 0x5c;
2601 for (; i < hash_context->block_size; ++i)
2602 pad[i] = 0x5c;
2603
2604 hash_context->finish_hash(hash_context, result);
2605
2606 hash_context->init_hash(hash_context);
2607 hash_context->update_hash(hash_context, pad, hash_context->block_size);
2608 hash_context->update_hash(hash_context, result, hash_context->result_size);
2609 hash_context->finish_hash(hash_context, result);
2610 }
2611
2612 /* V = HMAC_K(V) */
update_V(uECC_HashContext * hash_context,uint8_t * K,uint8_t * V)2613 static void update_V(uECC_HashContext *hash_context, uint8_t *K, uint8_t *V) {
2614 HMAC_init(hash_context, K);
2615 HMAC_update(hash_context, V, hash_context->result_size);
2616 HMAC_finish(hash_context, K, V);
2617 }
2618
2619 /* Deterministic signing, similar to RFC 6979. Differences are:
2620 * We just use (truncated) H(m) directly rather than bits2octets(H(m))
2621 (it is not reduced modulo curve_n).
2622 * We generate a value for k (aka T) directly rather than converting endianness.
2623
2624 Layout of hash_context->tmp: <K> | <V> | (1 byte overlapped 0x00 or 0x01) / <HMAC pad> */
uECC_sign_deterministic(const uint8_t private_key[uECC_BYTES],const uint8_t message_hash[uECC_BYTES],uECC_HashContext * hash_context,uint8_t signature[uECC_BYTES * 2])2625 int uECC_sign_deterministic(const uint8_t private_key[uECC_BYTES],
2626 const uint8_t message_hash[uECC_BYTES],
2627 uECC_HashContext *hash_context,
2628 uint8_t signature[uECC_BYTES*2]) {
2629 uint8_t *K = hash_context->tmp;
2630 uint8_t *V = K + hash_context->result_size;
2631 uECC_word_t tries;
2632 unsigned i;
2633 for (i = 0; i < hash_context->result_size; ++i) {
2634 V[i] = 0x01;
2635 K[i] = 0;
2636 }
2637
2638 // K = HMAC_K(V || 0x00 || int2octets(x) || h(m))
2639 HMAC_init(hash_context, K);
2640 V[hash_context->result_size] = 0x00;
2641 HMAC_update(hash_context, V, hash_context->result_size + 1);
2642 HMAC_update(hash_context, private_key, uECC_BYTES);
2643 HMAC_update(hash_context, message_hash, uECC_BYTES);
2644 HMAC_finish(hash_context, K, K);
2645
2646 update_V(hash_context, K, V);
2647
2648 // K = HMAC_K(V || 0x01 || int2octets(x) || h(m))
2649 HMAC_init(hash_context, K);
2650 V[hash_context->result_size] = 0x01;
2651 HMAC_update(hash_context, V, hash_context->result_size + 1);
2652 HMAC_update(hash_context, private_key, uECC_BYTES);
2653 HMAC_update(hash_context, message_hash, uECC_BYTES);
2654 HMAC_finish(hash_context, K, K);
2655
2656 update_V(hash_context, K, V);
2657
2658 for (tries = 0; tries < MAX_TRIES; ++tries) {
2659 uECC_word_t T[uECC_N_WORDS];
2660 uint8_t *T_ptr = (uint8_t *)T;
2661 unsigned T_bytes = 0;
2662 while (T_bytes < sizeof(T)) {
2663 update_V(hash_context, K, V);
2664 for (i = 0; i < hash_context->result_size && T_bytes < sizeof(T); ++i, ++T_bytes) {
2665 T_ptr[T_bytes] = V[i];
2666 }
2667 }
2668 #if (uECC_CURVE == uECC_secp160r1)
2669 T[uECC_WORDS] &= 0x01;
2670 #endif
2671
2672 if (uECC_sign_with_k(private_key, message_hash, T, signature)) {
2673 return 1;
2674 }
2675
2676 // K = HMAC_K(V || 0x00)
2677 HMAC_init(hash_context, K);
2678 V[hash_context->result_size] = 0x00;
2679 HMAC_update(hash_context, V, hash_context->result_size + 1);
2680 HMAC_finish(hash_context, K, K);
2681
2682 update_V(hash_context, K, V);
2683 }
2684 return 0;
2685 }
2686
smax(bitcount_t a,bitcount_t b)2687 static bitcount_t smax(bitcount_t a, bitcount_t b) {
2688 return (a > b ? a : b);
2689 }
2690
uECC_verify(const uint8_t public_key[uECC_BYTES * 2],const uint8_t hash[uECC_BYTES],const uint8_t signature[uECC_BYTES * 2])2691 int uECC_verify(const uint8_t public_key[uECC_BYTES*2],
2692 const uint8_t hash[uECC_BYTES],
2693 const uint8_t signature[uECC_BYTES*2]) {
2694 uECC_word_t u1[uECC_N_WORDS], u2[uECC_N_WORDS];
2695 uECC_word_t z[uECC_N_WORDS];
2696 EccPoint public, sum;
2697 uECC_word_t rx[uECC_WORDS];
2698 uECC_word_t ry[uECC_WORDS];
2699 uECC_word_t tx[uECC_WORDS];
2700 uECC_word_t ty[uECC_WORDS];
2701 uECC_word_t tz[uECC_WORDS];
2702 const EccPoint *points[4];
2703 const EccPoint *point;
2704 bitcount_t numBits;
2705 bitcount_t i;
2706 uECC_word_t r[uECC_N_WORDS], s[uECC_N_WORDS];
2707 r[uECC_N_WORDS - 1] = 0;
2708 s[uECC_N_WORDS - 1] = 0;
2709
2710 vli_bytesToNative(public.x, public_key);
2711 vli_bytesToNative(public.y, public_key + uECC_BYTES);
2712 vli_bytesToNative(r, signature);
2713 vli_bytesToNative(s, signature + uECC_BYTES);
2714
2715 if (vli_isZero(r) || vli_isZero(s)) { /* r, s must not be 0. */
2716 return 0;
2717 }
2718
2719 #if (uECC_CURVE != uECC_secp160r1)
2720 if (vli_cmp(curve_n, r) != 1 || vli_cmp(curve_n, s) != 1) { /* r, s must be < n. */
2721 return 0;
2722 }
2723 #endif
2724
2725 /* Calculate u1 and u2. */
2726 vli_modInv_n(z, s, curve_n); /* Z = s^-1 */
2727 u1[uECC_N_WORDS - 1] = 0;
2728 vli_bytesToNative(u1, hash);
2729 vli_modMult_n(u1, u1, z); /* u1 = e/s */
2730 vli_modMult_n(u2, r, z); /* u2 = r/s */
2731
2732 /* Calculate sum = G + Q. */
2733 vli_set(sum.x, public.x);
2734 vli_set(sum.y, public.y);
2735 vli_set(tx, curve_G.x);
2736 vli_set(ty, curve_G.y);
2737 vli_modSub_fast(z, sum.x, tx); /* Z = x2 - x1 */
2738 XYcZ_add(tx, ty, sum.x, sum.y);
2739 vli_modInv(z, z, curve_p); /* Z = 1/Z */
2740 apply_z(sum.x, sum.y, z);
2741
2742 /* Use Shamir's trick to calculate u1*G + u2*Q */
2743 points[0] = 0;
2744 points[1] = &curve_G;
2745 points[2] = &public;
2746 points[3] = ∑
2747 numBits = smax(vli_numBits(u1, uECC_N_WORDS), vli_numBits(u2, uECC_N_WORDS));
2748
2749 point = points[(!!vli_testBit(u1, numBits - 1)) | ((!!vli_testBit(u2, numBits - 1)) << 1)];
2750 vli_set(rx, point->x);
2751 vli_set(ry, point->y);
2752 vli_clear(z);
2753 z[0] = 1;
2754
2755 for (i = numBits - 2; i >= 0; --i) {
2756 uECC_word_t index;
2757 EccPoint_double_jacobian(rx, ry, z);
2758
2759 index = (!!vli_testBit(u1, i)) | ((!!vli_testBit(u2, i)) << 1);
2760 point = points[index];
2761 if (point) {
2762 vli_set(tx, point->x);
2763 vli_set(ty, point->y);
2764 apply_z(tx, ty, z);
2765 vli_modSub_fast(tz, rx, tx); /* Z = x2 - x1 */
2766 XYcZ_add(tx, ty, rx, ry);
2767 vli_modMult_fast(z, z, tz);
2768 }
2769 }
2770
2771 vli_modInv(z, z, curve_p); /* Z = 1/Z */
2772 apply_z(rx, ry, z);
2773
2774 /* v = x1 (mod n) */
2775 #if (uECC_CURVE != uECC_secp160r1)
2776 if (vli_cmp(curve_n, rx) != 1) {
2777 vli_sub(rx, rx, curve_n);
2778 }
2779 #endif
2780
2781 /* Accept only if v == r. */
2782 return vli_equal(rx, r);
2783 }
2784
2785 #endif
2786