1 /* Sha256.c -- SHA-256 Hash
2 : Igor Pavlov : Public domain
3 This code is based on public domain code from Wei Dai's Crypto++ library. */
4
5 #include "Precomp.h"
6
7 #include <string.h>
8
9 #include "Sha256.h"
10 #include "RotateDefs.h"
11 #include "CpuArch.h"
12
13 #ifdef MY_CPU_X86_OR_AMD64
14 #if defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30800) \
15 || defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 50100) \
16 || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900) \
17 || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) \
18 || defined(_MSC_VER) && (_MSC_VER >= 1200)
19 #define Z7_COMPILER_SHA256_SUPPORTED
20 #endif
21 #elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE)
22
23 #if defined(__ARM_FEATURE_SHA2) \
24 || defined(__ARM_FEATURE_CRYPTO)
25 #define Z7_COMPILER_SHA256_SUPPORTED
26 #else
27 #if defined(MY_CPU_ARM64) \
28 || defined(__ARM_ARCH) && (__ARM_ARCH >= 4) \
29 || defined(Z7_MSC_VER_ORIGINAL)
30 #if defined(__ARM_FP) && \
31 ( defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
32 || defined(__GNUC__) && (__GNUC__ >= 6) \
33 ) \
34 || defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1910)
35 #if defined(MY_CPU_ARM64) \
36 || !defined(Z7_CLANG_VERSION) \
37 || defined(__ARM_NEON) && \
38 (Z7_CLANG_VERSION < 170000 || \
39 Z7_CLANG_VERSION > 170001)
40 #define Z7_COMPILER_SHA256_SUPPORTED
41 #endif
42 #endif
43 #endif
44 #endif
45 #endif
46
47 void Z7_FASTCALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
48
49 #ifdef Z7_COMPILER_SHA256_SUPPORTED
50 void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
51
52 static SHA256_FUNC_UPDATE_BLOCKS g_SHA256_FUNC_UPDATE_BLOCKS = Sha256_UpdateBlocks;
53 static SHA256_FUNC_UPDATE_BLOCKS g_SHA256_FUNC_UPDATE_BLOCKS_HW;
54
55 #define SHA256_UPDATE_BLOCKS(p) p->v.vars.func_UpdateBlocks
56 #else
57 #define SHA256_UPDATE_BLOCKS(p) Sha256_UpdateBlocks
58 #endif
59
60
Sha256_SetFunction(CSha256 * p,unsigned algo)61 BoolInt Sha256_SetFunction(CSha256 *p, unsigned algo)
62 {
63 SHA256_FUNC_UPDATE_BLOCKS func = Sha256_UpdateBlocks;
64
65 #ifdef Z7_COMPILER_SHA256_SUPPORTED
66 if (algo != SHA256_ALGO_SW)
67 {
68 if (algo == SHA256_ALGO_DEFAULT)
69 func = g_SHA256_FUNC_UPDATE_BLOCKS;
70 else
71 {
72 if (algo != SHA256_ALGO_HW)
73 return False;
74 func = g_SHA256_FUNC_UPDATE_BLOCKS_HW;
75 if (!func)
76 return False;
77 }
78 }
79 #else
80 if (algo > 1)
81 return False;
82 #endif
83
84 p->v.vars.func_UpdateBlocks = func;
85 return True;
86 }
87
88
89 /* define it for speed optimization */
90
91 #ifdef Z7_SFX
92 #define STEP_PRE 1
93 #define STEP_MAIN 1
94 #else
95 #define STEP_PRE 2
96 #define STEP_MAIN 4
97 // #define Z7_SHA256_UNROLL
98 #endif
99
100 #undef Z7_SHA256_BIG_W
101 #if STEP_MAIN != 16
102 #define Z7_SHA256_BIG_W
103 #endif
104
105
106
107
Sha256_InitState(CSha256 * p)108 void Sha256_InitState(CSha256 *p)
109 {
110 p->v.vars.count = 0;
111 p->state[0] = 0x6a09e667;
112 p->state[1] = 0xbb67ae85;
113 p->state[2] = 0x3c6ef372;
114 p->state[3] = 0xa54ff53a;
115 p->state[4] = 0x510e527f;
116 p->state[5] = 0x9b05688c;
117 p->state[6] = 0x1f83d9ab;
118 p->state[7] = 0x5be0cd19;
119 }
120
121
122
123
124
125
126
127
Sha256_Init(CSha256 * p)128 void Sha256_Init(CSha256 *p)
129 {
130 p->v.vars.func_UpdateBlocks =
131 #ifdef Z7_COMPILER_SHA256_SUPPORTED
132 g_SHA256_FUNC_UPDATE_BLOCKS;
133 #else
134 NULL;
135 #endif
136 Sha256_InitState(p);
137 }
138
139 #define S0(x) (rotrFixed(x, 2) ^ rotrFixed(x,13) ^ rotrFixed(x,22))
140 #define S1(x) (rotrFixed(x, 6) ^ rotrFixed(x,11) ^ rotrFixed(x,25))
141 #define s0(x) (rotrFixed(x, 7) ^ rotrFixed(x,18) ^ (x >> 3))
142 #define s1(x) (rotrFixed(x,17) ^ rotrFixed(x,19) ^ (x >>10))
143
144 #define Ch(x,y,z) (z^(x&(y^z)))
145 #define Maj(x,y,z) ((x&y)|(z&(x|y)))
146
147
148 #define W_PRE(i) (W[(i) + (size_t)(j)] = GetBe32(data + ((size_t)(j) + i) * 4))
149
150 #define blk2_main(j, i) s1(w(j, (i)-2)) + w(j, (i)-7) + s0(w(j, (i)-15))
151
152 #ifdef Z7_SHA256_BIG_W
153 // we use +i instead of +(i) to change the order to solve CLANG compiler warning for signed/unsigned.
154 #define w(j, i) W[(size_t)(j) + i]
155 #define blk2(j, i) (w(j, i) = w(j, (i)-16) + blk2_main(j, i))
156 #else
157 #if STEP_MAIN == 16
158 #define w(j, i) W[(i) & 15]
159 #else
160 #define w(j, i) W[((size_t)(j) + (i)) & 15]
161 #endif
162 #define blk2(j, i) (w(j, i) += blk2_main(j, i))
163 #endif
164
165 #define W_MAIN(i) blk2(j, i)
166
167
168 #define T1(wx, i) \
169 tmp = h + S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
170 h = g; \
171 g = f; \
172 f = e; \
173 e = d + tmp; \
174 tmp += S0(a) + Maj(a, b, c); \
175 d = c; \
176 c = b; \
177 b = a; \
178 a = tmp; \
179
180 #define R1_PRE(i) T1( W_PRE, i)
181 #define R1_MAIN(i) T1( W_MAIN, i)
182
183 #if (!defined(Z7_SHA256_UNROLL) || STEP_MAIN < 8) && (STEP_MAIN >= 4)
184 #define R2_MAIN(i) \
185 R1_MAIN(i) \
186 R1_MAIN(i + 1) \
187
188 #endif
189
190
191
192 #if defined(Z7_SHA256_UNROLL) && STEP_MAIN >= 8
193
194 #define T4( a,b,c,d,e,f,g,h, wx, i) \
195 h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
196 tmp = h; \
197 h += d; \
198 d = tmp + S0(a) + Maj(a, b, c); \
199
200 #define R4( wx, i) \
201 T4 ( a,b,c,d,e,f,g,h, wx, (i )); \
202 T4 ( d,a,b,c,h,e,f,g, wx, (i+1)); \
203 T4 ( c,d,a,b,g,h,e,f, wx, (i+2)); \
204 T4 ( b,c,d,a,f,g,h,e, wx, (i+3)); \
205
206 #define R4_PRE(i) R4( W_PRE, i)
207 #define R4_MAIN(i) R4( W_MAIN, i)
208
209
210 #define T8( a,b,c,d,e,f,g,h, wx, i) \
211 h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
212 d += h; \
213 h += S0(a) + Maj(a, b, c); \
214
215 #define R8( wx, i) \
216 T8 ( a,b,c,d,e,f,g,h, wx, i ); \
217 T8 ( h,a,b,c,d,e,f,g, wx, i+1); \
218 T8 ( g,h,a,b,c,d,e,f, wx, i+2); \
219 T8 ( f,g,h,a,b,c,d,e, wx, i+3); \
220 T8 ( e,f,g,h,a,b,c,d, wx, i+4); \
221 T8 ( d,e,f,g,h,a,b,c, wx, i+5); \
222 T8 ( c,d,e,f,g,h,a,b, wx, i+6); \
223 T8 ( b,c,d,e,f,g,h,a, wx, i+7); \
224
225 #define R8_PRE(i) R8( W_PRE, i)
226 #define R8_MAIN(i) R8( W_MAIN, i)
227
228 #endif
229
230
231 extern
232 MY_ALIGN(64) const UInt32 SHA256_K_ARRAY[64];
233 MY_ALIGN(64) const UInt32 SHA256_K_ARRAY[64] = {
234 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
235 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
236 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
237 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
238 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
239 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
240 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
241 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
242 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
243 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
244 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
245 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
246 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
247 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
248 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
249 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
250 };
251
252
253
254
255
256 #define K SHA256_K_ARRAY
257
258 Z7_NO_INLINE
Sha256_UpdateBlocks(UInt32 state[8],const Byte * data,size_t numBlocks)259 void Z7_FASTCALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks)
260 {
261 UInt32 W
262 #ifdef Z7_SHA256_BIG_W
263 [64];
264 #else
265 [16];
266 #endif
267 unsigned j;
268 UInt32 a,b,c,d,e,f,g,h;
269 #if !defined(Z7_SHA256_UNROLL) || (STEP_MAIN <= 4) || (STEP_PRE <= 4)
270 UInt32 tmp;
271 #endif
272
273 if (numBlocks == 0) return;
274
275 a = state[0];
276 b = state[1];
277 c = state[2];
278 d = state[3];
279 e = state[4];
280 f = state[5];
281 g = state[6];
282 h = state[7];
283
284 do
285 {
286
287 for (j = 0; j < 16; j += STEP_PRE)
288 {
289 #if STEP_PRE > 4
290
291 #if STEP_PRE < 8
292 R4_PRE(0);
293 #else
294 R8_PRE(0);
295 #if STEP_PRE == 16
296 R8_PRE(8);
297 #endif
298 #endif
299
300 #else
301
302 R1_PRE(0)
303 #if STEP_PRE >= 2
304 R1_PRE(1)
305 #if STEP_PRE >= 4
306 R1_PRE(2)
307 R1_PRE(3)
308 #endif
309 #endif
310
311 #endif
312 }
313
314 for (j = 16; j < 64; j += STEP_MAIN)
315 {
316 #if defined(Z7_SHA256_UNROLL) && STEP_MAIN >= 8
317
318 #if STEP_MAIN < 8
319 R4_MAIN(0)
320 #else
321 R8_MAIN(0)
322 #if STEP_MAIN == 16
323 R8_MAIN(8)
324 #endif
325 #endif
326
327 #else
328
329 R1_MAIN(0)
330 #if STEP_MAIN >= 2
331 R1_MAIN(1)
332 #if STEP_MAIN >= 4
333 R2_MAIN(2)
334 #if STEP_MAIN >= 8
335 R2_MAIN(4)
336 R2_MAIN(6)
337 #if STEP_MAIN >= 16
338 R2_MAIN(8)
339 R2_MAIN(10)
340 R2_MAIN(12)
341 R2_MAIN(14)
342 #endif
343 #endif
344 #endif
345 #endif
346 #endif
347 }
348
349 a += state[0]; state[0] = a;
350 b += state[1]; state[1] = b;
351 c += state[2]; state[2] = c;
352 d += state[3]; state[3] = d;
353 e += state[4]; state[4] = e;
354 f += state[5]; state[5] = f;
355 g += state[6]; state[6] = g;
356 h += state[7]; state[7] = h;
357
358 data += SHA256_BLOCK_SIZE;
359 }
360 while (--numBlocks);
361 }
362
363
364 #define Sha256_UpdateBlock(p) SHA256_UPDATE_BLOCKS(p)(p->state, p->buffer, 1)
365
Sha256_Update(CSha256 * p,const Byte * data,size_t size)366 void Sha256_Update(CSha256 *p, const Byte *data, size_t size)
367 {
368 if (size == 0)
369 return;
370 {
371 const unsigned pos = (unsigned)p->v.vars.count & (SHA256_BLOCK_SIZE - 1);
372 const unsigned num = SHA256_BLOCK_SIZE - pos;
373 p->v.vars.count += size;
374 if (num > size)
375 {
376 memcpy(p->buffer + pos, data, size);
377 return;
378 }
379 if (pos != 0)
380 {
381 size -= num;
382 memcpy(p->buffer + pos, data, num);
383 data += num;
384 Sha256_UpdateBlock(p);
385 }
386 }
387 {
388 const size_t numBlocks = size >> 6;
389 // if (numBlocks)
390 SHA256_UPDATE_BLOCKS(p)(p->state, data, numBlocks);
391 size &= SHA256_BLOCK_SIZE - 1;
392 if (size == 0)
393 return;
394 data += (numBlocks << 6);
395 memcpy(p->buffer, data, size);
396 }
397 }
398
399
Sha256_Final(CSha256 * p,Byte * digest)400 void Sha256_Final(CSha256 *p, Byte *digest)
401 {
402 unsigned pos = (unsigned)p->v.vars.count & (SHA256_BLOCK_SIZE - 1);
403 p->buffer[pos++] = 0x80;
404 if (pos > (SHA256_BLOCK_SIZE - 4 * 2))
405 {
406 while (pos != SHA256_BLOCK_SIZE) { p->buffer[pos++] = 0; }
407 // memset(&p->buf.buffer[pos], 0, SHA256_BLOCK_SIZE - pos);
408 Sha256_UpdateBlock(p);
409 pos = 0;
410 }
411 memset(&p->buffer[pos], 0, (SHA256_BLOCK_SIZE - 4 * 2) - pos);
412 {
413 const UInt64 numBits = p->v.vars.count << 3;
414 SetBe32(p->buffer + SHA256_BLOCK_SIZE - 4 * 2, (UInt32)(numBits >> 32))
415 SetBe32(p->buffer + SHA256_BLOCK_SIZE - 4 * 1, (UInt32)(numBits))
416 }
417 Sha256_UpdateBlock(p);
418 #if 1 && defined(MY_CPU_BE)
419 memcpy(digest, p->state, SHA256_DIGEST_SIZE);
420 #else
421 {
422 unsigned i;
423 for (i = 0; i < 8; i += 2)
424 {
425 const UInt32 v0 = p->state[i];
426 const UInt32 v1 = p->state[(size_t)i + 1];
427 SetBe32(digest , v0)
428 SetBe32(digest + 4, v1)
429 digest += 4 * 2;
430 }
431 }
432
433
434
435
436 #endif
437 Sha256_InitState(p);
438 }
439
440
Sha256Prepare(void)441 void Sha256Prepare(void)
442 {
443 #ifdef Z7_COMPILER_SHA256_SUPPORTED
444 SHA256_FUNC_UPDATE_BLOCKS f, f_hw;
445 f = Sha256_UpdateBlocks;
446 f_hw = NULL;
447 #ifdef MY_CPU_X86_OR_AMD64
448 if (CPU_IsSupported_SHA()
449 && CPU_IsSupported_SSSE3()
450 )
451 #else
452 if (CPU_IsSupported_SHA2())
453 #endif
454 {
455 // printf("\n========== HW SHA256 ======== \n");
456 f = f_hw = Sha256_UpdateBlocks_HW;
457 }
458 g_SHA256_FUNC_UPDATE_BLOCKS = f;
459 g_SHA256_FUNC_UPDATE_BLOCKS_HW = f_hw;
460 #endif
461 }
462
463 #undef U64C
464 #undef K
465 #undef S0
466 #undef S1
467 #undef s0
468 #undef s1
469 #undef Ch
470 #undef Maj
471 #undef W_MAIN
472 #undef W_PRE
473 #undef w
474 #undef blk2_main
475 #undef blk2
476 #undef T1
477 #undef T4
478 #undef T8
479 #undef R1_PRE
480 #undef R1_MAIN
481 #undef R2_MAIN
482 #undef R4
483 #undef R4_PRE
484 #undef R4_MAIN
485 #undef R8
486 #undef R8_PRE
487 #undef R8_MAIN
488 #undef STEP_PRE
489 #undef STEP_MAIN
490 #undef Z7_SHA256_BIG_W
491 #undef Z7_SHA256_UNROLL
492 #undef Z7_COMPILER_SHA256_SUPPORTED
493