Lines Matching +full:0 +full:- +full:7 +full:a +full:- +full:e

2 # Implement fast SHA-256 with SSSE3 instructions. (x86_64)
11 # This software is available to you under a choice of one of two
21 # - Redistributions of source code must retain the above
25 # - Redistributions in binary form must reproduce the above
32 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
41 # This code is described in an Intel White-Paper:
42 # "Fast SHA-256 Implementations on Intel Architecture Processors"
58 # Add reg to mem using reg-mem add and store
87 SHUF_00BA = %xmm10 # shuffle xBxA -> 00BA
88 SHUF_DC00 = %xmm11 # shuffle xDxC -> DC00
98 e = %edx define
100 a = %eax define
116 _XMM_SAVE_SIZE = 0
118 _INP_END = 0
135 # Rotate values of symbols a...h
140 f = e
141 e = d define
144 b = a
145 a = TMP_ define
149 ## compute s0 four at a time and s1 two at a time
150 ## compute W[-16] + W[-7] 4 at a time
152 mov e, y0 # y0 = e
153 ror $(25-11), y0 # y0 = e >> (25-11)
154 mov a, y1 # y1 = a
155 palignr $4, X2, XTMP0 # XTMP0 = W[-7]
156 ror $(22-13), y1 # y1 = a >> (22-13)
157 xor e, y0 # y0 = e ^ (e >> (25-11))
159 ror $(11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
161 xor a, y1 # y1 = a ^ (a >> (22-13)
163 paddd X0, XTMP0 # XTMP0 = W[-7] + W[-16]
164 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
165 and e, y2 # y2 = (f^g)&e
166 ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
168 palignr $4, X0, XTMP1 # XTMP1 = W[-15]
169 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
170 ror $6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
171 xor g, y2 # y2 = CH = ((f^g)&e)^g
172 movdqa XTMP1, XTMP2 # XTMP2 = W[-15]
173 ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
176 movdqa XTMP1, XTMP3 # XTMP3 = W[-15]
177 mov a, y0 # y0 = a
179 mov a, y2 # y2 = a
180 pslld $(32-7), XTMP1 #
181 or c, y0 # y0 = a|c
183 and c, y2 # y2 = a&c
184 psrld $7, XTMP2 #
185 and b, y0 # y0 = (a|c)&b
187 por XTMP2, XTMP1 # XTMP1 = W[-15] ror 7
188 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
192 movdqa XTMP3, XTMP2 # XTMP2 = W[-15]
193 mov e, y0 # y0 = e
194 mov a, y1 # y1 = a
195 movdqa XTMP3, XTMP4 # XTMP4 = W[-15]
196 ror $(25-11), y0 # y0 = e >> (25-11)
197 xor e, y0 # y0 = e ^ (e >> (25-11))
199 ror $(22-13), y1 # y1 = a >> (22-13)
200 pslld $(32-18), XTMP3 #
201 xor a, y1 # y1 = a ^ (a >> (22-13)
202 ror $(11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
205 ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
206 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
207 and e, y2 # y2 = (f^g)&e
208 ror $6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
210 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
211 xor g, y2 # y2 = CH = ((f^g)&e)^g
212 psrld $3, XTMP4 # XTMP4 = W[-15] >> 3
215 ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
216 pxor XTMP2, XTMP1 # XTMP1 = W[-15] ror 7 ^ W[-15] ror 18
217 mov a, y0 # y0 = a
219 mov a, y2 # y2 = a
221 or c, y0 # y0 = a|c
223 and c, y2 # y2 = a&c
225 pshufd $0b11111010, X3, XTMP2 # XTMP2 = W[-2] {BBAA}
226 and b, y0 # y0 = (a|c)&b
228 paddd XTMP1, XTMP0 # XTMP0 = W[-16] + W[-7] + s0
229 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
233 movdqa XTMP2, XTMP3 # XTMP3 = W[-2] {BBAA}
234 mov e, y0 # y0 = e
235 mov a, y1 # y1 = a
236 ror $(25-11), y0 # y0 = e >> (25-11)
237 movdqa XTMP2, XTMP4 # XTMP4 = W[-2] {BBAA}
238 xor e, y0 # y0 = e ^ (e >> (25-11))
239 ror $(22-13), y1 # y1 = a >> (22-13)
241 xor a, y1 # y1 = a ^ (a >> (22-13)
242 ror $(11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
243 psrlq $17, XTMP2 # XTMP2 = W[-2] ror 17 {xBxA}
245 psrlq $19, XTMP3 # XTMP3 = W[-2] ror 19 {xBxA}
246 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
247 and e, y2 # y2 = (f^g)&e
248 psrld $10, XTMP4 # XTMP4 = W[-2] >> 10 {BBAA}
249 ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
250 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
251 xor g, y2 # y2 = CH = ((f^g)&e)^g
252 ror $6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
255 ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
258 mov a, y0 # y0 = a
260 mov a, y2 # y2 = a
262 or c, y0 # y0 = a|c
264 and c, y2 # y2 = a&c
265 paddd XTMP4, XTMP0 # XTMP0 = {..., ..., W[1], W[0]}
266 and b, y0 # y0 = (a|c)&b
269 pshufd $0b01010000, XTMP0, XTMP2 # XTMP2 = W[-2] {BBAA}
270 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
274 movdqa XTMP2, XTMP3 # XTMP3 = W[-2] {DDCC}
275 mov e, y0 # y0 = e
276 ror $(25-11), y0 # y0 = e >> (25-11)
277 mov a, y1 # y1 = a
278 movdqa XTMP2, X0 # X0 = W[-2] {DDCC}
279 ror $(22-13), y1 # y1 = a >> (22-13)
280 xor e, y0 # y0 = e ^ (e >> (25-11))
282 ror $(11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
283 psrlq $17, XTMP2 # XTMP2 = W[-2] ror 17 {xDxC}
284 xor a, y1 # y1 = a ^ (a >> (22-13)
286 psrlq $19, XTMP3 # XTMP3 = W[-2] ror 19 {xDxC}
287 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25
288 and e, y2 # y2 = (f^g)&e
289 ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
290 psrld $10, X0 # X0 = W[-2] >> 10 {DDCC}
291 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22
292 ror $6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>2
293 xor g, y2 # y2 = CH = ((f^g)&e)^g
295 ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>2
299 mov a, y0 # y0 = a
301 mov a, y2 # y2 = a
303 or c, y0 # y0 = a|c
305 and c, y2 # y2 = a&c
306 paddd XTMP0, X0 # X0 = {W[3], W[2], W[1], W[0]}
307 and b, y0 # y0 = (a|c)&b
309 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
318 mov e, y0 # y0 = e
319 ror $(25-11), y0 # y0 = e >> (25-11)
320 mov a, y1 # y1 = a
321 xor e, y0 # y0 = e ^ (e >> (25-11))
322 ror $(22-13), y1 # y1 = a >> (22-13)
324 xor a, y1 # y1 = a ^ (a >> (22-13)
325 ror $(11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
327 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
328 ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
329 and e, y2 # y2 = (f^g)&e
330 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
331 ror $6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
332 xor g, y2 # y2 = CH = ((f^g)&e)^g
334 ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
337 mov a, y0 # y0 = a
339 mov a, y2 # y2 = a
340 or c, y0 # y0 = a|c
342 and c, y2 # y2 = a&c
343 and b, y0 # y0 = (a|c)&b
345 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
377 mov 4*0(CTX), a
381 mov 4*4(CTX), e
384 mov 4*7(CTX), h
394 COPY_XMM_AND_BSWAP X0, 0*16(INP), BYTE_FLIP_MASK
433 DO_ROUND 0
440 DO_ROUND 0
451 addm (4*0)(CTX),a
455 addm (4*4)(CTX),e
458 addm (4*7)(CTX),h
481 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
482 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
483 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
484 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
485 .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
486 .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
487 .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
488 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
489 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
490 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
491 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
492 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
493 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
494 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
495 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
496 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
501 .octa 0x0c0d0e0f08090a0b0405060700010203
505 # shuffle xBxA -> 00BA
507 .octa 0xFFFFFFFFFFFFFFFF0b0a090803020100
511 # shuffle xDxC -> DC00
513 .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF