Lines Matching +full:- +full:2 +full:g

2 # Implement fast SHA-256 with AVX1 instructions. (x86_64)
13 # General Public License (GPL) Version 2, available from the file
21 # - Redistributions of source code must retain the above
25 # - Redistributions in binary form must reproduce the above
40 # This code is described in an Intel White-Paper:
41 # "Fast SHA-256 Implementations on Intel Architecture Processors"
59 # Add reg to mem using reg-mem add and store
67 shld $(32-(\p1)), \p2, \p2
94 SHUF_00BA = %xmm10 # shuffle xBxA -> 00BA
95 SHUF_DC00 = %xmm12 # shuffle xDxC -> DC00
99 INP = %rsi # 2nd arg
111 g = %r10d define
144 h = g
145 g = f define
156 ## compute W[-16] + W[-7] 4 at a time
159 MY_ROR (25-11), y0 # y0 = e >> (25-11)
161 vpalignr $4, X2, X3, XTMP0 # XTMP0 = W[-7]
162 MY_ROR (22-13), y1 # y1 = a >> (22-13)
163 xor e, y0 # y0 = e ^ (e >> (25-11))
165 MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
166 xor a, y1 # y1 = a ^ (a >> (22-13)
167 xor g, y2 # y2 = f^g
168 vpaddd X0, XTMP0, XTMP0 # XTMP0 = W[-7] + W[-16]
169 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
170 and e, y2 # y2 = (f^g)&e
171 MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
173 vpalignr $4, X0, X1, XTMP1 # XTMP1 = W[-15]
174 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
176 xor g, y2 # y2 = CH = ((f^g)&e)^g
177 MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
187 vpslld $(32-7), XTMP1, XTMP3
190 vpor XTMP2, XTMP3, XTMP3 # XTMP1 = W[-15] MY_ROR 7
196 MY_ROR (25-11), y0 # y0 = e >> (25-11)
197 xor e, y0 # y0 = e ^ (e >> (25-11))
199 MY_ROR (22-13), y1 # y1 = a >> (22-13)
201 xor a, y1 # y1 = a ^ (a >> (22-13)
202 MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
203 xor g, y2 # y2 = f^g
204 vpsrld $3, XTMP1, XTMP4 # XTMP4 = W[-15] >> 3
205 MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
206 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
207 and e, y2 # y2 = (f^g)&e
209 vpslld $(32-18), XTMP1, XTMP1
210 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
211 xor g, y2 # y2 = CH = ((f^g)&e)^g
215 MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
216 vpxor XTMP2, XTMP3, XTMP3 # XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR
225 vpshufd $0b11111010, X3, XTMP2 # XTMP2 = W[-2] {BBAA}
228 vpaddd XTMP1, XTMP0, XTMP0 # XTMP0 = W[-16] + W[-7] + s0
234 MY_ROR (25-11), y0 # y0 = e >> (25-11)
235 xor e, y0 # y0 = e ^ (e >> (25-11))
236 MY_ROR (22-13), y1 # y1 = a >> (22-13)
238 xor a, y1 # y1 = a ^ (a >> (22-13)
239 MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
240 vpsrld $10, XTMP2, XTMP4 # XTMP4 = W[-2] >> 10 {BBAA}
241 xor g, y2 # y2 = f^g
242 vpsrlq $19, XTMP2, XTMP3 # XTMP3 = W[-2] MY_ROR 19 {xBxA}
243 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
244 and e, y2 # y2 = (f^g)&e
245 vpsrlq $17, XTMP2, XTMP2 # XTMP2 = W[-2] MY_ROR 17 {xBxA}
246 MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
247 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
248 xor g, y2 # y2 = CH = ((f^g)&e)^g
252 MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
253 add (2*4 + _XFER)(%rsp), y2 # y2 = k + w + S1 + CH
266 vpshufd $0b01010000, XTMP0, XTMP2 # XTMP2 = W[-2] {DDCC}
271 MY_ROR (25-11), y0 # y0 = e >> (25-11)
273 MY_ROR (22-13), y1 # y1 = a >> (22-13)
274 xor e, y0 # y0 = e ^ (e >> (25-11))
276 MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
277 vpsrld $10, XTMP2, XTMP5 # XTMP5 = W[-2] >> 10 {DDCC}
278 xor a, y1 # y1 = a ^ (a >> (22-13)
279 xor g, y2 # y2 = f^g
280 vpsrlq $19, XTMP2, XTMP3 # XTMP3 = W[-2] MY_ROR 19 {xDxC}
281 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
282 and e, y2 # y2 = (f^g)&e
283 MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
284 vpsrlq $17, XTMP2, XTMP2 # XTMP2 = W[-2] MY_ROR 17 {xDxC}
285 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
287 xor g, y2 # y2 = CH = ((f^g)&e)^g
289 MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
300 vpaddd XTMP0, XTMP5, X0 # X0 = {W[3], W[2], W[1], W[0]}
312 MY_ROR (25-11), y0 # y0 = e >> (25-11)
314 xor e, y0 # y0 = e ^ (e >> (25-11))
315 MY_ROR (22-13), y1 # y1 = a >> (22-13)
317 xor a, y1 # y1 = a ^ (a >> (22-13)
318 MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
319 xor g, y2 # y2 = f^g
320 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
321 MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
322 and e, y2 # y2 = (f^g)&e
323 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
325 xor g, y2 # y2 = CH = ((f^g)&e)^g
327 MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
346 ## arg 2 : pointer to input data
370 mov 4*2(CTX), c
374 mov 4*6(CTX), g
386 COPY_XMM_AND_BSWAP X2, 2*16(INP), BYTE_FLIP_MASK
403 vpaddd 2*16(TBL), X0, XFER
415 mov $2, SRND
421 DO_ROUND 2
426 add $2*16, TBL
429 DO_ROUND 2
440 addm (4*2)(CTX),c
444 addm (4*6)(CTX),g
491 # shuffle xBxA -> 00BA
497 # shuffle xDxC -> DC00