Lines Matching +full:2 +full:w
13 # General Public License (GPL) Version 2, available from the file
92 INP = %rsi # 2nd arg
150 ## compute W[-16] + W[-7] 4 at a time
155 palignr $4, X2, XTMP0 # XTMP0 = W[-7]
163 paddd X0, XTMP0 # XTMP0 = W[-7] + W[-16]
166 ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
168 palignr $4, X0, XTMP1 # XTMP1 = W[-15]
169 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
172 movdqa XTMP1, XTMP2 # XTMP2 = W[-15]
173 ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
175 add _XFER(%rsp) , y2 # y2 = k + w + S1 + CH
176 movdqa XTMP1, XTMP3 # XTMP3 = W[-15]
178 add y2, h # h = h + S1 + CH + k + w
182 add h, d # d = d + h + S1 + CH + k + w
186 add y1, h # h = h + S1 + CH + k + w + S0
187 por XTMP2, XTMP1 # XTMP1 = W[-15] ror 7
189 add y0, h # h = h + S1 + CH + k + w + S0 + MAJ
192 movdqa XTMP3, XTMP2 # XTMP2 = W[-15]
195 movdqa XTMP3, XTMP4 # XTMP4 = W[-15]
205 ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
210 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
212 psrld $3, XTMP4 # XTMP4 = W[-15] >> 3
214 add (1*4 + _XFER)(%rsp), y2 # y2 = k + w + S1 + CH
215 ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
216 pxor XTMP2, XTMP1 # XTMP1 = W[-15] ror 7 ^ W[-15] ror 18
218 add y2, h # h = h + S1 + CH + k + w
222 add h, d # d = d + h + S1 + CH + k + w
225 pshufd $0b11111010, X3, XTMP2 # XTMP2 = W[-2] {BBAA}
227 add y1, h # h = h + S1 + CH + k + w + S0
228 paddd XTMP1, XTMP0 # XTMP0 = W[-16] + W[-7] + s0
230 add y0, h # h = h + S1 + CH + k + w + S0 + MAJ
233 movdqa XTMP2, XTMP3 # XTMP3 = W[-2] {BBAA}
237 movdqa XTMP2, XTMP4 # XTMP4 = W[-2] {BBAA}
243 psrlq $17, XTMP2 # XTMP2 = W[-2] ror 17 {xBxA}
245 psrlq $19, XTMP3 # XTMP3 = W[-2] ror 19 {xBxA}
248 psrld $10, XTMP4 # XTMP4 = W[-2] >> 10 {BBAA}
249 ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
250 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
255 ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
256 add (2*4 + _XFER)(%rsp), y2 # y2 = k + w + S1 + CH
259 add y2, h # h = h + S1 + CH + k + w
263 add h, d # d = d + h + S1 + CH + k + w
265 paddd XTMP4, XTMP0 # XTMP0 = {..., ..., W[1], W[0]}
267 add y1, h # h = h + S1 + CH + k + w + S0
269 pshufd $0b01010000, XTMP0, XTMP2 # XTMP2 = W[-2] {BBAA}
271 add y0, h # h = h + S1 + CH + k + w + S0 + MAJ
274 movdqa XTMP2, XTMP3 # XTMP3 = W[-2] {DDCC}
278 movdqa XTMP2, X0 # X0 = W[-2] {DDCC}
283 psrlq $17, XTMP2 # XTMP2 = W[-2] ror 17 {xDxC}
286 psrlq $19, XTMP3 # XTMP3 = W[-2] ror 19 {xDxC}
289 ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
290 psrld $10, X0 # X0 = W[-2] >> 10 {DDCC}
291 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22
292 ror $6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>2
295 ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>2
297 add (3*4 + _XFER)(%rsp), y2 # y2 = k + w + S1 + CH
300 add y2, h # h = h + S1 + CH + k + w
304 add h, d # d = d + h + S1 + CH + k + w
306 paddd XTMP0, X0 # X0 = {W[3], W[2], W[1], W[0]}
308 add y1, h # h = h + S1 + CH + k + w + S0
310 add y0, h # h = h + S1 + CH + k + w + S0 + MAJ
328 ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
330 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
334 ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
336 add offset(%rsp), y2 # y2 = k + w + S1 + CH
338 add y2, h # h = h + S1 + CH + k + w
341 add h, d # d = d + h + S1 + CH + k + w
344 add y1, h # h = h + S1 + CH + k + w + S0
346 add y0, h # h = h + S1 + CH + k + w + S0 + MAJ
355 ## arg 2 : pointer to input data
379 mov 4*2(CTX), c
396 COPY_XMM_AND_BSWAP X2, 2*16(INP), BYTE_FLIP_MASK
415 movdqa 2*16(TBL), XFER
429 mov $2, SRND
435 DO_ROUND 2
439 add $2*16, TBL
442 DO_ROUND 2
453 addm (4*2)(CTX),c