Lines Matching +full:2 +full:w
13 # General Public License (GPL) Version 2, available from the file
48 # This code schedules 2 blocks at a time, with 4 lanes per block
94 INP = %rsi # 2nd arg
116 _XFER_SIZE = 2*64*4 # 2 blocks, 64 rounds, 4 bytes/round
161 addl \disp(%rsp, SRND), h # h = k + w + h # --
163 vpalignr $4, X2, X3, XTMP0 # XTMP0 = W[-7]
169 vpaddd X0, XTMP0, XTMP0 # XTMP0 = W[-7] + W[-16]# y1 = (e >> 6)# S1
175 add h, d # d = k + w + h + d # --
178 vpalignr $4, X0, X1, XTMP1 # XTMP1 = W[-15]
180 rorx $2, a, T1 # T1 = (a >> 2) # S0
184 xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0
191 add y1, h # h = k + w + h + S0 # --
193 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
194 vpor XTMP2, XTMP3, XTMP3 # XTMP3 = W[-15] ror 7
197 add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
209 addl offset(%rsp, SRND), h # h = k + w + h # --
213 vpsrld $3, XTMP1, XTMP4 # XTMP4 = W[-15] >> 3
224 add h, d # d = k + w + h + d # --
231 rorx $2, a, T1 # T1 = (a >> 2) # S0
234 vpxor XTMP2, XTMP3, XTMP3 # XTMP3 = W[-15] ror 7 ^ W[-15] ror 18
235 xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0
241 vpshufd $0b11111010, X3, XTMP2 # XTMP2 = W[-2] {BBAA}
243 add y1, h # h = k + w + h + S0 # --
245 vpaddd XTMP1, XTMP0, XTMP0 # XTMP0 = W[-16] + W[-7] + s0
246 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
247 add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
250 vpsrld $10, XTMP2, XTMP4 # XTMP4 = W[-2] >> 10 {BBAA}
255 ################################### RND N + 2 ############################
259 offset = \disp + 2*4
260 addl offset(%rsp, SRND), h # h = k + w + h # --
262 vpsrlq $19, XTMP2, XTMP3 # XTMP3 = W[-2] ror 19 {xBxA}
270 vpsrlq $17, XTMP2, XTMP2 # XTMP2 = W[-2] ror 17 {xBxA}
275 add h, d # d = k + w + h + d # --
285 rorx $2, a ,T1 # T1 = (a >> 2) # S0
286 vpaddd XTMP4, XTMP0, XTMP0 # XTMP0 = {..., ..., W[1], W[0]}
288 xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0
292 vpshufd $0b01010000, XTMP0, XTMP2 # XTMP2 = W[-2] {DDCC}
295 add y1,h # h = k + w + h + S0 # --
296 add y2,d # d = k + w + h + d + S1 + CH = d + t1 # --
297 add y2,h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
310 addl offset(%rsp, SRND), h # h = k + w + h # --
314 vpsrld $10, XTMP2, XTMP5 # XTMP5 = W[-2] >> 10 {DDCC}
321 vpsrlq $19, XTMP2, XTMP3 # XTMP3 = W[-2] ror 19 {xDxC}
324 add h, d # d = k + w + h + d # --
327 vpsrlq $17, XTMP2, XTMP2 # XTMP2 = W[-2] ror 17 {xDxC}
337 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
339 rorx $2, a, T1 # T1 = (a >> 2) # S0
342 vpaddd XTMP0, XTMP5, X0 # X0 = {W[3], W[2], W[1], W[0]}
343 xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0
348 add y1, h # h = k + w + h + S0 # --
349 add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
375 rorx $2, a, T1 # T1 = (a >> 2) # S0
376 addl \disp(%rsp, SRND), h # h = k + w + h # --
379 xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0
386 add h, d # d = k + w + h + d # --
388 add y1, h # h = k + w + h + S0 # --
389 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
395 add y2, old_h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
413 rorx $2, a, T1 # T1 = (a >> 2) # S0
415 addl offset(%rsp, SRND), h # h = k + w + h # --
418 xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0
425 add h, d # d = k + w + h + d # --
427 add y1, h # h = k + w + h + S0 # --
429 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
433 ################################### RND N + 2 ##############################
435 add y2, old_h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
453 rorx $2, a, T1 # T1 = (a >> 2) # S0
454 offset = 4*2 + \disp
455 addl offset(%rsp, SRND), h # h = k + w + h # --
458 xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0
465 add h, d # d = k + w + h + d # --
467 add y1, h # h = k + w + h + S0 # --
469 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
475 add y2, old_h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
493 rorx $2, a, T1 # T1 = (a >> 2) # S0
495 addl offset(%rsp, SRND), h # h = k + w + h # --
498 xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0
505 add h, d # d = k + w + h + d # --
507 add y1, h # h = k + w + h + S0 # --
509 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
512 add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
523 ## arg 2 : pointer to input data
551 mov 4*2(CTX), c
568 VMOVDQ 2*32(INP),XTMP2
602 leaq K256+2*32(%rip), INP
604 vmovdqa XFER, 2*32+_XFER(%rsp, SRND)
605 FOUR_ROUNDS_AND_SCHED (_XFER + 2*32)
627 add $2*32, SRND
640 addm (4*2)(CTX),c
656 add $2*32, SRND
666 addm (4*2)(CTX),c
680 VMOVDQ 2*16(INP),XWORD2
695 mov (4*2)(CTX),c