Lines Matching +full:16 +full:- +full:17

1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 # Accelerated AES-GCM stitched implementation for ppc64le.
5 # Copyright 2024- IBM Inc.
22 # Hash keys = v3 - v14
29 # v31 - counter 1
32 # vs0 - round key 0
35 # This implementation uses stitched AES-GCM approach to improve overall performance.
65 std 0, 16(1)
66 stdu 1,-512(1)
70 SAVE_GPR 16, 128, 1
71 SAVE_GPR 17, 136, 1
82 SAVE_VRS 21, 16, 9
98 RESTORE_VRS 21, 16, 9
112 RESTORE_GPR 16, 128, 1
113 RESTORE_GPR 17, 136, 1
123 ld 0, 16(1)
175 # Hash keys = v3 - v14
176 # Scratch: v23 - v29
263 # Hash keys = v3 - v14
269 # load Hash - h^4, h^3, h^2, h
310 cmpdi 5, 16
314 li 10, 16
319 # Pre-load 8 AES rounds to scratch vectors.
320 xxlor 32+16, 1, 1
321 xxlor 32+17, 2, 2
329 addi 22, 23, -9 # remaing AES rounds
338 vcipher 15, 15, 16
339 vcipher 15, 15, 17
350 addi 10, 10, 16
358 addi 14, 14, 16
359 addi 9, 9, 16
368 addi 5, 5, -16
369 addi 11, 11, 16
373 addi 12, 12, -1
393 vspltisb 16, -1
394 li 12, 16
397 mtvsrdd 32+17, 0, 12
398 vslo 16, 16, 17 # partial block mask
401 xxland 11, 11, 32+16
406 addi 22, 23, -1 # loop - 1
408 addi 10, 6, 16
413 addi 10, 10, 16
419 vand 15, 15, 16
426 addi 12, 9, -1
462 # - Compute ghash for a full block. Clear Partial_len and pblock. Update IV.
464 # - Don't compute ghash if not full block. gcm_update will take care of it
473 li 17, 0
474 li 16, 16
475 sub 22, 16, 12 # bytes to complete a block
476 sub 17, 22, 5 # remaining bytes in a block
477 cmpdi 5, 16
479 li 17, 0
485 li 17, 0
490 vspltisb 16, -1
492 mtvsrdd 32+17, 0, 15
493 vslo 16, 16, 17
494 vsro 16, 16, 17
495 sldi 15, 17, 3
496 mtvsrdd 32+17, 0, 15
497 vsro 16, 16, 17
498 vslo 16, 16, 17 # mask
502 mtvsrdd 32+17, 0, 15
503 vsro 19, 19, 17 # 0x00..xxxx??..??
504 sldi 15, 17, 3
505 mtvsrdd 32+17, 0, 15
506 vsro 19, 19, 17 # 0x00..xxxx
507 vslo 19, 19, 17 # shift back to form 0x00..xxxx00..00
512 addi 22, 23, -1 # loop - 1
514 addi 10, 6, 16
519 addi 10, 10, 16
525 vand 15, 15, 16
533 addi 15, 9, -1
555 lxvb16x 32+17, 10, 7 # load previous pblock
557 vxor 15, 15, 17 # combined pblock
559 cmpdi 12, 16
581 # gcm_update(iv, Xi) - compute last hash
591 # load Hash - h^4, h^3, h^2, h
613 vpmsumd 17, 12, 2 # reduction
615 vsldoi 16, 1, 13, 8 # mH
617 vxor 14, 14, 16 # HH + HH
619 vpermxor 12, 12, 17, 15
637 # const char *rk, unsigned char iv[16], void *Xip);
639 # r3 - inp
640 # r4 - out
641 # r5 - len
642 # r6 - AES round keys
643 # r7 - iv and other data
644 # r8 - Xi, HPoli, hash keys
658 # initialize ICB: GHASH( IV ), IV - r7
659 lxvb16x 30+32, 0, 7 # load IV - v30
676 lxv 1, 16(6) # round key 1
685 # load rounds - 10 (128), 12 (192), 14 (256)
705 divdu 12, 5, 10 # n 128 bytes-blocks
707 addi 12, 12, -1 # loop - 1
710 vadduwm 16, 15, 31 # state + counter
711 vadduwm 17, 16, 31
712 vadduwm 18, 17, 31
721 vxor 15, 15, 29 # IV + round key - add round key 0
722 vxor 16, 16, 29
723 vxor 17, 17, 29
730 li 15, 16
731 li 16, 32
732 li 17, 48
739 # Pre-compute first 8 AES state and leave 1/3/5 more rounds
742 addi 22, 23, -9 # process 8 keys
751 addi 10, 10, 16
763 vcipherlast 16, 16, 1
764 vcipherlast 17, 17, 1
773 lxvb16x 32+25, 16, 14 # load block
774 lxvb16x 32+26, 17, 14 # load block
782 vxor 16, 16, 24
783 vxor 17, 17, 25
792 stxvb16x 49, 16, 9 # store output
793 stxvb16x 50, 17, 9 # store output
802 PPC_GHASH4x 0, 15, 16, 17, 18
809 vadduwm 16, 15, 31
810 vadduwm 17, 16, 31
811 vadduwm 18, 17, 31
819 vxor 15, 15, 27 # IV + round key - add round key 0
820 vxor 16, 16, 27
821 vxor 17, 17, 27
828 addi 5, 5, -128
837 addi 10, 10, 16
841 addi 12, 12, -1
847 vcipherlast 16, 16, 1
848 vcipherlast 17, 17, 1
857 lxvb16x 32+25, 16, 14 # load block
858 lxvb16x 32+26, 17, 14 # load block
866 vxor 16, 16, 24
867 vxor 17, 17, 25
876 stxvb16x 49, 16, 9 # store output
877 stxvb16x 50, 17, 9 # store output
885 PPC_GHASH4x 0, 15, 16, 17, 18
895 addi 5, 5, -128
924 # const char *rk, unsigned char iv[16], void *Xip);
936 # initialize ICB: GHASH( IV ), IV - r7
937 lxvb16x 30+32, 0, 7 # load IV - v30
954 lxv 1, 16(6) # round key 1
963 # load rounds - 10 (128), 12 (192), 14 (256)
983 divdu 12, 5, 10 # n 128 bytes-blocks
985 addi 12, 12, -1 # loop - 1
988 vadduwm 16, 15, 31 # state + counter
989 vadduwm 17, 16, 31
990 vadduwm 18, 17, 31
999 vxor 15, 15, 29 # IV + round key - add round key 0
1000 vxor 16, 16, 29
1001 vxor 17, 17, 29
1008 li 15, 16
1009 li 16, 32
1010 li 17, 48
1017 # Pre-compute first 8 AES state and leave 1/3/5 more rounds
1020 addi 22, 23, -9 # process 8 keys
1029 addi 10, 10, 16
1041 vcipherlast 16, 16, 1
1042 vcipherlast 17, 17, 1
1051 lxvb16x 32+25, 16, 14 # load block
1052 lxvb16x 32+26, 17, 14 # load block
1060 vxor 16, 16, 24
1061 vxor 17, 17, 25
1070 stxvb16x 49, 16, 9 # store output
1071 stxvb16x 50, 17, 9 # store output
1080 vmr 16, 24
1081 vmr 17, 25
1090 PPC_GHASH4x 0, 15, 16, 17, 18
1097 vadduwm 16, 15, 31
1098 vadduwm 17, 16, 31
1099 vadduwm 18, 17, 31
1107 vxor 15, 15, 27 # IV + round key - add round key 0
1108 vxor 16, 16, 27
1109 vxor 17, 17, 27
1116 addi 5, 5, -128
1125 addi 10, 10, 16
1129 addi 12, 12, -1
1135 vcipherlast 16, 16, 1
1136 vcipherlast 17, 17, 1
1145 lxvb16x 32+25, 16, 14 # load block
1146 lxvb16x 32+26, 17, 14 # load block
1154 vxor 16, 16, 24
1155 vxor 17, 17, 25
1164 stxvb16x 49, 16, 9 # store output
1165 stxvb16x 50, 17, 9 # store output
1174 vmr 16, 24
1175 vmr 17, 25
1183 PPC_GHASH4x 0, 15, 16, 17, 18
1193 addi 5, 5, -128