Lines Matching +full:2 +full:- +full:9

1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 # Accelerated AES-GCM stitched implementation for ppc64le.
5 # Copyright 2024- IBM Inc.
14 # X1 * H^4 + X2 * H^3 + x3 * H^2 + X4 * H =
22 # Hash keys = v3 - v14
24 # ( H^2.l, H^2, H^2.h)
29 # v31 - counter 1
32 # vs0 - round key 0
35 # This implementation uses stitched AES-GCM approach to improve overall performance.
36 # AES is implemented with 8x blocks and GHASH is using 2 4x blocks.
66 stdu 1,-512(1)
80 addi 9, 1, 256
81 SAVE_VRS 20, 0, 9
82 SAVE_VRS 21, 16, 9
83 SAVE_VRS 22, 32, 9
84 SAVE_VRS 23, 48, 9
85 SAVE_VRS 24, 64, 9
86 SAVE_VRS 25, 80, 9
87 SAVE_VRS 26, 96, 9
88 SAVE_VRS 27, 112, 9
89 SAVE_VRS 28, 128, 9
90 SAVE_VRS 29, 144, 9
91 SAVE_VRS 30, 160, 9
92 SAVE_VRS 31, 176, 9
96 addi 9, 1, 256
97 RESTORE_VRS 20, 0, 9
98 RESTORE_VRS 21, 16, 9
99 RESTORE_VRS 22, 32, 9
100 RESTORE_VRS 23, 48, 9
101 RESTORE_VRS 24, 64, 9
102 RESTORE_VRS 25, 80, 9
103 RESTORE_VRS 26, 96, 9
104 RESTORE_VRS 27, 112, 9
105 RESTORE_VRS 28, 128, 9
106 RESTORE_VRS 29, 144, 9
107 RESTORE_VRS 30, 160, 9
108 RESTORE_VRS 31, 176, 9
131 \_VCIPHER \ST+2, \ST+2, \r
139 \_VCIPHER \ST+2, \ST+2, \r
149 xxlor 32+24, 2, 2
175 # Hash keys = v3 - v14
176 # Scratch: v23 - v29
181 vpmsumd 24, 9, \S2
200 vpmsumd 28, 23, 2 # reduction
226 vpmsumd 23, 23, 2
243 vpmsumd 27, 22, 2 # reduction
254 vpmsumd 22, 22, 2 # reduction
263 # Hash keys = v3 - v14
269 # load Hash - h^4, h^3, h^2, h
271 lxvd2x 2+32, 10, 8 # H Poli
280 lxvd2x 6+32, 10, 8 # H^2l
282 lxvd2x 7+32, 10, 8 # H^2
284 lxvd2x 8+32, 10, 8 # H^2h
287 lxvd2x 9+32, 10, 8 # H^3l
319 # Pre-load 8 AES rounds to scratch vectors.
321 xxlor 32+17, 2, 2
329 addi 22, 23, -9 # remaing AES rounds
357 stxvb16x 32+15, 0, 9 # store output
359 addi 9, 9, 16
368 addi 5, 5, -16
373 addi 12, 12, -1
393 vspltisb 16, -1
406 addi 22, 23, -1 # loop - 1
426 addi 12, 9, -1
441 add 9, 9, 5
462 # - Compute ghash for a full block. Clear Partial_len and pblock. Update IV.
464 # - Don't compute ghash if not full block. gcm_update will take care of it
490 vspltisb 16, -1
512 addi 22, 23, -1 # loop - 1
533 addi 15, 9, -1
542 add 9, 9, 21
581 # gcm_update(iv, Xi) - compute last hash
591 # load Hash - h^4, h^3, h^2, h
593 lxvd2x 2+32, 10, 4 # H Poli
601 addis 11, 2, permx@toc@ha
605 li 9, 64
606 lxvb16x 32+6, 9, 3 # load pblock
613 vpmsumd 17, 12, 2 # reduction
621 vpmsumd 12, 12, 2 # reduction
626 #stxvb16x 32+0, 9, 3
639 # r3 - inp
640 # r4 - out
641 # r5 - len
642 # r6 - AES round keys
643 # r7 - iv and other data
644 # r8 - Xi, HPoli, hash keys
658 # initialize ICB: GHASH( IV ), IV - r7
659 lxvb16x 30+32, 0, 7 # load IV - v30
662 mr 9, 4
669 addis 11, 2, permx@toc@ha
674 # load 9 round keys to VSR
677 lxv 2, 32(6) # round key 2
685 # load rounds - 10 (128), 12 (192), 14 (256)
705 divdu 12, 5, 10 # n 128 bytes-blocks
707 addi 12, 12, -1 # loop - 1
717 xxlor 9, 32+22, 32+22 # save last state
721 vxor 15, 15, 29 # IV + round key - add round key 0
739 # Pre-compute first 8 AES state and leave 1/3/5 more rounds
742 addi 22, 23, -9 # process 8 keys
790 stxvb16x 47, 0, 9 # store output
791 stxvb16x 48, 15, 9 # store output
792 stxvb16x 49, 16, 9 # store output
793 stxvb16x 50, 17, 9 # store output
794 stxvb16x 51, 18, 9 # store output
795 stxvb16x 52, 19, 9 # store output
796 stxvb16x 53, 20, 9 # store output
797 stxvb16x 54, 21, 9 # store output
798 addi 9, 9, 128
807 xxlor 32+15, 9, 9 # last state
816 xxlor 9, 32+22, 32+22 # save last state
819 vxor 15, 15, 27 # IV + round key - add round key 0
828 addi 5, 5, -128
841 addi 12, 12, -1
874 stxvb16x 47, 0, 9 # store output
875 stxvb16x 48, 15, 9 # store output
876 stxvb16x 49, 16, 9 # store output
877 stxvb16x 50, 17, 9 # store output
878 stxvb16x 51, 18, 9 # store output
879 stxvb16x 52, 19, 9 # store output
880 stxvb16x 53, 20, 9 # store output
881 stxvb16x 54, 21, 9 # store output
882 addi 9, 9, 128
890 xxlor 30+32, 9, 9 # last ctr
895 addi 5, 5, -128
936 # initialize ICB: GHASH( IV ), IV - r7
937 lxvb16x 30+32, 0, 7 # load IV - v30
940 mr 9, 4
947 addis 11, 2, permx@toc@ha
952 # load 9 round keys to VSR
955 lxv 2, 32(6) # round key 2
963 # load rounds - 10 (128), 12 (192), 14 (256)
983 divdu 12, 5, 10 # n 128 bytes-blocks
985 addi 12, 12, -1 # loop - 1
995 xxlor 9, 32+22, 32+22 # save last state
999 vxor 15, 15, 29 # IV + round key - add round key 0
1017 # Pre-compute first 8 AES state and leave 1/3/5 more rounds
1020 addi 22, 23, -9 # process 8 keys
1068 stxvb16x 47, 0, 9 # store output
1069 stxvb16x 48, 15, 9 # store output
1070 stxvb16x 49, 16, 9 # store output
1071 stxvb16x 50, 17, 9 # store output
1072 stxvb16x 51, 18, 9 # store output
1073 stxvb16x 52, 19, 9 # store output
1074 stxvb16x 53, 20, 9 # store output
1075 stxvb16x 54, 21, 9 # store output
1077 addi 9, 9, 128
1095 xxlor 32+15, 9, 9 # last state
1104 xxlor 9, 32+22, 32+22 # save last state
1107 vxor 15, 15, 27 # IV + round key - add round key 0
1116 addi 5, 5, -128
1129 addi 12, 12, -1
1162 stxvb16x 47, 0, 9 # store output
1163 stxvb16x 48, 15, 9 # store output
1164 stxvb16x 49, 16, 9 # store output
1165 stxvb16x 50, 17, 9 # store output
1166 stxvb16x 51, 18, 9 # store output
1167 stxvb16x 52, 19, 9 # store output
1168 stxvb16x 53, 20, 9 # store output
1169 stxvb16x 54, 21, 9 # store output
1170 addi 9, 9, 128
1188 xxlor 30+32, 9, 9 # last ctr
1193 addi 5, 5, -128