Lines Matching +full:8 +full:- +full:9
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 # Accelerated AES-GCM stitched implementation for ppc64le.
5 # Copyright 2024- IBM Inc.
22 # Hash keys = v3 - v14
29 # v31 - counter 1
32 # vs0 - round key 0
33 # v15, v16, v17, v18, v19, v20, v21, v22 for 8 blocks (encrypted)
35 # This implementation uses stitched AES-GCM approach to improve overall performance.
36 # AES is implemented with 8x blocks and GHASH is using 2 4x blocks.
66 stdu 1,-512(1)
80 addi 9, 1, 256
81 SAVE_VRS 20, 0, 9
82 SAVE_VRS 21, 16, 9
83 SAVE_VRS 22, 32, 9
84 SAVE_VRS 23, 48, 9
85 SAVE_VRS 24, 64, 9
86 SAVE_VRS 25, 80, 9
87 SAVE_VRS 26, 96, 9
88 SAVE_VRS 27, 112, 9
89 SAVE_VRS 28, 128, 9
90 SAVE_VRS 29, 144, 9
91 SAVE_VRS 30, 160, 9
92 SAVE_VRS 31, 176, 9
96 addi 9, 1, 256
97 RESTORE_VRS 20, 0, 9
98 RESTORE_VRS 21, 16, 9
99 RESTORE_VRS 22, 32, 9
100 RESTORE_VRS 23, 48, 9
101 RESTORE_VRS 24, 64, 9
102 RESTORE_VRS 25, 80, 9
103 RESTORE_VRS 26, 96, 9
104 RESTORE_VRS 27, 112, 9
105 RESTORE_VRS 28, 128, 9
106 RESTORE_VRS 29, 144, 9
107 RESTORE_VRS 30, 160, 9
108 RESTORE_VRS 31, 176, 9
135 # 8x loops
159 xxlor 32+26, 8, 8
175 # Hash keys = v3 - v14
176 # Scratch: v23 - v29
181 vpmsumd 24, 9, \S2
203 vsldoi 25, 24, 1, 8 # mL
204 vsldoi 1, 1, 24, 8 # mH
208 # vsldoi 23, 23, 23, 8 # swap
215 vpmsumd 28, 8, \S3
225 vsldoi 25, 23, 23, 8 # swap
245 vsldoi 25, 23, 1, 8 # mL
246 vsldoi 26, 1, 23, 8 # mH
253 vsldoi 23, 22, 22, 8 # swap
263 # Hash keys = v3 - v14
267 lxvb16x 32, 0, 8 # load Xi
269 # load Hash - h^4, h^3, h^2, h
271 lxvd2x 2+32, 10, 8 # H Poli
273 lxvd2x 3+32, 10, 8 # Hl
275 lxvd2x 4+32, 10, 8 # H
277 lxvd2x 5+32, 10, 8 # Hh
280 lxvd2x 6+32, 10, 8 # H^2l
282 lxvd2x 7+32, 10, 8 # H^2
284 lxvd2x 8+32, 10, 8 # H^2h
287 lxvd2x 9+32, 10, 8 # H^3l
289 lxvd2x 10+32, 10, 8 # H^3
291 lxvd2x 11+32, 10, 8 # H^3h
294 lxvd2x 12+32, 10, 8 # H^4l
296 lxvd2x 13+32, 10, 8 # H^4
298 lxvd2x 14+32, 10, 8 # H^4h
319 # Pre-load 8 AES rounds to scratch vectors.
327 xxlor 32+29, 8, 8
329 addi 22, 23, -9 # remaing AES rounds
357 stxvb16x 32+15, 0, 9 # store output
359 addi 9, 9, 16
368 addi 5, 5, -16
373 addi 12, 12, -1
378 stxvb16x 32+0, 0, 8 # update Xi
393 vspltisb 16, -1
406 addi 22, 23, -1 # loop - 1
426 addi 12, 9, -1
441 add 9, 9, 5
452 stxvb16x 32+0, 0, 8 # Update X1
462 # - Compute ghash for a full block. Clear Partial_len and pblock. Update IV.
464 # - Don't compute ghash if not full block. gcm_update will take care of it
490 vspltisb 16, -1
512 addi 22, 23, -1 # loop - 1
533 addi 15, 9, -1
542 add 9, 9, 21
576 stxvb16x 32+0, 0, 8 # update Xi
581 # gcm_update(iv, Xi) - compute last hash
591 # load Hash - h^4, h^3, h^2, h
605 li 9, 64
606 lxvb16x 32+6, 9, 3 # load pblock
614 vsldoi 15, 13, 1, 8 # mL
615 vsldoi 16, 1, 13, 8 # mH
620 vsldoi 13, 12, 12, 8 # swap
626 #stxvb16x 32+0, 9, 3
639 # r3 - inp
640 # r4 - out
641 # r5 - len
642 # r6 - AES round keys
643 # r7 - iv and other data
644 # r8 - Xi, HPoli, hash keys
658 # initialize ICB: GHASH( IV ), IV - r7
659 lxvb16x 30+32, 0, 7 # load IV - v30
662 mr 9, 4
674 # load 9 round keys to VSR
683 lxv 8, 128(6) # round key 8
685 # load rounds - 10 (128), 12 (192), 14 (256)
700 # Process 8x AES/GCM blocks
703 # 8x blcoks
705 divdu 12, 5, 10 # n 128 bytes-blocks
707 addi 12, 12, -1 # loop - 1
717 xxlor 9, 32+22, 32+22 # save last state
721 vxor 15, 15, 29 # IV + round key - add round key 0
739 # Pre-compute first 8 AES state and leave 1/3/5 more rounds
742 addi 22, 23, -9 # process 8 keys
746 LOOP_8AES_STATE # process 8 AES keys
755 cmpdi 12, 0 # Only one loop (8 block)
759 # Loop 8x blocks and compute ghash
790 stxvb16x 47, 0, 9 # store output
791 stxvb16x 48, 15, 9 # store output
792 stxvb16x 49, 16, 9 # store output
793 stxvb16x 50, 17, 9 # store output
794 stxvb16x 51, 18, 9 # store output
795 stxvb16x 52, 19, 9 # store output
796 stxvb16x 53, 20, 9 # store output
797 stxvb16x 54, 21, 9 # store output
798 addi 9, 9, 128
807 xxlor 32+15, 9, 9 # last state
816 xxlor 9, 32+22, 32+22 # save last state
819 vxor 15, 15, 27 # IV + round key - add round key 0
828 addi 5, 5, -128
831 LOOP_8AES_STATE # process 8 AES keys
841 addi 12, 12, -1
874 stxvb16x 47, 0, 9 # store output
875 stxvb16x 48, 15, 9 # store output
876 stxvb16x 49, 16, 9 # store output
877 stxvb16x 50, 17, 9 # store output
878 stxvb16x 51, 18, 9 # store output
879 stxvb16x 52, 19, 9 # store output
880 stxvb16x 53, 20, 9 # store output
881 stxvb16x 54, 21, 9 # store output
882 addi 9, 9, 128
890 xxlor 30+32, 9, 9 # last ctr
893 stxvb16x 32+0, 0, 8 # update Xi
895 addi 5, 5, -128
899 # Done 8x blocks
925 # 8x Decrypt
936 # initialize ICB: GHASH( IV ), IV - r7
937 lxvb16x 30+32, 0, 7 # load IV - v30
940 mr 9, 4
952 # load 9 round keys to VSR
961 lxv 8, 128(6) # round key 8
963 # load rounds - 10 (128), 12 (192), 14 (256)
978 # Process 8x AES/GCM blocks
981 # 8x blcoks
983 divdu 12, 5, 10 # n 128 bytes-blocks
985 addi 12, 12, -1 # loop - 1
995 xxlor 9, 32+22, 32+22 # save last state
999 vxor 15, 15, 29 # IV + round key - add round key 0
1017 # Pre-compute first 8 AES state and leave 1/3/5 more rounds
1020 addi 22, 23, -9 # process 8 keys
1024 LOOP_8AES_STATE # process 8 AES keys
1033 cmpdi 12, 0 # Only one loop (8 block)
1037 # Loop 8x blocks and compute ghash
1068 stxvb16x 47, 0, 9 # store output
1069 stxvb16x 48, 15, 9 # store output
1070 stxvb16x 49, 16, 9 # store output
1071 stxvb16x 50, 17, 9 # store output
1072 stxvb16x 51, 18, 9 # store output
1073 stxvb16x 52, 19, 9 # store output
1074 stxvb16x 53, 20, 9 # store output
1075 stxvb16x 54, 21, 9 # store output
1077 addi 9, 9, 128
1095 xxlor 32+15, 9, 9 # last state
1104 xxlor 9, 32+22, 32+22 # save last state
1107 vxor 15, 15, 27 # IV + round key - add round key 0
1116 addi 5, 5, -128
1119 LOOP_8AES_STATE # process 8 AES keys
1129 addi 12, 12, -1
1162 stxvb16x 47, 0, 9 # store output
1163 stxvb16x 48, 15, 9 # store output
1164 stxvb16x 49, 16, 9 # store output
1165 stxvb16x 50, 17, 9 # store output
1166 stxvb16x 51, 18, 9 # store output
1167 stxvb16x 52, 19, 9 # store output
1168 stxvb16x 53, 20, 9 # store output
1169 stxvb16x 54, 21, 9 # store output
1170 addi 9, 9, 128
1188 xxlor 30+32, 9, 9 # last ctr
1191 stxvb16x 32+0, 0, 8 # update Xi
1193 addi 5, 5, -128
1197 # Done 8x blocks