Lines Matching +full:8 +full:- +full:15

1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 # Accelerated AES-GCM stitched implementation for ppc64le.
5 # Copyright 2024- IBM Inc.
22 # Hash keys = v3 - v14
29 # v31 - counter 1
32 # vs0 - round key 0
33 # v15, v16, v17, v18, v19, v20, v21, v22 for 8 blocks (encrypted)
35 # This implementation uses stitched AES-GCM approach to improve overall performance.
36 # AES is implemented with 8x blocks and GHASH is using 2 4x blocks.
66 stdu 1,-512(1)
69 SAVE_GPR 15, 120, 1
111 RESTORE_GPR 15, 120, 1
135 # 8x loops
152 AES_CIPHER_8x vcipher, 15, 23
153 AES_CIPHER_8x vcipher, 15, 24
154 AES_CIPHER_8x vcipher, 15, 25
155 AES_CIPHER_8x vcipher, 15, 26
159 xxlor 32+26, 8, 8
160 AES_CIPHER_8x vcipher, 15, 23
161 AES_CIPHER_8x vcipher, 15, 24
162 AES_CIPHER_8x vcipher, 15, 25
163 AES_CIPHER_8x vcipher, 15, 26
175 # Hash keys = v3 - v14
176 # Scratch: v23 - v29
203 vsldoi 25, 24, 1, 8 # mL
204 vsldoi 1, 1, 24, 8 # mH
208 # vsldoi 23, 23, 23, 8 # swap
215 vpmsumd 28, 8, \S3
225 vsldoi 25, 23, 23, 8 # swap
245 vsldoi 25, 23, 1, 8 # mL
246 vsldoi 26, 1, 23, 8 # mH
253 vsldoi 23, 22, 22, 8 # swap
263 # Hash keys = v3 - v14
267 lxvb16x 32, 0, 8 # load Xi
269 # load Hash - h^4, h^3, h^2, h
271 lxvd2x 2+32, 10, 8 # H Poli
273 lxvd2x 3+32, 10, 8 # Hl
275 lxvd2x 4+32, 10, 8 # H
277 lxvd2x 5+32, 10, 8 # Hh
280 lxvd2x 6+32, 10, 8 # H^2l
282 lxvd2x 7+32, 10, 8 # H^2
284 lxvd2x 8+32, 10, 8 # H^2h
287 lxvd2x 9+32, 10, 8 # H^3l
289 lxvd2x 10+32, 10, 8 # H^3
291 lxvd2x 11+32, 10, 8 # H^3h
294 lxvd2x 12+32, 10, 8 # H^4l
296 lxvd2x 13+32, 10, 8 # H^4
298 lxvd2x 14+32, 10, 8 # H^4h
317 xxlxor 32+15, 32+30, 0
319 # Pre-load 8 AES rounds to scratch vectors.
327 xxlor 32+29, 8, 8
329 addi 22, 23, -9 # remaing AES rounds
338 vcipher 15, 15, 16
339 vcipher 15, 15, 17
340 vcipher 15, 15, 18
341 vcipher 15, 15, 19
342 vcipher 15, 15, 20
343 vcipher 15, 15, 21
344 vcipher 15, 15, 28
345 vcipher 15, 15, 29
349 vcipher 15, 15, 1
354 vcipherlast 15, 15, 1
356 xxlxor 32+15, 32+15, 11
357 stxvb16x 32+15, 0, 9 # store output
363 xxlor 15+32, 11, 11
365 vxor 15, 15, 0
366 PPC_GHASH1x 0, 15
368 addi 5, 5, -16
372 xxlxor 32+15, 32+30, 0
373 addi 12, 12, -1
378 stxvb16x 32+0, 0, 8 # update Xi
393 vspltisb 16, -1
404 xxlxor 32+15, 32+30, 0
406 addi 22, 23, -1 # loop - 1
412 vcipher 15, 15, 1
416 vcipherlast 15, 15, 1
418 xxlxor 32+15, 32+15, 11
419 vand 15, 15, 16
424 stxvb16x 15+32, 10, 1 # write v15 to stack
426 addi 12, 9, -1
435 xxlor 32+15, 11, 11 # decrypt using the input block
437 #vxor 15, 15, 0 # ^ previous hash
438 #PPC_GHASH1x 0, 15
452 stxvb16x 32+0, 0, 8 # Update X1
453 stxvb16x 32+15, 10, 7 # Update pblock
462 # - Compute ghash for a full block. Clear Partial_len and pblock. Update IV.
464 # - Don't compute ghash if not full block. gcm_update will take care of it
490 vspltisb 16, -1
491 sldi 15, 12, 3
492 mtvsrdd 32+17, 0, 15
495 sldi 15, 17, 3
496 mtvsrdd 32+17, 0, 15
501 sldi 15, 12, 3
502 mtvsrdd 32+17, 0, 15
504 sldi 15, 17, 3
505 mtvsrdd 32+17, 0, 15
510 xxlxor 32+15, 32+30, 0
512 addi 22, 23, -1 # loop - 1
518 vcipher 15, 15, 1
522 vcipherlast 15, 15, 1
524 vxor 15, 15, 19
525 vand 15, 15, 16
530 stxvb16x 15+32, 10, 1 # write v15 to stack
533 addi 15, 9, -1
537 stbu 22, 1(15)
548 vmr 15, 19 # decrypt using the input block
557 vxor 15, 15, 17 # combined pblock
562 stxvb16x 32+15, 10, 7 # Update current pblock
573 vxor 15, 15, 0
574 PPC_GHASH1x 0, 15
576 stxvb16x 32+0, 0, 8 # update Xi
581 # gcm_update(iv, Xi) - compute last hash
591 # load Hash - h^4, h^3, h^2, h
614 vsldoi 15, 13, 1, 8 # mL
615 vsldoi 16, 1, 13, 8 # mH
616 vxor 12, 12, 15 # LL + LL
618 xxlor 32+15, 10, 10
619 vpermxor 12, 12, 17, 15
620 vsldoi 13, 12, 12, 8 # swap
639 # r3 - inp
640 # r4 - out
641 # r5 - len
642 # r6 - AES round keys
643 # r7 - iv and other data
644 # r8 - Xi, HPoli, hash keys
658 # initialize ICB: GHASH( IV ), IV - r7
659 lxvb16x 30+32, 0, 7 # load IV - v30
683 lxv 8, 128(6) # round key 8
685 # load rounds - 10 (128), 12 (192), 14 (256)
700 # Process 8x AES/GCM blocks
703 # 8x blcoks
705 divdu 12, 5, 10 # n 128 bytes-blocks
707 addi 12, 12, -1 # loop - 1
709 vmr 15, 30 # first state: IV
710 vadduwm 16, 15, 31 # state + counter
721 vxor 15, 15, 29 # IV + round key - add round key 0
730 li 15, 16
739 # Pre-compute first 8 AES state and leave 1/3/5 more rounds
742 addi 22, 23, -9 # process 8 keys
746 LOOP_8AES_STATE # process 8 AES keys
750 AES_CIPHER_8x vcipher 15 1
755 cmpdi 12, 0 # Only one loop (8 block)
759 # Loop 8x blocks and compute ghash
762 vcipherlast 15, 15, 1
772 lxvb16x 32+24, 15, 14 # load block
781 vxor 15, 15, 23
791 stxvb16x 48, 15, 9 # store output
801 vxor 15, 15, 0
802 PPC_GHASH4x 0, 15, 16, 17, 18
807 xxlor 32+15, 9, 9 # last state
808 vadduwm 15, 15, 31 # state + counter
809 vadduwm 16, 15, 31
819 vxor 15, 15, 27 # IV + round key - add round key 0
828 addi 5, 5, -128
831 LOOP_8AES_STATE # process 8 AES keys
836 AES_CIPHER_8x vcipher 15 1
841 addi 12, 12, -1
846 vcipherlast 15, 15, 1
856 lxvb16x 32+24, 15, 14 # load block
865 vxor 15, 15, 23
875 stxvb16x 48, 15, 9 # store output
884 vxor 15, 15, 0
885 PPC_GHASH4x 0, 15, 16, 17, 18
893 stxvb16x 32+0, 0, 8 # update Xi
895 addi 5, 5, -128
899 # Done 8x blocks
925 # 8x Decrypt
936 # initialize ICB: GHASH( IV ), IV - r7
937 lxvb16x 30+32, 0, 7 # load IV - v30
961 lxv 8, 128(6) # round key 8
963 # load rounds - 10 (128), 12 (192), 14 (256)
978 # Process 8x AES/GCM blocks
981 # 8x blcoks
983 divdu 12, 5, 10 # n 128 bytes-blocks
985 addi 12, 12, -1 # loop - 1
987 vmr 15, 30 # first state: IV
988 vadduwm 16, 15, 31 # state + counter
999 vxor 15, 15, 29 # IV + round key - add round key 0
1008 li 15, 16
1017 # Pre-compute first 8 AES state and leave 1/3/5 more rounds
1020 addi 22, 23, -9 # process 8 keys
1024 LOOP_8AES_STATE # process 8 AES keys
1028 AES_CIPHER_8x vcipher 15 1
1033 cmpdi 12, 0 # Only one loop (8 block)
1037 # Loop 8x blocks and compute ghash
1040 vcipherlast 15, 15, 1
1050 lxvb16x 32+24, 15, 14 # load block
1059 vxor 15, 15, 23
1069 stxvb16x 48, 15, 9 # store output
1079 vmr 15, 23
1089 vxor 15, 15, 0
1090 PPC_GHASH4x 0, 15, 16, 17, 18
1095 xxlor 32+15, 9, 9 # last state
1096 vadduwm 15, 15, 31 # state + counter
1097 vadduwm 16, 15, 31
1107 vxor 15, 15, 27 # IV + round key - add round key 0
1116 addi 5, 5, -128
1119 LOOP_8AES_STATE # process 8 AES keys
1124 AES_CIPHER_8x vcipher 15 1
1129 addi 12, 12, -1
1134 vcipherlast 15, 15, 1
1144 lxvb16x 32+24, 15, 14 # load block
1153 vxor 15, 15, 23
1163 stxvb16x 48, 15, 9 # store output
1172 #vmr 15, 23
1173 vxor 15, 23, 0
1182 #vxor 15, 15, 0
1183 PPC_GHASH4x 0, 15, 16, 17, 18
1191 stxvb16x 32+0, 0, 8 # update Xi
1193 addi 5, 5, -128
1197 # Done 8x blocks