Lines Matching +full:0 +full:- +full:15

1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 # Accelerated AES-GCM stitched implementation for ppc64le.
5 # Copyright 2024- IBM Inc.
22 # Hash keys = v3 - v14
29 # v31 - counter 1
32 # vs0 - round key 0
35 # This implementation uses stitched AES-GCM approach to improve overall performance.
64 mflr 0
65 std 0, 16(1)
66 stdu 1,-512(1)
69 SAVE_GPR 15, 120, 1
81 SAVE_VRS 20, 0, 9
97 RESTORE_VRS 20, 0, 9
111 RESTORE_GPR 15, 120, 1
123 ld 0, 16(1)
124 mtlr 0
152 AES_CIPHER_8x vcipher, 15, 23
153 AES_CIPHER_8x vcipher, 15, 24
154 AES_CIPHER_8x vcipher, 15, 25
155 AES_CIPHER_8x vcipher, 15, 26
160 AES_CIPHER_8x vcipher, 15, 23
161 AES_CIPHER_8x vcipher, 15, 24
162 AES_CIPHER_8x vcipher, 15, 25
163 AES_CIPHER_8x vcipher, 15, 26
175 # Hash keys = v3 - v14
176 # Scratch: v23 - v29
263 # Hash keys = v3 - v14
267 lxvb16x 32, 0, 8 # load Xi
269 # load Hash - h^4, h^3, h^2, h
305 # vs0: roundkey 0
317 xxlxor 32+15, 32+30, 0
319 # Pre-load 8 AES rounds to scratch vectors.
329 addi 22, 23, -9 # remaing AES rounds
331 cmpdi 12, 0
338 vcipher 15, 15, 16
339 vcipher 15, 15, 17
340 vcipher 15, 15, 18
341 vcipher 15, 15, 19
342 vcipher 15, 15, 20
343 vcipher 15, 15, 21
344 vcipher 15, 15, 28
345 vcipher 15, 15, 29
348 lxv 32+1, 0(10)
349 vcipher 15, 15, 1
352 lxv 32+1, 0(10) # last round key
353 lxvb16x 11, 0, 14 # load input block
354 vcipherlast 15, 15, 1
356 xxlxor 32+15, 32+15, 11
357 stxvb16x 32+15, 0, 9 # store output
361 cmpdi 24, 0 # decrypt?
363 xxlor 15+32, 11, 11
365 vxor 15, 15, 0
366 PPC_GHASH1x 0, 15
368 addi 5, 5, -16
372 xxlxor 32+15, 32+30, 0
373 addi 12, 12, -1
374 cmpdi 12, 0
377 stxvb16x 32+30, 0, 7 # update IV
378 stxvb16x 32+0, 0, 8 # update Xi
393 vspltisb 16, -1
397 mtvsrdd 32+17, 0, 12
400 lxvb16x 11, 0, 14 # load partial block
404 xxlxor 32+15, 32+30, 0
406 addi 22, 23, -1 # loop - 1
411 lxv 32+1, 0(10)
412 vcipher 15, 15, 1
415 lxv 32+1, 0(10) # last round key
416 vcipherlast 15, 15, 1
418 xxlxor 32+15, 32+15, 11
419 vand 15, 15, 16
424 stxvb16x 15+32, 10, 1 # write v15 to stack
426 addi 12, 9, -1
433 cmpdi 24, 0 # decrypt?
435 xxlor 32+15, 11, 11 # decrypt using the input block
437 #vxor 15, 15, 0 # ^ previous hash
438 #PPC_GHASH1x 0, 15
444 li 5, 0 # done last byte
450 stxvb16x 32+30, 0, 7 # update IV
452 stxvb16x 32+0, 0, 8 # Update X1
453 stxvb16x 32+15, 10, 7 # Update pblock
462 # - Compute ghash for a full block. Clear Partial_len and pblock. Update IV.
464 # - Don't compute ghash if not full block. gcm_update will take care of it
473 li 17, 0
479 li 17, 0
485 li 17, 0
490 vspltisb 16, -1
491 sldi 15, 12, 3
492 mtvsrdd 32+17, 0, 15
495 sldi 15, 17, 3
496 mtvsrdd 32+17, 0, 15
500 lxvb16x 32+19, 0, 14 # load partial block
501 sldi 15, 12, 3
502 mtvsrdd 32+17, 0, 15
503 vsro 19, 19, 17 # 0x00..xxxx??..??
504 sldi 15, 17, 3
505 mtvsrdd 32+17, 0, 15
506 vsro 19, 19, 17 # 0x00..xxxx
507 vslo 19, 19, 17 # shift back to form 0x00..xxxx00..00
510 xxlxor 32+15, 32+30, 0
512 addi 22, 23, -1 # loop - 1
517 lxv 32+1, 0(10)
518 vcipher 15, 15, 1
521 lxv 32+1, 0(10) # last round key
522 vcipherlast 15, 15, 1
524 vxor 15, 15, 19
525 vand 15, 15, 16
530 stxvb16x 15+32, 10, 1 # write v15 to stack
533 addi 15, 9, -1
537 stbu 22, 1(15)
546 cmpdi 24, 0 # decrypt?
548 vmr 15, 19 # decrypt using the input block
557 vxor 15, 15, 17 # combined pblock
562 stxvb16x 32+15, 10, 7 # Update current pblock
566 li 12, 0
570 stxvb16x 32+30, 0, 7 # update IV
573 vxor 15, 15, 0
574 PPC_GHASH1x 0, 15
575 stxvb16x 32+0, 10, 7 # update pblock for debug?
576 stxvb16x 32+0, 0, 8 # update Xi
581 # gcm_update(iv, Xi) - compute last hash
587 cmpdi 10, 0
590 lxvb16x 32, 0, 4 # load Xi
591 # load Hash - h^4, h^3, h^2, h
603 lxv 10, 0(11) # vs10: vpermxor vector
607 vxor 6, 6, 0
614 vsldoi 15, 13, 1, 8 # mL
616 vxor 12, 12, 15 # LL + LL
618 xxlor 32+15, 10, 10
619 vpermxor 12, 12, 17, 15
625 #vxor 0, 0, 0
626 #stxvb16x 32+0, 9, 3
627 li 10, 0
629 stxvb16x 32+7, 0, 4
639 # r3 - inp
640 # r4 - out
641 # r5 - len
642 # r6 - AES round keys
643 # r7 - iv and other data
644 # r8 - Xi, HPoli, hash keys
647 # Xi is at 0 in gcm_table (Xip).
652 cmpdi 5, 0
658 # initialize ICB: GHASH( IV ), IV - r7
659 lxvb16x 30+32, 0, 7 # load IV - v30
671 lxv 10, 0(11) # vs10: vpermxor vector
672 li 11, 0
675 lxv 0, 0(6) # round key 0
685 # load rounds - 10 (128), 12 (192), 14 (256)
694 cmpdi 12, 0
705 divdu 12, 5, 10 # n 128 bytes-blocks
707 addi 12, 12, -1 # loop - 1
709 vmr 15, 30 # first state: IV
710 vadduwm 16, 15, 31 # state + counter
720 xxlor 32+29, 0, 0
721 vxor 15, 15, 29 # IV + round key - add round key 0
730 li 15, 16
739 # Pre-compute first 8 AES state and leave 1/3/5 more rounds
742 addi 22, 23, -9 # process 8 keys
749 lxv 32+1, 0(10) # round key
750 AES_CIPHER_8x vcipher 15 1
753 lxv 32+1, 0(10) # last round key (v1)
755 cmpdi 12, 0 # Only one loop (8 block)
762 vcipherlast 15, 15, 1
771 lxvb16x 32+23, 0, 14 # load block
772 lxvb16x 32+24, 15, 14 # load block
781 vxor 15, 15, 23
790 stxvb16x 47, 0, 9 # store output
791 stxvb16x 48, 15, 9 # store output
801 vxor 15, 15, 0
802 PPC_GHASH4x 0, 15, 16, 17, 18
804 vxor 19, 19, 0
805 PPC_GHASH4x 0, 19, 20, 21, 22
807 xxlor 32+15, 9, 9 # last state
808 vadduwm 15, 15, 31 # state + counter
809 vadduwm 16, 15, 31
818 xxlor 32+27, 0, 0 # restore roundkey 0
819 vxor 15, 15, 27 # IV + round key - add round key 0
828 addi 5, 5, -128
835 lxv 32+1, 0(10) # round key
836 AES_CIPHER_8x vcipher 15 1
839 lxv 32+1, 0(10) # last round key (v1)
841 addi 12, 12, -1
842 cmpdi 12, 0
846 vcipherlast 15, 15, 1
855 lxvb16x 32+23, 0, 14 # load block
856 lxvb16x 32+24, 15, 14 # load block
865 vxor 15, 15, 23
874 stxvb16x 47, 0, 9 # store output
875 stxvb16x 48, 15, 9 # store output
884 vxor 15, 15, 0
885 PPC_GHASH4x 0, 15, 16, 17, 18
887 vxor 19, 19, 0
888 PPC_GHASH4x 0, 19, 20, 21, 22
892 stxvb16x 32+30, 0, 7 # update IV
893 stxvb16x 32+0, 0, 8 # update Xi
895 addi 5, 5, -128
902 cmpdi 5, 0
908 cmpdi 5, 0
912 cmpdi 5, 0
916 cmpdi 5, 0
930 cmpdi 5, 0
936 # initialize ICB: GHASH( IV ), IV - r7
937 lxvb16x 30+32, 0, 7 # load IV - v30
949 lxv 10, 0(11) # vs10: vpermxor vector
950 li 11, 0
953 lxv 0, 0(6) # round key 0
963 # load rounds - 10 (128), 12 (192), 14 (256)
965 li 24, 0 # decrypt
972 cmpdi 12, 0
983 divdu 12, 5, 10 # n 128 bytes-blocks
985 addi 12, 12, -1 # loop - 1
987 vmr 15, 30 # first state: IV
988 vadduwm 16, 15, 31 # state + counter
998 xxlor 32+29, 0, 0
999 vxor 15, 15, 29 # IV + round key - add round key 0
1008 li 15, 16
1017 # Pre-compute first 8 AES state and leave 1/3/5 more rounds
1020 addi 22, 23, -9 # process 8 keys
1027 lxv 32+1, 0(10) # round key
1028 AES_CIPHER_8x vcipher 15 1
1031 lxv 32+1, 0(10) # last round key (v1)
1033 cmpdi 12, 0 # Only one loop (8 block)
1040 vcipherlast 15, 15, 1
1049 lxvb16x 32+23, 0, 14 # load block
1050 lxvb16x 32+24, 15, 14 # load block
1059 vxor 15, 15, 23
1068 stxvb16x 47, 0, 9 # store output
1069 stxvb16x 48, 15, 9 # store output
1079 vmr 15, 23
1089 vxor 15, 15, 0
1090 PPC_GHASH4x 0, 15, 16, 17, 18
1092 vxor 19, 19, 0
1093 PPC_GHASH4x 0, 19, 20, 21, 22
1095 xxlor 32+15, 9, 9 # last state
1096 vadduwm 15, 15, 31 # state + counter
1097 vadduwm 16, 15, 31
1106 xxlor 32+27, 0, 0 # restore roundkey 0
1107 vxor 15, 15, 27 # IV + round key - add round key 0
1116 addi 5, 5, -128
1123 lxv 32+1, 0(10) # round key
1124 AES_CIPHER_8x vcipher 15 1
1127 lxv 32+1, 0(10) # last round key (v1)
1129 addi 12, 12, -1
1130 cmpdi 12, 0
1134 vcipherlast 15, 15, 1
1143 lxvb16x 32+23, 0, 14 # load block
1144 lxvb16x 32+24, 15, 14 # load block
1153 vxor 15, 15, 23
1162 stxvb16x 47, 0, 9 # store output
1163 stxvb16x 48, 15, 9 # store output
1172 #vmr 15, 23
1173 vxor 15, 23, 0
1182 #vxor 15, 15, 0
1183 PPC_GHASH4x 0, 15, 16, 17, 18
1185 vxor 19, 19, 0
1186 PPC_GHASH4x 0, 19, 20, 21, 22
1190 stxvb16x 32+30, 0, 7 # update IV
1191 stxvb16x 32+0, 0, 8 # update Xi
1193 addi 5, 5, -128
1200 cmpdi 5, 0
1204 li 24, 0 # decrypt
1206 cmpdi 5, 0
1210 cmpdi 5, 0
1214 cmpdi 5, 0
1227 li 3, 0
1235 .long 0x4c5d6e7f, 0x08192a3b, 0xc4d5e6f7, 0x8091a2b3