Lines Matching +full:0 +full:- +full:9
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 # Accelerated AES-GCM stitched implementation for ppc64le.
5 # Copyright 2024- IBM Inc.
22 # Hash keys = v3 - v14
29 # v31 - counter 1
32 # vs0 - round key 0
35 # This implementation uses stitched AES-GCM approach to improve overall performance.
64 mflr 0
65 std 0, 16(1)
66 stdu 1,-512(1)
80 addi 9, 1, 256
81 SAVE_VRS 20, 0, 9
82 SAVE_VRS 21, 16, 9
83 SAVE_VRS 22, 32, 9
84 SAVE_VRS 23, 48, 9
85 SAVE_VRS 24, 64, 9
86 SAVE_VRS 25, 80, 9
87 SAVE_VRS 26, 96, 9
88 SAVE_VRS 27, 112, 9
89 SAVE_VRS 28, 128, 9
90 SAVE_VRS 29, 144, 9
91 SAVE_VRS 30, 160, 9
92 SAVE_VRS 31, 176, 9
96 addi 9, 1, 256
97 RESTORE_VRS 20, 0, 9
98 RESTORE_VRS 21, 16, 9
99 RESTORE_VRS 22, 32, 9
100 RESTORE_VRS 23, 48, 9
101 RESTORE_VRS 24, 64, 9
102 RESTORE_VRS 25, 80, 9
103 RESTORE_VRS 26, 96, 9
104 RESTORE_VRS 27, 112, 9
105 RESTORE_VRS 28, 128, 9
106 RESTORE_VRS 29, 144, 9
107 RESTORE_VRS 30, 160, 9
108 RESTORE_VRS 31, 176, 9
123 ld 0, 16(1)
124 mtlr 0
175 # Hash keys = v3 - v14
176 # Scratch: v23 - v29
181 vpmsumd 24, 9, \S2
263 # Hash keys = v3 - v14
267 lxvb16x 32, 0, 8 # load Xi
269 # load Hash - h^4, h^3, h^2, h
287 lxvd2x 9+32, 10, 8 # H^3l
305 # vs0: roundkey 0
317 xxlxor 32+15, 32+30, 0
319 # Pre-load 8 AES rounds to scratch vectors.
329 addi 22, 23, -9 # remaing AES rounds
331 cmpdi 12, 0
348 lxv 32+1, 0(10)
352 lxv 32+1, 0(10) # last round key
353 lxvb16x 11, 0, 14 # load input block
357 stxvb16x 32+15, 0, 9 # store output
359 addi 9, 9, 16
361 cmpdi 24, 0 # decrypt?
365 vxor 15, 15, 0
366 PPC_GHASH1x 0, 15
368 addi 5, 5, -16
372 xxlxor 32+15, 32+30, 0
373 addi 12, 12, -1
374 cmpdi 12, 0
377 stxvb16x 32+30, 0, 7 # update IV
378 stxvb16x 32+0, 0, 8 # update Xi
393 vspltisb 16, -1
397 mtvsrdd 32+17, 0, 12
400 lxvb16x 11, 0, 14 # load partial block
404 xxlxor 32+15, 32+30, 0
406 addi 22, 23, -1 # loop - 1
411 lxv 32+1, 0(10)
415 lxv 32+1, 0(10) # last round key
426 addi 12, 9, -1
433 cmpdi 24, 0 # decrypt?
437 #vxor 15, 15, 0 # ^ previous hash
438 #PPC_GHASH1x 0, 15
441 add 9, 9, 5
444 li 5, 0 # done last byte
450 stxvb16x 32+30, 0, 7 # update IV
452 stxvb16x 32+0, 0, 8 # Update X1
462 # - Compute ghash for a full block. Clear Partial_len and pblock. Update IV.
464 # - Don't compute ghash if not full block. gcm_update will take care of it
473 li 17, 0
479 li 17, 0
485 li 17, 0
490 vspltisb 16, -1
492 mtvsrdd 32+17, 0, 15
496 mtvsrdd 32+17, 0, 15
500 lxvb16x 32+19, 0, 14 # load partial block
502 mtvsrdd 32+17, 0, 15
503 vsro 19, 19, 17 # 0x00..xxxx??..??
505 mtvsrdd 32+17, 0, 15
506 vsro 19, 19, 17 # 0x00..xxxx
507 vslo 19, 19, 17 # shift back to form 0x00..xxxx00..00
510 xxlxor 32+15, 32+30, 0
512 addi 22, 23, -1 # loop - 1
517 lxv 32+1, 0(10)
521 lxv 32+1, 0(10) # last round key
533 addi 15, 9, -1
542 add 9, 9, 21
546 cmpdi 24, 0 # decrypt?
566 li 12, 0
570 stxvb16x 32+30, 0, 7 # update IV
573 vxor 15, 15, 0
574 PPC_GHASH1x 0, 15
575 stxvb16x 32+0, 10, 7 # update pblock for debug?
576 stxvb16x 32+0, 0, 8 # update Xi
581 # gcm_update(iv, Xi) - compute last hash
587 cmpdi 10, 0
590 lxvb16x 32, 0, 4 # load Xi
591 # load Hash - h^4, h^3, h^2, h
603 lxv 10, 0(11) # vs10: vpermxor vector
605 li 9, 64
606 lxvb16x 32+6, 9, 3 # load pblock
607 vxor 6, 6, 0
625 #vxor 0, 0, 0
626 #stxvb16x 32+0, 9, 3
627 li 10, 0
629 stxvb16x 32+7, 0, 4
639 # r3 - inp
640 # r4 - out
641 # r5 - len
642 # r6 - AES round keys
643 # r7 - iv and other data
644 # r8 - Xi, HPoli, hash keys
647 # Xi is at 0 in gcm_table (Xip).
652 cmpdi 5, 0
658 # initialize ICB: GHASH( IV ), IV - r7
659 lxvb16x 30+32, 0, 7 # load IV - v30
662 mr 9, 4
671 lxv 10, 0(11) # vs10: vpermxor vector
672 li 11, 0
674 # load 9 round keys to VSR
675 lxv 0, 0(6) # round key 0
685 # load rounds - 10 (128), 12 (192), 14 (256)
694 cmpdi 12, 0
705 divdu 12, 5, 10 # n 128 bytes-blocks
707 addi 12, 12, -1 # loop - 1
717 xxlor 9, 32+22, 32+22 # save last state
720 xxlor 32+29, 0, 0
721 vxor 15, 15, 29 # IV + round key - add round key 0
739 # Pre-compute first 8 AES state and leave 1/3/5 more rounds
742 addi 22, 23, -9 # process 8 keys
749 lxv 32+1, 0(10) # round key
753 lxv 32+1, 0(10) # last round key (v1)
755 cmpdi 12, 0 # Only one loop (8 block)
771 lxvb16x 32+23, 0, 14 # load block
790 stxvb16x 47, 0, 9 # store output
791 stxvb16x 48, 15, 9 # store output
792 stxvb16x 49, 16, 9 # store output
793 stxvb16x 50, 17, 9 # store output
794 stxvb16x 51, 18, 9 # store output
795 stxvb16x 52, 19, 9 # store output
796 stxvb16x 53, 20, 9 # store output
797 stxvb16x 54, 21, 9 # store output
798 addi 9, 9, 128
801 vxor 15, 15, 0
802 PPC_GHASH4x 0, 15, 16, 17, 18
804 vxor 19, 19, 0
805 PPC_GHASH4x 0, 19, 20, 21, 22
807 xxlor 32+15, 9, 9 # last state
816 xxlor 9, 32+22, 32+22 # save last state
818 xxlor 32+27, 0, 0 # restore roundkey 0
819 vxor 15, 15, 27 # IV + round key - add round key 0
828 addi 5, 5, -128
835 lxv 32+1, 0(10) # round key
839 lxv 32+1, 0(10) # last round key (v1)
841 addi 12, 12, -1
842 cmpdi 12, 0
855 lxvb16x 32+23, 0, 14 # load block
874 stxvb16x 47, 0, 9 # store output
875 stxvb16x 48, 15, 9 # store output
876 stxvb16x 49, 16, 9 # store output
877 stxvb16x 50, 17, 9 # store output
878 stxvb16x 51, 18, 9 # store output
879 stxvb16x 52, 19, 9 # store output
880 stxvb16x 53, 20, 9 # store output
881 stxvb16x 54, 21, 9 # store output
882 addi 9, 9, 128
884 vxor 15, 15, 0
885 PPC_GHASH4x 0, 15, 16, 17, 18
887 vxor 19, 19, 0
888 PPC_GHASH4x 0, 19, 20, 21, 22
890 xxlor 30+32, 9, 9 # last ctr
892 stxvb16x 32+30, 0, 7 # update IV
893 stxvb16x 32+0, 0, 8 # update Xi
895 addi 5, 5, -128
902 cmpdi 5, 0
908 cmpdi 5, 0
912 cmpdi 5, 0
916 cmpdi 5, 0
930 cmpdi 5, 0
936 # initialize ICB: GHASH( IV ), IV - r7
937 lxvb16x 30+32, 0, 7 # load IV - v30
940 mr 9, 4
949 lxv 10, 0(11) # vs10: vpermxor vector
950 li 11, 0
952 # load 9 round keys to VSR
953 lxv 0, 0(6) # round key 0
963 # load rounds - 10 (128), 12 (192), 14 (256)
965 li 24, 0 # decrypt
972 cmpdi 12, 0
983 divdu 12, 5, 10 # n 128 bytes-blocks
985 addi 12, 12, -1 # loop - 1
995 xxlor 9, 32+22, 32+22 # save last state
998 xxlor 32+29, 0, 0
999 vxor 15, 15, 29 # IV + round key - add round key 0
1017 # Pre-compute first 8 AES state and leave 1/3/5 more rounds
1020 addi 22, 23, -9 # process 8 keys
1027 lxv 32+1, 0(10) # round key
1031 lxv 32+1, 0(10) # last round key (v1)
1033 cmpdi 12, 0 # Only one loop (8 block)
1049 lxvb16x 32+23, 0, 14 # load block
1068 stxvb16x 47, 0, 9 # store output
1069 stxvb16x 48, 15, 9 # store output
1070 stxvb16x 49, 16, 9 # store output
1071 stxvb16x 50, 17, 9 # store output
1072 stxvb16x 51, 18, 9 # store output
1073 stxvb16x 52, 19, 9 # store output
1074 stxvb16x 53, 20, 9 # store output
1075 stxvb16x 54, 21, 9 # store output
1077 addi 9, 9, 128
1089 vxor 15, 15, 0
1090 PPC_GHASH4x 0, 15, 16, 17, 18
1092 vxor 19, 19, 0
1093 PPC_GHASH4x 0, 19, 20, 21, 22
1095 xxlor 32+15, 9, 9 # last state
1104 xxlor 9, 32+22, 32+22 # save last state
1106 xxlor 32+27, 0, 0 # restore roundkey 0
1107 vxor 15, 15, 27 # IV + round key - add round key 0
1116 addi 5, 5, -128
1123 lxv 32+1, 0(10) # round key
1127 lxv 32+1, 0(10) # last round key (v1)
1129 addi 12, 12, -1
1130 cmpdi 12, 0
1143 lxvb16x 32+23, 0, 14 # load block
1162 stxvb16x 47, 0, 9 # store output
1163 stxvb16x 48, 15, 9 # store output
1164 stxvb16x 49, 16, 9 # store output
1165 stxvb16x 50, 17, 9 # store output
1166 stxvb16x 51, 18, 9 # store output
1167 stxvb16x 52, 19, 9 # store output
1168 stxvb16x 53, 20, 9 # store output
1169 stxvb16x 54, 21, 9 # store output
1170 addi 9, 9, 128
1173 vxor 15, 23, 0
1182 #vxor 15, 15, 0
1183 PPC_GHASH4x 0, 15, 16, 17, 18
1185 vxor 19, 19, 0
1186 PPC_GHASH4x 0, 19, 20, 21, 22
1188 xxlor 30+32, 9, 9 # last ctr
1190 stxvb16x 32+30, 0, 7 # update IV
1191 stxvb16x 32+0, 0, 8 # update Xi
1193 addi 5, 5, -128
1200 cmpdi 5, 0
1204 li 24, 0 # decrypt
1206 cmpdi 5, 0
1210 cmpdi 5, 0
1214 cmpdi 5, 0
1227 li 3, 0
1235 .long 0x4c5d6e7f, 0x08192a3b, 0xc4d5e6f7, 0x8091a2b3