Lines Matching +full:0 +full:- +full:32

1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 # Accelerated AES-GCM stitched implementation for ppc64le.
5 # Copyright 2024- IBM Inc.
22 # Hash keys = v3 - v14
29 # v31 - counter 1
32 # vs0 - round key 0
35 # This implementation uses stitched AES-GCM approach to improve overall performance.
52 stxv \VRS+32, \OFFSET(\FRAME)
60 lxv \VRS+32, \OFFSET(\FRAME)
64 mflr 0
65 std 0, 16(1)
66 stdu 1,-512(1)
81 SAVE_VRS 20, 0, 9
83 SAVE_VRS 22, 32, 9
97 RESTORE_VRS 20, 0, 9
99 RESTORE_VRS 22, 32, 9
123 ld 0, 16(1)
124 mtlr 0
148 xxlor 32+23, 1, 1
149 xxlor 32+24, 2, 2
150 xxlor 32+25, 3, 3
151 xxlor 32+26, 4, 4
156 xxlor 32+23, 5, 5
157 xxlor 32+24, 6, 6
158 xxlor 32+25, 7, 7
159 xxlor 32+26, 8, 8
175 # Hash keys = v3 - v14
176 # Scratch: v23 - v29
210 xxlor 32+25, 10, 10
250 xxlor 32+25, 10, 10
263 # Hash keys = v3 - v14
267 lxvb16x 32, 0, 8 # load Xi
269 # load Hash - h^4, h^3, h^2, h
270 li 10, 32
271 lxvd2x 2+32, 10, 8 # H Poli
273 lxvd2x 3+32, 10, 8 # Hl
275 lxvd2x 4+32, 10, 8 # H
277 lxvd2x 5+32, 10, 8 # Hh
280 lxvd2x 6+32, 10, 8 # H^2l
282 lxvd2x 7+32, 10, 8 # H^2
284 lxvd2x 8+32, 10, 8 # H^2h
287 lxvd2x 9+32, 10, 8 # H^3l
289 lxvd2x 10+32, 10, 8 # H^3
291 lxvd2x 11+32, 10, 8 # H^3h
294 lxvd2x 12+32, 10, 8 # H^4l
296 lxvd2x 13+32, 10, 8 # H^4
298 lxvd2x 14+32, 10, 8 # H^4h
305 # vs0: roundkey 0
317 xxlxor 32+15, 32+30, 0
319 # Pre-load 8 AES rounds to scratch vectors.
320 xxlor 32+16, 1, 1
321 xxlor 32+17, 2, 2
322 xxlor 32+18, 3, 3
323 xxlor 32+19, 4, 4
324 xxlor 32+20, 5, 5
325 xxlor 32+21, 6, 6
326 xxlor 32+28, 7, 7
327 xxlor 32+29, 8, 8
329 addi 22, 23, -9 # remaing AES rounds
331 cmpdi 12, 0
348 lxv 32+1, 0(10)
352 lxv 32+1, 0(10) # last round key
353 lxvb16x 11, 0, 14 # load input block
356 xxlxor 32+15, 32+15, 11
357 stxvb16x 32+15, 0, 9 # store output
361 cmpdi 24, 0 # decrypt?
363 xxlor 15+32, 11, 11
365 vxor 15, 15, 0
366 PPC_GHASH1x 0, 15
368 addi 5, 5, -16
372 xxlxor 32+15, 32+30, 0
373 addi 12, 12, -1
374 cmpdi 12, 0
377 stxvb16x 32+30, 0, 7 # update IV
378 stxvb16x 32+0, 0, 8 # update Xi
393 vspltisb 16, -1
397 mtvsrdd 32+17, 0, 12
400 lxvb16x 11, 0, 14 # load partial block
401 xxland 11, 11, 32+16
404 xxlxor 32+15, 32+30, 0
406 addi 22, 23, -1 # loop - 1
411 lxv 32+1, 0(10)
415 lxv 32+1, 0(10) # last round key
418 xxlxor 32+15, 32+15, 11
424 stxvb16x 15+32, 10, 1 # write v15 to stack
426 addi 12, 9, -1
433 cmpdi 24, 0 # decrypt?
435 xxlor 32+15, 11, 11 # decrypt using the input block
437 #vxor 15, 15, 0 # ^ previous hash
438 #PPC_GHASH1x 0, 15
444 li 5, 0 # done last byte
450 stxvb16x 32+30, 0, 7 # update IV
452 stxvb16x 32+0, 0, 8 # Update X1
453 stxvb16x 32+15, 10, 7 # Update pblock
462 # - Compute ghash for a full block. Clear Partial_len and pblock. Update IV.
464 # - Don't compute ghash if not full block. gcm_update will take care of it
473 li 17, 0
479 li 17, 0
485 li 17, 0
490 vspltisb 16, -1
492 mtvsrdd 32+17, 0, 15
496 mtvsrdd 32+17, 0, 15
500 lxvb16x 32+19, 0, 14 # load partial block
502 mtvsrdd 32+17, 0, 15
503 vsro 19, 19, 17 # 0x00..xxxx??..??
505 mtvsrdd 32+17, 0, 15
506 vsro 19, 19, 17 # 0x00..xxxx
507 vslo 19, 19, 17 # shift back to form 0x00..xxxx00..00
510 xxlxor 32+15, 32+30, 0
512 addi 22, 23, -1 # loop - 1
517 lxv 32+1, 0(10)
521 lxv 32+1, 0(10) # last round key
530 stxvb16x 15+32, 10, 1 # write v15 to stack
533 addi 15, 9, -1
546 cmpdi 24, 0 # decrypt?
555 lxvb16x 32+17, 10, 7 # load previous pblock
562 stxvb16x 32+15, 10, 7 # Update current pblock
566 li 12, 0
570 stxvb16x 32+30, 0, 7 # update IV
573 vxor 15, 15, 0
574 PPC_GHASH1x 0, 15
575 stxvb16x 32+0, 10, 7 # update pblock for debug?
576 stxvb16x 32+0, 0, 8 # update Xi
581 # gcm_update(iv, Xi) - compute last hash
587 cmpdi 10, 0
590 lxvb16x 32, 0, 4 # load Xi
591 # load Hash - h^4, h^3, h^2, h
592 li 10, 32
593 lxvd2x 2+32, 10, 4 # H Poli
595 lxvd2x 3+32, 10, 4 # Hl
597 lxvd2x 4+32, 10, 4 # H
599 lxvd2x 5+32, 10, 4 # Hh
603 lxv 10, 0(11) # vs10: vpermxor vector
606 lxvb16x 32+6, 9, 3 # load pblock
607 vxor 6, 6, 0
618 xxlor 32+15, 10, 10
625 #vxor 0, 0, 0
626 #stxvb16x 32+0, 9, 3
627 li 10, 0
629 stxvb16x 32+7, 0, 4
639 # r3 - inp
640 # r4 - out
641 # r5 - len
642 # r6 - AES round keys
643 # r7 - iv and other data
644 # r8 - Xi, HPoli, hash keys
647 # Xi is at 0 in gcm_table (Xip).
652 cmpdi 5, 0
658 # initialize ICB: GHASH( IV ), IV - r7
659 lxvb16x 30+32, 0, 7 # load IV - v30
671 lxv 10, 0(11) # vs10: vpermxor vector
672 li 11, 0
675 lxv 0, 0(6) # round key 0
677 lxv 2, 32(6) # round key 2
685 # load rounds - 10 (128), 12 (192), 14 (256)
694 cmpdi 12, 0
705 divdu 12, 5, 10 # n 128 bytes-blocks
707 addi 12, 12, -1 # loop - 1
717 xxlor 9, 32+22, 32+22 # save last state
720 xxlor 32+29, 0, 0
721 vxor 15, 15, 29 # IV + round key - add round key 0
731 li 16, 32
739 # Pre-compute first 8 AES state and leave 1/3/5 more rounds
742 addi 22, 23, -9 # process 8 keys
749 lxv 32+1, 0(10) # round key
753 lxv 32+1, 0(10) # last round key (v1)
755 cmpdi 12, 0 # Only one loop (8 block)
771 lxvb16x 32+23, 0, 14 # load block
772 lxvb16x 32+24, 15, 14 # load block
773 lxvb16x 32+25, 16, 14 # load block
774 lxvb16x 32+26, 17, 14 # load block
775 lxvb16x 32+27, 18, 14 # load block
776 lxvb16x 32+28, 19, 14 # load block
777 lxvb16x 32+29, 20, 14 # load block
778 lxvb16x 32+30, 21, 14 # load block
790 stxvb16x 47, 0, 9 # store output
801 vxor 15, 15, 0
802 PPC_GHASH4x 0, 15, 16, 17, 18
804 vxor 19, 19, 0
805 PPC_GHASH4x 0, 19, 20, 21, 22
807 xxlor 32+15, 9, 9 # last state
816 xxlor 9, 32+22, 32+22 # save last state
818 xxlor 32+27, 0, 0 # restore roundkey 0
819 vxor 15, 15, 27 # IV + round key - add round key 0
828 addi 5, 5, -128
835 lxv 32+1, 0(10) # round key
839 lxv 32+1, 0(10) # last round key (v1)
841 addi 12, 12, -1
842 cmpdi 12, 0
855 lxvb16x 32+23, 0, 14 # load block
856 lxvb16x 32+24, 15, 14 # load block
857 lxvb16x 32+25, 16, 14 # load block
858 lxvb16x 32+26, 17, 14 # load block
859 lxvb16x 32+27, 18, 14 # load block
860 lxvb16x 32+28, 19, 14 # load block
861 lxvb16x 32+29, 20, 14 # load block
862 lxvb16x 32+30, 21, 14 # load block
874 stxvb16x 47, 0, 9 # store output
884 vxor 15, 15, 0
885 PPC_GHASH4x 0, 15, 16, 17, 18
887 vxor 19, 19, 0
888 PPC_GHASH4x 0, 19, 20, 21, 22
890 xxlor 30+32, 9, 9 # last ctr
892 stxvb16x 32+30, 0, 7 # update IV
893 stxvb16x 32+0, 0, 8 # update Xi
895 addi 5, 5, -128
902 cmpdi 5, 0
908 cmpdi 5, 0
912 cmpdi 5, 0
916 cmpdi 5, 0
930 cmpdi 5, 0
936 # initialize ICB: GHASH( IV ), IV - r7
937 lxvb16x 30+32, 0, 7 # load IV - v30
949 lxv 10, 0(11) # vs10: vpermxor vector
950 li 11, 0
953 lxv 0, 0(6) # round key 0
955 lxv 2, 32(6) # round key 2
963 # load rounds - 10 (128), 12 (192), 14 (256)
965 li 24, 0 # decrypt
972 cmpdi 12, 0
983 divdu 12, 5, 10 # n 128 bytes-blocks
985 addi 12, 12, -1 # loop - 1
995 xxlor 9, 32+22, 32+22 # save last state
998 xxlor 32+29, 0, 0
999 vxor 15, 15, 29 # IV + round key - add round key 0
1009 li 16, 32
1017 # Pre-compute first 8 AES state and leave 1/3/5 more rounds
1020 addi 22, 23, -9 # process 8 keys
1027 lxv 32+1, 0(10) # round key
1031 lxv 32+1, 0(10) # last round key (v1)
1033 cmpdi 12, 0 # Only one loop (8 block)
1049 lxvb16x 32+23, 0, 14 # load block
1050 lxvb16x 32+24, 15, 14 # load block
1051 lxvb16x 32+25, 16, 14 # load block
1052 lxvb16x 32+26, 17, 14 # load block
1053 lxvb16x 32+27, 18, 14 # load block
1054 lxvb16x 32+28, 19, 14 # load block
1055 lxvb16x 32+29, 20, 14 # load block
1056 lxvb16x 32+30, 21, 14 # load block
1068 stxvb16x 47, 0, 9 # store output
1089 vxor 15, 15, 0
1090 PPC_GHASH4x 0, 15, 16, 17, 18
1092 vxor 19, 19, 0
1093 PPC_GHASH4x 0, 19, 20, 21, 22
1095 xxlor 32+15, 9, 9 # last state
1104 xxlor 9, 32+22, 32+22 # save last state
1106 xxlor 32+27, 0, 0 # restore roundkey 0
1107 vxor 15, 15, 27 # IV + round key - add round key 0
1116 addi 5, 5, -128
1123 lxv 32+1, 0(10) # round key
1127 lxv 32+1, 0(10) # last round key (v1)
1129 addi 12, 12, -1
1130 cmpdi 12, 0
1143 lxvb16x 32+23, 0, 14 # load block
1144 lxvb16x 32+24, 15, 14 # load block
1145 lxvb16x 32+25, 16, 14 # load block
1146 lxvb16x 32+26, 17, 14 # load block
1147 lxvb16x 32+27, 18, 14 # load block
1148 lxvb16x 32+28, 19, 14 # load block
1149 lxvb16x 32+29, 20, 14 # load block
1150 lxvb16x 32+30, 21, 14 # load block
1162 stxvb16x 47, 0, 9 # store output
1173 vxor 15, 23, 0
1182 #vxor 15, 15, 0
1183 PPC_GHASH4x 0, 15, 16, 17, 18
1185 vxor 19, 19, 0
1186 PPC_GHASH4x 0, 19, 20, 21, 22
1188 xxlor 30+32, 9, 9 # last ctr
1190 stxvb16x 32+30, 0, 7 # update IV
1191 stxvb16x 32+0, 0, 8 # update Xi
1193 addi 5, 5, -128
1200 cmpdi 5, 0
1204 li 24, 0 # decrypt
1206 cmpdi 5, 0
1210 cmpdi 5, 0
1214 cmpdi 5, 0
1227 li 3, 0
1235 .long 0x4c5d6e7f, 0x08192a3b, 0xc4d5e6f7, 0x8091a2b3