Lines Matching +full:- +full:32

1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 # Accelerated AES-GCM stitched implementation for ppc64le.
5 # Copyright 2024- IBM Inc.
22 # Hash keys = v3 - v14
29 # v31 - counter 1
32 # vs0 - round key 0
35 # This implementation uses stitched AES-GCM approach to improve overall performance.
52 stxv \VRS+32, \OFFSET(\FRAME)
60 lxv \VRS+32, \OFFSET(\FRAME)
66 stdu 1,-512(1)
83 SAVE_VRS 22, 32, 9
99 RESTORE_VRS 22, 32, 9
148 xxlor 32+23, 1, 1
149 xxlor 32+24, 2, 2
150 xxlor 32+25, 3, 3
151 xxlor 32+26, 4, 4
156 xxlor 32+23, 5, 5
157 xxlor 32+24, 6, 6
158 xxlor 32+25, 7, 7
159 xxlor 32+26, 8, 8
175 # Hash keys = v3 - v14
176 # Scratch: v23 - v29
210 xxlor 32+25, 10, 10
250 xxlor 32+25, 10, 10
263 # Hash keys = v3 - v14
267 lxvb16x 32, 0, 8 # load Xi
269 # load Hash - h^4, h^3, h^2, h
270 li 10, 32
271 lxvd2x 2+32, 10, 8 # H Poli
273 lxvd2x 3+32, 10, 8 # Hl
275 lxvd2x 4+32, 10, 8 # H
277 lxvd2x 5+32, 10, 8 # Hh
280 lxvd2x 6+32, 10, 8 # H^2l
282 lxvd2x 7+32, 10, 8 # H^2
284 lxvd2x 8+32, 10, 8 # H^2h
287 lxvd2x 9+32, 10, 8 # H^3l
289 lxvd2x 10+32, 10, 8 # H^3
291 lxvd2x 11+32, 10, 8 # H^3h
294 lxvd2x 12+32, 10, 8 # H^4l
296 lxvd2x 13+32, 10, 8 # H^4
298 lxvd2x 14+32, 10, 8 # H^4h
317 xxlxor 32+15, 32+30, 0
319 # Pre-load 8 AES rounds to scratch vectors.
320 xxlor 32+16, 1, 1
321 xxlor 32+17, 2, 2
322 xxlor 32+18, 3, 3
323 xxlor 32+19, 4, 4
324 xxlor 32+20, 5, 5
325 xxlor 32+21, 6, 6
326 xxlor 32+28, 7, 7
327 xxlor 32+29, 8, 8
329 addi 22, 23, -9 # remaing AES rounds
348 lxv 32+1, 0(10)
352 lxv 32+1, 0(10) # last round key
356 xxlxor 32+15, 32+15, 11
357 stxvb16x 32+15, 0, 9 # store output
363 xxlor 15+32, 11, 11
368 addi 5, 5, -16
372 xxlxor 32+15, 32+30, 0
373 addi 12, 12, -1
377 stxvb16x 32+30, 0, 7 # update IV
378 stxvb16x 32+0, 0, 8 # update Xi
393 vspltisb 16, -1
397 mtvsrdd 32+17, 0, 12
401 xxland 11, 11, 32+16
404 xxlxor 32+15, 32+30, 0
406 addi 22, 23, -1 # loop - 1
411 lxv 32+1, 0(10)
415 lxv 32+1, 0(10) # last round key
418 xxlxor 32+15, 32+15, 11
424 stxvb16x 15+32, 10, 1 # write v15 to stack
426 addi 12, 9, -1
435 xxlor 32+15, 11, 11 # decrypt using the input block
450 stxvb16x 32+30, 0, 7 # update IV
452 stxvb16x 32+0, 0, 8 # Update X1
453 stxvb16x 32+15, 10, 7 # Update pblock
462 # - Compute ghash for a full block. Clear Partial_len and pblock. Update IV.
464 # - Don't compute ghash if not full block. gcm_update will take care of it
490 vspltisb 16, -1
492 mtvsrdd 32+17, 0, 15
496 mtvsrdd 32+17, 0, 15
500 lxvb16x 32+19, 0, 14 # load partial block
502 mtvsrdd 32+17, 0, 15
505 mtvsrdd 32+17, 0, 15
510 xxlxor 32+15, 32+30, 0
512 addi 22, 23, -1 # loop - 1
517 lxv 32+1, 0(10)
521 lxv 32+1, 0(10) # last round key
530 stxvb16x 15+32, 10, 1 # write v15 to stack
533 addi 15, 9, -1
555 lxvb16x 32+17, 10, 7 # load previous pblock
562 stxvb16x 32+15, 10, 7 # Update current pblock
570 stxvb16x 32+30, 0, 7 # update IV
575 stxvb16x 32+0, 10, 7 # update pblock for debug?
576 stxvb16x 32+0, 0, 8 # update Xi
581 # gcm_update(iv, Xi) - compute last hash
590 lxvb16x 32, 0, 4 # load Xi
591 # load Hash - h^4, h^3, h^2, h
592 li 10, 32
593 lxvd2x 2+32, 10, 4 # H Poli
595 lxvd2x 3+32, 10, 4 # Hl
597 lxvd2x 4+32, 10, 4 # H
599 lxvd2x 5+32, 10, 4 # Hh
606 lxvb16x 32+6, 9, 3 # load pblock
618 xxlor 32+15, 10, 10
626 #stxvb16x 32+0, 9, 3
629 stxvb16x 32+7, 0, 4
639 # r3 - inp
640 # r4 - out
641 # r5 - len
642 # r6 - AES round keys
643 # r7 - iv and other data
644 # r8 - Xi, HPoli, hash keys
658 # initialize ICB: GHASH( IV ), IV - r7
659 lxvb16x 30+32, 0, 7 # load IV - v30
677 lxv 2, 32(6) # round key 2
685 # load rounds - 10 (128), 12 (192), 14 (256)
705 divdu 12, 5, 10 # n 128 bytes-blocks
707 addi 12, 12, -1 # loop - 1
717 xxlor 9, 32+22, 32+22 # save last state
720 xxlor 32+29, 0, 0
721 vxor 15, 15, 29 # IV + round key - add round key 0
731 li 16, 32
739 # Pre-compute first 8 AES state and leave 1/3/5 more rounds
742 addi 22, 23, -9 # process 8 keys
749 lxv 32+1, 0(10) # round key
753 lxv 32+1, 0(10) # last round key (v1)
771 lxvb16x 32+23, 0, 14 # load block
772 lxvb16x 32+24, 15, 14 # load block
773 lxvb16x 32+25, 16, 14 # load block
774 lxvb16x 32+26, 17, 14 # load block
775 lxvb16x 32+27, 18, 14 # load block
776 lxvb16x 32+28, 19, 14 # load block
777 lxvb16x 32+29, 20, 14 # load block
778 lxvb16x 32+30, 21, 14 # load block
807 xxlor 32+15, 9, 9 # last state
816 xxlor 9, 32+22, 32+22 # save last state
818 xxlor 32+27, 0, 0 # restore roundkey 0
819 vxor 15, 15, 27 # IV + round key - add round key 0
828 addi 5, 5, -128
835 lxv 32+1, 0(10) # round key
839 lxv 32+1, 0(10) # last round key (v1)
841 addi 12, 12, -1
855 lxvb16x 32+23, 0, 14 # load block
856 lxvb16x 32+24, 15, 14 # load block
857 lxvb16x 32+25, 16, 14 # load block
858 lxvb16x 32+26, 17, 14 # load block
859 lxvb16x 32+27, 18, 14 # load block
860 lxvb16x 32+28, 19, 14 # load block
861 lxvb16x 32+29, 20, 14 # load block
862 lxvb16x 32+30, 21, 14 # load block
890 xxlor 30+32, 9, 9 # last ctr
892 stxvb16x 32+30, 0, 7 # update IV
893 stxvb16x 32+0, 0, 8 # update Xi
895 addi 5, 5, -128
936 # initialize ICB: GHASH( IV ), IV - r7
937 lxvb16x 30+32, 0, 7 # load IV - v30
955 lxv 2, 32(6) # round key 2
963 # load rounds - 10 (128), 12 (192), 14 (256)
983 divdu 12, 5, 10 # n 128 bytes-blocks
985 addi 12, 12, -1 # loop - 1
995 xxlor 9, 32+22, 32+22 # save last state
998 xxlor 32+29, 0, 0
999 vxor 15, 15, 29 # IV + round key - add round key 0
1009 li 16, 32
1017 # Pre-compute first 8 AES state and leave 1/3/5 more rounds
1020 addi 22, 23, -9 # process 8 keys
1027 lxv 32+1, 0(10) # round key
1031 lxv 32+1, 0(10) # last round key (v1)
1049 lxvb16x 32+23, 0, 14 # load block
1050 lxvb16x 32+24, 15, 14 # load block
1051 lxvb16x 32+25, 16, 14 # load block
1052 lxvb16x 32+26, 17, 14 # load block
1053 lxvb16x 32+27, 18, 14 # load block
1054 lxvb16x 32+28, 19, 14 # load block
1055 lxvb16x 32+29, 20, 14 # load block
1056 lxvb16x 32+30, 21, 14 # load block
1095 xxlor 32+15, 9, 9 # last state
1104 xxlor 9, 32+22, 32+22 # save last state
1106 xxlor 32+27, 0, 0 # restore roundkey 0
1107 vxor 15, 15, 27 # IV + round key - add round key 0
1116 addi 5, 5, -128
1123 lxv 32+1, 0(10) # round key
1127 lxv 32+1, 0(10) # last round key (v1)
1129 addi 12, 12, -1
1143 lxvb16x 32+23, 0, 14 # load block
1144 lxvb16x 32+24, 15, 14 # load block
1145 lxvb16x 32+25, 16, 14 # load block
1146 lxvb16x 32+26, 17, 14 # load block
1147 lxvb16x 32+27, 18, 14 # load block
1148 lxvb16x 32+28, 19, 14 # load block
1149 lxvb16x 32+29, 20, 14 # load block
1150 lxvb16x 32+30, 21, 14 # load block
1188 xxlor 30+32, 9, 9 # last ctr
1190 stxvb16x 32+30, 0, 7 # update IV
1191 stxvb16x 32+0, 0, 8 # update Xi
1193 addi 5, 5, -128