aes-gcm-p10.S - OpenGrok cross reference for /linux-6.14.4/arch/powerpc/crypto/aes-gcm-p10.S

Lines Matching +full:2 +full:- +full:9
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 # Accelerated AES-GCM stitched implementation for ppc64le.
5 # Copyright 2024- IBM Inc.
14 #    X1 * H^4 + X2 * H^3 + x3 * H^2 + X4 * H =
22 # Hash keys = v3 - v14
24 #     ( H^2.l, H^2, H^2.h)
29 # v31 - counter 1
32 #     vs0 - round key 0
35 # This implementation uses stitched AES-GCM approach to improve overall performance.
36 # AES is implemented with 8x blocks and GHASH is using 2 4x blocks.
66 	stdu 1,-512(1)
80 	addi	9, 1, 256
81 	SAVE_VRS 20, 0, 9
82 	SAVE_VRS 21, 16, 9
83 	SAVE_VRS 22, 32, 9
84 	SAVE_VRS 23, 48, 9
85 	SAVE_VRS 24, 64, 9
86 	SAVE_VRS 25, 80, 9
87 	SAVE_VRS 26, 96, 9
88 	SAVE_VRS 27, 112, 9
89 	SAVE_VRS 28, 128, 9
90 	SAVE_VRS 29, 144, 9
91 	SAVE_VRS 30, 160, 9
92 	SAVE_VRS 31, 176, 9
96 	addi	9, 1, 256
97 	RESTORE_VRS 20, 0, 9
98 	RESTORE_VRS 21, 16, 9
99 	RESTORE_VRS 22, 32, 9
100 	RESTORE_VRS 23, 48, 9
101 	RESTORE_VRS 24, 64, 9
102 	RESTORE_VRS 25, 80, 9
103 	RESTORE_VRS 26, 96, 9
104 	RESTORE_VRS 27, 112, 9
105 	RESTORE_VRS 28, 128, 9
106 	RESTORE_VRS 29, 144, 9
107 	RESTORE_VRS 30, 160, 9
108 	RESTORE_VRS 31, 176, 9
131 	\_VCIPHER	\ST+2, \ST+2, \r
139 	\_VCIPHER	\ST+2, \ST+2, \r
149 	xxlor	32+24, 2, 2
175 # Hash keys = v3 - v14
176 # Scratch: v23 - v29
181 	vpmsumd	24, 9, \S2
200 	vpmsumd	28, 23, 2		# reduction
226 	vpmsumd	23, 23, 2
243 	vpmsumd	27, 22, 2		# reduction
254 	vpmsumd	22, 22, 2		# reduction
263 # Hash keys = v3 - v14
269 	# load Hash - h^4, h^3, h^2, h
271 	lxvd2x	2+32, 10, 8	# H Poli
280 	lxvd2x	6+32, 10, 8	# H^2l
282 	lxvd2x	7+32, 10, 8	# H^2
284 	lxvd2x	8+32, 10, 8	# H^2h
287 	lxvd2x	9+32, 10, 8	# H^3l
319 	# Pre-load 8 AES rounds to scratch vectors.
321 	xxlor	32+17, 2, 2
329 	addi	22, 23, -9	# remaing AES rounds
357 	stxvb16x 32+15, 0, 9	# store output
359 	addi	9, 9, 16
368 	addi	5, 5, -16
373 	addi	12, 12, -1
393 	vspltisb 16, -1
406 	addi	22, 23, -1		# loop - 1
426 	addi	12, 9, -1
441 	add	9, 9, 5
462 # - Compute ghash for a full block. Clear Partial_len and pblock. Update IV.
464 # - Don't compute ghash if not full block.  gcm_update will take care of it
490 	vspltisb 16, -1
512 	addi	22, 23, -1	# loop - 1
533 	addi	15, 9, -1
542 	add	9, 9, 21
581 # gcm_update(iv, Xi) - compute last hash
591 	# load Hash - h^4, h^3, h^2, h
593 	lxvd2x	2+32, 10, 4	# H Poli
601 	addis	11, 2, permx@toc@ha
605 	li	9, 64
606 	lxvb16x 32+6, 9, 3		# load pblock
613 	vpmsumd	17, 12, 2		# reduction
621 	vpmsumd	12, 12, 2		# reduction
626 	#stxvb16x 32+0, 9, 3
639 #    r3 - inp
640 #    r4 - out
641 #    r5 - len
642 #    r6 - AES round keys
643 #    r7 - iv and other data
644 #    r8 - Xi, HPoli, hash keys
658 	# initialize ICB: GHASH( IV ), IV - r7
659 	lxvb16x	30+32, 0, 7	# load IV  - v30
662 	mr	9, 4
669 	addis	11, 2, permx@toc@ha
674 	# load 9 round keys to VSR
677 	lxv	2, 32(6)		# round key 2
685 	# load rounds - 10 (128), 12 (192), 14 (256)
705 	divdu	12, 5, 10	# n 128 bytes-blocks
707 	addi	12, 12, -1	# loop - 1
717 	xxlor	9, 32+22, 32+22	# save last state
721         vxor    15, 15, 29      # IV + round key - add round key 0
739 	# Pre-compute first 8 AES state and leave 1/3/5 more rounds
742 	addi	22, 23, -9		# process 8 keys
790 	stxvb16x 47, 0, 9	# store output
791 	stxvb16x 48, 15, 9	# store output
792 	stxvb16x 49, 16, 9	# store output
793 	stxvb16x 50, 17, 9	# store output
794 	stxvb16x 51, 18, 9	# store output
795 	stxvb16x 52, 19, 9	# store output
796 	stxvb16x 53, 20, 9	# store output
797 	stxvb16x 54, 21, 9	# store output
798 	addi	9, 9, 128
807 	xxlor	32+15, 9, 9		# last state
816 	xxlor	9, 32+22, 32+22		# save last state
819         vxor    15, 15, 27		# IV + round key - add round key 0
828 	addi    5, 5, -128
841 	addi	12, 12, -1
874 	stxvb16x 47, 0, 9	# store output
875 	stxvb16x 48, 15, 9	# store output
876 	stxvb16x 49, 16, 9	# store output
877 	stxvb16x 50, 17, 9	# store output
878 	stxvb16x 51, 18, 9	# store output
879 	stxvb16x 52, 19, 9	# store output
880 	stxvb16x 53, 20, 9	# store output
881 	stxvb16x 54, 21, 9	# store output
882 	addi	9, 9, 128
890 	xxlor	30+32, 9, 9		# last ctr
895 	addi    5, 5, -128
936 	# initialize ICB: GHASH( IV ), IV - r7
937 	lxvb16x	30+32, 0, 7	# load IV  - v30
940 	mr	9, 4
947 	addis	11, 2, permx@toc@ha
952 	# load 9 round keys to VSR
955 	lxv	2, 32(6)		# round key 2
963 	# load rounds - 10 (128), 12 (192), 14 (256)
983 	divdu	12, 5, 10	# n 128 bytes-blocks
985 	addi	12, 12, -1	# loop - 1
995 	xxlor	9, 32+22, 32+22	# save last state
999         vxor    15, 15, 29      # IV + round key - add round key 0
1017 	# Pre-compute first 8 AES state and leave 1/3/5 more rounds
1020 	addi	22, 23, -9		# process 8 keys
1068 	stxvb16x 47, 0, 9	# store output
1069 	stxvb16x 48, 15, 9	# store output
1070 	stxvb16x 49, 16, 9	# store output
1071 	stxvb16x 50, 17, 9	# store output
1072 	stxvb16x 51, 18, 9	# store output
1073 	stxvb16x 52, 19, 9	# store output
1074 	stxvb16x 53, 20, 9	# store output
1075 	stxvb16x 54, 21, 9	# store output
1077 	addi	9, 9, 128
1095 	xxlor	32+15, 9, 9		# last state
1104 	xxlor	9, 32+22, 32+22		# save last state
1107         vxor    15, 15, 27		# IV + round key - add round key 0
1116 	addi    5, 5, -128
1129 	addi	12, 12, -1
1162 	stxvb16x 47, 0, 9	# store output
1163 	stxvb16x 48, 15, 9	# store output
1164 	stxvb16x 49, 16, 9	# store output
1165 	stxvb16x 50, 17, 9	# store output
1166 	stxvb16x 51, 18, 9	# store output
1167 	stxvb16x 52, 19, 9	# store output
1168 	stxvb16x 53, 20, 9	# store output
1169 	stxvb16x 54, 21, 9	# store output
1170 	addi	9, 9, 128
1188 	xxlor	30+32, 9, 9		# last ctr
1193 	addi    5, 5, -128