1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build (ppc64 || ppc64le) && !purego
6
7// Based on CRYPTOGAMS code with the following comment:
8// # ====================================================================
9// # Written by Andy Polyakov <[email protected]> for the OpenSSL
10// # project. The module is, however, dual licensed under OpenSSL and
11// # CRYPTOGAMS licenses depending on where you obtain it. For further
12// # details see http://www.openssl.org/~appro/cryptogams/.
13// # ====================================================================
14
15// Original code can be found at the link below:
16// https://github.com/dot-asm/cryptogams/blob/master/ppc/aesp8-ppc.pl
17
18// Some function names were changed to be consistent with Go function
19// names. For instance, function aes_p8_set_{en,de}crypt_key become
20// set{En,De}cryptKeyAsm. I also split setEncryptKeyAsm in two parts
21// and a new session was created (doEncryptKeyAsm). This was necessary to
22// avoid arguments overwriting when setDecryptKeyAsm calls setEncryptKeyAsm.
23// There were other modifications as well but kept the same functionality.
24
25#include "textflag.h"
26
27// For expandKeyAsm
28#define INP     R3
29#define BITS    R4
30#define OUTENC  R5 // Pointer to next expanded encrypt key
31#define PTR     R6
32#define CNT     R7
33#define ROUNDS  R8
34#define OUTDEC  R9  // Pointer to next expanded decrypt key
35#define TEMP    R19
36#define ZERO    V0
37#define IN0     V1
38#define IN1     V2
39#define KEY     V3
40#define RCON    V4
41#define MASK    V5
42#define TMP     V6
43#define STAGE   V7
44#define OUTPERM V8
45#define OUTMASK V9
46#define OUTHEAD V10
47#define OUTTAIL V11
48
49// For P9 instruction emulation
50#define ESPERM  V21  // Endian swapping permute into BE
51#define TMP2    V22  // Temporary for P8_STXVB16X/P8_STXVB16X
52
53// For {en,de}cryptBlockAsm
54#define BLK_INP    R3
55#define BLK_OUT    R4
56#define BLK_KEY    R5
57#define BLK_ROUNDS R6
58#define BLK_IDX    R7
59
60DATA ·rcon+0x00(SB)/8, $0x0f0e0d0c0b0a0908 // Permute for vector doubleword endian swap
61DATA ·rcon+0x08(SB)/8, $0x0706050403020100
62DATA ·rcon+0x10(SB)/8, $0x0100000001000000 // RCON
63DATA ·rcon+0x18(SB)/8, $0x0100000001000000 // RCON
64DATA ·rcon+0x20(SB)/8, $0x1b0000001b000000
65DATA ·rcon+0x28(SB)/8, $0x1b0000001b000000
66DATA ·rcon+0x30(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK
67DATA ·rcon+0x38(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK
68DATA ·rcon+0x40(SB)/8, $0x0000000000000000
69DATA ·rcon+0x48(SB)/8, $0x0000000000000000
70GLOBL ·rcon(SB), RODATA, $80
71
72#ifdef GOARCH_ppc64le
73#  ifdef GOPPC64_power9
74#define P8_LXVB16X(RA,RB,VT)  LXVB16X	(RA+RB), VT
75#define P8_STXVB16X(VS,RA,RB) STXVB16X	VS, (RA+RB)
76#define XXBRD_ON_LE(VA,VT)    XXBRD	VA, VT
77#  else
78// On POWER8/ppc64le, emulate the POWER9 instructions by loading unaligned
79// doublewords and byte-swapping each doubleword to emulate BE load/stores.
80#define NEEDS_ESPERM
81#define P8_LXVB16X(RA,RB,VT) \
82	LXVD2X	(RA+RB), VT \
83	VPERM	VT, VT, ESPERM, VT
84
85#define P8_STXVB16X(VS,RA,RB) \
86	VPERM	VS, VS, ESPERM, TMP2 \
87	STXVD2X	TMP2, (RA+RB)
88
89#define XXBRD_ON_LE(VA,VT) \
90	VPERM	VA, VA, ESPERM, VT
91
92#  endif // defined(GOPPC64_power9)
93#else
94#define P8_LXVB16X(RA,RB,VT)  LXVD2X	(RA+RB), VT
95#define P8_STXVB16X(VS,RA,RB) STXVD2X	VS, (RA+RB)
96#define XXBRD_ON_LE(VA, VT)
97#endif // defined(GOARCH_ppc64le)
98
99// func setEncryptKeyAsm(nr int, key *byte, enc *uint32, dec *uint32)
100TEXT ·expandKeyAsm(SB), NOSPLIT|NOFRAME, $0
101	// Load the arguments inside the registers
102	MOVD	nr+0(FP), ROUNDS
103	MOVD	key+8(FP), INP
104	MOVD	enc+16(FP), OUTENC
105	MOVD	dec+24(FP), OUTDEC
106
107#ifdef NEEDS_ESPERM
108	MOVDrcon(SB), PTR // PTR points to rcon addr
109	LVX	(PTR), ESPERM
110	ADD	$0x10, PTR
111#else
112	MOVDrcon+0x10(SB), PTR // PTR points to rcon addr (skipping permute vector)
113#endif
114
115	// Get key from memory and write aligned into VR
116	P8_LXVB16X(INP, R0, IN0)
117	ADD	$0x10, INP, INP
118	MOVD	$0x20, TEMP
119
120	CMPW	ROUNDS, $12
121	LVX	(PTR)(R0), RCON    // lvx   4,0,6      Load first 16 bytes into RCON
122	LVX	(PTR)(TEMP), MASK
123	ADD	$0x10, PTR, PTR    // addi  6,6,0x10   PTR to next 16 bytes of RCON
124	MOVD	$8, CNT            // li    7,8        CNT = 8
125	VXOR	ZERO, ZERO, ZERO   // vxor  0,0,0      Zero to be zero :)
126	MOVD	CNT, CTR           // mtctr 7          Set the counter to 8 (rounds)
127
128	// The expanded decrypt key is the expanded encrypt key stored in reverse order.
129	// Move OUTDEC to the last key location, and store in descending order.
130	ADD	$160, OUTDEC, OUTDEC
131	BLT	loop128
132	ADD	$32, OUTDEC, OUTDEC
133	BEQ	l192
134	ADD	$32, OUTDEC, OUTDEC
135	JMP	l256
136
137loop128:
138	// Key schedule (Round 1 to 8)
139	VPERM	IN0, IN0, MASK, KEY              // vperm 3,1,1,5         Rotate-n-splat
140	VSLDOI	$12, ZERO, IN0, TMP              // vsldoi 6,0,1,12
141	STXVD2X	IN0, (R0+OUTENC)
142	STXVD2X	IN0, (R0+OUTDEC)
143	VCIPHERLAST	KEY, RCON, KEY           // vcipherlast 3,3,4
144	ADD	$16, OUTENC, OUTENC
145	ADD	$-16, OUTDEC, OUTDEC
146
147	VXOR	IN0, TMP, IN0       // vxor 1,1,6
148	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
149	VXOR	IN0, TMP, IN0       // vxor 1,1,6
150	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
151	VXOR	IN0, TMP, IN0       // vxor 1,1,6
152	VADDUWM	RCON, RCON, RCON    // vadduwm 4,4,4
153	VXOR	IN0, KEY, IN0       // vxor 1,1,3
154	BDNZ	loop128
155
156	LVX	(PTR)(R0), RCON // lvx 4,0,6     Last two round keys
157
158	// Key schedule (Round 9)
159	VPERM	IN0, IN0, MASK, KEY              // vperm 3,1,1,5   Rotate-n-spat
160	VSLDOI	$12, ZERO, IN0, TMP              // vsldoi 6,0,1,12
161	STXVD2X	IN0, (R0+OUTENC)
162	STXVD2X	IN0, (R0+OUTDEC)
163	VCIPHERLAST	KEY, RCON, KEY           // vcipherlast 3,3,4
164	ADD	$16, OUTENC, OUTENC
165	ADD	$-16, OUTDEC, OUTDEC
166
167	// Key schedule (Round 10)
168	VXOR	IN0, TMP, IN0       // vxor 1,1,6
169	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
170	VXOR	IN0, TMP, IN0       // vxor 1,1,6
171	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
172	VXOR	IN0, TMP, IN0       // vxor 1,1,6
173	VADDUWM	RCON, RCON, RCON    // vadduwm 4,4,4
174	VXOR	IN0, KEY, IN0       // vxor 1,1,3
175
176	VPERM	IN0, IN0, MASK, KEY              // vperm 3,1,1,5   Rotate-n-splat
177	VSLDOI	$12, ZERO, IN0, TMP              // vsldoi 6,0,1,12
178	STXVD2X	IN0, (R0+OUTENC)
179	STXVD2X	IN0, (R0+OUTDEC)
180	VCIPHERLAST	KEY, RCON, KEY           // vcipherlast 3,3,4
181	ADD	$16, OUTENC, OUTENC
182	ADD	$-16, OUTDEC, OUTDEC
183
184	// Key schedule (Round 11)
185	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
186	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
187	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
188	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
189	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
190	VXOR	IN0, KEY, IN0                    // vxor 1,1,3
191	STXVD2X	IN0, (R0+OUTENC)
192	STXVD2X	IN0, (R0+OUTDEC)
193
194	RET
195
196l192:
197	LXSDX	(INP+R0), IN1                    // Load next 8 bytes into upper half of VSR.
198	XXBRD_ON_LE(IN1, IN1)                    // and convert to BE ordering on LE hosts.
199	MOVD	$4, CNT                          // li 7,4
200	STXVD2X	IN0, (R0+OUTENC)
201	STXVD2X	IN0, (R0+OUTDEC)
202	ADD	$16, OUTENC, OUTENC
203	ADD	$-16, OUTDEC, OUTDEC
204	VSPLTISB	$8, KEY                  // vspltisb 3,8
205	MOVD	CNT, CTR                         // mtctr 7
206	VSUBUBM	MASK, KEY, MASK                  // vsububm 5,5,3
207
208loop192:
209	VPERM	IN1, IN1, MASK, KEY // vperm 3,2,2,5
210	VSLDOI	$12, ZERO, IN0, TMP // vsldoi 6,0,1,12
211	VCIPHERLAST	KEY, RCON, KEY      // vcipherlast 3,3,4
212
213	VXOR	IN0, TMP, IN0       // vxor 1,1,6
214	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
215	VXOR	IN0, TMP, IN0       // vxor 1,1,6
216	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
217	VXOR	IN0, TMP, IN0       // vxor 1,1,6
218
219	VSLDOI	$8, ZERO, IN1, STAGE  // vsldoi 7,0,2,8
220	VSPLTW	$3, IN0, TMP          // vspltw 6,1,3
221	VXOR	TMP, IN1, TMP         // vxor 6,6,2
222	VSLDOI	$12, ZERO, IN1, IN1   // vsldoi 2,0,2,12
223	VADDUWM	RCON, RCON, RCON      // vadduwm 4,4,4
224	VXOR	IN1, TMP, IN1         // vxor 2,2,6
225	VXOR	IN0, KEY, IN0         // vxor 1,1,3
226	VXOR	IN1, KEY, IN1         // vxor 2,2,3
227	VSLDOI	$8, STAGE, IN0, STAGE // vsldoi 7,7,1,8
228
229	VPERM	IN1, IN1, MASK, KEY              // vperm 3,2,2,5
230	VSLDOI	$12, ZERO, IN0, TMP              // vsldoi 6,0,1,12
231	STXVD2X	STAGE, (R0+OUTENC)
232	STXVD2X	STAGE, (R0+OUTDEC)
233	VCIPHERLAST	KEY, RCON, KEY           // vcipherlast 3,3,4
234	ADD	$16, OUTENC, OUTENC
235	ADD	$-16, OUTDEC, OUTDEC
236
237	VSLDOI	$8, IN0, IN1, STAGE              // vsldoi 7,1,2,8
238	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
239	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
240	STXVD2X	STAGE, (R0+OUTENC)
241	STXVD2X	STAGE, (R0+OUTDEC)
242	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
243	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
244	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
245	ADD	$16, OUTENC, OUTENC
246	ADD	$-16, OUTDEC, OUTDEC
247
248	VSPLTW	$3, IN0, TMP                     // vspltw 6,1,3
249	VXOR	TMP, IN1, TMP                    // vxor 6,6,2
250	VSLDOI	$12, ZERO, IN1, IN1              // vsldoi 2,0,2,12
251	VADDUWM	RCON, RCON, RCON                 // vadduwm 4,4,4
252	VXOR	IN1, TMP, IN1                    // vxor 2,2,6
253	VXOR	IN0, KEY, IN0                    // vxor 1,1,3
254	VXOR	IN1, KEY, IN1                    // vxor 2,2,3
255	STXVD2X	IN0, (R0+OUTENC)
256	STXVD2X	IN0, (R0+OUTDEC)
257	ADD	$16, OUTENC, OUTENC
258	ADD	$-16, OUTDEC, OUTDEC
259	BDNZ	loop192
260
261	RET
262
263l256:
264	P8_LXVB16X(INP, R0, IN1)
265	MOVD	$7, CNT                          // li 7,7
266	STXVD2X	IN0, (R0+OUTENC)
267	STXVD2X	IN0, (R0+OUTDEC)
268	ADD	$16, OUTENC, OUTENC
269	ADD	$-16, OUTDEC, OUTDEC
270	MOVD	CNT, CTR                         // mtctr 7
271
272loop256:
273	VPERM	IN1, IN1, MASK, KEY              // vperm 3,2,2,5
274	VSLDOI	$12, ZERO, IN0, TMP              // vsldoi 6,0,1,12
275	STXVD2X	IN1, (R0+OUTENC)
276	STXVD2X	IN1, (R0+OUTDEC)
277	VCIPHERLAST	KEY, RCON, KEY           // vcipherlast 3,3,4
278	ADD	$16, OUTENC, OUTENC
279	ADD	$-16, OUTDEC, OUTDEC
280
281	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
282	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
283	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
284	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
285	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
286	VADDUWM	RCON, RCON, RCON                 // vadduwm 4,4,4
287	VXOR	IN0, KEY, IN0                    // vxor 1,1,3
288	STXVD2X	IN0, (R0+OUTENC)
289	STXVD2X	IN0, (R0+OUTDEC)
290	ADD	$16, OUTENC, OUTENC
291	ADD	$-16, OUTDEC, OUTDEC
292	BDZ	done
293
294	VSPLTW	$3, IN0, KEY        // vspltw 3,1,3
295	VSLDOI	$12, ZERO, IN1, TMP // vsldoi 6,0,2,12
296	VSBOX	KEY, KEY            // vsbox 3,3
297
298	VXOR	IN1, TMP, IN1       // vxor 2,2,6
299	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
300	VXOR	IN1, TMP, IN1       // vxor 2,2,6
301	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
302	VXOR	IN1, TMP, IN1       // vxor 2,2,6
303
304	VXOR	IN1, KEY, IN1 // vxor 2,2,3
305	JMP	loop256       // b .Loop256
306
307done:
308	RET
309
310// func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
311TEXT ·encryptBlockAsm(SB), NOSPLIT|NOFRAME, $0
312	MOVD	nr+0(FP), R6   // Round count/Key size
313	MOVD	xk+8(FP), R5   // Key pointer
314	MOVD	dst+16(FP), R3 // Dest pointer
315	MOVD	src+24(FP), R4 // Src pointer
316#ifdef NEEDS_ESPERM
317	MOVDrcon(SB), R7
318	LVX	(R7), ESPERM   // Permute value for P8_ macros.
319#endif
320
321	// Set CR{1,2,3}EQ to hold the key size information.
322	CMPU	R6, $10, CR1
323	CMPU	R6, $12, CR2
324	CMPU	R6, $14, CR3
325
326	MOVD	$16, R6
327	MOVD	$32, R7
328	MOVD	$48, R8
329	MOVD	$64, R9
330	MOVD	$80, R10
331	MOVD	$96, R11
332	MOVD	$112, R12
333
334	// Load text in BE order
335	P8_LXVB16X(R4, R0, V0)
336
337	// V1, V2 will hold keys, V0 is a temp.
338	// At completion, V2 will hold the ciphertext.
339	// Load xk[0:3] and xor with text
340	LXVD2X	(R0+R5), V1
341	VXOR	V0, V1, V0
342
343	// Load xk[4:11] and cipher
344	LXVD2X	(R6+R5), V1
345	LXVD2X	(R7+R5), V2
346	VCIPHER	V0, V1, V0
347	VCIPHER	V0, V2, V0
348
349	// Load xk[12:19] and cipher
350	LXVD2X	(R8+R5), V1
351	LXVD2X	(R9+R5), V2
352	VCIPHER	V0, V1, V0
353	VCIPHER	V0, V2, V0
354
355	// Load xk[20:27] and cipher
356	LXVD2X	(R10+R5), V1
357	LXVD2X	(R11+R5), V2
358	VCIPHER	V0, V1, V0
359	VCIPHER	V0, V2, V0
360
361	// Increment xk pointer to reuse constant offsets in R6-R12.
362	ADD	$112, R5
363
364	// Load xk[28:35] and cipher
365	LXVD2X	(R0+R5), V1
366	LXVD2X	(R6+R5), V2
367	VCIPHER	V0, V1, V0
368	VCIPHER	V0, V2, V0
369
370	// Load xk[36:43] and cipher
371	LXVD2X	(R7+R5), V1
372	LXVD2X	(R8+R5), V2
373	BEQ	CR1, Ldec_tail // Key size 10?
374	VCIPHER	V0, V1, V0
375	VCIPHER	V0, V2, V0
376
377	// Load xk[44:51] and cipher
378	LXVD2X	(R9+R5), V1
379	LXVD2X	(R10+R5), V2
380	BEQ	CR2, Ldec_tail // Key size 12?
381	VCIPHER	V0, V1, V0
382	VCIPHER	V0, V2, V0
383
384	// Load xk[52:59] and cipher
385	LXVD2X	(R11+R5), V1
386	LXVD2X	(R12+R5), V2
387	BNE	CR3, Linvalid_key_len // Not key size 14?
388	// Fallthrough to final cipher
389
390Ldec_tail:
391	// Cipher last two keys such that key information is
392	// cleared from V1 and V2.
393	VCIPHER		V0, V1, V1
394	VCIPHERLAST	V1, V2, V2
395
396	// Store the result in BE order.
397	P8_STXVB16X(V2, R3, R0)
398	RET
399
400Linvalid_key_len:
401	// Segfault, this should never happen. Only 3 keys sizes are created/used.
402	MOVD	R0, 0(R0)
403	RET
404
405// func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)
406TEXT ·decryptBlockAsm(SB), NOSPLIT|NOFRAME, $0
407	MOVD	nr+0(FP), R6   // Round count/Key size
408	MOVD	xk+8(FP), R5   // Key pointer
409	MOVD	dst+16(FP), R3 // Dest pointer
410	MOVD	src+24(FP), R4 // Src pointer
411#ifdef NEEDS_ESPERM
412	MOVDrcon(SB), R7
413	LVX	(R7), ESPERM   // Permute value for P8_ macros.
414#endif
415
416	// Set CR{1,2,3}EQ to hold the key size information.
417	CMPU	R6, $10, CR1
418	CMPU	R6, $12, CR2
419	CMPU	R6, $14, CR3
420
421	MOVD	$16, R6
422	MOVD	$32, R7
423	MOVD	$48, R8
424	MOVD	$64, R9
425	MOVD	$80, R10
426	MOVD	$96, R11
427	MOVD	$112, R12
428
429	// Load text in BE order
430	P8_LXVB16X(R4, R0, V0)
431
432	// V1, V2 will hold keys, V0 is a temp.
433	// At completion, V2 will hold the text.
434	// Load xk[0:3] and xor with ciphertext
435	LXVD2X	(R0+R5), V1
436	VXOR	V0, V1, V0
437
438	// Load xk[4:11] and cipher
439	LXVD2X	(R6+R5), V1
440	LXVD2X	(R7+R5), V2
441	VNCIPHER	V0, V1, V0
442	VNCIPHER	V0, V2, V0
443
444	// Load xk[12:19] and cipher
445	LXVD2X	(R8+R5), V1
446	LXVD2X	(R9+R5), V2
447	VNCIPHER	V0, V1, V0
448	VNCIPHER	V0, V2, V0
449
450	// Load xk[20:27] and cipher
451	LXVD2X	(R10+R5), V1
452	LXVD2X	(R11+R5), V2
453	VNCIPHER	V0, V1, V0
454	VNCIPHER	V0, V2, V0
455
456	// Increment xk pointer to reuse constant offsets in R6-R12.
457	ADD	$112, R5
458
459	// Load xk[28:35] and cipher
460	LXVD2X	(R0+R5), V1
461	LXVD2X	(R6+R5), V2
462	VNCIPHER	V0, V1, V0
463	VNCIPHER	V0, V2, V0
464
465	// Load xk[36:43] and cipher
466	LXVD2X	(R7+R5), V1
467	LXVD2X	(R8+R5), V2
468	BEQ	CR1, Ldec_tail // Key size 10?
469	VNCIPHER	V0, V1, V0
470	VNCIPHER	V0, V2, V0
471
472	// Load xk[44:51] and cipher
473	LXVD2X	(R9+R5), V1
474	LXVD2X	(R10+R5), V2
475	BEQ	CR2, Ldec_tail // Key size 12?
476	VNCIPHER	V0, V1, V0
477	VNCIPHER	V0, V2, V0
478
479	// Load xk[52:59] and cipher
480	LXVD2X	(R11+R5), V1
481	LXVD2X	(R12+R5), V2
482	BNE	CR3, Linvalid_key_len // Not key size 14?
483	// Fallthrough to final cipher
484
485Ldec_tail:
486	// Cipher last two keys such that key information is
487	// cleared from V1 and V2.
488	VNCIPHER	V0, V1, V1
489	VNCIPHERLAST	V1, V2, V2
490
491	// Store the result in BE order.
492	P8_STXVB16X(V2, R3, R0)
493	RET
494
495Linvalid_key_len:
496	// Segfault, this should never happen. Only 3 keys sizes are created/used.
497	MOVD	R0, 0(R0)
498	RET
499
500// Remove defines from above so they can be defined here
501#undef INP
502#undef OUTENC
503#undef ROUNDS
504#undef KEY
505#undef TMP
506
507#define INP R3
508#define OUTP R4
509#define LEN R5
510#define KEYP R6
511#define ROUNDS R7
512#define IVP R8
513#define ENC R9
514
515#define INOUT V2
516#define TMP V3
517#define IVEC V4
518
519// Load the crypt key into VSRs.
520//
521// The expanded key is stored and loaded using
522// STXVD2X/LXVD2X. The in-memory byte ordering
523// depends on the endianness of the machine. The
524// expanded keys are generated by expandKeyAsm above.
525//
526// Rkeyp holds the key pointer. It is clobbered. Once
527// the expanded keys are loaded, it is not needed.
528//
529// R12,R14-R21 are scratch registers.
530// For keyp of 10, V6, V11-V20 hold the expanded key.
531// For keyp of 12, V6, V9-V20 hold the expanded key.
532// For keyp of 14, V6, V7-V20 hold the expanded key.
533#define LOAD_KEY(Rkeyp) \
534	MOVD	$16, R12 \
535	MOVD	$32, R14 \
536	MOVD	$48, R15 \
537	MOVD	$64, R16 \
538	MOVD	$80, R17 \
539	MOVD	$96, R18 \
540	MOVD	$112, R19 \
541	MOVD	$128, R20 \
542	MOVD	$144, R21 \
543	LXVD2X	(R0+Rkeyp), V6 \
544	ADD	$16, Rkeyp \
545	BEQ	CR1, L_start10 \
546	BEQ	CR2, L_start12 \
547	LXVD2X	(R0+Rkeyp), V7 \
548	LXVD2X	(R12+Rkeyp), V8 \
549	ADD	$32, Rkeyp \
550	L_start12: \
551	LXVD2X	(R0+Rkeyp), V9 \
552	LXVD2X	(R12+Rkeyp), V10 \
553	ADD	$32, Rkeyp \
554	L_start10: \
555	LXVD2X	(R0+Rkeyp), V11 \
556	LXVD2X	(R12+Rkeyp), V12 \
557	LXVD2X	(R14+Rkeyp), V13 \
558	LXVD2X	(R15+Rkeyp), V14 \
559	LXVD2X	(R16+Rkeyp), V15 \
560	LXVD2X	(R17+Rkeyp), V16 \
561	LXVD2X	(R18+Rkeyp), V17 \
562	LXVD2X	(R19+Rkeyp), V18 \
563	LXVD2X	(R20+Rkeyp), V19 \
564	LXVD2X	(R21+Rkeyp), V20
565
566// Perform aes cipher operation for keysize 10/12/14 using the keys
567// loaded by LOAD_KEY, and key size information held in CR1EQ/CR2EQ.
568//
569// Vxor is ideally V6 (Key[0-3]), but for slightly improved encrypting
570// performance V6 and IVEC can be swapped (xor is both associative and
571// commutative) during encryption:
572//
573//	VXOR INOUT, IVEC, INOUT
574//	VXOR INOUT, V6, INOUT
575//
576//	into
577//
578//	VXOR INOUT, V6, INOUT
579//	VXOR INOUT, IVEC, INOUT
580//
581#define CIPHER_BLOCK(Vin, Vxor, Vout, vcipher, vciphel, label10, label12) \
582	VXOR	Vin, Vxor, Vout \
583	BEQ	CR1, label10 \
584	BEQ	CR2, label12 \
585	vcipher	Vout, V7, Vout \
586	vcipher	Vout, V8, Vout \
587	label12: \
588	vcipher	Vout, V9, Vout \
589	vcipher	Vout, V10, Vout \
590	label10: \
591	vcipher	Vout, V11, Vout \
592	vcipher	Vout, V12, Vout \
593	vcipher	Vout, V13, Vout \
594	vcipher	Vout, V14, Vout \
595	vcipher	Vout, V15, Vout \
596	vcipher	Vout, V16, Vout \
597	vcipher	Vout, V17, Vout \
598	vcipher	Vout, V18, Vout \
599	vcipher	Vout, V19, Vout \
600	vciphel	Vout, V20, Vout \
601
602#define CLEAR_KEYS() \
603	VXOR	V6, V6, V6 \
604	VXOR	V7, V7, V7 \
605	VXOR	V8, V8, V8 \
606	VXOR	V9, V9, V9 \
607	VXOR	V10, V10, V10 \
608	VXOR	V11, V11, V11 \
609	VXOR	V12, V12, V12 \
610	VXOR	V13, V13, V13 \
611	VXOR	V14, V14, V14 \
612	VXOR	V15, V15, V15 \
613	VXOR	V16, V16, V16 \
614	VXOR	V17, V17, V17 \
615	VXOR	V18, V18, V18 \
616	VXOR	V19, V19, V19 \
617	VXOR	V20, V20, V20
618
619//func cryptBlocksChain(src, dst *byte, length int, key *uint32, iv *byte, enc int, nr int)
620TEXT ·cryptBlocksChain(SB), NOSPLIT|NOFRAME, $0
621	MOVD	src+0(FP), INP
622	MOVD	dst+8(FP), OUTP
623	MOVD	length+16(FP), LEN
624	MOVD	key+24(FP), KEYP
625	MOVD	iv+32(FP), IVP
626	MOVD	enc+40(FP), ENC
627	MOVD	nr+48(FP), ROUNDS
628
629#ifdef NEEDS_ESPERM
630	MOVDrcon(SB), R11
631	LVX	(R11), ESPERM   // Permute value for P8_ macros.
632#endif
633
634	// Assume len > 0 && len % blockSize == 0.
635	CMPW	ENC, $0
636	P8_LXVB16X(IVP, R0, IVEC)
637	CMPU	ROUNDS, $10, CR1
638	CMPU	ROUNDS, $12, CR2 // Only sizes 10/12/14 are supported.
639
640	// Setup key in VSRs, and set loop count in CTR.
641	LOAD_KEY(KEYP)
642	SRD	$4, LEN
643	MOVD	LEN, CTR
644
645	BEQ	Lcbc_dec
646
647	PCALIGN $16
648Lcbc_enc:
649	P8_LXVB16X(INP, R0, INOUT)
650	ADD	$16, INP
651	VXOR	INOUT, V6, INOUT
652	CIPHER_BLOCK(INOUT, IVEC, INOUT, VCIPHER, VCIPHERLAST, Lcbc_enc10, Lcbc_enc12)
653	VOR	INOUT, INOUT, IVEC // ciphertext (INOUT) is IVEC for next block.
654	P8_STXVB16X(INOUT, OUTP, R0)
655	ADD	$16, OUTP
656	BDNZ	Lcbc_enc
657
658	P8_STXVB16X(INOUT, IVP, R0)
659	CLEAR_KEYS()
660	RET
661
662	PCALIGN $16
663Lcbc_dec:
664	P8_LXVB16X(INP, R0, TMP)
665	ADD	$16, INP
666	CIPHER_BLOCK(TMP, V6, INOUT, VNCIPHER, VNCIPHERLAST, Lcbc_dec10, Lcbc_dec12)
667	VXOR	INOUT, IVEC, INOUT
668	VOR	TMP, TMP, IVEC // TMP is IVEC for next block.
669	P8_STXVB16X(INOUT, OUTP, R0)
670	ADD	$16, OUTP
671	BDNZ	Lcbc_dec
672
673	P8_STXVB16X(IVEC, IVP, R0)
674	CLEAR_KEYS()
675	RET
676