xref: /aosp_15_r20/external/boringssl/src/gen/bcm/aesv8-armv7-linux.S (revision 8fb009dc861624b67b6cdb62ea21f0f22d0c584b)
1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#include <openssl/asm_base.h>
5
6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__)
7#include <openssl/arm_arch.h>
8
9#if __ARM_MAX_ARCH__>=7
10.text
11.arch	armv7-a	@ don't confuse not-so-latest binutils with argv8 :-)
12.fpu	neon
13.code	32
14#undef	__thumb2__
15.align	5
16.Lrcon:
17.long	0x01,0x01,0x01,0x01
18.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	@ rotate-n-splat
19.long	0x1b,0x1b,0x1b,0x1b
20
21.text
22
23.globl	aes_hw_set_encrypt_key
24.hidden	aes_hw_set_encrypt_key
25.type	aes_hw_set_encrypt_key,%function
26.align	5
27aes_hw_set_encrypt_key:
28.Lenc_key:
29	mov	r3,#-2
30	cmp	r1,#128
31	blt	.Lenc_key_abort
32	cmp	r1,#256
33	bgt	.Lenc_key_abort
34	tst	r1,#0x3f
35	bne	.Lenc_key_abort
36
37	adr	r3,.Lrcon
38	cmp	r1,#192
39
40	veor	q0,q0,q0
41	vld1.8	{q3},[r0]!
42	mov	r1,#8		@ reuse r1
43	vld1.32	{q1,q2},[r3]!
44
45	blt	.Loop128
46	beq	.L192
47	b	.L256
48
49.align	4
50.Loop128:
51	vtbl.8	d20,{q3},d4
52	vtbl.8	d21,{q3},d5
53	vext.8	q9,q0,q3,#12
54	vst1.32	{q3},[r2]!
55.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
56	subs	r1,r1,#1
57
58	veor	q3,q3,q9
59	vext.8	q9,q0,q9,#12
60	veor	q3,q3,q9
61	vext.8	q9,q0,q9,#12
62	veor	q10,q10,q1
63	veor	q3,q3,q9
64	vshl.u8	q1,q1,#1
65	veor	q3,q3,q10
66	bne	.Loop128
67
68	vld1.32	{q1},[r3]
69
70	vtbl.8	d20,{q3},d4
71	vtbl.8	d21,{q3},d5
72	vext.8	q9,q0,q3,#12
73	vst1.32	{q3},[r2]!
74.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
75
76	veor	q3,q3,q9
77	vext.8	q9,q0,q9,#12
78	veor	q3,q3,q9
79	vext.8	q9,q0,q9,#12
80	veor	q10,q10,q1
81	veor	q3,q3,q9
82	vshl.u8	q1,q1,#1
83	veor	q3,q3,q10
84
85	vtbl.8	d20,{q3},d4
86	vtbl.8	d21,{q3},d5
87	vext.8	q9,q0,q3,#12
88	vst1.32	{q3},[r2]!
89.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
90
91	veor	q3,q3,q9
92	vext.8	q9,q0,q9,#12
93	veor	q3,q3,q9
94	vext.8	q9,q0,q9,#12
95	veor	q10,q10,q1
96	veor	q3,q3,q9
97	veor	q3,q3,q10
98	vst1.32	{q3},[r2]
99	add	r2,r2,#0x50
100
101	mov	r12,#10
102	b	.Ldone
103
104.align	4
105.L192:
106	vld1.8	{d16},[r0]!
107	vmov.i8	q10,#8			@ borrow q10
108	vst1.32	{q3},[r2]!
109	vsub.i8	q2,q2,q10	@ adjust the mask
110
111.Loop192:
112	vtbl.8	d20,{q8},d4
113	vtbl.8	d21,{q8},d5
114	vext.8	q9,q0,q3,#12
115	vst1.32	{d16},[r2]!
116.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
117	subs	r1,r1,#1
118
119	veor	q3,q3,q9
120	vext.8	q9,q0,q9,#12
121	veor	q3,q3,q9
122	vext.8	q9,q0,q9,#12
123	veor	q3,q3,q9
124
125	vdup.32	q9,d7[1]
126	veor	q9,q9,q8
127	veor	q10,q10,q1
128	vext.8	q8,q0,q8,#12
129	vshl.u8	q1,q1,#1
130	veor	q8,q8,q9
131	veor	q3,q3,q10
132	veor	q8,q8,q10
133	vst1.32	{q3},[r2]!
134	bne	.Loop192
135
136	mov	r12,#12
137	add	r2,r2,#0x20
138	b	.Ldone
139
140.align	4
141.L256:
142	vld1.8	{q8},[r0]
143	mov	r1,#7
144	mov	r12,#14
145	vst1.32	{q3},[r2]!
146
147.Loop256:
148	vtbl.8	d20,{q8},d4
149	vtbl.8	d21,{q8},d5
150	vext.8	q9,q0,q3,#12
151	vst1.32	{q8},[r2]!
152.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
153	subs	r1,r1,#1
154
155	veor	q3,q3,q9
156	vext.8	q9,q0,q9,#12
157	veor	q3,q3,q9
158	vext.8	q9,q0,q9,#12
159	veor	q10,q10,q1
160	veor	q3,q3,q9
161	vshl.u8	q1,q1,#1
162	veor	q3,q3,q10
163	vst1.32	{q3},[r2]!
164	beq	.Ldone
165
166	vdup.32	q10,d7[1]
167	vext.8	q9,q0,q8,#12
168.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
169
170	veor	q8,q8,q9
171	vext.8	q9,q0,q9,#12
172	veor	q8,q8,q9
173	vext.8	q9,q0,q9,#12
174	veor	q8,q8,q9
175
176	veor	q8,q8,q10
177	b	.Loop256
178
179.Ldone:
180	str	r12,[r2]
181	mov	r3,#0
182
183.Lenc_key_abort:
184	mov	r0,r3			@ return value
185
186	bx	lr
187.size	aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
188
189.globl	aes_hw_set_decrypt_key
190.hidden	aes_hw_set_decrypt_key
191.type	aes_hw_set_decrypt_key,%function
192.align	5
193aes_hw_set_decrypt_key:
194	stmdb	sp!,{r4,lr}
195	bl	.Lenc_key
196
197	cmp	r0,#0
198	bne	.Ldec_key_abort
199
200	sub	r2,r2,#240		@ restore original r2
201	mov	r4,#-16
202	add	r0,r2,r12,lsl#4	@ end of key schedule
203
204	vld1.32	{q0},[r2]
205	vld1.32	{q1},[r0]
206	vst1.32	{q0},[r0],r4
207	vst1.32	{q1},[r2]!
208
209.Loop_imc:
210	vld1.32	{q0},[r2]
211	vld1.32	{q1},[r0]
212.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
213.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
214	vst1.32	{q0},[r0],r4
215	vst1.32	{q1},[r2]!
216	cmp	r0,r2
217	bhi	.Loop_imc
218
219	vld1.32	{q0},[r2]
220.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
221	vst1.32	{q0},[r0]
222
223	eor	r0,r0,r0		@ return value
224.Ldec_key_abort:
225	ldmia	sp!,{r4,pc}
226.size	aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
227.globl	aes_hw_encrypt
228.hidden	aes_hw_encrypt
229.type	aes_hw_encrypt,%function
230.align	5
231aes_hw_encrypt:
232	AARCH64_VALID_CALL_TARGET
233	ldr	r3,[r2,#240]
234	vld1.32	{q0},[r2]!
235	vld1.8	{q2},[r0]
236	sub	r3,r3,#2
237	vld1.32	{q1},[r2]!
238
239.Loop_enc:
240.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
241.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
242	vld1.32	{q0},[r2]!
243	subs	r3,r3,#2
244.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
245.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
246	vld1.32	{q1},[r2]!
247	bgt	.Loop_enc
248
249.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
250.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
251	vld1.32	{q0},[r2]
252.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
253	veor	q2,q2,q0
254
255	vst1.8	{q2},[r1]
256	bx	lr
257.size	aes_hw_encrypt,.-aes_hw_encrypt
258.globl	aes_hw_decrypt
259.hidden	aes_hw_decrypt
260.type	aes_hw_decrypt,%function
261.align	5
262aes_hw_decrypt:
263	AARCH64_VALID_CALL_TARGET
264	ldr	r3,[r2,#240]
265	vld1.32	{q0},[r2]!
266	vld1.8	{q2},[r0]
267	sub	r3,r3,#2
268	vld1.32	{q1},[r2]!
269
270.Loop_dec:
271.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
272.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
273	vld1.32	{q0},[r2]!
274	subs	r3,r3,#2
275.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
276.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
277	vld1.32	{q1},[r2]!
278	bgt	.Loop_dec
279
280.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
281.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
282	vld1.32	{q0},[r2]
283.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
284	veor	q2,q2,q0
285
286	vst1.8	{q2},[r1]
287	bx	lr
288.size	aes_hw_decrypt,.-aes_hw_decrypt
289.globl	aes_hw_cbc_encrypt
290.hidden	aes_hw_cbc_encrypt
291.type	aes_hw_cbc_encrypt,%function
292.align	5
293aes_hw_cbc_encrypt:
294	mov	ip,sp
295	stmdb	sp!,{r4,r5,r6,r7,r8,lr}
296	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
297	ldmia	ip,{r4,r5}		@ load remaining args
298	subs	r2,r2,#16
299	mov	r8,#16
300	blo	.Lcbc_abort
301	moveq	r8,#0
302
303	cmp	r5,#0			@ en- or decrypting?
304	ldr	r5,[r3,#240]
305	and	r2,r2,#-16
306	vld1.8	{q6},[r4]
307	vld1.8	{q0},[r0],r8
308
309	vld1.32	{q8,q9},[r3]		@ load key schedule...
310	sub	r5,r5,#6
311	add	r7,r3,r5,lsl#4	@ pointer to last 7 round keys
312	sub	r5,r5,#2
313	vld1.32	{q10,q11},[r7]!
314	vld1.32	{q12,q13},[r7]!
315	vld1.32	{q14,q15},[r7]!
316	vld1.32	{q7},[r7]
317
318	add	r7,r3,#32
319	mov	r6,r5
320	beq	.Lcbc_dec
321
322	cmp	r5,#2
323	veor	q0,q0,q6
324	veor	q5,q8,q7
325	beq	.Lcbc_enc128
326
327	vld1.32	{q2,q3},[r7]
328	add	r7,r3,#16
329	add	r6,r3,#16*4
330	add	r12,r3,#16*5
331.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
332.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
333	add	r14,r3,#16*6
334	add	r3,r3,#16*7
335	b	.Lenter_cbc_enc
336
337.align	4
338.Loop_cbc_enc:
339.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
340.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
341	vst1.8	{q6},[r1]!
342.Lenter_cbc_enc:
343.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
344.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
345.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
346.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
347	vld1.32	{q8},[r6]
348	cmp	r5,#4
349.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
350.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
351	vld1.32	{q9},[r12]
352	beq	.Lcbc_enc192
353
354.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
355.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
356	vld1.32	{q8},[r14]
357.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
358.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
359	vld1.32	{q9},[r3]
360	nop
361
362.Lcbc_enc192:
363.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
364.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
365	subs	r2,r2,#16
366.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
367.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
368	moveq	r8,#0
369.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
370.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
371.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
372.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
373	vld1.8	{q8},[r0],r8
374.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
375.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
376	veor	q8,q8,q5
377.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
378.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
379	vld1.32	{q9},[r7]		@ re-pre-load rndkey[1]
380.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
381.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
382.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
383	veor	q6,q0,q7
384	bhs	.Loop_cbc_enc
385
386	vst1.8	{q6},[r1]!
387	b	.Lcbc_done
388
389.align	5
390.Lcbc_enc128:
391	vld1.32	{q2,q3},[r7]
392.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
393.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
394	b	.Lenter_cbc_enc128
395.Loop_cbc_enc128:
396.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
397.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
398	vst1.8	{q6},[r1]!
399.Lenter_cbc_enc128:
400.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
401.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
402	subs	r2,r2,#16
403.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
404.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
405	moveq	r8,#0
406.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
407.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
408.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
409.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
410.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
411.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
412	vld1.8	{q8},[r0],r8
413.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
414.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
415.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
416.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
417.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
418.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
419	veor	q8,q8,q5
420.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
421	veor	q6,q0,q7
422	bhs	.Loop_cbc_enc128
423
424	vst1.8	{q6},[r1]!
425	b	.Lcbc_done
426.align	5
427.Lcbc_dec:
428	vld1.8	{q10},[r0]!
429	subs	r2,r2,#32		@ bias
430	add	r6,r5,#2
431	vorr	q3,q0,q0
432	vorr	q1,q0,q0
433	vorr	q11,q10,q10
434	blo	.Lcbc_dec_tail
435
436	vorr	q1,q10,q10
437	vld1.8	{q10},[r0]!
438	vorr	q2,q0,q0
439	vorr	q3,q1,q1
440	vorr	q11,q10,q10
441
442.Loop3x_cbc_dec:
443.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
444.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
445.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
446.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
447.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
448.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
449	vld1.32	{q8},[r7]!
450	subs	r6,r6,#2
451.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
452.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
453.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
454.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
455.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
456.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
457	vld1.32	{q9},[r7]!
458	bgt	.Loop3x_cbc_dec
459
460.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
461.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
462.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
463.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
464.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
465.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
466	veor	q4,q6,q7
467	subs	r2,r2,#0x30
468	veor	q5,q2,q7
469	movlo	r6,r2			@ r6, r6, is zero at this point
470.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
471.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
472.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
473.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
474.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
475.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
476	veor	q9,q3,q7
477	add	r0,r0,r6		@ r0 is adjusted in such way that
478					@ at exit from the loop q1-q10
479					@ are loaded with last "words"
480	vorr	q6,q11,q11
481	mov	r7,r3
482.byte	0x68,0x03,0xb0,0xf3	@ aesd q0,q12
483.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
484.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
485.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
486.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
487.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
488	vld1.8	{q2},[r0]!
489.byte	0x6a,0x03,0xb0,0xf3	@ aesd q0,q13
490.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
491.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
492.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
493.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
494.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
495	vld1.8	{q3},[r0]!
496.byte	0x6c,0x03,0xb0,0xf3	@ aesd q0,q14
497.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
498.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
499.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
500.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
501.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
502	vld1.8	{q11},[r0]!
503.byte	0x6e,0x03,0xb0,0xf3	@ aesd q0,q15
504.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
505.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
506	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
507	add	r6,r5,#2
508	veor	q4,q4,q0
509	veor	q5,q5,q1
510	veor	q10,q10,q9
511	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
512	vst1.8	{q4},[r1]!
513	vorr	q0,q2,q2
514	vst1.8	{q5},[r1]!
515	vorr	q1,q3,q3
516	vst1.8	{q10},[r1]!
517	vorr	q10,q11,q11
518	bhs	.Loop3x_cbc_dec
519
520	cmn	r2,#0x30
521	beq	.Lcbc_done
522	nop
523
524.Lcbc_dec_tail:
525.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
526.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
527.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
528.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
529	vld1.32	{q8},[r7]!
530	subs	r6,r6,#2
531.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
532.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
533.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
534.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
535	vld1.32	{q9},[r7]!
536	bgt	.Lcbc_dec_tail
537
538.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
539.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
540.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
541.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
542.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
543.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
544.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
545.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
546.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
547.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
548.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
549.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
550	cmn	r2,#0x20
551.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
552.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
553.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
554.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
555	veor	q5,q6,q7
556.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
557.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
558.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
559.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
560	veor	q9,q3,q7
561.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
562.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
563	beq	.Lcbc_dec_one
564	veor	q5,q5,q1
565	veor	q9,q9,q10
566	vorr	q6,q11,q11
567	vst1.8	{q5},[r1]!
568	vst1.8	{q9},[r1]!
569	b	.Lcbc_done
570
571.Lcbc_dec_one:
572	veor	q5,q5,q10
573	vorr	q6,q11,q11
574	vst1.8	{q5},[r1]!
575
576.Lcbc_done:
577	vst1.8	{q6},[r4]
578.Lcbc_abort:
579	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
580	ldmia	sp!,{r4,r5,r6,r7,r8,pc}
581.size	aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
582.globl	aes_hw_ctr32_encrypt_blocks
583.hidden	aes_hw_ctr32_encrypt_blocks
584.type	aes_hw_ctr32_encrypt_blocks,%function
585.align	5
586aes_hw_ctr32_encrypt_blocks:
587	mov	ip,sp
588	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
589	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
590	ldr	r4, [ip]		@ load remaining arg
591	ldr	r5,[r3,#240]
592
593	ldr	r8, [r4, #12]
594	vld1.32	{q0},[r4]
595
596	vld1.32	{q8,q9},[r3]		@ load key schedule...
597	sub	r5,r5,#4
598	mov	r12,#16
599	cmp	r2,#2
600	add	r7,r3,r5,lsl#4	@ pointer to last 5 round keys
601	sub	r5,r5,#2
602	vld1.32	{q12,q13},[r7]!
603	vld1.32	{q14,q15},[r7]!
604	vld1.32	{q7},[r7]
605	add	r7,r3,#32
606	mov	r6,r5
607	movlo	r12,#0
608
609	@ ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
610	@ affected by silicon errata #1742098 [0] and #1655431 [1],
611	@ respectively, where the second instruction of an aese/aesmc
612	@ instruction pair may execute twice if an interrupt is taken right
613	@ after the first instruction consumes an input register of which a
614	@ single 32-bit lane has been updated the last time it was modified.
615	@
616	@ This function uses a counter in one 32-bit lane. The
617	@ could write to q1 and q10 directly, but that trips this bugs.
618	@ We write to q6 and copy to the final register as a workaround.
619	@
620	@ [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
621	@ [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
622#ifndef __ARMEB__
623	rev	r8, r8
624#endif
625	add	r10, r8, #1
626	vorr	q6,q0,q0
627	rev	r10, r10
628	vmov.32	d13[1],r10
629	add	r8, r8, #2
630	vorr	q1,q6,q6
631	bls	.Lctr32_tail
632	rev	r12, r8
633	vmov.32	d13[1],r12
634	sub	r2,r2,#3		@ bias
635	vorr	q10,q6,q6
636	b	.Loop3x_ctr32
637
638.align	4
639.Loop3x_ctr32:
640.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
641.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
642.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
643.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
644.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
645.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
646	vld1.32	{q8},[r7]!
647	subs	r6,r6,#2
648.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
649.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
650.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
651.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
652.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
653.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
654	vld1.32	{q9},[r7]!
655	bgt	.Loop3x_ctr32
656
657.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
658.byte	0x80,0x83,0xb0,0xf3	@ aesmc q4,q0
659.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
660.byte	0x82,0xa3,0xb0,0xf3	@ aesmc q5,q1
661	vld1.8	{q2},[r0]!
662	add	r9,r8,#1
663.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
664.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
665	vld1.8	{q3},[r0]!
666	rev	r9,r9
667.byte	0x22,0x83,0xb0,0xf3	@ aese q4,q9
668.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
669.byte	0x22,0xa3,0xb0,0xf3	@ aese q5,q9
670.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
671	vld1.8	{q11},[r0]!
672	mov	r7,r3
673.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
674.byte	0xa4,0x23,0xf0,0xf3	@ aesmc q9,q10
675.byte	0x28,0x83,0xb0,0xf3	@ aese q4,q12
676.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
677.byte	0x28,0xa3,0xb0,0xf3	@ aese q5,q12
678.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
679	veor	q2,q2,q7
680	add	r10,r8,#2
681.byte	0x28,0x23,0xf0,0xf3	@ aese q9,q12
682.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
683	veor	q3,q3,q7
684	add	r8,r8,#3
685.byte	0x2a,0x83,0xb0,0xf3	@ aese q4,q13
686.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
687.byte	0x2a,0xa3,0xb0,0xf3	@ aese q5,q13
688.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
689	 @ Note the logic to update q0, q1, and q1 is written to work
690	 @ around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
691	 @ 32-bit mode. See the comment above.
692	veor	q11,q11,q7
693	vmov.32	d13[1], r9
694.byte	0x2a,0x23,0xf0,0xf3	@ aese q9,q13
695.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
696	vorr	q0,q6,q6
697	rev	r10,r10
698.byte	0x2c,0x83,0xb0,0xf3	@ aese q4,q14
699.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
700	vmov.32	d13[1], r10
701	rev	r12,r8
702.byte	0x2c,0xa3,0xb0,0xf3	@ aese q5,q14
703.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
704	vorr	q1,q6,q6
705	vmov.32	d13[1], r12
706.byte	0x2c,0x23,0xf0,0xf3	@ aese q9,q14
707.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
708	vorr	q10,q6,q6
709	subs	r2,r2,#3
710.byte	0x2e,0x83,0xb0,0xf3	@ aese q4,q15
711.byte	0x2e,0xa3,0xb0,0xf3	@ aese q5,q15
712.byte	0x2e,0x23,0xf0,0xf3	@ aese q9,q15
713
714	veor	q2,q2,q4
715	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
716	vst1.8	{q2},[r1]!
717	veor	q3,q3,q5
718	mov	r6,r5
719	vst1.8	{q3},[r1]!
720	veor	q11,q11,q9
721	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
722	vst1.8	{q11},[r1]!
723	bhs	.Loop3x_ctr32
724
725	adds	r2,r2,#3
726	beq	.Lctr32_done
727	cmp	r2,#1
728	mov	r12,#16
729	moveq	r12,#0
730
731.Lctr32_tail:
732.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
733.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
734.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
735.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
736	vld1.32	{q8},[r7]!
737	subs	r6,r6,#2
738.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
739.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
740.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
741.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
742	vld1.32	{q9},[r7]!
743	bgt	.Lctr32_tail
744
745.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
746.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
747.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
748.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
749.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
750.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
751.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
752.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
753	vld1.8	{q2},[r0],r12
754.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
755.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
756.byte	0x28,0x23,0xb0,0xf3	@ aese q1,q12
757.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
758	vld1.8	{q3},[r0]
759.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
760.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
761.byte	0x2a,0x23,0xb0,0xf3	@ aese q1,q13
762.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
763	veor	q2,q2,q7
764.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
765.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
766.byte	0x2c,0x23,0xb0,0xf3	@ aese q1,q14
767.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
768	veor	q3,q3,q7
769.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
770.byte	0x2e,0x23,0xb0,0xf3	@ aese q1,q15
771
772	cmp	r2,#1
773	veor	q2,q2,q0
774	veor	q3,q3,q1
775	vst1.8	{q2},[r1]!
776	beq	.Lctr32_done
777	vst1.8	{q3},[r1]
778
779.Lctr32_done:
780	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
781	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
782.size	aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
783#endif
784#endif  // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__)
785