1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#include <ring-core/asm_base.h>
5
6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__)
7#include <ring-core/arm_arch.h>
8
9#if __ARM_MAX_ARCH__>=7
10.text
11.arch	armv7-a	@ don't confuse not-so-latest binutils with argv8 :-)
12.fpu	neon
13.code	32
14#undef	__thumb2__
15.align	5
16.Lrcon:
17.long	0x01,0x01,0x01,0x01
18.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	@ rotate-n-splat
19.long	0x1b,0x1b,0x1b,0x1b
20
21.text
22
23.globl	aes_hw_set_encrypt_key
24.hidden	aes_hw_set_encrypt_key
25.type	aes_hw_set_encrypt_key,%function
26.align	5
27aes_hw_set_encrypt_key:
28.Lenc_key:
29	mov	r3,#-1
30	cmp	r0,#0
31	beq	.Lenc_key_abort
32	cmp	r2,#0
33	beq	.Lenc_key_abort
34	mov	r3,#-2
35	cmp	r1,#128
36	blt	.Lenc_key_abort
37	cmp	r1,#256
38	bgt	.Lenc_key_abort
39	tst	r1,#0x3f
40	bne	.Lenc_key_abort
41
42	adr	r3,.Lrcon
43	cmp	r1,#192
44
45	veor	q0,q0,q0
46	vld1.8	{q3},[r0]!
47	mov	r1,#8		@ reuse r1
48	vld1.32	{q1,q2},[r3]!
49
50	blt	.Loop128
51	@ 192-bit key support was removed.
52	b	.L256
53
54.align	4
55.Loop128:
56	vtbl.8	d20,{q3},d4
57	vtbl.8	d21,{q3},d5
58	vext.8	q9,q0,q3,#12
59	vst1.32	{q3},[r2]!
60.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
61	subs	r1,r1,#1
62
63	veor	q3,q3,q9
64	vext.8	q9,q0,q9,#12
65	veor	q3,q3,q9
66	vext.8	q9,q0,q9,#12
67	veor	q10,q10,q1
68	veor	q3,q3,q9
69	vshl.u8	q1,q1,#1
70	veor	q3,q3,q10
71	bne	.Loop128
72
73	vld1.32	{q1},[r3]
74
75	vtbl.8	d20,{q3},d4
76	vtbl.8	d21,{q3},d5
77	vext.8	q9,q0,q3,#12
78	vst1.32	{q3},[r2]!
79.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
80
81	veor	q3,q3,q9
82	vext.8	q9,q0,q9,#12
83	veor	q3,q3,q9
84	vext.8	q9,q0,q9,#12
85	veor	q10,q10,q1
86	veor	q3,q3,q9
87	vshl.u8	q1,q1,#1
88	veor	q3,q3,q10
89
90	vtbl.8	d20,{q3},d4
91	vtbl.8	d21,{q3},d5
92	vext.8	q9,q0,q3,#12
93	vst1.32	{q3},[r2]!
94.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
95
96	veor	q3,q3,q9
97	vext.8	q9,q0,q9,#12
98	veor	q3,q3,q9
99	vext.8	q9,q0,q9,#12
100	veor	q10,q10,q1
101	veor	q3,q3,q9
102	veor	q3,q3,q10
103	vst1.32	{q3},[r2]
104	add	r2,r2,#0x50
105
106	mov	r12,#10
107	b	.Ldone
108
109@ 192-bit key support was removed.
110
111.align	4
112.L256:
113	vld1.8	{q8},[r0]
114	mov	r1,#7
115	mov	r12,#14
116	vst1.32	{q3},[r2]!
117
118.Loop256:
119	vtbl.8	d20,{q8},d4
120	vtbl.8	d21,{q8},d5
121	vext.8	q9,q0,q3,#12
122	vst1.32	{q8},[r2]!
123.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
124	subs	r1,r1,#1
125
126	veor	q3,q3,q9
127	vext.8	q9,q0,q9,#12
128	veor	q3,q3,q9
129	vext.8	q9,q0,q9,#12
130	veor	q10,q10,q1
131	veor	q3,q3,q9
132	vshl.u8	q1,q1,#1
133	veor	q3,q3,q10
134	vst1.32	{q3},[r2]!
135	beq	.Ldone
136
137	vdup.32	q10,d7[1]
138	vext.8	q9,q0,q8,#12
139.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
140
141	veor	q8,q8,q9
142	vext.8	q9,q0,q9,#12
143	veor	q8,q8,q9
144	vext.8	q9,q0,q9,#12
145	veor	q8,q8,q9
146
147	veor	q8,q8,q10
148	b	.Loop256
149
150.Ldone:
151	str	r12,[r2]
152	mov	r3,#0
153
154.Lenc_key_abort:
155	mov	r0,r3			@ return value
156
157	bx	lr
158.size	aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
159.globl	aes_hw_encrypt
160.hidden	aes_hw_encrypt
161.type	aes_hw_encrypt,%function
162.align	5
163aes_hw_encrypt:
164	AARCH64_VALID_CALL_TARGET
165	ldr	r3,[r2,#240]
166	vld1.32	{q0},[r2]!
167	vld1.8	{q2},[r0]
168	sub	r3,r3,#2
169	vld1.32	{q1},[r2]!
170
171.Loop_enc:
172.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
173.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
174	vld1.32	{q0},[r2]!
175	subs	r3,r3,#2
176.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
177.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
178	vld1.32	{q1},[r2]!
179	bgt	.Loop_enc
180
181.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
182.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
183	vld1.32	{q0},[r2]
184.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
185	veor	q2,q2,q0
186
187	vst1.8	{q2},[r1]
188	bx	lr
189.size	aes_hw_encrypt,.-aes_hw_encrypt
190.globl	aes_hw_ctr32_encrypt_blocks
191.hidden	aes_hw_ctr32_encrypt_blocks
192.type	aes_hw_ctr32_encrypt_blocks,%function
193.align	5
194aes_hw_ctr32_encrypt_blocks:
195	mov	ip,sp
196	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
197	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
198	ldr	r4, [ip]		@ load remaining arg
199	ldr	r5,[r3,#240]
200
201	ldr	r8, [r4, #12]
202	vld1.32	{q0},[r4]
203
204	vld1.32	{q8,q9},[r3]		@ load key schedule...
205	sub	r5,r5,#4
206	mov	r12,#16
207	cmp	r2,#2
208	add	r7,r3,r5,lsl#4	@ pointer to last 5 round keys
209	sub	r5,r5,#2
210	vld1.32	{q12,q13},[r7]!
211	vld1.32	{q14,q15},[r7]!
212	vld1.32	{q7},[r7]
213	add	r7,r3,#32
214	mov	r6,r5
215	movlo	r12,#0
216
217	@ ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
218	@ affected by silicon errata #1742098 [0] and #1655431 [1],
219	@ respectively, where the second instruction of an aese/aesmc
220	@ instruction pair may execute twice if an interrupt is taken right
221	@ after the first instruction consumes an input register of which a
222	@ single 32-bit lane has been updated the last time it was modified.
223	@
224	@ This function uses a counter in one 32-bit lane. The
225	@ could write to q1 and q10 directly, but that trips this bugs.
226	@ We write to q6 and copy to the final register as a workaround.
227	@
228	@ [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
229	@ [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
230#ifndef __ARMEB__
231	rev	r8, r8
232#endif
233	add	r10, r8, #1
234	vorr	q6,q0,q0
235	rev	r10, r10
236	vmov.32	d13[1],r10
237	add	r8, r8, #2
238	vorr	q1,q6,q6
239	bls	.Lctr32_tail
240	rev	r12, r8
241	vmov.32	d13[1],r12
242	sub	r2,r2,#3		@ bias
243	vorr	q10,q6,q6
244	b	.Loop3x_ctr32
245
246.align	4
247.Loop3x_ctr32:
248.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
249.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
250.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
251.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
252.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
253.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
254	vld1.32	{q8},[r7]!
255	subs	r6,r6,#2
256.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
257.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
258.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
259.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
260.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
261.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
262	vld1.32	{q9},[r7]!
263	bgt	.Loop3x_ctr32
264
265.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
266.byte	0x80,0x83,0xb0,0xf3	@ aesmc q4,q0
267.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
268.byte	0x82,0xa3,0xb0,0xf3	@ aesmc q5,q1
269	vld1.8	{q2},[r0]!
270	add	r9,r8,#1
271.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
272.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
273	vld1.8	{q3},[r0]!
274	rev	r9,r9
275.byte	0x22,0x83,0xb0,0xf3	@ aese q4,q9
276.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
277.byte	0x22,0xa3,0xb0,0xf3	@ aese q5,q9
278.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
279	vld1.8	{q11},[r0]!
280	mov	r7,r3
281.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
282.byte	0xa4,0x23,0xf0,0xf3	@ aesmc q9,q10
283.byte	0x28,0x83,0xb0,0xf3	@ aese q4,q12
284.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
285.byte	0x28,0xa3,0xb0,0xf3	@ aese q5,q12
286.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
287	veor	q2,q2,q7
288	add	r10,r8,#2
289.byte	0x28,0x23,0xf0,0xf3	@ aese q9,q12
290.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
291	veor	q3,q3,q7
292	add	r8,r8,#3
293.byte	0x2a,0x83,0xb0,0xf3	@ aese q4,q13
294.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
295.byte	0x2a,0xa3,0xb0,0xf3	@ aese q5,q13
296.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
297	 @ Note the logic to update q0, q1, and q1 is written to work
298	 @ around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
299	 @ 32-bit mode. See the comment above.
300	veor	q11,q11,q7
301	vmov.32	d13[1], r9
302.byte	0x2a,0x23,0xf0,0xf3	@ aese q9,q13
303.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
304	vorr	q0,q6,q6
305	rev	r10,r10
306.byte	0x2c,0x83,0xb0,0xf3	@ aese q4,q14
307.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
308	vmov.32	d13[1], r10
309	rev	r12,r8
310.byte	0x2c,0xa3,0xb0,0xf3	@ aese q5,q14
311.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
312	vorr	q1,q6,q6
313	vmov.32	d13[1], r12
314.byte	0x2c,0x23,0xf0,0xf3	@ aese q9,q14
315.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
316	vorr	q10,q6,q6
317	subs	r2,r2,#3
318.byte	0x2e,0x83,0xb0,0xf3	@ aese q4,q15
319.byte	0x2e,0xa3,0xb0,0xf3	@ aese q5,q15
320.byte	0x2e,0x23,0xf0,0xf3	@ aese q9,q15
321
322	veor	q2,q2,q4
323	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
324	vst1.8	{q2},[r1]!
325	veor	q3,q3,q5
326	mov	r6,r5
327	vst1.8	{q3},[r1]!
328	veor	q11,q11,q9
329	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
330	vst1.8	{q11},[r1]!
331	bhs	.Loop3x_ctr32
332
333	adds	r2,r2,#3
334	beq	.Lctr32_done
335	cmp	r2,#1
336	mov	r12,#16
337	moveq	r12,#0
338
339.Lctr32_tail:
340.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
341.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
342.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
343.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
344	vld1.32	{q8},[r7]!
345	subs	r6,r6,#2
346.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
347.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
348.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
349.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
350	vld1.32	{q9},[r7]!
351	bgt	.Lctr32_tail
352
353.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
354.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
355.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
356.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
357.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
358.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
359.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
360.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
361	vld1.8	{q2},[r0],r12
362.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
363.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
364.byte	0x28,0x23,0xb0,0xf3	@ aese q1,q12
365.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
366	vld1.8	{q3},[r0]
367.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
368.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
369.byte	0x2a,0x23,0xb0,0xf3	@ aese q1,q13
370.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
371	veor	q2,q2,q7
372.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
373.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
374.byte	0x2c,0x23,0xb0,0xf3	@ aese q1,q14
375.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
376	veor	q3,q3,q7
377.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
378.byte	0x2e,0x23,0xb0,0xf3	@ aese q1,q15
379
380	cmp	r2,#1
381	veor	q2,q2,q0
382	veor	q3,q3,q1
383	vst1.8	{q2},[r1]!
384	beq	.Lctr32_done
385	vst1.8	{q3},[r1]
386
387.Lctr32_done:
388	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
389	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
390.size	aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
391#endif
392#endif  // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__)
393