1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#include <ring-core/asm_base.h>
5
6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__)
7#include <ring-core/arm_arch.h>
8
9#if __ARM_MAX_ARCH__>=7
10.text
11.arch	armv8-a+crypto
12.section	.rodata
13.align	5
14.Lrcon:
15.long	0x01,0x01,0x01,0x01
16.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	// rotate-n-splat
17.long	0x1b,0x1b,0x1b,0x1b
18
19.text
20
21.globl	aes_hw_set_encrypt_key
22.hidden	aes_hw_set_encrypt_key
23.type	aes_hw_set_encrypt_key,%function
24.align	5
25aes_hw_set_encrypt_key:
26.Lenc_key:
27	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
28	AARCH64_VALID_CALL_TARGET
29	stp	x29,x30,[sp,#-16]!
30	add	x29,sp,#0
31	mov	x3,#-1
32	cmp	x0,#0
33	b.eq	.Lenc_key_abort
34	cmp	x2,#0
35	b.eq	.Lenc_key_abort
36	mov	x3,#-2
37	cmp	w1,#128
38	b.lt	.Lenc_key_abort
39	cmp	w1,#256
40	b.gt	.Lenc_key_abort
41	tst	w1,#0x3f
42	b.ne	.Lenc_key_abort
43
44	adrp	x3,.Lrcon
45	add	x3,x3,:lo12:.Lrcon
46	cmp	w1,#192
47
48	eor	v0.16b,v0.16b,v0.16b
49	ld1	{v3.16b},[x0],#16
50	mov	w1,#8		// reuse w1
51	ld1	{v1.4s,v2.4s},[x3],#32
52
53	b.lt	.Loop128
54	// 192-bit key support was removed.
55	b	.L256
56
57.align	4
58.Loop128:
59	tbl	v6.16b,{v3.16b},v2.16b
60	ext	v5.16b,v0.16b,v3.16b,#12
61	st1	{v3.4s},[x2],#16
62	aese	v6.16b,v0.16b
63	subs	w1,w1,#1
64
65	eor	v3.16b,v3.16b,v5.16b
66	ext	v5.16b,v0.16b,v5.16b,#12
67	eor	v3.16b,v3.16b,v5.16b
68	ext	v5.16b,v0.16b,v5.16b,#12
69	eor	v6.16b,v6.16b,v1.16b
70	eor	v3.16b,v3.16b,v5.16b
71	shl	v1.16b,v1.16b,#1
72	eor	v3.16b,v3.16b,v6.16b
73	b.ne	.Loop128
74
75	ld1	{v1.4s},[x3]
76
77	tbl	v6.16b,{v3.16b},v2.16b
78	ext	v5.16b,v0.16b,v3.16b,#12
79	st1	{v3.4s},[x2],#16
80	aese	v6.16b,v0.16b
81
82	eor	v3.16b,v3.16b,v5.16b
83	ext	v5.16b,v0.16b,v5.16b,#12
84	eor	v3.16b,v3.16b,v5.16b
85	ext	v5.16b,v0.16b,v5.16b,#12
86	eor	v6.16b,v6.16b,v1.16b
87	eor	v3.16b,v3.16b,v5.16b
88	shl	v1.16b,v1.16b,#1
89	eor	v3.16b,v3.16b,v6.16b
90
91	tbl	v6.16b,{v3.16b},v2.16b
92	ext	v5.16b,v0.16b,v3.16b,#12
93	st1	{v3.4s},[x2],#16
94	aese	v6.16b,v0.16b
95
96	eor	v3.16b,v3.16b,v5.16b
97	ext	v5.16b,v0.16b,v5.16b,#12
98	eor	v3.16b,v3.16b,v5.16b
99	ext	v5.16b,v0.16b,v5.16b,#12
100	eor	v6.16b,v6.16b,v1.16b
101	eor	v3.16b,v3.16b,v5.16b
102	eor	v3.16b,v3.16b,v6.16b
103	st1	{v3.4s},[x2]
104	add	x2,x2,#0x50
105
106	mov	w12,#10
107	b	.Ldone
108
109// 192-bit key support was removed.
110
111.align	4
112.L256:
113	ld1	{v4.16b},[x0]
114	mov	w1,#7
115	mov	w12,#14
116	st1	{v3.4s},[x2],#16
117
118.Loop256:
119	tbl	v6.16b,{v4.16b},v2.16b
120	ext	v5.16b,v0.16b,v3.16b,#12
121	st1	{v4.4s},[x2],#16
122	aese	v6.16b,v0.16b
123	subs	w1,w1,#1
124
125	eor	v3.16b,v3.16b,v5.16b
126	ext	v5.16b,v0.16b,v5.16b,#12
127	eor	v3.16b,v3.16b,v5.16b
128	ext	v5.16b,v0.16b,v5.16b,#12
129	eor	v6.16b,v6.16b,v1.16b
130	eor	v3.16b,v3.16b,v5.16b
131	shl	v1.16b,v1.16b,#1
132	eor	v3.16b,v3.16b,v6.16b
133	st1	{v3.4s},[x2],#16
134	b.eq	.Ldone
135
136	dup	v6.4s,v3.s[3]		// just splat
137	ext	v5.16b,v0.16b,v4.16b,#12
138	aese	v6.16b,v0.16b
139
140	eor	v4.16b,v4.16b,v5.16b
141	ext	v5.16b,v0.16b,v5.16b,#12
142	eor	v4.16b,v4.16b,v5.16b
143	ext	v5.16b,v0.16b,v5.16b,#12
144	eor	v4.16b,v4.16b,v5.16b
145
146	eor	v4.16b,v4.16b,v6.16b
147	b	.Loop256
148
149.Ldone:
150	str	w12,[x2]
151	mov	x3,#0
152
153.Lenc_key_abort:
154	mov	x0,x3			// return value
155	ldr	x29,[sp],#16
156	ret
157.size	aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
158.globl	aes_hw_encrypt
159.hidden	aes_hw_encrypt
160.type	aes_hw_encrypt,%function
161.align	5
162aes_hw_encrypt:
163	AARCH64_VALID_CALL_TARGET
164	ldr	w3,[x2,#240]
165	ld1	{v0.4s},[x2],#16
166	ld1	{v2.16b},[x0]
167	sub	w3,w3,#2
168	ld1	{v1.4s},[x2],#16
169
170.Loop_enc:
171	aese	v2.16b,v0.16b
172	aesmc	v2.16b,v2.16b
173	ld1	{v0.4s},[x2],#16
174	subs	w3,w3,#2
175	aese	v2.16b,v1.16b
176	aesmc	v2.16b,v2.16b
177	ld1	{v1.4s},[x2],#16
178	b.gt	.Loop_enc
179
180	aese	v2.16b,v0.16b
181	aesmc	v2.16b,v2.16b
182	ld1	{v0.4s},[x2]
183	aese	v2.16b,v1.16b
184	eor	v2.16b,v2.16b,v0.16b
185
186	st1	{v2.16b},[x1]
187	ret
188.size	aes_hw_encrypt,.-aes_hw_encrypt
189.globl	aes_hw_ctr32_encrypt_blocks
190.hidden	aes_hw_ctr32_encrypt_blocks
191.type	aes_hw_ctr32_encrypt_blocks,%function
192.align	5
193aes_hw_ctr32_encrypt_blocks:
194	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
195	AARCH64_VALID_CALL_TARGET
196	stp	x29,x30,[sp,#-16]!
197	add	x29,sp,#0
198	ldr	w5,[x3,#240]
199
200	ldr	w8, [x4, #12]
201	ld1	{v0.4s},[x4]
202
203	ld1	{v16.4s,v17.4s},[x3]		// load key schedule...
204	sub	w5,w5,#4
205	mov	x12,#16
206	cmp	x2,#2
207	add	x7,x3,x5,lsl#4	// pointer to last 5 round keys
208	sub	w5,w5,#2
209	ld1	{v20.4s,v21.4s},[x7],#32
210	ld1	{v22.4s,v23.4s},[x7],#32
211	ld1	{v7.4s},[x7]
212	add	x7,x3,#32
213	mov	w6,w5
214	csel	x12,xzr,x12,lo
215
216	// ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
217	// affected by silicon errata #1742098 [0] and #1655431 [1],
218	// respectively, where the second instruction of an aese/aesmc
219	// instruction pair may execute twice if an interrupt is taken right
220	// after the first instruction consumes an input register of which a
221	// single 32-bit lane has been updated the last time it was modified.
222	//
223	// This function uses a counter in one 32-bit lane. The vmov lines
224	// could write to v1.16b and v18.16b directly, but that trips this bugs.
225	// We write to v6.16b and copy to the final register as a workaround.
226	//
227	// [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
228	// [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
229#ifndef __AARCH64EB__
230	rev	w8, w8
231#endif
232	add	w10, w8, #1
233	orr	v6.16b,v0.16b,v0.16b
234	rev	w10, w10
235	mov	v6.s[3],w10
236	add	w8, w8, #2
237	orr	v1.16b,v6.16b,v6.16b
238	b.ls	.Lctr32_tail
239	rev	w12, w8
240	mov	v6.s[3],w12
241	sub	x2,x2,#3		// bias
242	orr	v18.16b,v6.16b,v6.16b
243	b	.Loop3x_ctr32
244
245.align	4
246.Loop3x_ctr32:
247	aese	v0.16b,v16.16b
248	aesmc	v0.16b,v0.16b
249	aese	v1.16b,v16.16b
250	aesmc	v1.16b,v1.16b
251	aese	v18.16b,v16.16b
252	aesmc	v18.16b,v18.16b
253	ld1	{v16.4s},[x7],#16
254	subs	w6,w6,#2
255	aese	v0.16b,v17.16b
256	aesmc	v0.16b,v0.16b
257	aese	v1.16b,v17.16b
258	aesmc	v1.16b,v1.16b
259	aese	v18.16b,v17.16b
260	aesmc	v18.16b,v18.16b
261	ld1	{v17.4s},[x7],#16
262	b.gt	.Loop3x_ctr32
263
264	aese	v0.16b,v16.16b
265	aesmc	v4.16b,v0.16b
266	aese	v1.16b,v16.16b
267	aesmc	v5.16b,v1.16b
268	ld1	{v2.16b},[x0],#16
269	add	w9,w8,#1
270	aese	v18.16b,v16.16b
271	aesmc	v18.16b,v18.16b
272	ld1	{v3.16b},[x0],#16
273	rev	w9,w9
274	aese	v4.16b,v17.16b
275	aesmc	v4.16b,v4.16b
276	aese	v5.16b,v17.16b
277	aesmc	v5.16b,v5.16b
278	ld1	{v19.16b},[x0],#16
279	mov	x7,x3
280	aese	v18.16b,v17.16b
281	aesmc	v17.16b,v18.16b
282	aese	v4.16b,v20.16b
283	aesmc	v4.16b,v4.16b
284	aese	v5.16b,v20.16b
285	aesmc	v5.16b,v5.16b
286	eor	v2.16b,v2.16b,v7.16b
287	add	w10,w8,#2
288	aese	v17.16b,v20.16b
289	aesmc	v17.16b,v17.16b
290	eor	v3.16b,v3.16b,v7.16b
291	add	w8,w8,#3
292	aese	v4.16b,v21.16b
293	aesmc	v4.16b,v4.16b
294	aese	v5.16b,v21.16b
295	aesmc	v5.16b,v5.16b
296	 // Note the logic to update v0.16b, v1.16b, and v1.16b is written to work
297	 // around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
298	 // 32-bit mode. See the comment above.
299	eor	v19.16b,v19.16b,v7.16b
300	mov	v6.s[3], w9
301	aese	v17.16b,v21.16b
302	aesmc	v17.16b,v17.16b
303	orr	v0.16b,v6.16b,v6.16b
304	rev	w10,w10
305	aese	v4.16b,v22.16b
306	aesmc	v4.16b,v4.16b
307	mov	v6.s[3], w10
308	rev	w12,w8
309	aese	v5.16b,v22.16b
310	aesmc	v5.16b,v5.16b
311	orr	v1.16b,v6.16b,v6.16b
312	mov	v6.s[3], w12
313	aese	v17.16b,v22.16b
314	aesmc	v17.16b,v17.16b
315	orr	v18.16b,v6.16b,v6.16b
316	subs	x2,x2,#3
317	aese	v4.16b,v23.16b
318	aese	v5.16b,v23.16b
319	aese	v17.16b,v23.16b
320
321	eor	v2.16b,v2.16b,v4.16b
322	ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0]
323	st1	{v2.16b},[x1],#16
324	eor	v3.16b,v3.16b,v5.16b
325	mov	w6,w5
326	st1	{v3.16b},[x1],#16
327	eor	v19.16b,v19.16b,v17.16b
328	ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1]
329	st1	{v19.16b},[x1],#16
330	b.hs	.Loop3x_ctr32
331
332	adds	x2,x2,#3
333	b.eq	.Lctr32_done
334	cmp	x2,#1
335	mov	x12,#16
336	csel	x12,xzr,x12,eq
337
338.Lctr32_tail:
339	aese	v0.16b,v16.16b
340	aesmc	v0.16b,v0.16b
341	aese	v1.16b,v16.16b
342	aesmc	v1.16b,v1.16b
343	ld1	{v16.4s},[x7],#16
344	subs	w6,w6,#2
345	aese	v0.16b,v17.16b
346	aesmc	v0.16b,v0.16b
347	aese	v1.16b,v17.16b
348	aesmc	v1.16b,v1.16b
349	ld1	{v17.4s},[x7],#16
350	b.gt	.Lctr32_tail
351
352	aese	v0.16b,v16.16b
353	aesmc	v0.16b,v0.16b
354	aese	v1.16b,v16.16b
355	aesmc	v1.16b,v1.16b
356	aese	v0.16b,v17.16b
357	aesmc	v0.16b,v0.16b
358	aese	v1.16b,v17.16b
359	aesmc	v1.16b,v1.16b
360	ld1	{v2.16b},[x0],x12
361	aese	v0.16b,v20.16b
362	aesmc	v0.16b,v0.16b
363	aese	v1.16b,v20.16b
364	aesmc	v1.16b,v1.16b
365	ld1	{v3.16b},[x0]
366	aese	v0.16b,v21.16b
367	aesmc	v0.16b,v0.16b
368	aese	v1.16b,v21.16b
369	aesmc	v1.16b,v1.16b
370	eor	v2.16b,v2.16b,v7.16b
371	aese	v0.16b,v22.16b
372	aesmc	v0.16b,v0.16b
373	aese	v1.16b,v22.16b
374	aesmc	v1.16b,v1.16b
375	eor	v3.16b,v3.16b,v7.16b
376	aese	v0.16b,v23.16b
377	aese	v1.16b,v23.16b
378
379	cmp	x2,#1
380	eor	v2.16b,v2.16b,v0.16b
381	eor	v3.16b,v3.16b,v1.16b
382	st1	{v2.16b},[x1],#16
383	b.eq	.Lctr32_done
384	st1	{v3.16b},[x1]
385
386.Lctr32_done:
387	ldr	x29,[sp],#16
388	ret
389.size	aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
390#endif
391#endif  // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__)
392