1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#include <ring-core/asm_base.h>
5
6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(_WIN32)
7#include <ring-core/arm_arch.h>
8
9#if __ARM_MAX_ARCH__>=7
10.text
11.arch	armv8-a+crypto
12.section	.rodata
13.align	5
14Lrcon:
15.long	0x01,0x01,0x01,0x01
16.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	// rotate-n-splat
17.long	0x1b,0x1b,0x1b,0x1b
18
19.text
20
21.globl	aes_hw_set_encrypt_key
22
23.def aes_hw_set_encrypt_key
24   .type 32
25.endef
26.align	5
27aes_hw_set_encrypt_key:
28Lenc_key:
29	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
30	AARCH64_VALID_CALL_TARGET
31	stp	x29,x30,[sp,#-16]!
32	add	x29,sp,#0
33	mov	x3,#-1
34	cmp	x0,#0
35	b.eq	Lenc_key_abort
36	cmp	x2,#0
37	b.eq	Lenc_key_abort
38	mov	x3,#-2
39	cmp	w1,#128
40	b.lt	Lenc_key_abort
41	cmp	w1,#256
42	b.gt	Lenc_key_abort
43	tst	w1,#0x3f
44	b.ne	Lenc_key_abort
45
46	adrp	x3,Lrcon
47	add	x3,x3,:lo12:Lrcon
48	cmp	w1,#192
49
50	eor	v0.16b,v0.16b,v0.16b
51	ld1	{v3.16b},[x0],#16
52	mov	w1,#8		// reuse w1
53	ld1	{v1.4s,v2.4s},[x3],#32
54
55	b.lt	Loop128
56	// 192-bit key support was removed.
57	b	L256
58
59.align	4
60Loop128:
61	tbl	v6.16b,{v3.16b},v2.16b
62	ext	v5.16b,v0.16b,v3.16b,#12
63	st1	{v3.4s},[x2],#16
64	aese	v6.16b,v0.16b
65	subs	w1,w1,#1
66
67	eor	v3.16b,v3.16b,v5.16b
68	ext	v5.16b,v0.16b,v5.16b,#12
69	eor	v3.16b,v3.16b,v5.16b
70	ext	v5.16b,v0.16b,v5.16b,#12
71	eor	v6.16b,v6.16b,v1.16b
72	eor	v3.16b,v3.16b,v5.16b
73	shl	v1.16b,v1.16b,#1
74	eor	v3.16b,v3.16b,v6.16b
75	b.ne	Loop128
76
77	ld1	{v1.4s},[x3]
78
79	tbl	v6.16b,{v3.16b},v2.16b
80	ext	v5.16b,v0.16b,v3.16b,#12
81	st1	{v3.4s},[x2],#16
82	aese	v6.16b,v0.16b
83
84	eor	v3.16b,v3.16b,v5.16b
85	ext	v5.16b,v0.16b,v5.16b,#12
86	eor	v3.16b,v3.16b,v5.16b
87	ext	v5.16b,v0.16b,v5.16b,#12
88	eor	v6.16b,v6.16b,v1.16b
89	eor	v3.16b,v3.16b,v5.16b
90	shl	v1.16b,v1.16b,#1
91	eor	v3.16b,v3.16b,v6.16b
92
93	tbl	v6.16b,{v3.16b},v2.16b
94	ext	v5.16b,v0.16b,v3.16b,#12
95	st1	{v3.4s},[x2],#16
96	aese	v6.16b,v0.16b
97
98	eor	v3.16b,v3.16b,v5.16b
99	ext	v5.16b,v0.16b,v5.16b,#12
100	eor	v3.16b,v3.16b,v5.16b
101	ext	v5.16b,v0.16b,v5.16b,#12
102	eor	v6.16b,v6.16b,v1.16b
103	eor	v3.16b,v3.16b,v5.16b
104	eor	v3.16b,v3.16b,v6.16b
105	st1	{v3.4s},[x2]
106	add	x2,x2,#0x50
107
108	mov	w12,#10
109	b	Ldone
110
111// 192-bit key support was removed.
112
113.align	4
114L256:
115	ld1	{v4.16b},[x0]
116	mov	w1,#7
117	mov	w12,#14
118	st1	{v3.4s},[x2],#16
119
120Loop256:
121	tbl	v6.16b,{v4.16b},v2.16b
122	ext	v5.16b,v0.16b,v3.16b,#12
123	st1	{v4.4s},[x2],#16
124	aese	v6.16b,v0.16b
125	subs	w1,w1,#1
126
127	eor	v3.16b,v3.16b,v5.16b
128	ext	v5.16b,v0.16b,v5.16b,#12
129	eor	v3.16b,v3.16b,v5.16b
130	ext	v5.16b,v0.16b,v5.16b,#12
131	eor	v6.16b,v6.16b,v1.16b
132	eor	v3.16b,v3.16b,v5.16b
133	shl	v1.16b,v1.16b,#1
134	eor	v3.16b,v3.16b,v6.16b
135	st1	{v3.4s},[x2],#16
136	b.eq	Ldone
137
138	dup	v6.4s,v3.s[3]		// just splat
139	ext	v5.16b,v0.16b,v4.16b,#12
140	aese	v6.16b,v0.16b
141
142	eor	v4.16b,v4.16b,v5.16b
143	ext	v5.16b,v0.16b,v5.16b,#12
144	eor	v4.16b,v4.16b,v5.16b
145	ext	v5.16b,v0.16b,v5.16b,#12
146	eor	v4.16b,v4.16b,v5.16b
147
148	eor	v4.16b,v4.16b,v6.16b
149	b	Loop256
150
151Ldone:
152	str	w12,[x2]
153	mov	x3,#0
154
155Lenc_key_abort:
156	mov	x0,x3			// return value
157	ldr	x29,[sp],#16
158	ret
159
160.globl	aes_hw_encrypt
161
162.def aes_hw_encrypt
163   .type 32
164.endef
165.align	5
166aes_hw_encrypt:
167	AARCH64_VALID_CALL_TARGET
168	ldr	w3,[x2,#240]
169	ld1	{v0.4s},[x2],#16
170	ld1	{v2.16b},[x0]
171	sub	w3,w3,#2
172	ld1	{v1.4s},[x2],#16
173
174Loop_enc:
175	aese	v2.16b,v0.16b
176	aesmc	v2.16b,v2.16b
177	ld1	{v0.4s},[x2],#16
178	subs	w3,w3,#2
179	aese	v2.16b,v1.16b
180	aesmc	v2.16b,v2.16b
181	ld1	{v1.4s},[x2],#16
182	b.gt	Loop_enc
183
184	aese	v2.16b,v0.16b
185	aesmc	v2.16b,v2.16b
186	ld1	{v0.4s},[x2]
187	aese	v2.16b,v1.16b
188	eor	v2.16b,v2.16b,v0.16b
189
190	st1	{v2.16b},[x1]
191	ret
192
193.globl	aes_hw_ctr32_encrypt_blocks
194
195.def aes_hw_ctr32_encrypt_blocks
196   .type 32
197.endef
198.align	5
199aes_hw_ctr32_encrypt_blocks:
200	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
201	AARCH64_VALID_CALL_TARGET
202	stp	x29,x30,[sp,#-16]!
203	add	x29,sp,#0
204	ldr	w5,[x3,#240]
205
206	ldr	w8, [x4, #12]
207	ld1	{v0.4s},[x4]
208
209	ld1	{v16.4s,v17.4s},[x3]		// load key schedule...
210	sub	w5,w5,#4
211	mov	x12,#16
212	cmp	x2,#2
213	add	x7,x3,x5,lsl#4	// pointer to last 5 round keys
214	sub	w5,w5,#2
215	ld1	{v20.4s,v21.4s},[x7],#32
216	ld1	{v22.4s,v23.4s},[x7],#32
217	ld1	{v7.4s},[x7]
218	add	x7,x3,#32
219	mov	w6,w5
220	csel	x12,xzr,x12,lo
221
222	// ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
223	// affected by silicon errata #1742098 [0] and #1655431 [1],
224	// respectively, where the second instruction of an aese/aesmc
225	// instruction pair may execute twice if an interrupt is taken right
226	// after the first instruction consumes an input register of which a
227	// single 32-bit lane has been updated the last time it was modified.
228	//
229	// This function uses a counter in one 32-bit lane. The vmov lines
230	// could write to v1.16b and v18.16b directly, but that trips this bugs.
231	// We write to v6.16b and copy to the final register as a workaround.
232	//
233	// [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
234	// [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
235#ifndef __AARCH64EB__
236	rev	w8, w8
237#endif
238	add	w10, w8, #1
239	orr	v6.16b,v0.16b,v0.16b
240	rev	w10, w10
241	mov	v6.s[3],w10
242	add	w8, w8, #2
243	orr	v1.16b,v6.16b,v6.16b
244	b.ls	Lctr32_tail
245	rev	w12, w8
246	mov	v6.s[3],w12
247	sub	x2,x2,#3		// bias
248	orr	v18.16b,v6.16b,v6.16b
249	b	Loop3x_ctr32
250
251.align	4
252Loop3x_ctr32:
253	aese	v0.16b,v16.16b
254	aesmc	v0.16b,v0.16b
255	aese	v1.16b,v16.16b
256	aesmc	v1.16b,v1.16b
257	aese	v18.16b,v16.16b
258	aesmc	v18.16b,v18.16b
259	ld1	{v16.4s},[x7],#16
260	subs	w6,w6,#2
261	aese	v0.16b,v17.16b
262	aesmc	v0.16b,v0.16b
263	aese	v1.16b,v17.16b
264	aesmc	v1.16b,v1.16b
265	aese	v18.16b,v17.16b
266	aesmc	v18.16b,v18.16b
267	ld1	{v17.4s},[x7],#16
268	b.gt	Loop3x_ctr32
269
270	aese	v0.16b,v16.16b
271	aesmc	v4.16b,v0.16b
272	aese	v1.16b,v16.16b
273	aesmc	v5.16b,v1.16b
274	ld1	{v2.16b},[x0],#16
275	add	w9,w8,#1
276	aese	v18.16b,v16.16b
277	aesmc	v18.16b,v18.16b
278	ld1	{v3.16b},[x0],#16
279	rev	w9,w9
280	aese	v4.16b,v17.16b
281	aesmc	v4.16b,v4.16b
282	aese	v5.16b,v17.16b
283	aesmc	v5.16b,v5.16b
284	ld1	{v19.16b},[x0],#16
285	mov	x7,x3
286	aese	v18.16b,v17.16b
287	aesmc	v17.16b,v18.16b
288	aese	v4.16b,v20.16b
289	aesmc	v4.16b,v4.16b
290	aese	v5.16b,v20.16b
291	aesmc	v5.16b,v5.16b
292	eor	v2.16b,v2.16b,v7.16b
293	add	w10,w8,#2
294	aese	v17.16b,v20.16b
295	aesmc	v17.16b,v17.16b
296	eor	v3.16b,v3.16b,v7.16b
297	add	w8,w8,#3
298	aese	v4.16b,v21.16b
299	aesmc	v4.16b,v4.16b
300	aese	v5.16b,v21.16b
301	aesmc	v5.16b,v5.16b
302	 // Note the logic to update v0.16b, v1.16b, and v1.16b is written to work
303	 // around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
304	 // 32-bit mode. See the comment above.
305	eor	v19.16b,v19.16b,v7.16b
306	mov	v6.s[3], w9
307	aese	v17.16b,v21.16b
308	aesmc	v17.16b,v17.16b
309	orr	v0.16b,v6.16b,v6.16b
310	rev	w10,w10
311	aese	v4.16b,v22.16b
312	aesmc	v4.16b,v4.16b
313	mov	v6.s[3], w10
314	rev	w12,w8
315	aese	v5.16b,v22.16b
316	aesmc	v5.16b,v5.16b
317	orr	v1.16b,v6.16b,v6.16b
318	mov	v6.s[3], w12
319	aese	v17.16b,v22.16b
320	aesmc	v17.16b,v17.16b
321	orr	v18.16b,v6.16b,v6.16b
322	subs	x2,x2,#3
323	aese	v4.16b,v23.16b
324	aese	v5.16b,v23.16b
325	aese	v17.16b,v23.16b
326
327	eor	v2.16b,v2.16b,v4.16b
328	ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0]
329	st1	{v2.16b},[x1],#16
330	eor	v3.16b,v3.16b,v5.16b
331	mov	w6,w5
332	st1	{v3.16b},[x1],#16
333	eor	v19.16b,v19.16b,v17.16b
334	ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1]
335	st1	{v19.16b},[x1],#16
336	b.hs	Loop3x_ctr32
337
338	adds	x2,x2,#3
339	b.eq	Lctr32_done
340	cmp	x2,#1
341	mov	x12,#16
342	csel	x12,xzr,x12,eq
343
344Lctr32_tail:
345	aese	v0.16b,v16.16b
346	aesmc	v0.16b,v0.16b
347	aese	v1.16b,v16.16b
348	aesmc	v1.16b,v1.16b
349	ld1	{v16.4s},[x7],#16
350	subs	w6,w6,#2
351	aese	v0.16b,v17.16b
352	aesmc	v0.16b,v0.16b
353	aese	v1.16b,v17.16b
354	aesmc	v1.16b,v1.16b
355	ld1	{v17.4s},[x7],#16
356	b.gt	Lctr32_tail
357
358	aese	v0.16b,v16.16b
359	aesmc	v0.16b,v0.16b
360	aese	v1.16b,v16.16b
361	aesmc	v1.16b,v1.16b
362	aese	v0.16b,v17.16b
363	aesmc	v0.16b,v0.16b
364	aese	v1.16b,v17.16b
365	aesmc	v1.16b,v1.16b
366	ld1	{v2.16b},[x0],x12
367	aese	v0.16b,v20.16b
368	aesmc	v0.16b,v0.16b
369	aese	v1.16b,v20.16b
370	aesmc	v1.16b,v1.16b
371	ld1	{v3.16b},[x0]
372	aese	v0.16b,v21.16b
373	aesmc	v0.16b,v0.16b
374	aese	v1.16b,v21.16b
375	aesmc	v1.16b,v1.16b
376	eor	v2.16b,v2.16b,v7.16b
377	aese	v0.16b,v22.16b
378	aesmc	v0.16b,v0.16b
379	aese	v1.16b,v22.16b
380	aesmc	v1.16b,v1.16b
381	eor	v3.16b,v3.16b,v7.16b
382	aese	v0.16b,v23.16b
383	aese	v1.16b,v23.16b
384
385	cmp	x2,#1
386	eor	v2.16b,v2.16b,v0.16b
387	eor	v3.16b,v3.16b,v1.16b
388	st1	{v2.16b},[x1],#16
389	b.eq	Lctr32_done
390	st1	{v3.16b},[x1]
391
392Lctr32_done:
393	ldr	x29,[sp],#16
394	ret
395
396#endif
397#endif  // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(_WIN32)
398