1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#include <ring-core/asm_base.h>
5
6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__)
7.text
8.extern	OPENSSL_ia32cap_P
9.hidden OPENSSL_ia32cap_P
10.globl	aes_hw_encrypt
11.hidden aes_hw_encrypt
12.type	aes_hw_encrypt,@function
13.align	16
14aes_hw_encrypt:
15.cfi_startproc
16_CET_ENDBR
17#ifdef BORINGSSL_DISPATCH_TEST
18.extern	BORINGSSL_function_hit
19.hidden BORINGSSL_function_hit
20	movb	$1,BORINGSSL_function_hit+1(%rip)
21#endif
22	movups	(%rdi),%xmm2
23	movl	240(%rdx),%eax
24	movups	(%rdx),%xmm0
25	movups	16(%rdx),%xmm1
26	leaq	32(%rdx),%rdx
27	xorps	%xmm0,%xmm2
28.Loop_enc1_1:
29.byte	102,15,56,220,209
30	decl	%eax
31	movups	(%rdx),%xmm1
32	leaq	16(%rdx),%rdx
33	jnz	.Loop_enc1_1
34.byte	102,15,56,221,209
35	pxor	%xmm0,%xmm0
36	pxor	%xmm1,%xmm1
37	movups	%xmm2,(%rsi)
38	pxor	%xmm2,%xmm2
39	ret
40.cfi_endproc
41.size	aes_hw_encrypt,.-aes_hw_encrypt
42.type	_aesni_encrypt2,@function
43.align	16
44_aesni_encrypt2:
45.cfi_startproc
46	movups	(%rcx),%xmm0
47	shll	$4,%eax
48	movups	16(%rcx),%xmm1
49	xorps	%xmm0,%xmm2
50	xorps	%xmm0,%xmm3
51	movups	32(%rcx),%xmm0
52	leaq	32(%rcx,%rax,1),%rcx
53	negq	%rax
54	addq	$16,%rax
55
56.Lenc_loop2:
57.byte	102,15,56,220,209
58.byte	102,15,56,220,217
59	movups	(%rcx,%rax,1),%xmm1
60	addq	$32,%rax
61.byte	102,15,56,220,208
62.byte	102,15,56,220,216
63	movups	-16(%rcx,%rax,1),%xmm0
64	jnz	.Lenc_loop2
65
66.byte	102,15,56,220,209
67.byte	102,15,56,220,217
68.byte	102,15,56,221,208
69.byte	102,15,56,221,216
70	ret
71.cfi_endproc
72.size	_aesni_encrypt2,.-_aesni_encrypt2
73.type	_aesni_encrypt3,@function
74.align	16
75_aesni_encrypt3:
76.cfi_startproc
77	movups	(%rcx),%xmm0
78	shll	$4,%eax
79	movups	16(%rcx),%xmm1
80	xorps	%xmm0,%xmm2
81	xorps	%xmm0,%xmm3
82	xorps	%xmm0,%xmm4
83	movups	32(%rcx),%xmm0
84	leaq	32(%rcx,%rax,1),%rcx
85	negq	%rax
86	addq	$16,%rax
87
88.Lenc_loop3:
89.byte	102,15,56,220,209
90.byte	102,15,56,220,217
91.byte	102,15,56,220,225
92	movups	(%rcx,%rax,1),%xmm1
93	addq	$32,%rax
94.byte	102,15,56,220,208
95.byte	102,15,56,220,216
96.byte	102,15,56,220,224
97	movups	-16(%rcx,%rax,1),%xmm0
98	jnz	.Lenc_loop3
99
100.byte	102,15,56,220,209
101.byte	102,15,56,220,217
102.byte	102,15,56,220,225
103.byte	102,15,56,221,208
104.byte	102,15,56,221,216
105.byte	102,15,56,221,224
106	ret
107.cfi_endproc
108.size	_aesni_encrypt3,.-_aesni_encrypt3
109.type	_aesni_encrypt4,@function
110.align	16
111_aesni_encrypt4:
112.cfi_startproc
113	movups	(%rcx),%xmm0
114	shll	$4,%eax
115	movups	16(%rcx),%xmm1
116	xorps	%xmm0,%xmm2
117	xorps	%xmm0,%xmm3
118	xorps	%xmm0,%xmm4
119	xorps	%xmm0,%xmm5
120	movups	32(%rcx),%xmm0
121	leaq	32(%rcx,%rax,1),%rcx
122	negq	%rax
123.byte	0x0f,0x1f,0x00
124	addq	$16,%rax
125
126.Lenc_loop4:
127.byte	102,15,56,220,209
128.byte	102,15,56,220,217
129.byte	102,15,56,220,225
130.byte	102,15,56,220,233
131	movups	(%rcx,%rax,1),%xmm1
132	addq	$32,%rax
133.byte	102,15,56,220,208
134.byte	102,15,56,220,216
135.byte	102,15,56,220,224
136.byte	102,15,56,220,232
137	movups	-16(%rcx,%rax,1),%xmm0
138	jnz	.Lenc_loop4
139
140.byte	102,15,56,220,209
141.byte	102,15,56,220,217
142.byte	102,15,56,220,225
143.byte	102,15,56,220,233
144.byte	102,15,56,221,208
145.byte	102,15,56,221,216
146.byte	102,15,56,221,224
147.byte	102,15,56,221,232
148	ret
149.cfi_endproc
150.size	_aesni_encrypt4,.-_aesni_encrypt4
151.type	_aesni_encrypt6,@function
152.align	16
153_aesni_encrypt6:
154.cfi_startproc
155	movups	(%rcx),%xmm0
156	shll	$4,%eax
157	movups	16(%rcx),%xmm1
158	xorps	%xmm0,%xmm2
159	pxor	%xmm0,%xmm3
160	pxor	%xmm0,%xmm4
161.byte	102,15,56,220,209
162	leaq	32(%rcx,%rax,1),%rcx
163	negq	%rax
164.byte	102,15,56,220,217
165	pxor	%xmm0,%xmm5
166	pxor	%xmm0,%xmm6
167.byte	102,15,56,220,225
168	pxor	%xmm0,%xmm7
169	movups	(%rcx,%rax,1),%xmm0
170	addq	$16,%rax
171	jmp	.Lenc_loop6_enter
172.align	16
173.Lenc_loop6:
174.byte	102,15,56,220,209
175.byte	102,15,56,220,217
176.byte	102,15,56,220,225
177.Lenc_loop6_enter:
178.byte	102,15,56,220,233
179.byte	102,15,56,220,241
180.byte	102,15,56,220,249
181	movups	(%rcx,%rax,1),%xmm1
182	addq	$32,%rax
183.byte	102,15,56,220,208
184.byte	102,15,56,220,216
185.byte	102,15,56,220,224
186.byte	102,15,56,220,232
187.byte	102,15,56,220,240
188.byte	102,15,56,220,248
189	movups	-16(%rcx,%rax,1),%xmm0
190	jnz	.Lenc_loop6
191
192.byte	102,15,56,220,209
193.byte	102,15,56,220,217
194.byte	102,15,56,220,225
195.byte	102,15,56,220,233
196.byte	102,15,56,220,241
197.byte	102,15,56,220,249
198.byte	102,15,56,221,208
199.byte	102,15,56,221,216
200.byte	102,15,56,221,224
201.byte	102,15,56,221,232
202.byte	102,15,56,221,240
203.byte	102,15,56,221,248
204	ret
205.cfi_endproc
206.size	_aesni_encrypt6,.-_aesni_encrypt6
207.type	_aesni_encrypt8,@function
208.align	16
209_aesni_encrypt8:
210.cfi_startproc
211	movups	(%rcx),%xmm0
212	shll	$4,%eax
213	movups	16(%rcx),%xmm1
214	xorps	%xmm0,%xmm2
215	xorps	%xmm0,%xmm3
216	pxor	%xmm0,%xmm4
217	pxor	%xmm0,%xmm5
218	pxor	%xmm0,%xmm6
219	leaq	32(%rcx,%rax,1),%rcx
220	negq	%rax
221.byte	102,15,56,220,209
222	pxor	%xmm0,%xmm7
223	pxor	%xmm0,%xmm8
224.byte	102,15,56,220,217
225	pxor	%xmm0,%xmm9
226	movups	(%rcx,%rax,1),%xmm0
227	addq	$16,%rax
228	jmp	.Lenc_loop8_inner
229.align	16
230.Lenc_loop8:
231.byte	102,15,56,220,209
232.byte	102,15,56,220,217
233.Lenc_loop8_inner:
234.byte	102,15,56,220,225
235.byte	102,15,56,220,233
236.byte	102,15,56,220,241
237.byte	102,15,56,220,249
238.byte	102,68,15,56,220,193
239.byte	102,68,15,56,220,201
240.Lenc_loop8_enter:
241	movups	(%rcx,%rax,1),%xmm1
242	addq	$32,%rax
243.byte	102,15,56,220,208
244.byte	102,15,56,220,216
245.byte	102,15,56,220,224
246.byte	102,15,56,220,232
247.byte	102,15,56,220,240
248.byte	102,15,56,220,248
249.byte	102,68,15,56,220,192
250.byte	102,68,15,56,220,200
251	movups	-16(%rcx,%rax,1),%xmm0
252	jnz	.Lenc_loop8
253
254.byte	102,15,56,220,209
255.byte	102,15,56,220,217
256.byte	102,15,56,220,225
257.byte	102,15,56,220,233
258.byte	102,15,56,220,241
259.byte	102,15,56,220,249
260.byte	102,68,15,56,220,193
261.byte	102,68,15,56,220,201
262.byte	102,15,56,221,208
263.byte	102,15,56,221,216
264.byte	102,15,56,221,224
265.byte	102,15,56,221,232
266.byte	102,15,56,221,240
267.byte	102,15,56,221,248
268.byte	102,68,15,56,221,192
269.byte	102,68,15,56,221,200
270	ret
271.cfi_endproc
272.size	_aesni_encrypt8,.-_aesni_encrypt8
273.globl	aes_hw_ctr32_encrypt_blocks
274.hidden aes_hw_ctr32_encrypt_blocks
275.type	aes_hw_ctr32_encrypt_blocks,@function
276.align	16
277aes_hw_ctr32_encrypt_blocks:
278.cfi_startproc
279_CET_ENDBR
280#ifdef BORINGSSL_DISPATCH_TEST
281	movb	$1,BORINGSSL_function_hit(%rip)
282#endif
283	cmpq	$1,%rdx
284	jne	.Lctr32_bulk
285
286
287
288	movups	(%r8),%xmm2
289	movups	(%rdi),%xmm3
290	movl	240(%rcx),%edx
291	movups	(%rcx),%xmm0
292	movups	16(%rcx),%xmm1
293	leaq	32(%rcx),%rcx
294	xorps	%xmm0,%xmm2
295.Loop_enc1_2:
296.byte	102,15,56,220,209
297	decl	%edx
298	movups	(%rcx),%xmm1
299	leaq	16(%rcx),%rcx
300	jnz	.Loop_enc1_2
301.byte	102,15,56,221,209
302	pxor	%xmm0,%xmm0
303	pxor	%xmm1,%xmm1
304	xorps	%xmm3,%xmm2
305	pxor	%xmm3,%xmm3
306	movups	%xmm2,(%rsi)
307	xorps	%xmm2,%xmm2
308	jmp	.Lctr32_epilogue
309
310.align	16
311.Lctr32_bulk:
312	leaq	(%rsp),%r11
313.cfi_def_cfa_register	%r11
314	pushq	%rbp
315.cfi_offset	%rbp,-16
316	subq	$128,%rsp
317	andq	$-16,%rsp
318
319
320
321
322	movdqu	(%r8),%xmm2
323	movdqu	(%rcx),%xmm0
324	movl	12(%r8),%r8d
325	pxor	%xmm0,%xmm2
326	movl	12(%rcx),%ebp
327	movdqa	%xmm2,0(%rsp)
328	bswapl	%r8d
329	movdqa	%xmm2,%xmm3
330	movdqa	%xmm2,%xmm4
331	movdqa	%xmm2,%xmm5
332	movdqa	%xmm2,64(%rsp)
333	movdqa	%xmm2,80(%rsp)
334	movdqa	%xmm2,96(%rsp)
335	movq	%rdx,%r10
336	movdqa	%xmm2,112(%rsp)
337
338	leaq	1(%r8),%rax
339	leaq	2(%r8),%rdx
340	bswapl	%eax
341	bswapl	%edx
342	xorl	%ebp,%eax
343	xorl	%ebp,%edx
344.byte	102,15,58,34,216,3
345	leaq	3(%r8),%rax
346	movdqa	%xmm3,16(%rsp)
347.byte	102,15,58,34,226,3
348	bswapl	%eax
349	movq	%r10,%rdx
350	leaq	4(%r8),%r10
351	movdqa	%xmm4,32(%rsp)
352	xorl	%ebp,%eax
353	bswapl	%r10d
354.byte	102,15,58,34,232,3
355	xorl	%ebp,%r10d
356	movdqa	%xmm5,48(%rsp)
357	leaq	5(%r8),%r9
358	movl	%r10d,64+12(%rsp)
359	bswapl	%r9d
360	leaq	6(%r8),%r10
361	movl	240(%rcx),%eax
362	xorl	%ebp,%r9d
363	bswapl	%r10d
364	movl	%r9d,80+12(%rsp)
365	xorl	%ebp,%r10d
366	leaq	7(%r8),%r9
367	movl	%r10d,96+12(%rsp)
368	bswapl	%r9d
369	leaq	OPENSSL_ia32cap_P(%rip),%r10
370	movl	4(%r10),%r10d
371	xorl	%ebp,%r9d
372	andl	$71303168,%r10d
373	movl	%r9d,112+12(%rsp)
374
375	movups	16(%rcx),%xmm1
376
377	movdqa	64(%rsp),%xmm6
378	movdqa	80(%rsp),%xmm7
379
380	cmpq	$8,%rdx
381	jb	.Lctr32_tail
382
383	subq	$6,%rdx
384	cmpl	$4194304,%r10d
385	je	.Lctr32_6x
386
387	leaq	128(%rcx),%rcx
388	subq	$2,%rdx
389	jmp	.Lctr32_loop8
390
391.align	16
392.Lctr32_6x:
393	shll	$4,%eax
394	movl	$48,%r10d
395	bswapl	%ebp
396	leaq	32(%rcx,%rax,1),%rcx
397	subq	%rax,%r10
398	jmp	.Lctr32_loop6
399
400.align	16
401.Lctr32_loop6:
402	addl	$6,%r8d
403	movups	-48(%rcx,%r10,1),%xmm0
404.byte	102,15,56,220,209
405	movl	%r8d,%eax
406	xorl	%ebp,%eax
407.byte	102,15,56,220,217
408.byte	0x0f,0x38,0xf1,0x44,0x24,12
409	leal	1(%r8),%eax
410.byte	102,15,56,220,225
411	xorl	%ebp,%eax
412.byte	0x0f,0x38,0xf1,0x44,0x24,28
413.byte	102,15,56,220,233
414	leal	2(%r8),%eax
415	xorl	%ebp,%eax
416.byte	102,15,56,220,241
417.byte	0x0f,0x38,0xf1,0x44,0x24,44
418	leal	3(%r8),%eax
419.byte	102,15,56,220,249
420	movups	-32(%rcx,%r10,1),%xmm1
421	xorl	%ebp,%eax
422
423.byte	102,15,56,220,208
424.byte	0x0f,0x38,0xf1,0x44,0x24,60
425	leal	4(%r8),%eax
426.byte	102,15,56,220,216
427	xorl	%ebp,%eax
428.byte	0x0f,0x38,0xf1,0x44,0x24,76
429.byte	102,15,56,220,224
430	leal	5(%r8),%eax
431	xorl	%ebp,%eax
432.byte	102,15,56,220,232
433.byte	0x0f,0x38,0xf1,0x44,0x24,92
434	movq	%r10,%rax
435.byte	102,15,56,220,240
436.byte	102,15,56,220,248
437	movups	-16(%rcx,%r10,1),%xmm0
438
439	call	.Lenc_loop6
440
441	movdqu	(%rdi),%xmm8
442	movdqu	16(%rdi),%xmm9
443	movdqu	32(%rdi),%xmm10
444	movdqu	48(%rdi),%xmm11
445	movdqu	64(%rdi),%xmm12
446	movdqu	80(%rdi),%xmm13
447	leaq	96(%rdi),%rdi
448	movups	-64(%rcx,%r10,1),%xmm1
449	pxor	%xmm2,%xmm8
450	movaps	0(%rsp),%xmm2
451	pxor	%xmm3,%xmm9
452	movaps	16(%rsp),%xmm3
453	pxor	%xmm4,%xmm10
454	movaps	32(%rsp),%xmm4
455	pxor	%xmm5,%xmm11
456	movaps	48(%rsp),%xmm5
457	pxor	%xmm6,%xmm12
458	movaps	64(%rsp),%xmm6
459	pxor	%xmm7,%xmm13
460	movaps	80(%rsp),%xmm7
461	movdqu	%xmm8,(%rsi)
462	movdqu	%xmm9,16(%rsi)
463	movdqu	%xmm10,32(%rsi)
464	movdqu	%xmm11,48(%rsi)
465	movdqu	%xmm12,64(%rsi)
466	movdqu	%xmm13,80(%rsi)
467	leaq	96(%rsi),%rsi
468
469	subq	$6,%rdx
470	jnc	.Lctr32_loop6
471
472	addq	$6,%rdx
473	jz	.Lctr32_done
474
475	leal	-48(%r10),%eax
476	leaq	-80(%rcx,%r10,1),%rcx
477	negl	%eax
478	shrl	$4,%eax
479	jmp	.Lctr32_tail
480
481.align	32
482.Lctr32_loop8:
483	addl	$8,%r8d
484	movdqa	96(%rsp),%xmm8
485.byte	102,15,56,220,209
486	movl	%r8d,%r9d
487	movdqa	112(%rsp),%xmm9
488.byte	102,15,56,220,217
489	bswapl	%r9d
490	movups	32-128(%rcx),%xmm0
491.byte	102,15,56,220,225
492	xorl	%ebp,%r9d
493	nop
494.byte	102,15,56,220,233
495	movl	%r9d,0+12(%rsp)
496	leaq	1(%r8),%r9
497.byte	102,15,56,220,241
498.byte	102,15,56,220,249
499.byte	102,68,15,56,220,193
500.byte	102,68,15,56,220,201
501	movups	48-128(%rcx),%xmm1
502	bswapl	%r9d
503.byte	102,15,56,220,208
504.byte	102,15,56,220,216
505	xorl	%ebp,%r9d
506.byte	0x66,0x90
507.byte	102,15,56,220,224
508.byte	102,15,56,220,232
509	movl	%r9d,16+12(%rsp)
510	leaq	2(%r8),%r9
511.byte	102,15,56,220,240
512.byte	102,15,56,220,248
513.byte	102,68,15,56,220,192
514.byte	102,68,15,56,220,200
515	movups	64-128(%rcx),%xmm0
516	bswapl	%r9d
517.byte	102,15,56,220,209
518.byte	102,15,56,220,217
519	xorl	%ebp,%r9d
520.byte	0x66,0x90
521.byte	102,15,56,220,225
522.byte	102,15,56,220,233
523	movl	%r9d,32+12(%rsp)
524	leaq	3(%r8),%r9
525.byte	102,15,56,220,241
526.byte	102,15,56,220,249
527.byte	102,68,15,56,220,193
528.byte	102,68,15,56,220,201
529	movups	80-128(%rcx),%xmm1
530	bswapl	%r9d
531.byte	102,15,56,220,208
532.byte	102,15,56,220,216
533	xorl	%ebp,%r9d
534.byte	0x66,0x90
535.byte	102,15,56,220,224
536.byte	102,15,56,220,232
537	movl	%r9d,48+12(%rsp)
538	leaq	4(%r8),%r9
539.byte	102,15,56,220,240
540.byte	102,15,56,220,248
541.byte	102,68,15,56,220,192
542.byte	102,68,15,56,220,200
543	movups	96-128(%rcx),%xmm0
544	bswapl	%r9d
545.byte	102,15,56,220,209
546.byte	102,15,56,220,217
547	xorl	%ebp,%r9d
548.byte	0x66,0x90
549.byte	102,15,56,220,225
550.byte	102,15,56,220,233
551	movl	%r9d,64+12(%rsp)
552	leaq	5(%r8),%r9
553.byte	102,15,56,220,241
554.byte	102,15,56,220,249
555.byte	102,68,15,56,220,193
556.byte	102,68,15,56,220,201
557	movups	112-128(%rcx),%xmm1
558	bswapl	%r9d
559.byte	102,15,56,220,208
560.byte	102,15,56,220,216
561	xorl	%ebp,%r9d
562.byte	0x66,0x90
563.byte	102,15,56,220,224
564.byte	102,15,56,220,232
565	movl	%r9d,80+12(%rsp)
566	leaq	6(%r8),%r9
567.byte	102,15,56,220,240
568.byte	102,15,56,220,248
569.byte	102,68,15,56,220,192
570.byte	102,68,15,56,220,200
571	movups	128-128(%rcx),%xmm0
572	bswapl	%r9d
573.byte	102,15,56,220,209
574.byte	102,15,56,220,217
575	xorl	%ebp,%r9d
576.byte	0x66,0x90
577.byte	102,15,56,220,225
578.byte	102,15,56,220,233
579	movl	%r9d,96+12(%rsp)
580	leaq	7(%r8),%r9
581.byte	102,15,56,220,241
582.byte	102,15,56,220,249
583.byte	102,68,15,56,220,193
584.byte	102,68,15,56,220,201
585	movups	144-128(%rcx),%xmm1
586	bswapl	%r9d
587.byte	102,15,56,220,208
588.byte	102,15,56,220,216
589.byte	102,15,56,220,224
590	xorl	%ebp,%r9d
591	movdqu	0(%rdi),%xmm10
592.byte	102,15,56,220,232
593	movl	%r9d,112+12(%rsp)
594	cmpl	$11,%eax
595.byte	102,15,56,220,240
596.byte	102,15,56,220,248
597.byte	102,68,15,56,220,192
598.byte	102,68,15,56,220,200
599	movups	160-128(%rcx),%xmm0
600
601	jb	.Lctr32_enc_done
602
603.byte	102,15,56,220,209
604.byte	102,15,56,220,217
605.byte	102,15,56,220,225
606.byte	102,15,56,220,233
607.byte	102,15,56,220,241
608.byte	102,15,56,220,249
609.byte	102,68,15,56,220,193
610.byte	102,68,15,56,220,201
611	movups	176-128(%rcx),%xmm1
612
613.byte	102,15,56,220,208
614.byte	102,15,56,220,216
615.byte	102,15,56,220,224
616.byte	102,15,56,220,232
617.byte	102,15,56,220,240
618.byte	102,15,56,220,248
619.byte	102,68,15,56,220,192
620.byte	102,68,15,56,220,200
621	movups	192-128(%rcx),%xmm0
622
623
624
625.byte	102,15,56,220,209
626.byte	102,15,56,220,217
627.byte	102,15,56,220,225
628.byte	102,15,56,220,233
629.byte	102,15,56,220,241
630.byte	102,15,56,220,249
631.byte	102,68,15,56,220,193
632.byte	102,68,15,56,220,201
633	movups	208-128(%rcx),%xmm1
634
635.byte	102,15,56,220,208
636.byte	102,15,56,220,216
637.byte	102,15,56,220,224
638.byte	102,15,56,220,232
639.byte	102,15,56,220,240
640.byte	102,15,56,220,248
641.byte	102,68,15,56,220,192
642.byte	102,68,15,56,220,200
643	movups	224-128(%rcx),%xmm0
644	jmp	.Lctr32_enc_done
645
646.align	16
647.Lctr32_enc_done:
648	movdqu	16(%rdi),%xmm11
649	pxor	%xmm0,%xmm10
650	movdqu	32(%rdi),%xmm12
651	pxor	%xmm0,%xmm11
652	movdqu	48(%rdi),%xmm13
653	pxor	%xmm0,%xmm12
654	movdqu	64(%rdi),%xmm14
655	pxor	%xmm0,%xmm13
656	movdqu	80(%rdi),%xmm15
657	pxor	%xmm0,%xmm14
658	prefetcht0	448(%rdi)
659	prefetcht0	512(%rdi)
660	pxor	%xmm0,%xmm15
661.byte	102,15,56,220,209
662.byte	102,15,56,220,217
663.byte	102,15,56,220,225
664.byte	102,15,56,220,233
665.byte	102,15,56,220,241
666.byte	102,15,56,220,249
667.byte	102,68,15,56,220,193
668.byte	102,68,15,56,220,201
669	movdqu	96(%rdi),%xmm1
670	leaq	128(%rdi),%rdi
671
672.byte	102,65,15,56,221,210
673	pxor	%xmm0,%xmm1
674	movdqu	112-128(%rdi),%xmm10
675.byte	102,65,15,56,221,219
676	pxor	%xmm0,%xmm10
677	movdqa	0(%rsp),%xmm11
678.byte	102,65,15,56,221,228
679.byte	102,65,15,56,221,237
680	movdqa	16(%rsp),%xmm12
681	movdqa	32(%rsp),%xmm13
682.byte	102,65,15,56,221,246
683.byte	102,65,15,56,221,255
684	movdqa	48(%rsp),%xmm14
685	movdqa	64(%rsp),%xmm15
686.byte	102,68,15,56,221,193
687	movdqa	80(%rsp),%xmm0
688	movups	16-128(%rcx),%xmm1
689.byte	102,69,15,56,221,202
690
691	movups	%xmm2,(%rsi)
692	movdqa	%xmm11,%xmm2
693	movups	%xmm3,16(%rsi)
694	movdqa	%xmm12,%xmm3
695	movups	%xmm4,32(%rsi)
696	movdqa	%xmm13,%xmm4
697	movups	%xmm5,48(%rsi)
698	movdqa	%xmm14,%xmm5
699	movups	%xmm6,64(%rsi)
700	movdqa	%xmm15,%xmm6
701	movups	%xmm7,80(%rsi)
702	movdqa	%xmm0,%xmm7
703	movups	%xmm8,96(%rsi)
704	movups	%xmm9,112(%rsi)
705	leaq	128(%rsi),%rsi
706
707	subq	$8,%rdx
708	jnc	.Lctr32_loop8
709
710	addq	$8,%rdx
711	jz	.Lctr32_done
712	leaq	-128(%rcx),%rcx
713
714.Lctr32_tail:
715
716
717	leaq	16(%rcx),%rcx
718	cmpq	$4,%rdx
719	jb	.Lctr32_loop3
720	je	.Lctr32_loop4
721
722
723	shll	$4,%eax
724	movdqa	96(%rsp),%xmm8
725	pxor	%xmm9,%xmm9
726
727	movups	16(%rcx),%xmm0
728.byte	102,15,56,220,209
729.byte	102,15,56,220,217
730	leaq	32-16(%rcx,%rax,1),%rcx
731	negq	%rax
732.byte	102,15,56,220,225
733	addq	$16,%rax
734	movups	(%rdi),%xmm10
735.byte	102,15,56,220,233
736.byte	102,15,56,220,241
737	movups	16(%rdi),%xmm11
738	movups	32(%rdi),%xmm12
739.byte	102,15,56,220,249
740.byte	102,68,15,56,220,193
741
742	call	.Lenc_loop8_enter
743
744	movdqu	48(%rdi),%xmm13
745	pxor	%xmm10,%xmm2
746	movdqu	64(%rdi),%xmm10
747	pxor	%xmm11,%xmm3
748	movdqu	%xmm2,(%rsi)
749	pxor	%xmm12,%xmm4
750	movdqu	%xmm3,16(%rsi)
751	pxor	%xmm13,%xmm5
752	movdqu	%xmm4,32(%rsi)
753	pxor	%xmm10,%xmm6
754	movdqu	%xmm5,48(%rsi)
755	movdqu	%xmm6,64(%rsi)
756	cmpq	$6,%rdx
757	jb	.Lctr32_done
758
759	movups	80(%rdi),%xmm11
760	xorps	%xmm11,%xmm7
761	movups	%xmm7,80(%rsi)
762	je	.Lctr32_done
763
764	movups	96(%rdi),%xmm12
765	xorps	%xmm12,%xmm8
766	movups	%xmm8,96(%rsi)
767	jmp	.Lctr32_done
768
769.align	32
770.Lctr32_loop4:
771.byte	102,15,56,220,209
772	leaq	16(%rcx),%rcx
773	decl	%eax
774.byte	102,15,56,220,217
775.byte	102,15,56,220,225
776.byte	102,15,56,220,233
777	movups	(%rcx),%xmm1
778	jnz	.Lctr32_loop4
779.byte	102,15,56,221,209
780.byte	102,15,56,221,217
781	movups	(%rdi),%xmm10
782	movups	16(%rdi),%xmm11
783.byte	102,15,56,221,225
784.byte	102,15,56,221,233
785	movups	32(%rdi),%xmm12
786	movups	48(%rdi),%xmm13
787
788	xorps	%xmm10,%xmm2
789	movups	%xmm2,(%rsi)
790	xorps	%xmm11,%xmm3
791	movups	%xmm3,16(%rsi)
792	pxor	%xmm12,%xmm4
793	movdqu	%xmm4,32(%rsi)
794	pxor	%xmm13,%xmm5
795	movdqu	%xmm5,48(%rsi)
796	jmp	.Lctr32_done
797
798.align	32
799.Lctr32_loop3:
800.byte	102,15,56,220,209
801	leaq	16(%rcx),%rcx
802	decl	%eax
803.byte	102,15,56,220,217
804.byte	102,15,56,220,225
805	movups	(%rcx),%xmm1
806	jnz	.Lctr32_loop3
807.byte	102,15,56,221,209
808.byte	102,15,56,221,217
809.byte	102,15,56,221,225
810
811	movups	(%rdi),%xmm10
812	xorps	%xmm10,%xmm2
813	movups	%xmm2,(%rsi)
814	cmpq	$2,%rdx
815	jb	.Lctr32_done
816
817	movups	16(%rdi),%xmm11
818	xorps	%xmm11,%xmm3
819	movups	%xmm3,16(%rsi)
820	je	.Lctr32_done
821
822	movups	32(%rdi),%xmm12
823	xorps	%xmm12,%xmm4
824	movups	%xmm4,32(%rsi)
825
826.Lctr32_done:
827	xorps	%xmm0,%xmm0
828	xorl	%ebp,%ebp
829	pxor	%xmm1,%xmm1
830	pxor	%xmm2,%xmm2
831	pxor	%xmm3,%xmm3
832	pxor	%xmm4,%xmm4
833	pxor	%xmm5,%xmm5
834	pxor	%xmm6,%xmm6
835	pxor	%xmm7,%xmm7
836	movaps	%xmm0,0(%rsp)
837	pxor	%xmm8,%xmm8
838	movaps	%xmm0,16(%rsp)
839	pxor	%xmm9,%xmm9
840	movaps	%xmm0,32(%rsp)
841	pxor	%xmm10,%xmm10
842	movaps	%xmm0,48(%rsp)
843	pxor	%xmm11,%xmm11
844	movaps	%xmm0,64(%rsp)
845	pxor	%xmm12,%xmm12
846	movaps	%xmm0,80(%rsp)
847	pxor	%xmm13,%xmm13
848	movaps	%xmm0,96(%rsp)
849	pxor	%xmm14,%xmm14
850	movaps	%xmm0,112(%rsp)
851	pxor	%xmm15,%xmm15
852	movq	-8(%r11),%rbp
853.cfi_restore	%rbp
854	leaq	(%r11),%rsp
855.cfi_def_cfa_register	%rsp
856.Lctr32_epilogue:
857	ret
858.cfi_endproc
859.size	aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
860.globl	aes_hw_set_encrypt_key
861.hidden aes_hw_set_encrypt_key
862.type	aes_hw_set_encrypt_key,@function
863.align	16
864aes_hw_set_encrypt_key:
865__aesni_set_encrypt_key:
866.cfi_startproc
867_CET_ENDBR
868#ifdef BORINGSSL_DISPATCH_TEST
869	movb	$1,BORINGSSL_function_hit+3(%rip)
870#endif
871.byte	0x48,0x83,0xEC,0x08
872.cfi_adjust_cfa_offset	8
873	movq	$-1,%rax
874	testq	%rdi,%rdi
875	jz	.Lenc_key_ret
876	testq	%rdx,%rdx
877	jz	.Lenc_key_ret
878
879	movups	(%rdi),%xmm0
880	xorps	%xmm4,%xmm4
881	leaq	OPENSSL_ia32cap_P(%rip),%r10
882	movl	4(%r10),%r10d
883	andl	$268437504,%r10d
884	leaq	16(%rdx),%rax
885	cmpl	$256,%esi
886	je	.L14rounds
887
888	cmpl	$128,%esi
889	jne	.Lbad_keybits
890
891.L10rounds:
892	movl	$9,%esi
893	cmpl	$268435456,%r10d
894	je	.L10rounds_alt
895
896	movups	%xmm0,(%rdx)
897.byte	102,15,58,223,200,1
898	call	.Lkey_expansion_128_cold
899.byte	102,15,58,223,200,2
900	call	.Lkey_expansion_128
901.byte	102,15,58,223,200,4
902	call	.Lkey_expansion_128
903.byte	102,15,58,223,200,8
904	call	.Lkey_expansion_128
905.byte	102,15,58,223,200,16
906	call	.Lkey_expansion_128
907.byte	102,15,58,223,200,32
908	call	.Lkey_expansion_128
909.byte	102,15,58,223,200,64
910	call	.Lkey_expansion_128
911.byte	102,15,58,223,200,128
912	call	.Lkey_expansion_128
913.byte	102,15,58,223,200,27
914	call	.Lkey_expansion_128
915.byte	102,15,58,223,200,54
916	call	.Lkey_expansion_128
917	movups	%xmm0,(%rax)
918	movl	%esi,80(%rax)
919	xorl	%eax,%eax
920	jmp	.Lenc_key_ret
921
922.align	16
923.L10rounds_alt:
924	movdqa	.Lkey_rotate(%rip),%xmm5
925	movl	$8,%r10d
926	movdqa	.Lkey_rcon1(%rip),%xmm4
927	movdqa	%xmm0,%xmm2
928	movdqu	%xmm0,(%rdx)
929	jmp	.Loop_key128
930
931.align	16
932.Loop_key128:
933.byte	102,15,56,0,197
934.byte	102,15,56,221,196
935	pslld	$1,%xmm4
936	leaq	16(%rax),%rax
937
938	movdqa	%xmm2,%xmm3
939	pslldq	$4,%xmm2
940	pxor	%xmm2,%xmm3
941	pslldq	$4,%xmm2
942	pxor	%xmm2,%xmm3
943	pslldq	$4,%xmm2
944	pxor	%xmm3,%xmm2
945
946	pxor	%xmm2,%xmm0
947	movdqu	%xmm0,-16(%rax)
948	movdqa	%xmm0,%xmm2
949
950	decl	%r10d
951	jnz	.Loop_key128
952
953	movdqa	.Lkey_rcon1b(%rip),%xmm4
954
955.byte	102,15,56,0,197
956.byte	102,15,56,221,196
957	pslld	$1,%xmm4
958
959	movdqa	%xmm2,%xmm3
960	pslldq	$4,%xmm2
961	pxor	%xmm2,%xmm3
962	pslldq	$4,%xmm2
963	pxor	%xmm2,%xmm3
964	pslldq	$4,%xmm2
965	pxor	%xmm3,%xmm2
966
967	pxor	%xmm2,%xmm0
968	movdqu	%xmm0,(%rax)
969
970	movdqa	%xmm0,%xmm2
971.byte	102,15,56,0,197
972.byte	102,15,56,221,196
973
974	movdqa	%xmm2,%xmm3
975	pslldq	$4,%xmm2
976	pxor	%xmm2,%xmm3
977	pslldq	$4,%xmm2
978	pxor	%xmm2,%xmm3
979	pslldq	$4,%xmm2
980	pxor	%xmm3,%xmm2
981
982	pxor	%xmm2,%xmm0
983	movdqu	%xmm0,16(%rax)
984
985	movl	%esi,96(%rax)
986	xorl	%eax,%eax
987	jmp	.Lenc_key_ret
988
989
990
991.align	16
992.L14rounds:
993	movups	16(%rdi),%xmm2
994	movl	$13,%esi
995	leaq	16(%rax),%rax
996	cmpl	$268435456,%r10d
997	je	.L14rounds_alt
998
999	movups	%xmm0,(%rdx)
1000	movups	%xmm2,16(%rdx)
1001.byte	102,15,58,223,202,1
1002	call	.Lkey_expansion_256a_cold
1003.byte	102,15,58,223,200,1
1004	call	.Lkey_expansion_256b
1005.byte	102,15,58,223,202,2
1006	call	.Lkey_expansion_256a
1007.byte	102,15,58,223,200,2
1008	call	.Lkey_expansion_256b
1009.byte	102,15,58,223,202,4
1010	call	.Lkey_expansion_256a
1011.byte	102,15,58,223,200,4
1012	call	.Lkey_expansion_256b
1013.byte	102,15,58,223,202,8
1014	call	.Lkey_expansion_256a
1015.byte	102,15,58,223,200,8
1016	call	.Lkey_expansion_256b
1017.byte	102,15,58,223,202,16
1018	call	.Lkey_expansion_256a
1019.byte	102,15,58,223,200,16
1020	call	.Lkey_expansion_256b
1021.byte	102,15,58,223,202,32
1022	call	.Lkey_expansion_256a
1023.byte	102,15,58,223,200,32
1024	call	.Lkey_expansion_256b
1025.byte	102,15,58,223,202,64
1026	call	.Lkey_expansion_256a
1027	movups	%xmm0,(%rax)
1028	movl	%esi,16(%rax)
1029	xorq	%rax,%rax
1030	jmp	.Lenc_key_ret
1031
1032.align	16
1033.L14rounds_alt:
1034	movdqa	.Lkey_rotate(%rip),%xmm5
1035	movdqa	.Lkey_rcon1(%rip),%xmm4
1036	movl	$7,%r10d
1037	movdqu	%xmm0,0(%rdx)
1038	movdqa	%xmm2,%xmm1
1039	movdqu	%xmm2,16(%rdx)
1040	jmp	.Loop_key256
1041
1042.align	16
1043.Loop_key256:
1044.byte	102,15,56,0,213
1045.byte	102,15,56,221,212
1046
1047	movdqa	%xmm0,%xmm3
1048	pslldq	$4,%xmm0
1049	pxor	%xmm0,%xmm3
1050	pslldq	$4,%xmm0
1051	pxor	%xmm0,%xmm3
1052	pslldq	$4,%xmm0
1053	pxor	%xmm3,%xmm0
1054	pslld	$1,%xmm4
1055
1056	pxor	%xmm2,%xmm0
1057	movdqu	%xmm0,(%rax)
1058
1059	decl	%r10d
1060	jz	.Ldone_key256
1061
1062	pshufd	$0xff,%xmm0,%xmm2
1063	pxor	%xmm3,%xmm3
1064.byte	102,15,56,221,211
1065
1066	movdqa	%xmm1,%xmm3
1067	pslldq	$4,%xmm1
1068	pxor	%xmm1,%xmm3
1069	pslldq	$4,%xmm1
1070	pxor	%xmm1,%xmm3
1071	pslldq	$4,%xmm1
1072	pxor	%xmm3,%xmm1
1073
1074	pxor	%xmm1,%xmm2
1075	movdqu	%xmm2,16(%rax)
1076	leaq	32(%rax),%rax
1077	movdqa	%xmm2,%xmm1
1078
1079	jmp	.Loop_key256
1080
1081.Ldone_key256:
1082	movl	%esi,16(%rax)
1083	xorl	%eax,%eax
1084	jmp	.Lenc_key_ret
1085
1086.align	16
1087.Lbad_keybits:
1088	movq	$-2,%rax
1089.Lenc_key_ret:
1090	pxor	%xmm0,%xmm0
1091	pxor	%xmm1,%xmm1
1092	pxor	%xmm2,%xmm2
1093	pxor	%xmm3,%xmm3
1094	pxor	%xmm4,%xmm4
1095	pxor	%xmm5,%xmm5
1096	addq	$8,%rsp
1097.cfi_adjust_cfa_offset	-8
1098	ret
1099.cfi_endproc
1100.LSEH_end_set_encrypt_key:
1101
1102.align	16
1103.Lkey_expansion_128:
1104	movups	%xmm0,(%rax)
1105	leaq	16(%rax),%rax
1106.Lkey_expansion_128_cold:
1107	shufps	$16,%xmm0,%xmm4
1108	xorps	%xmm4,%xmm0
1109	shufps	$140,%xmm0,%xmm4
1110	xorps	%xmm4,%xmm0
1111	shufps	$255,%xmm1,%xmm1
1112	xorps	%xmm1,%xmm0
1113	ret
1114
1115.align	16
1116.Lkey_expansion_192a:
1117	movups	%xmm0,(%rax)
1118	leaq	16(%rax),%rax
1119.Lkey_expansion_192a_cold:
1120	movaps	%xmm2,%xmm5
1121.Lkey_expansion_192b_warm:
1122	shufps	$16,%xmm0,%xmm4
1123	movdqa	%xmm2,%xmm3
1124	xorps	%xmm4,%xmm0
1125	shufps	$140,%xmm0,%xmm4
1126	pslldq	$4,%xmm3
1127	xorps	%xmm4,%xmm0
1128	pshufd	$85,%xmm1,%xmm1
1129	pxor	%xmm3,%xmm2
1130	pxor	%xmm1,%xmm0
1131	pshufd	$255,%xmm0,%xmm3
1132	pxor	%xmm3,%xmm2
1133	ret
1134
1135.align	16
1136.Lkey_expansion_192b:
1137	movaps	%xmm0,%xmm3
1138	shufps	$68,%xmm0,%xmm5
1139	movups	%xmm5,(%rax)
1140	shufps	$78,%xmm2,%xmm3
1141	movups	%xmm3,16(%rax)
1142	leaq	32(%rax),%rax
1143	jmp	.Lkey_expansion_192b_warm
1144
1145.align	16
1146.Lkey_expansion_256a:
1147	movups	%xmm2,(%rax)
1148	leaq	16(%rax),%rax
1149.Lkey_expansion_256a_cold:
1150	shufps	$16,%xmm0,%xmm4
1151	xorps	%xmm4,%xmm0
1152	shufps	$140,%xmm0,%xmm4
1153	xorps	%xmm4,%xmm0
1154	shufps	$255,%xmm1,%xmm1
1155	xorps	%xmm1,%xmm0
1156	ret
1157
1158.align	16
1159.Lkey_expansion_256b:
1160	movups	%xmm0,(%rax)
1161	leaq	16(%rax),%rax
1162
1163	shufps	$16,%xmm2,%xmm4
1164	xorps	%xmm4,%xmm2
1165	shufps	$140,%xmm2,%xmm4
1166	xorps	%xmm4,%xmm2
1167	shufps	$170,%xmm1,%xmm1
1168	xorps	%xmm1,%xmm2
1169	ret
1170.size	aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
1171.size	__aesni_set_encrypt_key,.-__aesni_set_encrypt_key
1172.section	.rodata
1173.align	64
1174.Lbswap_mask:
1175.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1176.Lincrement32:
1177.long	6,6,6,0
1178.Lincrement64:
1179.long	1,0,0,0
1180.Lincrement1:
1181.byte	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
1182.Lkey_rotate:
1183.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
1184.Lkey_rotate192:
1185.long	0x04070605,0x04070605,0x04070605,0x04070605
1186.Lkey_rcon1:
1187.long	1,1,1,1
1188.Lkey_rcon1b:
1189.long	0x1b,0x1b,0x1b,0x1b
1190
1191.byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1192.align	64
1193.text
1194#endif
1195