xref: /aosp_15_r20/external/cronet/third_party/boringssl/src/gen/bcm/aesni-x86_64-apple.S (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#include <openssl/asm_base.h>
5
6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
7.text
8
9.globl	_aes_hw_encrypt
10.private_extern _aes_hw_encrypt
11
12.p2align	4
13_aes_hw_encrypt:
14
15_CET_ENDBR
16#ifdef BORINGSSL_DISPATCH_TEST
17
18	movb	$1,_BORINGSSL_function_hit+1(%rip)
19#endif
20	movups	(%rdi),%xmm2
21	movl	240(%rdx),%eax
22	movups	(%rdx),%xmm0
23	movups	16(%rdx),%xmm1
24	leaq	32(%rdx),%rdx
25	xorps	%xmm0,%xmm2
26L$oop_enc1_1:
27.byte	102,15,56,220,209
28	decl	%eax
29	movups	(%rdx),%xmm1
30	leaq	16(%rdx),%rdx
31	jnz	L$oop_enc1_1
32.byte	102,15,56,221,209
33	pxor	%xmm0,%xmm0
34	pxor	%xmm1,%xmm1
35	movups	%xmm2,(%rsi)
36	pxor	%xmm2,%xmm2
37	ret
38
39
40
41.globl	_aes_hw_decrypt
42.private_extern _aes_hw_decrypt
43
44.p2align	4
45_aes_hw_decrypt:
46
47_CET_ENDBR
48	movups	(%rdi),%xmm2
49	movl	240(%rdx),%eax
50	movups	(%rdx),%xmm0
51	movups	16(%rdx),%xmm1
52	leaq	32(%rdx),%rdx
53	xorps	%xmm0,%xmm2
54L$oop_dec1_2:
55.byte	102,15,56,222,209
56	decl	%eax
57	movups	(%rdx),%xmm1
58	leaq	16(%rdx),%rdx
59	jnz	L$oop_dec1_2
60.byte	102,15,56,223,209
61	pxor	%xmm0,%xmm0
62	pxor	%xmm1,%xmm1
63	movups	%xmm2,(%rsi)
64	pxor	%xmm2,%xmm2
65	ret
66
67
68
69.p2align	4
70_aesni_encrypt2:
71
72	movups	(%rcx),%xmm0
73	shll	$4,%eax
74	movups	16(%rcx),%xmm1
75	xorps	%xmm0,%xmm2
76	xorps	%xmm0,%xmm3
77	movups	32(%rcx),%xmm0
78	leaq	32(%rcx,%rax,1),%rcx
79	negq	%rax
80	addq	$16,%rax
81
82L$enc_loop2:
83.byte	102,15,56,220,209
84.byte	102,15,56,220,217
85	movups	(%rcx,%rax,1),%xmm1
86	addq	$32,%rax
87.byte	102,15,56,220,208
88.byte	102,15,56,220,216
89	movups	-16(%rcx,%rax,1),%xmm0
90	jnz	L$enc_loop2
91
92.byte	102,15,56,220,209
93.byte	102,15,56,220,217
94.byte	102,15,56,221,208
95.byte	102,15,56,221,216
96	ret
97
98
99
100.p2align	4
101_aesni_decrypt2:
102
103	movups	(%rcx),%xmm0
104	shll	$4,%eax
105	movups	16(%rcx),%xmm1
106	xorps	%xmm0,%xmm2
107	xorps	%xmm0,%xmm3
108	movups	32(%rcx),%xmm0
109	leaq	32(%rcx,%rax,1),%rcx
110	negq	%rax
111	addq	$16,%rax
112
113L$dec_loop2:
114.byte	102,15,56,222,209
115.byte	102,15,56,222,217
116	movups	(%rcx,%rax,1),%xmm1
117	addq	$32,%rax
118.byte	102,15,56,222,208
119.byte	102,15,56,222,216
120	movups	-16(%rcx,%rax,1),%xmm0
121	jnz	L$dec_loop2
122
123.byte	102,15,56,222,209
124.byte	102,15,56,222,217
125.byte	102,15,56,223,208
126.byte	102,15,56,223,216
127	ret
128
129
130
131.p2align	4
132_aesni_encrypt3:
133
134	movups	(%rcx),%xmm0
135	shll	$4,%eax
136	movups	16(%rcx),%xmm1
137	xorps	%xmm0,%xmm2
138	xorps	%xmm0,%xmm3
139	xorps	%xmm0,%xmm4
140	movups	32(%rcx),%xmm0
141	leaq	32(%rcx,%rax,1),%rcx
142	negq	%rax
143	addq	$16,%rax
144
145L$enc_loop3:
146.byte	102,15,56,220,209
147.byte	102,15,56,220,217
148.byte	102,15,56,220,225
149	movups	(%rcx,%rax,1),%xmm1
150	addq	$32,%rax
151.byte	102,15,56,220,208
152.byte	102,15,56,220,216
153.byte	102,15,56,220,224
154	movups	-16(%rcx,%rax,1),%xmm0
155	jnz	L$enc_loop3
156
157.byte	102,15,56,220,209
158.byte	102,15,56,220,217
159.byte	102,15,56,220,225
160.byte	102,15,56,221,208
161.byte	102,15,56,221,216
162.byte	102,15,56,221,224
163	ret
164
165
166
167.p2align	4
168_aesni_decrypt3:
169
170	movups	(%rcx),%xmm0
171	shll	$4,%eax
172	movups	16(%rcx),%xmm1
173	xorps	%xmm0,%xmm2
174	xorps	%xmm0,%xmm3
175	xorps	%xmm0,%xmm4
176	movups	32(%rcx),%xmm0
177	leaq	32(%rcx,%rax,1),%rcx
178	negq	%rax
179	addq	$16,%rax
180
181L$dec_loop3:
182.byte	102,15,56,222,209
183.byte	102,15,56,222,217
184.byte	102,15,56,222,225
185	movups	(%rcx,%rax,1),%xmm1
186	addq	$32,%rax
187.byte	102,15,56,222,208
188.byte	102,15,56,222,216
189.byte	102,15,56,222,224
190	movups	-16(%rcx,%rax,1),%xmm0
191	jnz	L$dec_loop3
192
193.byte	102,15,56,222,209
194.byte	102,15,56,222,217
195.byte	102,15,56,222,225
196.byte	102,15,56,223,208
197.byte	102,15,56,223,216
198.byte	102,15,56,223,224
199	ret
200
201
202
203.p2align	4
204_aesni_encrypt4:
205
206	movups	(%rcx),%xmm0
207	shll	$4,%eax
208	movups	16(%rcx),%xmm1
209	xorps	%xmm0,%xmm2
210	xorps	%xmm0,%xmm3
211	xorps	%xmm0,%xmm4
212	xorps	%xmm0,%xmm5
213	movups	32(%rcx),%xmm0
214	leaq	32(%rcx,%rax,1),%rcx
215	negq	%rax
216.byte	0x0f,0x1f,0x00
217	addq	$16,%rax
218
219L$enc_loop4:
220.byte	102,15,56,220,209
221.byte	102,15,56,220,217
222.byte	102,15,56,220,225
223.byte	102,15,56,220,233
224	movups	(%rcx,%rax,1),%xmm1
225	addq	$32,%rax
226.byte	102,15,56,220,208
227.byte	102,15,56,220,216
228.byte	102,15,56,220,224
229.byte	102,15,56,220,232
230	movups	-16(%rcx,%rax,1),%xmm0
231	jnz	L$enc_loop4
232
233.byte	102,15,56,220,209
234.byte	102,15,56,220,217
235.byte	102,15,56,220,225
236.byte	102,15,56,220,233
237.byte	102,15,56,221,208
238.byte	102,15,56,221,216
239.byte	102,15,56,221,224
240.byte	102,15,56,221,232
241	ret
242
243
244
245.p2align	4
246_aesni_decrypt4:
247
248	movups	(%rcx),%xmm0
249	shll	$4,%eax
250	movups	16(%rcx),%xmm1
251	xorps	%xmm0,%xmm2
252	xorps	%xmm0,%xmm3
253	xorps	%xmm0,%xmm4
254	xorps	%xmm0,%xmm5
255	movups	32(%rcx),%xmm0
256	leaq	32(%rcx,%rax,1),%rcx
257	negq	%rax
258.byte	0x0f,0x1f,0x00
259	addq	$16,%rax
260
261L$dec_loop4:
262.byte	102,15,56,222,209
263.byte	102,15,56,222,217
264.byte	102,15,56,222,225
265.byte	102,15,56,222,233
266	movups	(%rcx,%rax,1),%xmm1
267	addq	$32,%rax
268.byte	102,15,56,222,208
269.byte	102,15,56,222,216
270.byte	102,15,56,222,224
271.byte	102,15,56,222,232
272	movups	-16(%rcx,%rax,1),%xmm0
273	jnz	L$dec_loop4
274
275.byte	102,15,56,222,209
276.byte	102,15,56,222,217
277.byte	102,15,56,222,225
278.byte	102,15,56,222,233
279.byte	102,15,56,223,208
280.byte	102,15,56,223,216
281.byte	102,15,56,223,224
282.byte	102,15,56,223,232
283	ret
284
285
286
287.p2align	4
288_aesni_encrypt6:
289
290	movups	(%rcx),%xmm0
291	shll	$4,%eax
292	movups	16(%rcx),%xmm1
293	xorps	%xmm0,%xmm2
294	pxor	%xmm0,%xmm3
295	pxor	%xmm0,%xmm4
296.byte	102,15,56,220,209
297	leaq	32(%rcx,%rax,1),%rcx
298	negq	%rax
299.byte	102,15,56,220,217
300	pxor	%xmm0,%xmm5
301	pxor	%xmm0,%xmm6
302.byte	102,15,56,220,225
303	pxor	%xmm0,%xmm7
304	movups	(%rcx,%rax,1),%xmm0
305	addq	$16,%rax
306	jmp	L$enc_loop6_enter
307.p2align	4
308L$enc_loop6:
309.byte	102,15,56,220,209
310.byte	102,15,56,220,217
311.byte	102,15,56,220,225
312L$enc_loop6_enter:
313.byte	102,15,56,220,233
314.byte	102,15,56,220,241
315.byte	102,15,56,220,249
316	movups	(%rcx,%rax,1),%xmm1
317	addq	$32,%rax
318.byte	102,15,56,220,208
319.byte	102,15,56,220,216
320.byte	102,15,56,220,224
321.byte	102,15,56,220,232
322.byte	102,15,56,220,240
323.byte	102,15,56,220,248
324	movups	-16(%rcx,%rax,1),%xmm0
325	jnz	L$enc_loop6
326
327.byte	102,15,56,220,209
328.byte	102,15,56,220,217
329.byte	102,15,56,220,225
330.byte	102,15,56,220,233
331.byte	102,15,56,220,241
332.byte	102,15,56,220,249
333.byte	102,15,56,221,208
334.byte	102,15,56,221,216
335.byte	102,15,56,221,224
336.byte	102,15,56,221,232
337.byte	102,15,56,221,240
338.byte	102,15,56,221,248
339	ret
340
341
342
343.p2align	4
344_aesni_decrypt6:
345
346	movups	(%rcx),%xmm0
347	shll	$4,%eax
348	movups	16(%rcx),%xmm1
349	xorps	%xmm0,%xmm2
350	pxor	%xmm0,%xmm3
351	pxor	%xmm0,%xmm4
352.byte	102,15,56,222,209
353	leaq	32(%rcx,%rax,1),%rcx
354	negq	%rax
355.byte	102,15,56,222,217
356	pxor	%xmm0,%xmm5
357	pxor	%xmm0,%xmm6
358.byte	102,15,56,222,225
359	pxor	%xmm0,%xmm7
360	movups	(%rcx,%rax,1),%xmm0
361	addq	$16,%rax
362	jmp	L$dec_loop6_enter
363.p2align	4
364L$dec_loop6:
365.byte	102,15,56,222,209
366.byte	102,15,56,222,217
367.byte	102,15,56,222,225
368L$dec_loop6_enter:
369.byte	102,15,56,222,233
370.byte	102,15,56,222,241
371.byte	102,15,56,222,249
372	movups	(%rcx,%rax,1),%xmm1
373	addq	$32,%rax
374.byte	102,15,56,222,208
375.byte	102,15,56,222,216
376.byte	102,15,56,222,224
377.byte	102,15,56,222,232
378.byte	102,15,56,222,240
379.byte	102,15,56,222,248
380	movups	-16(%rcx,%rax,1),%xmm0
381	jnz	L$dec_loop6
382
383.byte	102,15,56,222,209
384.byte	102,15,56,222,217
385.byte	102,15,56,222,225
386.byte	102,15,56,222,233
387.byte	102,15,56,222,241
388.byte	102,15,56,222,249
389.byte	102,15,56,223,208
390.byte	102,15,56,223,216
391.byte	102,15,56,223,224
392.byte	102,15,56,223,232
393.byte	102,15,56,223,240
394.byte	102,15,56,223,248
395	ret
396
397
398
399.p2align	4
400_aesni_encrypt8:
401
402	movups	(%rcx),%xmm0
403	shll	$4,%eax
404	movups	16(%rcx),%xmm1
405	xorps	%xmm0,%xmm2
406	xorps	%xmm0,%xmm3
407	pxor	%xmm0,%xmm4
408	pxor	%xmm0,%xmm5
409	pxor	%xmm0,%xmm6
410	leaq	32(%rcx,%rax,1),%rcx
411	negq	%rax
412.byte	102,15,56,220,209
413	pxor	%xmm0,%xmm7
414	pxor	%xmm0,%xmm8
415.byte	102,15,56,220,217
416	pxor	%xmm0,%xmm9
417	movups	(%rcx,%rax,1),%xmm0
418	addq	$16,%rax
419	jmp	L$enc_loop8_inner
420.p2align	4
421L$enc_loop8:
422.byte	102,15,56,220,209
423.byte	102,15,56,220,217
424L$enc_loop8_inner:
425.byte	102,15,56,220,225
426.byte	102,15,56,220,233
427.byte	102,15,56,220,241
428.byte	102,15,56,220,249
429.byte	102,68,15,56,220,193
430.byte	102,68,15,56,220,201
431L$enc_loop8_enter:
432	movups	(%rcx,%rax,1),%xmm1
433	addq	$32,%rax
434.byte	102,15,56,220,208
435.byte	102,15,56,220,216
436.byte	102,15,56,220,224
437.byte	102,15,56,220,232
438.byte	102,15,56,220,240
439.byte	102,15,56,220,248
440.byte	102,68,15,56,220,192
441.byte	102,68,15,56,220,200
442	movups	-16(%rcx,%rax,1),%xmm0
443	jnz	L$enc_loop8
444
445.byte	102,15,56,220,209
446.byte	102,15,56,220,217
447.byte	102,15,56,220,225
448.byte	102,15,56,220,233
449.byte	102,15,56,220,241
450.byte	102,15,56,220,249
451.byte	102,68,15,56,220,193
452.byte	102,68,15,56,220,201
453.byte	102,15,56,221,208
454.byte	102,15,56,221,216
455.byte	102,15,56,221,224
456.byte	102,15,56,221,232
457.byte	102,15,56,221,240
458.byte	102,15,56,221,248
459.byte	102,68,15,56,221,192
460.byte	102,68,15,56,221,200
461	ret
462
463
464
465.p2align	4
466_aesni_decrypt8:
467
468	movups	(%rcx),%xmm0
469	shll	$4,%eax
470	movups	16(%rcx),%xmm1
471	xorps	%xmm0,%xmm2
472	xorps	%xmm0,%xmm3
473	pxor	%xmm0,%xmm4
474	pxor	%xmm0,%xmm5
475	pxor	%xmm0,%xmm6
476	leaq	32(%rcx,%rax,1),%rcx
477	negq	%rax
478.byte	102,15,56,222,209
479	pxor	%xmm0,%xmm7
480	pxor	%xmm0,%xmm8
481.byte	102,15,56,222,217
482	pxor	%xmm0,%xmm9
483	movups	(%rcx,%rax,1),%xmm0
484	addq	$16,%rax
485	jmp	L$dec_loop8_inner
486.p2align	4
487L$dec_loop8:
488.byte	102,15,56,222,209
489.byte	102,15,56,222,217
490L$dec_loop8_inner:
491.byte	102,15,56,222,225
492.byte	102,15,56,222,233
493.byte	102,15,56,222,241
494.byte	102,15,56,222,249
495.byte	102,68,15,56,222,193
496.byte	102,68,15,56,222,201
497L$dec_loop8_enter:
498	movups	(%rcx,%rax,1),%xmm1
499	addq	$32,%rax
500.byte	102,15,56,222,208
501.byte	102,15,56,222,216
502.byte	102,15,56,222,224
503.byte	102,15,56,222,232
504.byte	102,15,56,222,240
505.byte	102,15,56,222,248
506.byte	102,68,15,56,222,192
507.byte	102,68,15,56,222,200
508	movups	-16(%rcx,%rax,1),%xmm0
509	jnz	L$dec_loop8
510
511.byte	102,15,56,222,209
512.byte	102,15,56,222,217
513.byte	102,15,56,222,225
514.byte	102,15,56,222,233
515.byte	102,15,56,222,241
516.byte	102,15,56,222,249
517.byte	102,68,15,56,222,193
518.byte	102,68,15,56,222,201
519.byte	102,15,56,223,208
520.byte	102,15,56,223,216
521.byte	102,15,56,223,224
522.byte	102,15,56,223,232
523.byte	102,15,56,223,240
524.byte	102,15,56,223,248
525.byte	102,68,15,56,223,192
526.byte	102,68,15,56,223,200
527	ret
528
529
530.globl	_aes_hw_ecb_encrypt
531.private_extern _aes_hw_ecb_encrypt
532
533.p2align	4
534_aes_hw_ecb_encrypt:
535
536_CET_ENDBR
537	andq	$-16,%rdx
538	jz	L$ecb_ret
539
540	movl	240(%rcx),%eax
541	movups	(%rcx),%xmm0
542	movq	%rcx,%r11
543	movl	%eax,%r10d
544	testl	%r8d,%r8d
545	jz	L$ecb_decrypt
546
547	cmpq	$0x80,%rdx
548	jb	L$ecb_enc_tail
549
550	movdqu	(%rdi),%xmm2
551	movdqu	16(%rdi),%xmm3
552	movdqu	32(%rdi),%xmm4
553	movdqu	48(%rdi),%xmm5
554	movdqu	64(%rdi),%xmm6
555	movdqu	80(%rdi),%xmm7
556	movdqu	96(%rdi),%xmm8
557	movdqu	112(%rdi),%xmm9
558	leaq	128(%rdi),%rdi
559	subq	$0x80,%rdx
560	jmp	L$ecb_enc_loop8_enter
561.p2align	4
562L$ecb_enc_loop8:
563	movups	%xmm2,(%rsi)
564	movq	%r11,%rcx
565	movdqu	(%rdi),%xmm2
566	movl	%r10d,%eax
567	movups	%xmm3,16(%rsi)
568	movdqu	16(%rdi),%xmm3
569	movups	%xmm4,32(%rsi)
570	movdqu	32(%rdi),%xmm4
571	movups	%xmm5,48(%rsi)
572	movdqu	48(%rdi),%xmm5
573	movups	%xmm6,64(%rsi)
574	movdqu	64(%rdi),%xmm6
575	movups	%xmm7,80(%rsi)
576	movdqu	80(%rdi),%xmm7
577	movups	%xmm8,96(%rsi)
578	movdqu	96(%rdi),%xmm8
579	movups	%xmm9,112(%rsi)
580	leaq	128(%rsi),%rsi
581	movdqu	112(%rdi),%xmm9
582	leaq	128(%rdi),%rdi
583L$ecb_enc_loop8_enter:
584
585	call	_aesni_encrypt8
586
587	subq	$0x80,%rdx
588	jnc	L$ecb_enc_loop8
589
590	movups	%xmm2,(%rsi)
591	movq	%r11,%rcx
592	movups	%xmm3,16(%rsi)
593	movl	%r10d,%eax
594	movups	%xmm4,32(%rsi)
595	movups	%xmm5,48(%rsi)
596	movups	%xmm6,64(%rsi)
597	movups	%xmm7,80(%rsi)
598	movups	%xmm8,96(%rsi)
599	movups	%xmm9,112(%rsi)
600	leaq	128(%rsi),%rsi
601	addq	$0x80,%rdx
602	jz	L$ecb_ret
603
604L$ecb_enc_tail:
605	movups	(%rdi),%xmm2
606	cmpq	$0x20,%rdx
607	jb	L$ecb_enc_one
608	movups	16(%rdi),%xmm3
609	je	L$ecb_enc_two
610	movups	32(%rdi),%xmm4
611	cmpq	$0x40,%rdx
612	jb	L$ecb_enc_three
613	movups	48(%rdi),%xmm5
614	je	L$ecb_enc_four
615	movups	64(%rdi),%xmm6
616	cmpq	$0x60,%rdx
617	jb	L$ecb_enc_five
618	movups	80(%rdi),%xmm7
619	je	L$ecb_enc_six
620	movdqu	96(%rdi),%xmm8
621	xorps	%xmm9,%xmm9
622	call	_aesni_encrypt8
623	movups	%xmm2,(%rsi)
624	movups	%xmm3,16(%rsi)
625	movups	%xmm4,32(%rsi)
626	movups	%xmm5,48(%rsi)
627	movups	%xmm6,64(%rsi)
628	movups	%xmm7,80(%rsi)
629	movups	%xmm8,96(%rsi)
630	jmp	L$ecb_ret
631.p2align	4
632L$ecb_enc_one:
633	movups	(%rcx),%xmm0
634	movups	16(%rcx),%xmm1
635	leaq	32(%rcx),%rcx
636	xorps	%xmm0,%xmm2
637L$oop_enc1_3:
638.byte	102,15,56,220,209
639	decl	%eax
640	movups	(%rcx),%xmm1
641	leaq	16(%rcx),%rcx
642	jnz	L$oop_enc1_3
643.byte	102,15,56,221,209
644	movups	%xmm2,(%rsi)
645	jmp	L$ecb_ret
646.p2align	4
647L$ecb_enc_two:
648	call	_aesni_encrypt2
649	movups	%xmm2,(%rsi)
650	movups	%xmm3,16(%rsi)
651	jmp	L$ecb_ret
652.p2align	4
653L$ecb_enc_three:
654	call	_aesni_encrypt3
655	movups	%xmm2,(%rsi)
656	movups	%xmm3,16(%rsi)
657	movups	%xmm4,32(%rsi)
658	jmp	L$ecb_ret
659.p2align	4
660L$ecb_enc_four:
661	call	_aesni_encrypt4
662	movups	%xmm2,(%rsi)
663	movups	%xmm3,16(%rsi)
664	movups	%xmm4,32(%rsi)
665	movups	%xmm5,48(%rsi)
666	jmp	L$ecb_ret
667.p2align	4
668L$ecb_enc_five:
669	xorps	%xmm7,%xmm7
670	call	_aesni_encrypt6
671	movups	%xmm2,(%rsi)
672	movups	%xmm3,16(%rsi)
673	movups	%xmm4,32(%rsi)
674	movups	%xmm5,48(%rsi)
675	movups	%xmm6,64(%rsi)
676	jmp	L$ecb_ret
677.p2align	4
678L$ecb_enc_six:
679	call	_aesni_encrypt6
680	movups	%xmm2,(%rsi)
681	movups	%xmm3,16(%rsi)
682	movups	%xmm4,32(%rsi)
683	movups	%xmm5,48(%rsi)
684	movups	%xmm6,64(%rsi)
685	movups	%xmm7,80(%rsi)
686	jmp	L$ecb_ret
687
688.p2align	4
689L$ecb_decrypt:
690	cmpq	$0x80,%rdx
691	jb	L$ecb_dec_tail
692
693	movdqu	(%rdi),%xmm2
694	movdqu	16(%rdi),%xmm3
695	movdqu	32(%rdi),%xmm4
696	movdqu	48(%rdi),%xmm5
697	movdqu	64(%rdi),%xmm6
698	movdqu	80(%rdi),%xmm7
699	movdqu	96(%rdi),%xmm8
700	movdqu	112(%rdi),%xmm9
701	leaq	128(%rdi),%rdi
702	subq	$0x80,%rdx
703	jmp	L$ecb_dec_loop8_enter
704.p2align	4
705L$ecb_dec_loop8:
706	movups	%xmm2,(%rsi)
707	movq	%r11,%rcx
708	movdqu	(%rdi),%xmm2
709	movl	%r10d,%eax
710	movups	%xmm3,16(%rsi)
711	movdqu	16(%rdi),%xmm3
712	movups	%xmm4,32(%rsi)
713	movdqu	32(%rdi),%xmm4
714	movups	%xmm5,48(%rsi)
715	movdqu	48(%rdi),%xmm5
716	movups	%xmm6,64(%rsi)
717	movdqu	64(%rdi),%xmm6
718	movups	%xmm7,80(%rsi)
719	movdqu	80(%rdi),%xmm7
720	movups	%xmm8,96(%rsi)
721	movdqu	96(%rdi),%xmm8
722	movups	%xmm9,112(%rsi)
723	leaq	128(%rsi),%rsi
724	movdqu	112(%rdi),%xmm9
725	leaq	128(%rdi),%rdi
726L$ecb_dec_loop8_enter:
727
728	call	_aesni_decrypt8
729
730	movups	(%r11),%xmm0
731	subq	$0x80,%rdx
732	jnc	L$ecb_dec_loop8
733
734	movups	%xmm2,(%rsi)
735	pxor	%xmm2,%xmm2
736	movq	%r11,%rcx
737	movups	%xmm3,16(%rsi)
738	pxor	%xmm3,%xmm3
739	movl	%r10d,%eax
740	movups	%xmm4,32(%rsi)
741	pxor	%xmm4,%xmm4
742	movups	%xmm5,48(%rsi)
743	pxor	%xmm5,%xmm5
744	movups	%xmm6,64(%rsi)
745	pxor	%xmm6,%xmm6
746	movups	%xmm7,80(%rsi)
747	pxor	%xmm7,%xmm7
748	movups	%xmm8,96(%rsi)
749	pxor	%xmm8,%xmm8
750	movups	%xmm9,112(%rsi)
751	pxor	%xmm9,%xmm9
752	leaq	128(%rsi),%rsi
753	addq	$0x80,%rdx
754	jz	L$ecb_ret
755
756L$ecb_dec_tail:
757	movups	(%rdi),%xmm2
758	cmpq	$0x20,%rdx
759	jb	L$ecb_dec_one
760	movups	16(%rdi),%xmm3
761	je	L$ecb_dec_two
762	movups	32(%rdi),%xmm4
763	cmpq	$0x40,%rdx
764	jb	L$ecb_dec_three
765	movups	48(%rdi),%xmm5
766	je	L$ecb_dec_four
767	movups	64(%rdi),%xmm6
768	cmpq	$0x60,%rdx
769	jb	L$ecb_dec_five
770	movups	80(%rdi),%xmm7
771	je	L$ecb_dec_six
772	movups	96(%rdi),%xmm8
773	movups	(%rcx),%xmm0
774	xorps	%xmm9,%xmm9
775	call	_aesni_decrypt8
776	movups	%xmm2,(%rsi)
777	pxor	%xmm2,%xmm2
778	movups	%xmm3,16(%rsi)
779	pxor	%xmm3,%xmm3
780	movups	%xmm4,32(%rsi)
781	pxor	%xmm4,%xmm4
782	movups	%xmm5,48(%rsi)
783	pxor	%xmm5,%xmm5
784	movups	%xmm6,64(%rsi)
785	pxor	%xmm6,%xmm6
786	movups	%xmm7,80(%rsi)
787	pxor	%xmm7,%xmm7
788	movups	%xmm8,96(%rsi)
789	pxor	%xmm8,%xmm8
790	pxor	%xmm9,%xmm9
791	jmp	L$ecb_ret
792.p2align	4
793L$ecb_dec_one:
794	movups	(%rcx),%xmm0
795	movups	16(%rcx),%xmm1
796	leaq	32(%rcx),%rcx
797	xorps	%xmm0,%xmm2
798L$oop_dec1_4:
799.byte	102,15,56,222,209
800	decl	%eax
801	movups	(%rcx),%xmm1
802	leaq	16(%rcx),%rcx
803	jnz	L$oop_dec1_4
804.byte	102,15,56,223,209
805	movups	%xmm2,(%rsi)
806	pxor	%xmm2,%xmm2
807	jmp	L$ecb_ret
808.p2align	4
809L$ecb_dec_two:
810	call	_aesni_decrypt2
811	movups	%xmm2,(%rsi)
812	pxor	%xmm2,%xmm2
813	movups	%xmm3,16(%rsi)
814	pxor	%xmm3,%xmm3
815	jmp	L$ecb_ret
816.p2align	4
817L$ecb_dec_three:
818	call	_aesni_decrypt3
819	movups	%xmm2,(%rsi)
820	pxor	%xmm2,%xmm2
821	movups	%xmm3,16(%rsi)
822	pxor	%xmm3,%xmm3
823	movups	%xmm4,32(%rsi)
824	pxor	%xmm4,%xmm4
825	jmp	L$ecb_ret
826.p2align	4
827L$ecb_dec_four:
828	call	_aesni_decrypt4
829	movups	%xmm2,(%rsi)
830	pxor	%xmm2,%xmm2
831	movups	%xmm3,16(%rsi)
832	pxor	%xmm3,%xmm3
833	movups	%xmm4,32(%rsi)
834	pxor	%xmm4,%xmm4
835	movups	%xmm5,48(%rsi)
836	pxor	%xmm5,%xmm5
837	jmp	L$ecb_ret
838.p2align	4
839L$ecb_dec_five:
840	xorps	%xmm7,%xmm7
841	call	_aesni_decrypt6
842	movups	%xmm2,(%rsi)
843	pxor	%xmm2,%xmm2
844	movups	%xmm3,16(%rsi)
845	pxor	%xmm3,%xmm3
846	movups	%xmm4,32(%rsi)
847	pxor	%xmm4,%xmm4
848	movups	%xmm5,48(%rsi)
849	pxor	%xmm5,%xmm5
850	movups	%xmm6,64(%rsi)
851	pxor	%xmm6,%xmm6
852	pxor	%xmm7,%xmm7
853	jmp	L$ecb_ret
854.p2align	4
855L$ecb_dec_six:
856	call	_aesni_decrypt6
857	movups	%xmm2,(%rsi)
858	pxor	%xmm2,%xmm2
859	movups	%xmm3,16(%rsi)
860	pxor	%xmm3,%xmm3
861	movups	%xmm4,32(%rsi)
862	pxor	%xmm4,%xmm4
863	movups	%xmm5,48(%rsi)
864	pxor	%xmm5,%xmm5
865	movups	%xmm6,64(%rsi)
866	pxor	%xmm6,%xmm6
867	movups	%xmm7,80(%rsi)
868	pxor	%xmm7,%xmm7
869
870L$ecb_ret:
871	xorps	%xmm0,%xmm0
872	pxor	%xmm1,%xmm1
873	ret
874
875
876.globl	_aes_hw_ctr32_encrypt_blocks
877.private_extern _aes_hw_ctr32_encrypt_blocks
878
879.p2align	4
880_aes_hw_ctr32_encrypt_blocks:
881
882_CET_ENDBR
883#ifdef BORINGSSL_DISPATCH_TEST
884	movb	$1,_BORINGSSL_function_hit(%rip)
885#endif
886	cmpq	$1,%rdx
887	jne	L$ctr32_bulk
888
889
890
891	movups	(%r8),%xmm2
892	movups	(%rdi),%xmm3
893	movl	240(%rcx),%edx
894	movups	(%rcx),%xmm0
895	movups	16(%rcx),%xmm1
896	leaq	32(%rcx),%rcx
897	xorps	%xmm0,%xmm2
898L$oop_enc1_5:
899.byte	102,15,56,220,209
900	decl	%edx
901	movups	(%rcx),%xmm1
902	leaq	16(%rcx),%rcx
903	jnz	L$oop_enc1_5
904.byte	102,15,56,221,209
905	pxor	%xmm0,%xmm0
906	pxor	%xmm1,%xmm1
907	xorps	%xmm3,%xmm2
908	pxor	%xmm3,%xmm3
909	movups	%xmm2,(%rsi)
910	xorps	%xmm2,%xmm2
911	jmp	L$ctr32_epilogue
912
913.p2align	4
914L$ctr32_bulk:
915	leaq	(%rsp),%r11
916
917	pushq	%rbp
918
919	subq	$128,%rsp
920	andq	$-16,%rsp
921
922
923
924
925	movdqu	(%r8),%xmm2
926	movdqu	(%rcx),%xmm0
927	movl	12(%r8),%r8d
928	pxor	%xmm0,%xmm2
929	movl	12(%rcx),%ebp
930	movdqa	%xmm2,0(%rsp)
931	bswapl	%r8d
932	movdqa	%xmm2,%xmm3
933	movdqa	%xmm2,%xmm4
934	movdqa	%xmm2,%xmm5
935	movdqa	%xmm2,64(%rsp)
936	movdqa	%xmm2,80(%rsp)
937	movdqa	%xmm2,96(%rsp)
938	movq	%rdx,%r10
939	movdqa	%xmm2,112(%rsp)
940
941	leaq	1(%r8),%rax
942	leaq	2(%r8),%rdx
943	bswapl	%eax
944	bswapl	%edx
945	xorl	%ebp,%eax
946	xorl	%ebp,%edx
947.byte	102,15,58,34,216,3
948	leaq	3(%r8),%rax
949	movdqa	%xmm3,16(%rsp)
950.byte	102,15,58,34,226,3
951	bswapl	%eax
952	movq	%r10,%rdx
953	leaq	4(%r8),%r10
954	movdqa	%xmm4,32(%rsp)
955	xorl	%ebp,%eax
956	bswapl	%r10d
957.byte	102,15,58,34,232,3
958	xorl	%ebp,%r10d
959	movdqa	%xmm5,48(%rsp)
960	leaq	5(%r8),%r9
961	movl	%r10d,64+12(%rsp)
962	bswapl	%r9d
963	leaq	6(%r8),%r10
964	movl	240(%rcx),%eax
965	xorl	%ebp,%r9d
966	bswapl	%r10d
967	movl	%r9d,80+12(%rsp)
968	xorl	%ebp,%r10d
969	leaq	7(%r8),%r9
970	movl	%r10d,96+12(%rsp)
971	bswapl	%r9d
972	xorl	%ebp,%r9d
973	movl	%r9d,112+12(%rsp)
974
975	movups	16(%rcx),%xmm1
976
977	movdqa	64(%rsp),%xmm6
978	movdqa	80(%rsp),%xmm7
979
980	cmpq	$8,%rdx
981	jb	L$ctr32_tail
982
983	leaq	128(%rcx),%rcx
984	subq	$8,%rdx
985	jmp	L$ctr32_loop8
986
987.p2align	5
988L$ctr32_loop8:
989	addl	$8,%r8d
990	movdqa	96(%rsp),%xmm8
991.byte	102,15,56,220,209
992	movl	%r8d,%r9d
993	movdqa	112(%rsp),%xmm9
994.byte	102,15,56,220,217
995	bswapl	%r9d
996	movups	32-128(%rcx),%xmm0
997.byte	102,15,56,220,225
998	xorl	%ebp,%r9d
999	nop
1000.byte	102,15,56,220,233
1001	movl	%r9d,0+12(%rsp)
1002	leaq	1(%r8),%r9
1003.byte	102,15,56,220,241
1004.byte	102,15,56,220,249
1005.byte	102,68,15,56,220,193
1006.byte	102,68,15,56,220,201
1007	movups	48-128(%rcx),%xmm1
1008	bswapl	%r9d
1009.byte	102,15,56,220,208
1010.byte	102,15,56,220,216
1011	xorl	%ebp,%r9d
1012.byte	0x66,0x90
1013.byte	102,15,56,220,224
1014.byte	102,15,56,220,232
1015	movl	%r9d,16+12(%rsp)
1016	leaq	2(%r8),%r9
1017.byte	102,15,56,220,240
1018.byte	102,15,56,220,248
1019.byte	102,68,15,56,220,192
1020.byte	102,68,15,56,220,200
1021	movups	64-128(%rcx),%xmm0
1022	bswapl	%r9d
1023.byte	102,15,56,220,209
1024.byte	102,15,56,220,217
1025	xorl	%ebp,%r9d
1026.byte	0x66,0x90
1027.byte	102,15,56,220,225
1028.byte	102,15,56,220,233
1029	movl	%r9d,32+12(%rsp)
1030	leaq	3(%r8),%r9
1031.byte	102,15,56,220,241
1032.byte	102,15,56,220,249
1033.byte	102,68,15,56,220,193
1034.byte	102,68,15,56,220,201
1035	movups	80-128(%rcx),%xmm1
1036	bswapl	%r9d
1037.byte	102,15,56,220,208
1038.byte	102,15,56,220,216
1039	xorl	%ebp,%r9d
1040.byte	0x66,0x90
1041.byte	102,15,56,220,224
1042.byte	102,15,56,220,232
1043	movl	%r9d,48+12(%rsp)
1044	leaq	4(%r8),%r9
1045.byte	102,15,56,220,240
1046.byte	102,15,56,220,248
1047.byte	102,68,15,56,220,192
1048.byte	102,68,15,56,220,200
1049	movups	96-128(%rcx),%xmm0
1050	bswapl	%r9d
1051.byte	102,15,56,220,209
1052.byte	102,15,56,220,217
1053	xorl	%ebp,%r9d
1054.byte	0x66,0x90
1055.byte	102,15,56,220,225
1056.byte	102,15,56,220,233
1057	movl	%r9d,64+12(%rsp)
1058	leaq	5(%r8),%r9
1059.byte	102,15,56,220,241
1060.byte	102,15,56,220,249
1061.byte	102,68,15,56,220,193
1062.byte	102,68,15,56,220,201
1063	movups	112-128(%rcx),%xmm1
1064	bswapl	%r9d
1065.byte	102,15,56,220,208
1066.byte	102,15,56,220,216
1067	xorl	%ebp,%r9d
1068.byte	0x66,0x90
1069.byte	102,15,56,220,224
1070.byte	102,15,56,220,232
1071	movl	%r9d,80+12(%rsp)
1072	leaq	6(%r8),%r9
1073.byte	102,15,56,220,240
1074.byte	102,15,56,220,248
1075.byte	102,68,15,56,220,192
1076.byte	102,68,15,56,220,200
1077	movups	128-128(%rcx),%xmm0
1078	bswapl	%r9d
1079.byte	102,15,56,220,209
1080.byte	102,15,56,220,217
1081	xorl	%ebp,%r9d
1082.byte	0x66,0x90
1083.byte	102,15,56,220,225
1084.byte	102,15,56,220,233
1085	movl	%r9d,96+12(%rsp)
1086	leaq	7(%r8),%r9
1087.byte	102,15,56,220,241
1088.byte	102,15,56,220,249
1089.byte	102,68,15,56,220,193
1090.byte	102,68,15,56,220,201
1091	movups	144-128(%rcx),%xmm1
1092	bswapl	%r9d
1093.byte	102,15,56,220,208
1094.byte	102,15,56,220,216
1095.byte	102,15,56,220,224
1096	xorl	%ebp,%r9d
1097	movdqu	0(%rdi),%xmm10
1098.byte	102,15,56,220,232
1099	movl	%r9d,112+12(%rsp)
1100	cmpl	$11,%eax
1101.byte	102,15,56,220,240
1102.byte	102,15,56,220,248
1103.byte	102,68,15,56,220,192
1104.byte	102,68,15,56,220,200
1105	movups	160-128(%rcx),%xmm0
1106
1107	jb	L$ctr32_enc_done
1108
1109.byte	102,15,56,220,209
1110.byte	102,15,56,220,217
1111.byte	102,15,56,220,225
1112.byte	102,15,56,220,233
1113.byte	102,15,56,220,241
1114.byte	102,15,56,220,249
1115.byte	102,68,15,56,220,193
1116.byte	102,68,15,56,220,201
1117	movups	176-128(%rcx),%xmm1
1118
1119.byte	102,15,56,220,208
1120.byte	102,15,56,220,216
1121.byte	102,15,56,220,224
1122.byte	102,15,56,220,232
1123.byte	102,15,56,220,240
1124.byte	102,15,56,220,248
1125.byte	102,68,15,56,220,192
1126.byte	102,68,15,56,220,200
1127	movups	192-128(%rcx),%xmm0
1128	je	L$ctr32_enc_done
1129
1130.byte	102,15,56,220,209
1131.byte	102,15,56,220,217
1132.byte	102,15,56,220,225
1133.byte	102,15,56,220,233
1134.byte	102,15,56,220,241
1135.byte	102,15,56,220,249
1136.byte	102,68,15,56,220,193
1137.byte	102,68,15,56,220,201
1138	movups	208-128(%rcx),%xmm1
1139
1140.byte	102,15,56,220,208
1141.byte	102,15,56,220,216
1142.byte	102,15,56,220,224
1143.byte	102,15,56,220,232
1144.byte	102,15,56,220,240
1145.byte	102,15,56,220,248
1146.byte	102,68,15,56,220,192
1147.byte	102,68,15,56,220,200
1148	movups	224-128(%rcx),%xmm0
1149	jmp	L$ctr32_enc_done
1150
1151.p2align	4
1152L$ctr32_enc_done:
1153	movdqu	16(%rdi),%xmm11
1154	pxor	%xmm0,%xmm10
1155	movdqu	32(%rdi),%xmm12
1156	pxor	%xmm0,%xmm11
1157	movdqu	48(%rdi),%xmm13
1158	pxor	%xmm0,%xmm12
1159	movdqu	64(%rdi),%xmm14
1160	pxor	%xmm0,%xmm13
1161	movdqu	80(%rdi),%xmm15
1162	pxor	%xmm0,%xmm14
1163	prefetcht0	448(%rdi)
1164	prefetcht0	512(%rdi)
1165	pxor	%xmm0,%xmm15
1166.byte	102,15,56,220,209
1167.byte	102,15,56,220,217
1168.byte	102,15,56,220,225
1169.byte	102,15,56,220,233
1170.byte	102,15,56,220,241
1171.byte	102,15,56,220,249
1172.byte	102,68,15,56,220,193
1173.byte	102,68,15,56,220,201
1174	movdqu	96(%rdi),%xmm1
1175	leaq	128(%rdi),%rdi
1176
1177.byte	102,65,15,56,221,210
1178	pxor	%xmm0,%xmm1
1179	movdqu	112-128(%rdi),%xmm10
1180.byte	102,65,15,56,221,219
1181	pxor	%xmm0,%xmm10
1182	movdqa	0(%rsp),%xmm11
1183.byte	102,65,15,56,221,228
1184.byte	102,65,15,56,221,237
1185	movdqa	16(%rsp),%xmm12
1186	movdqa	32(%rsp),%xmm13
1187.byte	102,65,15,56,221,246
1188.byte	102,65,15,56,221,255
1189	movdqa	48(%rsp),%xmm14
1190	movdqa	64(%rsp),%xmm15
1191.byte	102,68,15,56,221,193
1192	movdqa	80(%rsp),%xmm0
1193	movups	16-128(%rcx),%xmm1
1194.byte	102,69,15,56,221,202
1195
1196	movups	%xmm2,(%rsi)
1197	movdqa	%xmm11,%xmm2
1198	movups	%xmm3,16(%rsi)
1199	movdqa	%xmm12,%xmm3
1200	movups	%xmm4,32(%rsi)
1201	movdqa	%xmm13,%xmm4
1202	movups	%xmm5,48(%rsi)
1203	movdqa	%xmm14,%xmm5
1204	movups	%xmm6,64(%rsi)
1205	movdqa	%xmm15,%xmm6
1206	movups	%xmm7,80(%rsi)
1207	movdqa	%xmm0,%xmm7
1208	movups	%xmm8,96(%rsi)
1209	movups	%xmm9,112(%rsi)
1210	leaq	128(%rsi),%rsi
1211
1212	subq	$8,%rdx
1213	jnc	L$ctr32_loop8
1214
1215	addq	$8,%rdx
1216	jz	L$ctr32_done
1217	leaq	-128(%rcx),%rcx
1218
1219L$ctr32_tail:
1220
1221
1222	leaq	16(%rcx),%rcx
1223	cmpq	$4,%rdx
1224	jb	L$ctr32_loop3
1225	je	L$ctr32_loop4
1226
1227
1228	shll	$4,%eax
1229	movdqa	96(%rsp),%xmm8
1230	pxor	%xmm9,%xmm9
1231
1232	movups	16(%rcx),%xmm0
1233.byte	102,15,56,220,209
1234.byte	102,15,56,220,217
1235	leaq	32-16(%rcx,%rax,1),%rcx
1236	negq	%rax
1237.byte	102,15,56,220,225
1238	addq	$16,%rax
1239	movups	(%rdi),%xmm10
1240.byte	102,15,56,220,233
1241.byte	102,15,56,220,241
1242	movups	16(%rdi),%xmm11
1243	movups	32(%rdi),%xmm12
1244.byte	102,15,56,220,249
1245.byte	102,68,15,56,220,193
1246
1247	call	L$enc_loop8_enter
1248
1249	movdqu	48(%rdi),%xmm13
1250	pxor	%xmm10,%xmm2
1251	movdqu	64(%rdi),%xmm10
1252	pxor	%xmm11,%xmm3
1253	movdqu	%xmm2,(%rsi)
1254	pxor	%xmm12,%xmm4
1255	movdqu	%xmm3,16(%rsi)
1256	pxor	%xmm13,%xmm5
1257	movdqu	%xmm4,32(%rsi)
1258	pxor	%xmm10,%xmm6
1259	movdqu	%xmm5,48(%rsi)
1260	movdqu	%xmm6,64(%rsi)
1261	cmpq	$6,%rdx
1262	jb	L$ctr32_done
1263
1264	movups	80(%rdi),%xmm11
1265	xorps	%xmm11,%xmm7
1266	movups	%xmm7,80(%rsi)
1267	je	L$ctr32_done
1268
1269	movups	96(%rdi),%xmm12
1270	xorps	%xmm12,%xmm8
1271	movups	%xmm8,96(%rsi)
1272	jmp	L$ctr32_done
1273
1274.p2align	5
1275L$ctr32_loop4:
1276.byte	102,15,56,220,209
1277	leaq	16(%rcx),%rcx
1278	decl	%eax
1279.byte	102,15,56,220,217
1280.byte	102,15,56,220,225
1281.byte	102,15,56,220,233
1282	movups	(%rcx),%xmm1
1283	jnz	L$ctr32_loop4
1284.byte	102,15,56,221,209
1285.byte	102,15,56,221,217
1286	movups	(%rdi),%xmm10
1287	movups	16(%rdi),%xmm11
1288.byte	102,15,56,221,225
1289.byte	102,15,56,221,233
1290	movups	32(%rdi),%xmm12
1291	movups	48(%rdi),%xmm13
1292
1293	xorps	%xmm10,%xmm2
1294	movups	%xmm2,(%rsi)
1295	xorps	%xmm11,%xmm3
1296	movups	%xmm3,16(%rsi)
1297	pxor	%xmm12,%xmm4
1298	movdqu	%xmm4,32(%rsi)
1299	pxor	%xmm13,%xmm5
1300	movdqu	%xmm5,48(%rsi)
1301	jmp	L$ctr32_done
1302
1303.p2align	5
1304L$ctr32_loop3:
1305.byte	102,15,56,220,209
1306	leaq	16(%rcx),%rcx
1307	decl	%eax
1308.byte	102,15,56,220,217
1309.byte	102,15,56,220,225
1310	movups	(%rcx),%xmm1
1311	jnz	L$ctr32_loop3
1312.byte	102,15,56,221,209
1313.byte	102,15,56,221,217
1314.byte	102,15,56,221,225
1315
1316	movups	(%rdi),%xmm10
1317	xorps	%xmm10,%xmm2
1318	movups	%xmm2,(%rsi)
1319	cmpq	$2,%rdx
1320	jb	L$ctr32_done
1321
1322	movups	16(%rdi),%xmm11
1323	xorps	%xmm11,%xmm3
1324	movups	%xmm3,16(%rsi)
1325	je	L$ctr32_done
1326
1327	movups	32(%rdi),%xmm12
1328	xorps	%xmm12,%xmm4
1329	movups	%xmm4,32(%rsi)
1330
1331L$ctr32_done:
1332	xorps	%xmm0,%xmm0
1333	xorl	%ebp,%ebp
1334	pxor	%xmm1,%xmm1
1335	pxor	%xmm2,%xmm2
1336	pxor	%xmm3,%xmm3
1337	pxor	%xmm4,%xmm4
1338	pxor	%xmm5,%xmm5
1339	pxor	%xmm6,%xmm6
1340	pxor	%xmm7,%xmm7
1341	movaps	%xmm0,0(%rsp)
1342	pxor	%xmm8,%xmm8
1343	movaps	%xmm0,16(%rsp)
1344	pxor	%xmm9,%xmm9
1345	movaps	%xmm0,32(%rsp)
1346	pxor	%xmm10,%xmm10
1347	movaps	%xmm0,48(%rsp)
1348	pxor	%xmm11,%xmm11
1349	movaps	%xmm0,64(%rsp)
1350	pxor	%xmm12,%xmm12
1351	movaps	%xmm0,80(%rsp)
1352	pxor	%xmm13,%xmm13
1353	movaps	%xmm0,96(%rsp)
1354	pxor	%xmm14,%xmm14
1355	movaps	%xmm0,112(%rsp)
1356	pxor	%xmm15,%xmm15
1357	movq	-8(%r11),%rbp
1358
1359	leaq	(%r11),%rsp
1360
1361L$ctr32_epilogue:
1362	ret
1363
1364
1365.globl	_aes_hw_cbc_encrypt
1366.private_extern _aes_hw_cbc_encrypt
1367
1368.p2align	4
1369_aes_hw_cbc_encrypt:
1370
1371_CET_ENDBR
1372	testq	%rdx,%rdx
1373	jz	L$cbc_ret
1374
1375	movl	240(%rcx),%r10d
1376	movq	%rcx,%r11
1377	testl	%r9d,%r9d
1378	jz	L$cbc_decrypt
1379
1380	movups	(%r8),%xmm2
1381	movl	%r10d,%eax
1382	cmpq	$16,%rdx
1383	jb	L$cbc_enc_tail
1384	subq	$16,%rdx
1385	jmp	L$cbc_enc_loop
1386.p2align	4
1387L$cbc_enc_loop:
1388	movups	(%rdi),%xmm3
1389	leaq	16(%rdi),%rdi
1390
1391	movups	(%rcx),%xmm0
1392	movups	16(%rcx),%xmm1
1393	xorps	%xmm0,%xmm3
1394	leaq	32(%rcx),%rcx
1395	xorps	%xmm3,%xmm2
1396L$oop_enc1_6:
1397.byte	102,15,56,220,209
1398	decl	%eax
1399	movups	(%rcx),%xmm1
1400	leaq	16(%rcx),%rcx
1401	jnz	L$oop_enc1_6
1402.byte	102,15,56,221,209
1403	movl	%r10d,%eax
1404	movq	%r11,%rcx
1405	movups	%xmm2,0(%rsi)
1406	leaq	16(%rsi),%rsi
1407	subq	$16,%rdx
1408	jnc	L$cbc_enc_loop
1409	addq	$16,%rdx
1410	jnz	L$cbc_enc_tail
1411	pxor	%xmm0,%xmm0
1412	pxor	%xmm1,%xmm1
1413	movups	%xmm2,(%r8)
1414	pxor	%xmm2,%xmm2
1415	pxor	%xmm3,%xmm3
1416	jmp	L$cbc_ret
1417
1418L$cbc_enc_tail:
1419	movq	%rdx,%rcx
1420	xchgq	%rdi,%rsi
1421.long	0x9066A4F3
1422	movl	$16,%ecx
1423	subq	%rdx,%rcx
1424	xorl	%eax,%eax
1425.long	0x9066AAF3
1426	leaq	-16(%rdi),%rdi
1427	movl	%r10d,%eax
1428	movq	%rdi,%rsi
1429	movq	%r11,%rcx
1430	xorq	%rdx,%rdx
1431	jmp	L$cbc_enc_loop
1432
1433.p2align	4
1434L$cbc_decrypt:
1435	cmpq	$16,%rdx
1436	jne	L$cbc_decrypt_bulk
1437
1438
1439
1440	movdqu	(%rdi),%xmm2
1441	movdqu	(%r8),%xmm3
1442	movdqa	%xmm2,%xmm4
1443	movups	(%rcx),%xmm0
1444	movups	16(%rcx),%xmm1
1445	leaq	32(%rcx),%rcx
1446	xorps	%xmm0,%xmm2
1447L$oop_dec1_7:
1448.byte	102,15,56,222,209
1449	decl	%r10d
1450	movups	(%rcx),%xmm1
1451	leaq	16(%rcx),%rcx
1452	jnz	L$oop_dec1_7
1453.byte	102,15,56,223,209
1454	pxor	%xmm0,%xmm0
1455	pxor	%xmm1,%xmm1
1456	movdqu	%xmm4,(%r8)
1457	xorps	%xmm3,%xmm2
1458	pxor	%xmm3,%xmm3
1459	movups	%xmm2,(%rsi)
1460	pxor	%xmm2,%xmm2
1461	jmp	L$cbc_ret
1462.p2align	4
1463L$cbc_decrypt_bulk:
1464	leaq	(%rsp),%r11
1465
1466	pushq	%rbp
1467
1468	subq	$16,%rsp
1469	andq	$-16,%rsp
1470	movq	%rcx,%rbp
1471	movups	(%r8),%xmm10
1472	movl	%r10d,%eax
1473	cmpq	$0x50,%rdx
1474	jbe	L$cbc_dec_tail
1475
1476	movups	(%rcx),%xmm0
1477	movdqu	0(%rdi),%xmm2
1478	movdqu	16(%rdi),%xmm3
1479	movdqa	%xmm2,%xmm11
1480	movdqu	32(%rdi),%xmm4
1481	movdqa	%xmm3,%xmm12
1482	movdqu	48(%rdi),%xmm5
1483	movdqa	%xmm4,%xmm13
1484	movdqu	64(%rdi),%xmm6
1485	movdqa	%xmm5,%xmm14
1486	movdqu	80(%rdi),%xmm7
1487	movdqa	%xmm6,%xmm15
1488	cmpq	$0x70,%rdx
1489	jbe	L$cbc_dec_six_or_seven
1490
1491	subq	$0x70,%rdx
1492	leaq	112(%rcx),%rcx
1493	jmp	L$cbc_dec_loop8_enter
1494.p2align	4
1495L$cbc_dec_loop8:
1496	movups	%xmm9,(%rsi)
1497	leaq	16(%rsi),%rsi
1498L$cbc_dec_loop8_enter:
1499	movdqu	96(%rdi),%xmm8
1500	pxor	%xmm0,%xmm2
1501	movdqu	112(%rdi),%xmm9
1502	pxor	%xmm0,%xmm3
1503	movups	16-112(%rcx),%xmm1
1504	pxor	%xmm0,%xmm4
1505	movq	$-1,%rbp
1506	cmpq	$0x70,%rdx
1507	pxor	%xmm0,%xmm5
1508	pxor	%xmm0,%xmm6
1509	pxor	%xmm0,%xmm7
1510	pxor	%xmm0,%xmm8
1511
1512.byte	102,15,56,222,209
1513	pxor	%xmm0,%xmm9
1514	movups	32-112(%rcx),%xmm0
1515.byte	102,15,56,222,217
1516.byte	102,15,56,222,225
1517.byte	102,15,56,222,233
1518.byte	102,15,56,222,241
1519.byte	102,15,56,222,249
1520.byte	102,68,15,56,222,193
1521	adcq	$0,%rbp
1522	andq	$128,%rbp
1523.byte	102,68,15,56,222,201
1524	addq	%rdi,%rbp
1525	movups	48-112(%rcx),%xmm1
1526.byte	102,15,56,222,208
1527.byte	102,15,56,222,216
1528.byte	102,15,56,222,224
1529.byte	102,15,56,222,232
1530.byte	102,15,56,222,240
1531.byte	102,15,56,222,248
1532.byte	102,68,15,56,222,192
1533.byte	102,68,15,56,222,200
1534	movups	64-112(%rcx),%xmm0
1535	nop
1536.byte	102,15,56,222,209
1537.byte	102,15,56,222,217
1538.byte	102,15,56,222,225
1539.byte	102,15,56,222,233
1540.byte	102,15,56,222,241
1541.byte	102,15,56,222,249
1542.byte	102,68,15,56,222,193
1543.byte	102,68,15,56,222,201
1544	movups	80-112(%rcx),%xmm1
1545	nop
1546.byte	102,15,56,222,208
1547.byte	102,15,56,222,216
1548.byte	102,15,56,222,224
1549.byte	102,15,56,222,232
1550.byte	102,15,56,222,240
1551.byte	102,15,56,222,248
1552.byte	102,68,15,56,222,192
1553.byte	102,68,15,56,222,200
1554	movups	96-112(%rcx),%xmm0
1555	nop
1556.byte	102,15,56,222,209
1557.byte	102,15,56,222,217
1558.byte	102,15,56,222,225
1559.byte	102,15,56,222,233
1560.byte	102,15,56,222,241
1561.byte	102,15,56,222,249
1562.byte	102,68,15,56,222,193
1563.byte	102,68,15,56,222,201
1564	movups	112-112(%rcx),%xmm1
1565	nop
1566.byte	102,15,56,222,208
1567.byte	102,15,56,222,216
1568.byte	102,15,56,222,224
1569.byte	102,15,56,222,232
1570.byte	102,15,56,222,240
1571.byte	102,15,56,222,248
1572.byte	102,68,15,56,222,192
1573.byte	102,68,15,56,222,200
1574	movups	128-112(%rcx),%xmm0
1575	nop
1576.byte	102,15,56,222,209
1577.byte	102,15,56,222,217
1578.byte	102,15,56,222,225
1579.byte	102,15,56,222,233
1580.byte	102,15,56,222,241
1581.byte	102,15,56,222,249
1582.byte	102,68,15,56,222,193
1583.byte	102,68,15,56,222,201
1584	movups	144-112(%rcx),%xmm1
1585	cmpl	$11,%eax
1586.byte	102,15,56,222,208
1587.byte	102,15,56,222,216
1588.byte	102,15,56,222,224
1589.byte	102,15,56,222,232
1590.byte	102,15,56,222,240
1591.byte	102,15,56,222,248
1592.byte	102,68,15,56,222,192
1593.byte	102,68,15,56,222,200
1594	movups	160-112(%rcx),%xmm0
1595	jb	L$cbc_dec_done
1596.byte	102,15,56,222,209
1597.byte	102,15,56,222,217
1598.byte	102,15,56,222,225
1599.byte	102,15,56,222,233
1600.byte	102,15,56,222,241
1601.byte	102,15,56,222,249
1602.byte	102,68,15,56,222,193
1603.byte	102,68,15,56,222,201
1604	movups	176-112(%rcx),%xmm1
1605	nop
1606.byte	102,15,56,222,208
1607.byte	102,15,56,222,216
1608.byte	102,15,56,222,224
1609.byte	102,15,56,222,232
1610.byte	102,15,56,222,240
1611.byte	102,15,56,222,248
1612.byte	102,68,15,56,222,192
1613.byte	102,68,15,56,222,200
1614	movups	192-112(%rcx),%xmm0
1615	je	L$cbc_dec_done
1616.byte	102,15,56,222,209
1617.byte	102,15,56,222,217
1618.byte	102,15,56,222,225
1619.byte	102,15,56,222,233
1620.byte	102,15,56,222,241
1621.byte	102,15,56,222,249
1622.byte	102,68,15,56,222,193
1623.byte	102,68,15,56,222,201
1624	movups	208-112(%rcx),%xmm1
1625	nop
1626.byte	102,15,56,222,208
1627.byte	102,15,56,222,216
1628.byte	102,15,56,222,224
1629.byte	102,15,56,222,232
1630.byte	102,15,56,222,240
1631.byte	102,15,56,222,248
1632.byte	102,68,15,56,222,192
1633.byte	102,68,15,56,222,200
1634	movups	224-112(%rcx),%xmm0
1635	jmp	L$cbc_dec_done
1636.p2align	4
1637L$cbc_dec_done:
1638.byte	102,15,56,222,209
1639.byte	102,15,56,222,217
1640	pxor	%xmm0,%xmm10
1641	pxor	%xmm0,%xmm11
1642.byte	102,15,56,222,225
1643.byte	102,15,56,222,233
1644	pxor	%xmm0,%xmm12
1645	pxor	%xmm0,%xmm13
1646.byte	102,15,56,222,241
1647.byte	102,15,56,222,249
1648	pxor	%xmm0,%xmm14
1649	pxor	%xmm0,%xmm15
1650.byte	102,68,15,56,222,193
1651.byte	102,68,15,56,222,201
1652	movdqu	80(%rdi),%xmm1
1653
1654.byte	102,65,15,56,223,210
1655	movdqu	96(%rdi),%xmm10
1656	pxor	%xmm0,%xmm1
1657.byte	102,65,15,56,223,219
1658	pxor	%xmm0,%xmm10
1659	movdqu	112(%rdi),%xmm0
1660.byte	102,65,15,56,223,228
1661	leaq	128(%rdi),%rdi
1662	movdqu	0(%rbp),%xmm11
1663.byte	102,65,15,56,223,237
1664.byte	102,65,15,56,223,246
1665	movdqu	16(%rbp),%xmm12
1666	movdqu	32(%rbp),%xmm13
1667.byte	102,65,15,56,223,255
1668.byte	102,68,15,56,223,193
1669	movdqu	48(%rbp),%xmm14
1670	movdqu	64(%rbp),%xmm15
1671.byte	102,69,15,56,223,202
1672	movdqa	%xmm0,%xmm10
1673	movdqu	80(%rbp),%xmm1
1674	movups	-112(%rcx),%xmm0
1675
1676	movups	%xmm2,(%rsi)
1677	movdqa	%xmm11,%xmm2
1678	movups	%xmm3,16(%rsi)
1679	movdqa	%xmm12,%xmm3
1680	movups	%xmm4,32(%rsi)
1681	movdqa	%xmm13,%xmm4
1682	movups	%xmm5,48(%rsi)
1683	movdqa	%xmm14,%xmm5
1684	movups	%xmm6,64(%rsi)
1685	movdqa	%xmm15,%xmm6
1686	movups	%xmm7,80(%rsi)
1687	movdqa	%xmm1,%xmm7
1688	movups	%xmm8,96(%rsi)
1689	leaq	112(%rsi),%rsi
1690
1691	subq	$0x80,%rdx
1692	ja	L$cbc_dec_loop8
1693
1694	movaps	%xmm9,%xmm2
1695	leaq	-112(%rcx),%rcx
1696	addq	$0x70,%rdx
1697	jle	L$cbc_dec_clear_tail_collected
1698	movups	%xmm9,(%rsi)
1699	leaq	16(%rsi),%rsi
1700	cmpq	$0x50,%rdx
1701	jbe	L$cbc_dec_tail
1702
1703	movaps	%xmm11,%xmm2
1704L$cbc_dec_six_or_seven:
1705	cmpq	$0x60,%rdx
1706	ja	L$cbc_dec_seven
1707
1708	movaps	%xmm7,%xmm8
1709	call	_aesni_decrypt6
1710	pxor	%xmm10,%xmm2
1711	movaps	%xmm8,%xmm10
1712	pxor	%xmm11,%xmm3
1713	movdqu	%xmm2,(%rsi)
1714	pxor	%xmm12,%xmm4
1715	movdqu	%xmm3,16(%rsi)
1716	pxor	%xmm3,%xmm3
1717	pxor	%xmm13,%xmm5
1718	movdqu	%xmm4,32(%rsi)
1719	pxor	%xmm4,%xmm4
1720	pxor	%xmm14,%xmm6
1721	movdqu	%xmm5,48(%rsi)
1722	pxor	%xmm5,%xmm5
1723	pxor	%xmm15,%xmm7
1724	movdqu	%xmm6,64(%rsi)
1725	pxor	%xmm6,%xmm6
1726	leaq	80(%rsi),%rsi
1727	movdqa	%xmm7,%xmm2
1728	pxor	%xmm7,%xmm7
1729	jmp	L$cbc_dec_tail_collected
1730
1731.p2align	4
1732L$cbc_dec_seven:
1733	movups	96(%rdi),%xmm8
1734	xorps	%xmm9,%xmm9
1735	call	_aesni_decrypt8
1736	movups	80(%rdi),%xmm9
1737	pxor	%xmm10,%xmm2
1738	movups	96(%rdi),%xmm10
1739	pxor	%xmm11,%xmm3
1740	movdqu	%xmm2,(%rsi)
1741	pxor	%xmm12,%xmm4
1742	movdqu	%xmm3,16(%rsi)
1743	pxor	%xmm3,%xmm3
1744	pxor	%xmm13,%xmm5
1745	movdqu	%xmm4,32(%rsi)
1746	pxor	%xmm4,%xmm4
1747	pxor	%xmm14,%xmm6
1748	movdqu	%xmm5,48(%rsi)
1749	pxor	%xmm5,%xmm5
1750	pxor	%xmm15,%xmm7
1751	movdqu	%xmm6,64(%rsi)
1752	pxor	%xmm6,%xmm6
1753	pxor	%xmm9,%xmm8
1754	movdqu	%xmm7,80(%rsi)
1755	pxor	%xmm7,%xmm7
1756	leaq	96(%rsi),%rsi
1757	movdqa	%xmm8,%xmm2
1758	pxor	%xmm8,%xmm8
1759	pxor	%xmm9,%xmm9
1760	jmp	L$cbc_dec_tail_collected
1761
1762L$cbc_dec_tail:
1763	movups	(%rdi),%xmm2
1764	subq	$0x10,%rdx
1765	jbe	L$cbc_dec_one
1766
1767	movups	16(%rdi),%xmm3
1768	movaps	%xmm2,%xmm11
1769	subq	$0x10,%rdx
1770	jbe	L$cbc_dec_two
1771
1772	movups	32(%rdi),%xmm4
1773	movaps	%xmm3,%xmm12
1774	subq	$0x10,%rdx
1775	jbe	L$cbc_dec_three
1776
1777	movups	48(%rdi),%xmm5
1778	movaps	%xmm4,%xmm13
1779	subq	$0x10,%rdx
1780	jbe	L$cbc_dec_four
1781
1782	movups	64(%rdi),%xmm6
1783	movaps	%xmm5,%xmm14
1784	movaps	%xmm6,%xmm15
1785	xorps	%xmm7,%xmm7
1786	call	_aesni_decrypt6
1787	pxor	%xmm10,%xmm2
1788	movaps	%xmm15,%xmm10
1789	pxor	%xmm11,%xmm3
1790	movdqu	%xmm2,(%rsi)
1791	pxor	%xmm12,%xmm4
1792	movdqu	%xmm3,16(%rsi)
1793	pxor	%xmm3,%xmm3
1794	pxor	%xmm13,%xmm5
1795	movdqu	%xmm4,32(%rsi)
1796	pxor	%xmm4,%xmm4
1797	pxor	%xmm14,%xmm6
1798	movdqu	%xmm5,48(%rsi)
1799	pxor	%xmm5,%xmm5
1800	leaq	64(%rsi),%rsi
1801	movdqa	%xmm6,%xmm2
1802	pxor	%xmm6,%xmm6
1803	pxor	%xmm7,%xmm7
1804	subq	$0x10,%rdx
1805	jmp	L$cbc_dec_tail_collected
1806
1807.p2align	4
1808L$cbc_dec_one:
1809	movaps	%xmm2,%xmm11
1810	movups	(%rcx),%xmm0
1811	movups	16(%rcx),%xmm1
1812	leaq	32(%rcx),%rcx
1813	xorps	%xmm0,%xmm2
1814L$oop_dec1_8:
1815.byte	102,15,56,222,209
1816	decl	%eax
1817	movups	(%rcx),%xmm1
1818	leaq	16(%rcx),%rcx
1819	jnz	L$oop_dec1_8
1820.byte	102,15,56,223,209
1821	xorps	%xmm10,%xmm2
1822	movaps	%xmm11,%xmm10
1823	jmp	L$cbc_dec_tail_collected
1824.p2align	4
1825L$cbc_dec_two:
1826	movaps	%xmm3,%xmm12
1827	call	_aesni_decrypt2
1828	pxor	%xmm10,%xmm2
1829	movaps	%xmm12,%xmm10
1830	pxor	%xmm11,%xmm3
1831	movdqu	%xmm2,(%rsi)
1832	movdqa	%xmm3,%xmm2
1833	pxor	%xmm3,%xmm3
1834	leaq	16(%rsi),%rsi
1835	jmp	L$cbc_dec_tail_collected
1836.p2align	4
1837L$cbc_dec_three:
1838	movaps	%xmm4,%xmm13
1839	call	_aesni_decrypt3
1840	pxor	%xmm10,%xmm2
1841	movaps	%xmm13,%xmm10
1842	pxor	%xmm11,%xmm3
1843	movdqu	%xmm2,(%rsi)
1844	pxor	%xmm12,%xmm4
1845	movdqu	%xmm3,16(%rsi)
1846	pxor	%xmm3,%xmm3
1847	movdqa	%xmm4,%xmm2
1848	pxor	%xmm4,%xmm4
1849	leaq	32(%rsi),%rsi
1850	jmp	L$cbc_dec_tail_collected
1851.p2align	4
1852L$cbc_dec_four:
1853	movaps	%xmm5,%xmm14
1854	call	_aesni_decrypt4
1855	pxor	%xmm10,%xmm2
1856	movaps	%xmm14,%xmm10
1857	pxor	%xmm11,%xmm3
1858	movdqu	%xmm2,(%rsi)
1859	pxor	%xmm12,%xmm4
1860	movdqu	%xmm3,16(%rsi)
1861	pxor	%xmm3,%xmm3
1862	pxor	%xmm13,%xmm5
1863	movdqu	%xmm4,32(%rsi)
1864	pxor	%xmm4,%xmm4
1865	movdqa	%xmm5,%xmm2
1866	pxor	%xmm5,%xmm5
1867	leaq	48(%rsi),%rsi
1868	jmp	L$cbc_dec_tail_collected
1869
1870.p2align	4
1871L$cbc_dec_clear_tail_collected:
1872	pxor	%xmm3,%xmm3
1873	pxor	%xmm4,%xmm4
1874	pxor	%xmm5,%xmm5
1875	pxor	%xmm6,%xmm6
1876	pxor	%xmm7,%xmm7
1877	pxor	%xmm8,%xmm8
1878	pxor	%xmm9,%xmm9
1879L$cbc_dec_tail_collected:
1880	movups	%xmm10,(%r8)
1881	andq	$15,%rdx
1882	jnz	L$cbc_dec_tail_partial
1883	movups	%xmm2,(%rsi)
1884	pxor	%xmm2,%xmm2
1885	jmp	L$cbc_dec_ret
1886.p2align	4
1887L$cbc_dec_tail_partial:
1888	movaps	%xmm2,(%rsp)
1889	pxor	%xmm2,%xmm2
1890	movq	$16,%rcx
1891	movq	%rsi,%rdi
1892	subq	%rdx,%rcx
1893	leaq	(%rsp),%rsi
1894.long	0x9066A4F3
1895	movdqa	%xmm2,(%rsp)
1896
1897L$cbc_dec_ret:
1898	xorps	%xmm0,%xmm0
1899	pxor	%xmm1,%xmm1
1900	movq	-8(%r11),%rbp
1901
1902	leaq	(%r11),%rsp
1903
1904L$cbc_ret:
1905	ret
1906
1907
1908.globl	_aes_hw_set_decrypt_key
1909.private_extern _aes_hw_set_decrypt_key
1910
1911.p2align	4
1912_aes_hw_set_decrypt_key:
1913
1914_CET_ENDBR
1915.byte	0x48,0x83,0xEC,0x08
1916
1917	call	__aesni_set_encrypt_key
1918	shll	$4,%esi
1919	testl	%eax,%eax
1920	jnz	L$dec_key_ret
1921	leaq	16(%rdx,%rsi,1),%rdi
1922
1923	movups	(%rdx),%xmm0
1924	movups	(%rdi),%xmm1
1925	movups	%xmm0,(%rdi)
1926	movups	%xmm1,(%rdx)
1927	leaq	16(%rdx),%rdx
1928	leaq	-16(%rdi),%rdi
1929
1930L$dec_key_inverse:
1931	movups	(%rdx),%xmm0
1932	movups	(%rdi),%xmm1
1933.byte	102,15,56,219,192
1934.byte	102,15,56,219,201
1935	leaq	16(%rdx),%rdx
1936	leaq	-16(%rdi),%rdi
1937	movups	%xmm0,16(%rdi)
1938	movups	%xmm1,-16(%rdx)
1939	cmpq	%rdx,%rdi
1940	ja	L$dec_key_inverse
1941
1942	movups	(%rdx),%xmm0
1943.byte	102,15,56,219,192
1944	pxor	%xmm1,%xmm1
1945	movups	%xmm0,(%rdi)
1946	pxor	%xmm0,%xmm0
1947L$dec_key_ret:
1948	addq	$8,%rsp
1949
1950	ret
1951
1952L$SEH_end_set_decrypt_key:
1953
1954.globl	_aes_hw_set_encrypt_key
1955.private_extern _aes_hw_set_encrypt_key
1956
1957.p2align	4
1958_aes_hw_set_encrypt_key:
1959__aesni_set_encrypt_key:
1960
1961_CET_ENDBR
1962#ifdef BORINGSSL_DISPATCH_TEST
1963	movb	$1,_BORINGSSL_function_hit+3(%rip)
1964#endif
1965.byte	0x48,0x83,0xEC,0x08
1966
1967	movq	$-1,%rax
1968	testq	%rdi,%rdi
1969	jz	L$enc_key_ret
1970	testq	%rdx,%rdx
1971	jz	L$enc_key_ret
1972
1973	movups	(%rdi),%xmm0
1974	xorps	%xmm4,%xmm4
1975	leaq	_OPENSSL_ia32cap_P(%rip),%r10
1976	movl	4(%r10),%r10d
1977	andl	$268437504,%r10d
1978	leaq	16(%rdx),%rax
1979	cmpl	$256,%esi
1980	je	L$14rounds
1981	cmpl	$192,%esi
1982	je	L$12rounds
1983	cmpl	$128,%esi
1984	jne	L$bad_keybits
1985
1986L$10rounds:
1987	movl	$9,%esi
1988	cmpl	$268435456,%r10d
1989	je	L$10rounds_alt
1990
1991	movups	%xmm0,(%rdx)
1992.byte	102,15,58,223,200,1
1993	call	L$key_expansion_128_cold
1994.byte	102,15,58,223,200,2
1995	call	L$key_expansion_128
1996.byte	102,15,58,223,200,4
1997	call	L$key_expansion_128
1998.byte	102,15,58,223,200,8
1999	call	L$key_expansion_128
2000.byte	102,15,58,223,200,16
2001	call	L$key_expansion_128
2002.byte	102,15,58,223,200,32
2003	call	L$key_expansion_128
2004.byte	102,15,58,223,200,64
2005	call	L$key_expansion_128
2006.byte	102,15,58,223,200,128
2007	call	L$key_expansion_128
2008.byte	102,15,58,223,200,27
2009	call	L$key_expansion_128
2010.byte	102,15,58,223,200,54
2011	call	L$key_expansion_128
2012	movups	%xmm0,(%rax)
2013	movl	%esi,80(%rax)
2014	xorl	%eax,%eax
2015	jmp	L$enc_key_ret
2016
2017.p2align	4
2018L$10rounds_alt:
2019	movdqa	L$key_rotate(%rip),%xmm5
2020	movl	$8,%r10d
2021	movdqa	L$key_rcon1(%rip),%xmm4
2022	movdqa	%xmm0,%xmm2
2023	movdqu	%xmm0,(%rdx)
2024	jmp	L$oop_key128
2025
2026.p2align	4
2027L$oop_key128:
2028.byte	102,15,56,0,197
2029.byte	102,15,56,221,196
2030	pslld	$1,%xmm4
2031	leaq	16(%rax),%rax
2032
2033	movdqa	%xmm2,%xmm3
2034	pslldq	$4,%xmm2
2035	pxor	%xmm2,%xmm3
2036	pslldq	$4,%xmm2
2037	pxor	%xmm2,%xmm3
2038	pslldq	$4,%xmm2
2039	pxor	%xmm3,%xmm2
2040
2041	pxor	%xmm2,%xmm0
2042	movdqu	%xmm0,-16(%rax)
2043	movdqa	%xmm0,%xmm2
2044
2045	decl	%r10d
2046	jnz	L$oop_key128
2047
2048	movdqa	L$key_rcon1b(%rip),%xmm4
2049
2050.byte	102,15,56,0,197
2051.byte	102,15,56,221,196
2052	pslld	$1,%xmm4
2053
2054	movdqa	%xmm2,%xmm3
2055	pslldq	$4,%xmm2
2056	pxor	%xmm2,%xmm3
2057	pslldq	$4,%xmm2
2058	pxor	%xmm2,%xmm3
2059	pslldq	$4,%xmm2
2060	pxor	%xmm3,%xmm2
2061
2062	pxor	%xmm2,%xmm0
2063	movdqu	%xmm0,(%rax)
2064
2065	movdqa	%xmm0,%xmm2
2066.byte	102,15,56,0,197
2067.byte	102,15,56,221,196
2068
2069	movdqa	%xmm2,%xmm3
2070	pslldq	$4,%xmm2
2071	pxor	%xmm2,%xmm3
2072	pslldq	$4,%xmm2
2073	pxor	%xmm2,%xmm3
2074	pslldq	$4,%xmm2
2075	pxor	%xmm3,%xmm2
2076
2077	pxor	%xmm2,%xmm0
2078	movdqu	%xmm0,16(%rax)
2079
2080	movl	%esi,96(%rax)
2081	xorl	%eax,%eax
2082	jmp	L$enc_key_ret
2083
2084.p2align	4
2085L$12rounds:
2086	movq	16(%rdi),%xmm2
2087	movl	$11,%esi
2088	cmpl	$268435456,%r10d
2089	je	L$12rounds_alt
2090
2091	movups	%xmm0,(%rdx)
2092.byte	102,15,58,223,202,1
2093	call	L$key_expansion_192a_cold
2094.byte	102,15,58,223,202,2
2095	call	L$key_expansion_192b
2096.byte	102,15,58,223,202,4
2097	call	L$key_expansion_192a
2098.byte	102,15,58,223,202,8
2099	call	L$key_expansion_192b
2100.byte	102,15,58,223,202,16
2101	call	L$key_expansion_192a
2102.byte	102,15,58,223,202,32
2103	call	L$key_expansion_192b
2104.byte	102,15,58,223,202,64
2105	call	L$key_expansion_192a
2106.byte	102,15,58,223,202,128
2107	call	L$key_expansion_192b
2108	movups	%xmm0,(%rax)
2109	movl	%esi,48(%rax)
2110	xorq	%rax,%rax
2111	jmp	L$enc_key_ret
2112
2113.p2align	4
2114L$12rounds_alt:
2115	movdqa	L$key_rotate192(%rip),%xmm5
2116	movdqa	L$key_rcon1(%rip),%xmm4
2117	movl	$8,%r10d
2118	movdqu	%xmm0,(%rdx)
2119	jmp	L$oop_key192
2120
2121.p2align	4
2122L$oop_key192:
2123	movq	%xmm2,0(%rax)
2124	movdqa	%xmm2,%xmm1
2125.byte	102,15,56,0,213
2126.byte	102,15,56,221,212
2127	pslld	$1,%xmm4
2128	leaq	24(%rax),%rax
2129
2130	movdqa	%xmm0,%xmm3
2131	pslldq	$4,%xmm0
2132	pxor	%xmm0,%xmm3
2133	pslldq	$4,%xmm0
2134	pxor	%xmm0,%xmm3
2135	pslldq	$4,%xmm0
2136	pxor	%xmm3,%xmm0
2137
2138	pshufd	$0xff,%xmm0,%xmm3
2139	pxor	%xmm1,%xmm3
2140	pslldq	$4,%xmm1
2141	pxor	%xmm1,%xmm3
2142
2143	pxor	%xmm2,%xmm0
2144	pxor	%xmm3,%xmm2
2145	movdqu	%xmm0,-16(%rax)
2146
2147	decl	%r10d
2148	jnz	L$oop_key192
2149
2150	movl	%esi,32(%rax)
2151	xorl	%eax,%eax
2152	jmp	L$enc_key_ret
2153
2154.p2align	4
2155L$14rounds:
2156	movups	16(%rdi),%xmm2
2157	movl	$13,%esi
2158	leaq	16(%rax),%rax
2159	cmpl	$268435456,%r10d
2160	je	L$14rounds_alt
2161
2162	movups	%xmm0,(%rdx)
2163	movups	%xmm2,16(%rdx)
2164.byte	102,15,58,223,202,1
2165	call	L$key_expansion_256a_cold
2166.byte	102,15,58,223,200,1
2167	call	L$key_expansion_256b
2168.byte	102,15,58,223,202,2
2169	call	L$key_expansion_256a
2170.byte	102,15,58,223,200,2
2171	call	L$key_expansion_256b
2172.byte	102,15,58,223,202,4
2173	call	L$key_expansion_256a
2174.byte	102,15,58,223,200,4
2175	call	L$key_expansion_256b
2176.byte	102,15,58,223,202,8
2177	call	L$key_expansion_256a
2178.byte	102,15,58,223,200,8
2179	call	L$key_expansion_256b
2180.byte	102,15,58,223,202,16
2181	call	L$key_expansion_256a
2182.byte	102,15,58,223,200,16
2183	call	L$key_expansion_256b
2184.byte	102,15,58,223,202,32
2185	call	L$key_expansion_256a
2186.byte	102,15,58,223,200,32
2187	call	L$key_expansion_256b
2188.byte	102,15,58,223,202,64
2189	call	L$key_expansion_256a
2190	movups	%xmm0,(%rax)
2191	movl	%esi,16(%rax)
2192	xorq	%rax,%rax
2193	jmp	L$enc_key_ret
2194
2195.p2align	4
2196L$14rounds_alt:
2197	movdqa	L$key_rotate(%rip),%xmm5
2198	movdqa	L$key_rcon1(%rip),%xmm4
2199	movl	$7,%r10d
2200	movdqu	%xmm0,0(%rdx)
2201	movdqa	%xmm2,%xmm1
2202	movdqu	%xmm2,16(%rdx)
2203	jmp	L$oop_key256
2204
2205.p2align	4
2206L$oop_key256:
2207.byte	102,15,56,0,213
2208.byte	102,15,56,221,212
2209
2210	movdqa	%xmm0,%xmm3
2211	pslldq	$4,%xmm0
2212	pxor	%xmm0,%xmm3
2213	pslldq	$4,%xmm0
2214	pxor	%xmm0,%xmm3
2215	pslldq	$4,%xmm0
2216	pxor	%xmm3,%xmm0
2217	pslld	$1,%xmm4
2218
2219	pxor	%xmm2,%xmm0
2220	movdqu	%xmm0,(%rax)
2221
2222	decl	%r10d
2223	jz	L$done_key256
2224
2225	pshufd	$0xff,%xmm0,%xmm2
2226	pxor	%xmm3,%xmm3
2227.byte	102,15,56,221,211
2228
2229	movdqa	%xmm1,%xmm3
2230	pslldq	$4,%xmm1
2231	pxor	%xmm1,%xmm3
2232	pslldq	$4,%xmm1
2233	pxor	%xmm1,%xmm3
2234	pslldq	$4,%xmm1
2235	pxor	%xmm3,%xmm1
2236
2237	pxor	%xmm1,%xmm2
2238	movdqu	%xmm2,16(%rax)
2239	leaq	32(%rax),%rax
2240	movdqa	%xmm2,%xmm1
2241
2242	jmp	L$oop_key256
2243
2244L$done_key256:
2245	movl	%esi,16(%rax)
2246	xorl	%eax,%eax
2247	jmp	L$enc_key_ret
2248
2249.p2align	4
2250L$bad_keybits:
2251	movq	$-2,%rax
2252L$enc_key_ret:
2253	pxor	%xmm0,%xmm0
2254	pxor	%xmm1,%xmm1
2255	pxor	%xmm2,%xmm2
2256	pxor	%xmm3,%xmm3
2257	pxor	%xmm4,%xmm4
2258	pxor	%xmm5,%xmm5
2259	addq	$8,%rsp
2260
2261	ret
2262
2263L$SEH_end_set_encrypt_key:
2264
2265.p2align	4
2266L$key_expansion_128:
2267	movups	%xmm0,(%rax)
2268	leaq	16(%rax),%rax
2269L$key_expansion_128_cold:
2270	shufps	$16,%xmm0,%xmm4
2271	xorps	%xmm4,%xmm0
2272	shufps	$140,%xmm0,%xmm4
2273	xorps	%xmm4,%xmm0
2274	shufps	$255,%xmm1,%xmm1
2275	xorps	%xmm1,%xmm0
2276	ret
2277
2278.p2align	4
2279L$key_expansion_192a:
2280	movups	%xmm0,(%rax)
2281	leaq	16(%rax),%rax
2282L$key_expansion_192a_cold:
2283	movaps	%xmm2,%xmm5
2284L$key_expansion_192b_warm:
2285	shufps	$16,%xmm0,%xmm4
2286	movdqa	%xmm2,%xmm3
2287	xorps	%xmm4,%xmm0
2288	shufps	$140,%xmm0,%xmm4
2289	pslldq	$4,%xmm3
2290	xorps	%xmm4,%xmm0
2291	pshufd	$85,%xmm1,%xmm1
2292	pxor	%xmm3,%xmm2
2293	pxor	%xmm1,%xmm0
2294	pshufd	$255,%xmm0,%xmm3
2295	pxor	%xmm3,%xmm2
2296	ret
2297
2298.p2align	4
2299L$key_expansion_192b:
2300	movaps	%xmm0,%xmm3
2301	shufps	$68,%xmm0,%xmm5
2302	movups	%xmm5,(%rax)
2303	shufps	$78,%xmm2,%xmm3
2304	movups	%xmm3,16(%rax)
2305	leaq	32(%rax),%rax
2306	jmp	L$key_expansion_192b_warm
2307
2308.p2align	4
2309L$key_expansion_256a:
2310	movups	%xmm2,(%rax)
2311	leaq	16(%rax),%rax
2312L$key_expansion_256a_cold:
2313	shufps	$16,%xmm0,%xmm4
2314	xorps	%xmm4,%xmm0
2315	shufps	$140,%xmm0,%xmm4
2316	xorps	%xmm4,%xmm0
2317	shufps	$255,%xmm1,%xmm1
2318	xorps	%xmm1,%xmm0
2319	ret
2320
2321.p2align	4
2322L$key_expansion_256b:
2323	movups	%xmm0,(%rax)
2324	leaq	16(%rax),%rax
2325
2326	shufps	$16,%xmm2,%xmm4
2327	xorps	%xmm4,%xmm2
2328	shufps	$140,%xmm2,%xmm4
2329	xorps	%xmm4,%xmm2
2330	shufps	$170,%xmm1,%xmm1
2331	xorps	%xmm1,%xmm2
2332	ret
2333
2334
2335.section	__DATA,__const
2336.p2align	6
2337L$bswap_mask:
2338.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
2339L$increment32:
2340.long	6,6,6,0
2341L$increment64:
2342.long	1,0,0,0
2343L$xts_magic:
2344.long	0x87,0,1,0
2345L$increment1:
2346.byte	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2347L$key_rotate:
2348.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
2349L$key_rotate192:
2350.long	0x04070605,0x04070605,0x04070605,0x04070605
2351L$key_rcon1:
2352.long	1,1,1,1
2353L$key_rcon1b:
2354.long	0x1b,0x1b,0x1b,0x1b
2355
2356.byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2357.p2align	6
2358.text
2359#endif
2360