xref: /aosp_15_r20/external/boringssl/src/gen/bcm/aesni-x86_64-apple.S (revision 8fb009dc861624b67b6cdb62ea21f0f22d0c584b)
1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#include <openssl/asm_base.h>
5
6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
7.text
8.globl	_aes_hw_encrypt
9.private_extern _aes_hw_encrypt
10
11.p2align	4
12_aes_hw_encrypt:
13
14_CET_ENDBR
15#ifdef BORINGSSL_DISPATCH_TEST
16
17	movb	$1,_BORINGSSL_function_hit+1(%rip)
18#endif
19	movups	(%rdi),%xmm2
20	movl	240(%rdx),%eax
21	movups	(%rdx),%xmm0
22	movups	16(%rdx),%xmm1
23	leaq	32(%rdx),%rdx
24	xorps	%xmm0,%xmm2
25L$oop_enc1_1:
26.byte	102,15,56,220,209
27	decl	%eax
28	movups	(%rdx),%xmm1
29	leaq	16(%rdx),%rdx
30	jnz	L$oop_enc1_1
31.byte	102,15,56,221,209
32	pxor	%xmm0,%xmm0
33	pxor	%xmm1,%xmm1
34	movups	%xmm2,(%rsi)
35	pxor	%xmm2,%xmm2
36	ret
37
38
39
40.globl	_aes_hw_decrypt
41.private_extern _aes_hw_decrypt
42
43.p2align	4
44_aes_hw_decrypt:
45
46_CET_ENDBR
47	movups	(%rdi),%xmm2
48	movl	240(%rdx),%eax
49	movups	(%rdx),%xmm0
50	movups	16(%rdx),%xmm1
51	leaq	32(%rdx),%rdx
52	xorps	%xmm0,%xmm2
53L$oop_dec1_2:
54.byte	102,15,56,222,209
55	decl	%eax
56	movups	(%rdx),%xmm1
57	leaq	16(%rdx),%rdx
58	jnz	L$oop_dec1_2
59.byte	102,15,56,223,209
60	pxor	%xmm0,%xmm0
61	pxor	%xmm1,%xmm1
62	movups	%xmm2,(%rsi)
63	pxor	%xmm2,%xmm2
64	ret
65
66
67
68.p2align	4
69_aesni_encrypt2:
70
71	movups	(%rcx),%xmm0
72	shll	$4,%eax
73	movups	16(%rcx),%xmm1
74	xorps	%xmm0,%xmm2
75	xorps	%xmm0,%xmm3
76	movups	32(%rcx),%xmm0
77	leaq	32(%rcx,%rax,1),%rcx
78	negq	%rax
79	addq	$16,%rax
80
81L$enc_loop2:
82.byte	102,15,56,220,209
83.byte	102,15,56,220,217
84	movups	(%rcx,%rax,1),%xmm1
85	addq	$32,%rax
86.byte	102,15,56,220,208
87.byte	102,15,56,220,216
88	movups	-16(%rcx,%rax,1),%xmm0
89	jnz	L$enc_loop2
90
91.byte	102,15,56,220,209
92.byte	102,15,56,220,217
93.byte	102,15,56,221,208
94.byte	102,15,56,221,216
95	ret
96
97
98
99.p2align	4
100_aesni_decrypt2:
101
102	movups	(%rcx),%xmm0
103	shll	$4,%eax
104	movups	16(%rcx),%xmm1
105	xorps	%xmm0,%xmm2
106	xorps	%xmm0,%xmm3
107	movups	32(%rcx),%xmm0
108	leaq	32(%rcx,%rax,1),%rcx
109	negq	%rax
110	addq	$16,%rax
111
112L$dec_loop2:
113.byte	102,15,56,222,209
114.byte	102,15,56,222,217
115	movups	(%rcx,%rax,1),%xmm1
116	addq	$32,%rax
117.byte	102,15,56,222,208
118.byte	102,15,56,222,216
119	movups	-16(%rcx,%rax,1),%xmm0
120	jnz	L$dec_loop2
121
122.byte	102,15,56,222,209
123.byte	102,15,56,222,217
124.byte	102,15,56,223,208
125.byte	102,15,56,223,216
126	ret
127
128
129
130.p2align	4
131_aesni_encrypt3:
132
133	movups	(%rcx),%xmm0
134	shll	$4,%eax
135	movups	16(%rcx),%xmm1
136	xorps	%xmm0,%xmm2
137	xorps	%xmm0,%xmm3
138	xorps	%xmm0,%xmm4
139	movups	32(%rcx),%xmm0
140	leaq	32(%rcx,%rax,1),%rcx
141	negq	%rax
142	addq	$16,%rax
143
144L$enc_loop3:
145.byte	102,15,56,220,209
146.byte	102,15,56,220,217
147.byte	102,15,56,220,225
148	movups	(%rcx,%rax,1),%xmm1
149	addq	$32,%rax
150.byte	102,15,56,220,208
151.byte	102,15,56,220,216
152.byte	102,15,56,220,224
153	movups	-16(%rcx,%rax,1),%xmm0
154	jnz	L$enc_loop3
155
156.byte	102,15,56,220,209
157.byte	102,15,56,220,217
158.byte	102,15,56,220,225
159.byte	102,15,56,221,208
160.byte	102,15,56,221,216
161.byte	102,15,56,221,224
162	ret
163
164
165
166.p2align	4
167_aesni_decrypt3:
168
169	movups	(%rcx),%xmm0
170	shll	$4,%eax
171	movups	16(%rcx),%xmm1
172	xorps	%xmm0,%xmm2
173	xorps	%xmm0,%xmm3
174	xorps	%xmm0,%xmm4
175	movups	32(%rcx),%xmm0
176	leaq	32(%rcx,%rax,1),%rcx
177	negq	%rax
178	addq	$16,%rax
179
180L$dec_loop3:
181.byte	102,15,56,222,209
182.byte	102,15,56,222,217
183.byte	102,15,56,222,225
184	movups	(%rcx,%rax,1),%xmm1
185	addq	$32,%rax
186.byte	102,15,56,222,208
187.byte	102,15,56,222,216
188.byte	102,15,56,222,224
189	movups	-16(%rcx,%rax,1),%xmm0
190	jnz	L$dec_loop3
191
192.byte	102,15,56,222,209
193.byte	102,15,56,222,217
194.byte	102,15,56,222,225
195.byte	102,15,56,223,208
196.byte	102,15,56,223,216
197.byte	102,15,56,223,224
198	ret
199
200
201
202.p2align	4
203_aesni_encrypt4:
204
205	movups	(%rcx),%xmm0
206	shll	$4,%eax
207	movups	16(%rcx),%xmm1
208	xorps	%xmm0,%xmm2
209	xorps	%xmm0,%xmm3
210	xorps	%xmm0,%xmm4
211	xorps	%xmm0,%xmm5
212	movups	32(%rcx),%xmm0
213	leaq	32(%rcx,%rax,1),%rcx
214	negq	%rax
215.byte	0x0f,0x1f,0x00
216	addq	$16,%rax
217
218L$enc_loop4:
219.byte	102,15,56,220,209
220.byte	102,15,56,220,217
221.byte	102,15,56,220,225
222.byte	102,15,56,220,233
223	movups	(%rcx,%rax,1),%xmm1
224	addq	$32,%rax
225.byte	102,15,56,220,208
226.byte	102,15,56,220,216
227.byte	102,15,56,220,224
228.byte	102,15,56,220,232
229	movups	-16(%rcx,%rax,1),%xmm0
230	jnz	L$enc_loop4
231
232.byte	102,15,56,220,209
233.byte	102,15,56,220,217
234.byte	102,15,56,220,225
235.byte	102,15,56,220,233
236.byte	102,15,56,221,208
237.byte	102,15,56,221,216
238.byte	102,15,56,221,224
239.byte	102,15,56,221,232
240	ret
241
242
243
244.p2align	4
245_aesni_decrypt4:
246
247	movups	(%rcx),%xmm0
248	shll	$4,%eax
249	movups	16(%rcx),%xmm1
250	xorps	%xmm0,%xmm2
251	xorps	%xmm0,%xmm3
252	xorps	%xmm0,%xmm4
253	xorps	%xmm0,%xmm5
254	movups	32(%rcx),%xmm0
255	leaq	32(%rcx,%rax,1),%rcx
256	negq	%rax
257.byte	0x0f,0x1f,0x00
258	addq	$16,%rax
259
260L$dec_loop4:
261.byte	102,15,56,222,209
262.byte	102,15,56,222,217
263.byte	102,15,56,222,225
264.byte	102,15,56,222,233
265	movups	(%rcx,%rax,1),%xmm1
266	addq	$32,%rax
267.byte	102,15,56,222,208
268.byte	102,15,56,222,216
269.byte	102,15,56,222,224
270.byte	102,15,56,222,232
271	movups	-16(%rcx,%rax,1),%xmm0
272	jnz	L$dec_loop4
273
274.byte	102,15,56,222,209
275.byte	102,15,56,222,217
276.byte	102,15,56,222,225
277.byte	102,15,56,222,233
278.byte	102,15,56,223,208
279.byte	102,15,56,223,216
280.byte	102,15,56,223,224
281.byte	102,15,56,223,232
282	ret
283
284
285
286.p2align	4
287_aesni_encrypt6:
288
289	movups	(%rcx),%xmm0
290	shll	$4,%eax
291	movups	16(%rcx),%xmm1
292	xorps	%xmm0,%xmm2
293	pxor	%xmm0,%xmm3
294	pxor	%xmm0,%xmm4
295.byte	102,15,56,220,209
296	leaq	32(%rcx,%rax,1),%rcx
297	negq	%rax
298.byte	102,15,56,220,217
299	pxor	%xmm0,%xmm5
300	pxor	%xmm0,%xmm6
301.byte	102,15,56,220,225
302	pxor	%xmm0,%xmm7
303	movups	(%rcx,%rax,1),%xmm0
304	addq	$16,%rax
305	jmp	L$enc_loop6_enter
306.p2align	4
307L$enc_loop6:
308.byte	102,15,56,220,209
309.byte	102,15,56,220,217
310.byte	102,15,56,220,225
311L$enc_loop6_enter:
312.byte	102,15,56,220,233
313.byte	102,15,56,220,241
314.byte	102,15,56,220,249
315	movups	(%rcx,%rax,1),%xmm1
316	addq	$32,%rax
317.byte	102,15,56,220,208
318.byte	102,15,56,220,216
319.byte	102,15,56,220,224
320.byte	102,15,56,220,232
321.byte	102,15,56,220,240
322.byte	102,15,56,220,248
323	movups	-16(%rcx,%rax,1),%xmm0
324	jnz	L$enc_loop6
325
326.byte	102,15,56,220,209
327.byte	102,15,56,220,217
328.byte	102,15,56,220,225
329.byte	102,15,56,220,233
330.byte	102,15,56,220,241
331.byte	102,15,56,220,249
332.byte	102,15,56,221,208
333.byte	102,15,56,221,216
334.byte	102,15,56,221,224
335.byte	102,15,56,221,232
336.byte	102,15,56,221,240
337.byte	102,15,56,221,248
338	ret
339
340
341
342.p2align	4
343_aesni_decrypt6:
344
345	movups	(%rcx),%xmm0
346	shll	$4,%eax
347	movups	16(%rcx),%xmm1
348	xorps	%xmm0,%xmm2
349	pxor	%xmm0,%xmm3
350	pxor	%xmm0,%xmm4
351.byte	102,15,56,222,209
352	leaq	32(%rcx,%rax,1),%rcx
353	negq	%rax
354.byte	102,15,56,222,217
355	pxor	%xmm0,%xmm5
356	pxor	%xmm0,%xmm6
357.byte	102,15,56,222,225
358	pxor	%xmm0,%xmm7
359	movups	(%rcx,%rax,1),%xmm0
360	addq	$16,%rax
361	jmp	L$dec_loop6_enter
362.p2align	4
363L$dec_loop6:
364.byte	102,15,56,222,209
365.byte	102,15,56,222,217
366.byte	102,15,56,222,225
367L$dec_loop6_enter:
368.byte	102,15,56,222,233
369.byte	102,15,56,222,241
370.byte	102,15,56,222,249
371	movups	(%rcx,%rax,1),%xmm1
372	addq	$32,%rax
373.byte	102,15,56,222,208
374.byte	102,15,56,222,216
375.byte	102,15,56,222,224
376.byte	102,15,56,222,232
377.byte	102,15,56,222,240
378.byte	102,15,56,222,248
379	movups	-16(%rcx,%rax,1),%xmm0
380	jnz	L$dec_loop6
381
382.byte	102,15,56,222,209
383.byte	102,15,56,222,217
384.byte	102,15,56,222,225
385.byte	102,15,56,222,233
386.byte	102,15,56,222,241
387.byte	102,15,56,222,249
388.byte	102,15,56,223,208
389.byte	102,15,56,223,216
390.byte	102,15,56,223,224
391.byte	102,15,56,223,232
392.byte	102,15,56,223,240
393.byte	102,15,56,223,248
394	ret
395
396
397
398.p2align	4
399_aesni_encrypt8:
400
401	movups	(%rcx),%xmm0
402	shll	$4,%eax
403	movups	16(%rcx),%xmm1
404	xorps	%xmm0,%xmm2
405	xorps	%xmm0,%xmm3
406	pxor	%xmm0,%xmm4
407	pxor	%xmm0,%xmm5
408	pxor	%xmm0,%xmm6
409	leaq	32(%rcx,%rax,1),%rcx
410	negq	%rax
411.byte	102,15,56,220,209
412	pxor	%xmm0,%xmm7
413	pxor	%xmm0,%xmm8
414.byte	102,15,56,220,217
415	pxor	%xmm0,%xmm9
416	movups	(%rcx,%rax,1),%xmm0
417	addq	$16,%rax
418	jmp	L$enc_loop8_inner
419.p2align	4
420L$enc_loop8:
421.byte	102,15,56,220,209
422.byte	102,15,56,220,217
423L$enc_loop8_inner:
424.byte	102,15,56,220,225
425.byte	102,15,56,220,233
426.byte	102,15,56,220,241
427.byte	102,15,56,220,249
428.byte	102,68,15,56,220,193
429.byte	102,68,15,56,220,201
430L$enc_loop8_enter:
431	movups	(%rcx,%rax,1),%xmm1
432	addq	$32,%rax
433.byte	102,15,56,220,208
434.byte	102,15,56,220,216
435.byte	102,15,56,220,224
436.byte	102,15,56,220,232
437.byte	102,15,56,220,240
438.byte	102,15,56,220,248
439.byte	102,68,15,56,220,192
440.byte	102,68,15,56,220,200
441	movups	-16(%rcx,%rax,1),%xmm0
442	jnz	L$enc_loop8
443
444.byte	102,15,56,220,209
445.byte	102,15,56,220,217
446.byte	102,15,56,220,225
447.byte	102,15,56,220,233
448.byte	102,15,56,220,241
449.byte	102,15,56,220,249
450.byte	102,68,15,56,220,193
451.byte	102,68,15,56,220,201
452.byte	102,15,56,221,208
453.byte	102,15,56,221,216
454.byte	102,15,56,221,224
455.byte	102,15,56,221,232
456.byte	102,15,56,221,240
457.byte	102,15,56,221,248
458.byte	102,68,15,56,221,192
459.byte	102,68,15,56,221,200
460	ret
461
462
463
464.p2align	4
465_aesni_decrypt8:
466
467	movups	(%rcx),%xmm0
468	shll	$4,%eax
469	movups	16(%rcx),%xmm1
470	xorps	%xmm0,%xmm2
471	xorps	%xmm0,%xmm3
472	pxor	%xmm0,%xmm4
473	pxor	%xmm0,%xmm5
474	pxor	%xmm0,%xmm6
475	leaq	32(%rcx,%rax,1),%rcx
476	negq	%rax
477.byte	102,15,56,222,209
478	pxor	%xmm0,%xmm7
479	pxor	%xmm0,%xmm8
480.byte	102,15,56,222,217
481	pxor	%xmm0,%xmm9
482	movups	(%rcx,%rax,1),%xmm0
483	addq	$16,%rax
484	jmp	L$dec_loop8_inner
485.p2align	4
486L$dec_loop8:
487.byte	102,15,56,222,209
488.byte	102,15,56,222,217
489L$dec_loop8_inner:
490.byte	102,15,56,222,225
491.byte	102,15,56,222,233
492.byte	102,15,56,222,241
493.byte	102,15,56,222,249
494.byte	102,68,15,56,222,193
495.byte	102,68,15,56,222,201
496L$dec_loop8_enter:
497	movups	(%rcx,%rax,1),%xmm1
498	addq	$32,%rax
499.byte	102,15,56,222,208
500.byte	102,15,56,222,216
501.byte	102,15,56,222,224
502.byte	102,15,56,222,232
503.byte	102,15,56,222,240
504.byte	102,15,56,222,248
505.byte	102,68,15,56,222,192
506.byte	102,68,15,56,222,200
507	movups	-16(%rcx,%rax,1),%xmm0
508	jnz	L$dec_loop8
509
510.byte	102,15,56,222,209
511.byte	102,15,56,222,217
512.byte	102,15,56,222,225
513.byte	102,15,56,222,233
514.byte	102,15,56,222,241
515.byte	102,15,56,222,249
516.byte	102,68,15,56,222,193
517.byte	102,68,15,56,222,201
518.byte	102,15,56,223,208
519.byte	102,15,56,223,216
520.byte	102,15,56,223,224
521.byte	102,15,56,223,232
522.byte	102,15,56,223,240
523.byte	102,15,56,223,248
524.byte	102,68,15,56,223,192
525.byte	102,68,15,56,223,200
526	ret
527
528
529.globl	_aes_hw_ecb_encrypt
530.private_extern _aes_hw_ecb_encrypt
531
532.p2align	4
533_aes_hw_ecb_encrypt:
534
535_CET_ENDBR
536	andq	$-16,%rdx
537	jz	L$ecb_ret
538
539	movl	240(%rcx),%eax
540	movups	(%rcx),%xmm0
541	movq	%rcx,%r11
542	movl	%eax,%r10d
543	testl	%r8d,%r8d
544	jz	L$ecb_decrypt
545
546	cmpq	$0x80,%rdx
547	jb	L$ecb_enc_tail
548
549	movdqu	(%rdi),%xmm2
550	movdqu	16(%rdi),%xmm3
551	movdqu	32(%rdi),%xmm4
552	movdqu	48(%rdi),%xmm5
553	movdqu	64(%rdi),%xmm6
554	movdqu	80(%rdi),%xmm7
555	movdqu	96(%rdi),%xmm8
556	movdqu	112(%rdi),%xmm9
557	leaq	128(%rdi),%rdi
558	subq	$0x80,%rdx
559	jmp	L$ecb_enc_loop8_enter
560.p2align	4
561L$ecb_enc_loop8:
562	movups	%xmm2,(%rsi)
563	movq	%r11,%rcx
564	movdqu	(%rdi),%xmm2
565	movl	%r10d,%eax
566	movups	%xmm3,16(%rsi)
567	movdqu	16(%rdi),%xmm3
568	movups	%xmm4,32(%rsi)
569	movdqu	32(%rdi),%xmm4
570	movups	%xmm5,48(%rsi)
571	movdqu	48(%rdi),%xmm5
572	movups	%xmm6,64(%rsi)
573	movdqu	64(%rdi),%xmm6
574	movups	%xmm7,80(%rsi)
575	movdqu	80(%rdi),%xmm7
576	movups	%xmm8,96(%rsi)
577	movdqu	96(%rdi),%xmm8
578	movups	%xmm9,112(%rsi)
579	leaq	128(%rsi),%rsi
580	movdqu	112(%rdi),%xmm9
581	leaq	128(%rdi),%rdi
582L$ecb_enc_loop8_enter:
583
584	call	_aesni_encrypt8
585
586	subq	$0x80,%rdx
587	jnc	L$ecb_enc_loop8
588
589	movups	%xmm2,(%rsi)
590	movq	%r11,%rcx
591	movups	%xmm3,16(%rsi)
592	movl	%r10d,%eax
593	movups	%xmm4,32(%rsi)
594	movups	%xmm5,48(%rsi)
595	movups	%xmm6,64(%rsi)
596	movups	%xmm7,80(%rsi)
597	movups	%xmm8,96(%rsi)
598	movups	%xmm9,112(%rsi)
599	leaq	128(%rsi),%rsi
600	addq	$0x80,%rdx
601	jz	L$ecb_ret
602
603L$ecb_enc_tail:
604	movups	(%rdi),%xmm2
605	cmpq	$0x20,%rdx
606	jb	L$ecb_enc_one
607	movups	16(%rdi),%xmm3
608	je	L$ecb_enc_two
609	movups	32(%rdi),%xmm4
610	cmpq	$0x40,%rdx
611	jb	L$ecb_enc_three
612	movups	48(%rdi),%xmm5
613	je	L$ecb_enc_four
614	movups	64(%rdi),%xmm6
615	cmpq	$0x60,%rdx
616	jb	L$ecb_enc_five
617	movups	80(%rdi),%xmm7
618	je	L$ecb_enc_six
619	movdqu	96(%rdi),%xmm8
620	xorps	%xmm9,%xmm9
621	call	_aesni_encrypt8
622	movups	%xmm2,(%rsi)
623	movups	%xmm3,16(%rsi)
624	movups	%xmm4,32(%rsi)
625	movups	%xmm5,48(%rsi)
626	movups	%xmm6,64(%rsi)
627	movups	%xmm7,80(%rsi)
628	movups	%xmm8,96(%rsi)
629	jmp	L$ecb_ret
630.p2align	4
631L$ecb_enc_one:
632	movups	(%rcx),%xmm0
633	movups	16(%rcx),%xmm1
634	leaq	32(%rcx),%rcx
635	xorps	%xmm0,%xmm2
636L$oop_enc1_3:
637.byte	102,15,56,220,209
638	decl	%eax
639	movups	(%rcx),%xmm1
640	leaq	16(%rcx),%rcx
641	jnz	L$oop_enc1_3
642.byte	102,15,56,221,209
643	movups	%xmm2,(%rsi)
644	jmp	L$ecb_ret
645.p2align	4
646L$ecb_enc_two:
647	call	_aesni_encrypt2
648	movups	%xmm2,(%rsi)
649	movups	%xmm3,16(%rsi)
650	jmp	L$ecb_ret
651.p2align	4
652L$ecb_enc_three:
653	call	_aesni_encrypt3
654	movups	%xmm2,(%rsi)
655	movups	%xmm3,16(%rsi)
656	movups	%xmm4,32(%rsi)
657	jmp	L$ecb_ret
658.p2align	4
659L$ecb_enc_four:
660	call	_aesni_encrypt4
661	movups	%xmm2,(%rsi)
662	movups	%xmm3,16(%rsi)
663	movups	%xmm4,32(%rsi)
664	movups	%xmm5,48(%rsi)
665	jmp	L$ecb_ret
666.p2align	4
667L$ecb_enc_five:
668	xorps	%xmm7,%xmm7
669	call	_aesni_encrypt6
670	movups	%xmm2,(%rsi)
671	movups	%xmm3,16(%rsi)
672	movups	%xmm4,32(%rsi)
673	movups	%xmm5,48(%rsi)
674	movups	%xmm6,64(%rsi)
675	jmp	L$ecb_ret
676.p2align	4
677L$ecb_enc_six:
678	call	_aesni_encrypt6
679	movups	%xmm2,(%rsi)
680	movups	%xmm3,16(%rsi)
681	movups	%xmm4,32(%rsi)
682	movups	%xmm5,48(%rsi)
683	movups	%xmm6,64(%rsi)
684	movups	%xmm7,80(%rsi)
685	jmp	L$ecb_ret
686
687.p2align	4
688L$ecb_decrypt:
689	cmpq	$0x80,%rdx
690	jb	L$ecb_dec_tail
691
692	movdqu	(%rdi),%xmm2
693	movdqu	16(%rdi),%xmm3
694	movdqu	32(%rdi),%xmm4
695	movdqu	48(%rdi),%xmm5
696	movdqu	64(%rdi),%xmm6
697	movdqu	80(%rdi),%xmm7
698	movdqu	96(%rdi),%xmm8
699	movdqu	112(%rdi),%xmm9
700	leaq	128(%rdi),%rdi
701	subq	$0x80,%rdx
702	jmp	L$ecb_dec_loop8_enter
703.p2align	4
704L$ecb_dec_loop8:
705	movups	%xmm2,(%rsi)
706	movq	%r11,%rcx
707	movdqu	(%rdi),%xmm2
708	movl	%r10d,%eax
709	movups	%xmm3,16(%rsi)
710	movdqu	16(%rdi),%xmm3
711	movups	%xmm4,32(%rsi)
712	movdqu	32(%rdi),%xmm4
713	movups	%xmm5,48(%rsi)
714	movdqu	48(%rdi),%xmm5
715	movups	%xmm6,64(%rsi)
716	movdqu	64(%rdi),%xmm6
717	movups	%xmm7,80(%rsi)
718	movdqu	80(%rdi),%xmm7
719	movups	%xmm8,96(%rsi)
720	movdqu	96(%rdi),%xmm8
721	movups	%xmm9,112(%rsi)
722	leaq	128(%rsi),%rsi
723	movdqu	112(%rdi),%xmm9
724	leaq	128(%rdi),%rdi
725L$ecb_dec_loop8_enter:
726
727	call	_aesni_decrypt8
728
729	movups	(%r11),%xmm0
730	subq	$0x80,%rdx
731	jnc	L$ecb_dec_loop8
732
733	movups	%xmm2,(%rsi)
734	pxor	%xmm2,%xmm2
735	movq	%r11,%rcx
736	movups	%xmm3,16(%rsi)
737	pxor	%xmm3,%xmm3
738	movl	%r10d,%eax
739	movups	%xmm4,32(%rsi)
740	pxor	%xmm4,%xmm4
741	movups	%xmm5,48(%rsi)
742	pxor	%xmm5,%xmm5
743	movups	%xmm6,64(%rsi)
744	pxor	%xmm6,%xmm6
745	movups	%xmm7,80(%rsi)
746	pxor	%xmm7,%xmm7
747	movups	%xmm8,96(%rsi)
748	pxor	%xmm8,%xmm8
749	movups	%xmm9,112(%rsi)
750	pxor	%xmm9,%xmm9
751	leaq	128(%rsi),%rsi
752	addq	$0x80,%rdx
753	jz	L$ecb_ret
754
755L$ecb_dec_tail:
756	movups	(%rdi),%xmm2
757	cmpq	$0x20,%rdx
758	jb	L$ecb_dec_one
759	movups	16(%rdi),%xmm3
760	je	L$ecb_dec_two
761	movups	32(%rdi),%xmm4
762	cmpq	$0x40,%rdx
763	jb	L$ecb_dec_three
764	movups	48(%rdi),%xmm5
765	je	L$ecb_dec_four
766	movups	64(%rdi),%xmm6
767	cmpq	$0x60,%rdx
768	jb	L$ecb_dec_five
769	movups	80(%rdi),%xmm7
770	je	L$ecb_dec_six
771	movups	96(%rdi),%xmm8
772	movups	(%rcx),%xmm0
773	xorps	%xmm9,%xmm9
774	call	_aesni_decrypt8
775	movups	%xmm2,(%rsi)
776	pxor	%xmm2,%xmm2
777	movups	%xmm3,16(%rsi)
778	pxor	%xmm3,%xmm3
779	movups	%xmm4,32(%rsi)
780	pxor	%xmm4,%xmm4
781	movups	%xmm5,48(%rsi)
782	pxor	%xmm5,%xmm5
783	movups	%xmm6,64(%rsi)
784	pxor	%xmm6,%xmm6
785	movups	%xmm7,80(%rsi)
786	pxor	%xmm7,%xmm7
787	movups	%xmm8,96(%rsi)
788	pxor	%xmm8,%xmm8
789	pxor	%xmm9,%xmm9
790	jmp	L$ecb_ret
791.p2align	4
792L$ecb_dec_one:
793	movups	(%rcx),%xmm0
794	movups	16(%rcx),%xmm1
795	leaq	32(%rcx),%rcx
796	xorps	%xmm0,%xmm2
797L$oop_dec1_4:
798.byte	102,15,56,222,209
799	decl	%eax
800	movups	(%rcx),%xmm1
801	leaq	16(%rcx),%rcx
802	jnz	L$oop_dec1_4
803.byte	102,15,56,223,209
804	movups	%xmm2,(%rsi)
805	pxor	%xmm2,%xmm2
806	jmp	L$ecb_ret
807.p2align	4
808L$ecb_dec_two:
809	call	_aesni_decrypt2
810	movups	%xmm2,(%rsi)
811	pxor	%xmm2,%xmm2
812	movups	%xmm3,16(%rsi)
813	pxor	%xmm3,%xmm3
814	jmp	L$ecb_ret
815.p2align	4
816L$ecb_dec_three:
817	call	_aesni_decrypt3
818	movups	%xmm2,(%rsi)
819	pxor	%xmm2,%xmm2
820	movups	%xmm3,16(%rsi)
821	pxor	%xmm3,%xmm3
822	movups	%xmm4,32(%rsi)
823	pxor	%xmm4,%xmm4
824	jmp	L$ecb_ret
825.p2align	4
826L$ecb_dec_four:
827	call	_aesni_decrypt4
828	movups	%xmm2,(%rsi)
829	pxor	%xmm2,%xmm2
830	movups	%xmm3,16(%rsi)
831	pxor	%xmm3,%xmm3
832	movups	%xmm4,32(%rsi)
833	pxor	%xmm4,%xmm4
834	movups	%xmm5,48(%rsi)
835	pxor	%xmm5,%xmm5
836	jmp	L$ecb_ret
837.p2align	4
838L$ecb_dec_five:
839	xorps	%xmm7,%xmm7
840	call	_aesni_decrypt6
841	movups	%xmm2,(%rsi)
842	pxor	%xmm2,%xmm2
843	movups	%xmm3,16(%rsi)
844	pxor	%xmm3,%xmm3
845	movups	%xmm4,32(%rsi)
846	pxor	%xmm4,%xmm4
847	movups	%xmm5,48(%rsi)
848	pxor	%xmm5,%xmm5
849	movups	%xmm6,64(%rsi)
850	pxor	%xmm6,%xmm6
851	pxor	%xmm7,%xmm7
852	jmp	L$ecb_ret
853.p2align	4
854L$ecb_dec_six:
855	call	_aesni_decrypt6
856	movups	%xmm2,(%rsi)
857	pxor	%xmm2,%xmm2
858	movups	%xmm3,16(%rsi)
859	pxor	%xmm3,%xmm3
860	movups	%xmm4,32(%rsi)
861	pxor	%xmm4,%xmm4
862	movups	%xmm5,48(%rsi)
863	pxor	%xmm5,%xmm5
864	movups	%xmm6,64(%rsi)
865	pxor	%xmm6,%xmm6
866	movups	%xmm7,80(%rsi)
867	pxor	%xmm7,%xmm7
868
869L$ecb_ret:
870	xorps	%xmm0,%xmm0
871	pxor	%xmm1,%xmm1
872	ret
873
874
875.globl	_aes_hw_ctr32_encrypt_blocks
876.private_extern _aes_hw_ctr32_encrypt_blocks
877
878.p2align	4
879_aes_hw_ctr32_encrypt_blocks:
880
881_CET_ENDBR
882#ifdef BORINGSSL_DISPATCH_TEST
883	movb	$1,_BORINGSSL_function_hit(%rip)
884#endif
885	cmpq	$1,%rdx
886	jne	L$ctr32_bulk
887
888
889
890	movups	(%r8),%xmm2
891	movups	(%rdi),%xmm3
892	movl	240(%rcx),%edx
893	movups	(%rcx),%xmm0
894	movups	16(%rcx),%xmm1
895	leaq	32(%rcx),%rcx
896	xorps	%xmm0,%xmm2
897L$oop_enc1_5:
898.byte	102,15,56,220,209
899	decl	%edx
900	movups	(%rcx),%xmm1
901	leaq	16(%rcx),%rcx
902	jnz	L$oop_enc1_5
903.byte	102,15,56,221,209
904	pxor	%xmm0,%xmm0
905	pxor	%xmm1,%xmm1
906	xorps	%xmm3,%xmm2
907	pxor	%xmm3,%xmm3
908	movups	%xmm2,(%rsi)
909	xorps	%xmm2,%xmm2
910	jmp	L$ctr32_epilogue
911
912.p2align	4
913L$ctr32_bulk:
914	leaq	(%rsp),%r11
915
916	pushq	%rbp
917
918	subq	$128,%rsp
919	andq	$-16,%rsp
920
921
922
923
924	movdqu	(%r8),%xmm2
925	movdqu	(%rcx),%xmm0
926	movl	12(%r8),%r8d
927	pxor	%xmm0,%xmm2
928	movl	12(%rcx),%ebp
929	movdqa	%xmm2,0(%rsp)
930	bswapl	%r8d
931	movdqa	%xmm2,%xmm3
932	movdqa	%xmm2,%xmm4
933	movdqa	%xmm2,%xmm5
934	movdqa	%xmm2,64(%rsp)
935	movdqa	%xmm2,80(%rsp)
936	movdqa	%xmm2,96(%rsp)
937	movq	%rdx,%r10
938	movdqa	%xmm2,112(%rsp)
939
940	leaq	1(%r8),%rax
941	leaq	2(%r8),%rdx
942	bswapl	%eax
943	bswapl	%edx
944	xorl	%ebp,%eax
945	xorl	%ebp,%edx
946.byte	102,15,58,34,216,3
947	leaq	3(%r8),%rax
948	movdqa	%xmm3,16(%rsp)
949.byte	102,15,58,34,226,3
950	bswapl	%eax
951	movq	%r10,%rdx
952	leaq	4(%r8),%r10
953	movdqa	%xmm4,32(%rsp)
954	xorl	%ebp,%eax
955	bswapl	%r10d
956.byte	102,15,58,34,232,3
957	xorl	%ebp,%r10d
958	movdqa	%xmm5,48(%rsp)
959	leaq	5(%r8),%r9
960	movl	%r10d,64+12(%rsp)
961	bswapl	%r9d
962	leaq	6(%r8),%r10
963	movl	240(%rcx),%eax
964	xorl	%ebp,%r9d
965	bswapl	%r10d
966	movl	%r9d,80+12(%rsp)
967	xorl	%ebp,%r10d
968	leaq	7(%r8),%r9
969	movl	%r10d,96+12(%rsp)
970	bswapl	%r9d
971	xorl	%ebp,%r9d
972	movl	%r9d,112+12(%rsp)
973
974	movups	16(%rcx),%xmm1
975
976	movdqa	64(%rsp),%xmm6
977	movdqa	80(%rsp),%xmm7
978
979	cmpq	$8,%rdx
980	jb	L$ctr32_tail
981
982	leaq	128(%rcx),%rcx
983	subq	$8,%rdx
984	jmp	L$ctr32_loop8
985
986.p2align	5
987L$ctr32_loop8:
988	addl	$8,%r8d
989	movdqa	96(%rsp),%xmm8
990.byte	102,15,56,220,209
991	movl	%r8d,%r9d
992	movdqa	112(%rsp),%xmm9
993.byte	102,15,56,220,217
994	bswapl	%r9d
995	movups	32-128(%rcx),%xmm0
996.byte	102,15,56,220,225
997	xorl	%ebp,%r9d
998	nop
999.byte	102,15,56,220,233
1000	movl	%r9d,0+12(%rsp)
1001	leaq	1(%r8),%r9
1002.byte	102,15,56,220,241
1003.byte	102,15,56,220,249
1004.byte	102,68,15,56,220,193
1005.byte	102,68,15,56,220,201
1006	movups	48-128(%rcx),%xmm1
1007	bswapl	%r9d
1008.byte	102,15,56,220,208
1009.byte	102,15,56,220,216
1010	xorl	%ebp,%r9d
1011.byte	0x66,0x90
1012.byte	102,15,56,220,224
1013.byte	102,15,56,220,232
1014	movl	%r9d,16+12(%rsp)
1015	leaq	2(%r8),%r9
1016.byte	102,15,56,220,240
1017.byte	102,15,56,220,248
1018.byte	102,68,15,56,220,192
1019.byte	102,68,15,56,220,200
1020	movups	64-128(%rcx),%xmm0
1021	bswapl	%r9d
1022.byte	102,15,56,220,209
1023.byte	102,15,56,220,217
1024	xorl	%ebp,%r9d
1025.byte	0x66,0x90
1026.byte	102,15,56,220,225
1027.byte	102,15,56,220,233
1028	movl	%r9d,32+12(%rsp)
1029	leaq	3(%r8),%r9
1030.byte	102,15,56,220,241
1031.byte	102,15,56,220,249
1032.byte	102,68,15,56,220,193
1033.byte	102,68,15,56,220,201
1034	movups	80-128(%rcx),%xmm1
1035	bswapl	%r9d
1036.byte	102,15,56,220,208
1037.byte	102,15,56,220,216
1038	xorl	%ebp,%r9d
1039.byte	0x66,0x90
1040.byte	102,15,56,220,224
1041.byte	102,15,56,220,232
1042	movl	%r9d,48+12(%rsp)
1043	leaq	4(%r8),%r9
1044.byte	102,15,56,220,240
1045.byte	102,15,56,220,248
1046.byte	102,68,15,56,220,192
1047.byte	102,68,15,56,220,200
1048	movups	96-128(%rcx),%xmm0
1049	bswapl	%r9d
1050.byte	102,15,56,220,209
1051.byte	102,15,56,220,217
1052	xorl	%ebp,%r9d
1053.byte	0x66,0x90
1054.byte	102,15,56,220,225
1055.byte	102,15,56,220,233
1056	movl	%r9d,64+12(%rsp)
1057	leaq	5(%r8),%r9
1058.byte	102,15,56,220,241
1059.byte	102,15,56,220,249
1060.byte	102,68,15,56,220,193
1061.byte	102,68,15,56,220,201
1062	movups	112-128(%rcx),%xmm1
1063	bswapl	%r9d
1064.byte	102,15,56,220,208
1065.byte	102,15,56,220,216
1066	xorl	%ebp,%r9d
1067.byte	0x66,0x90
1068.byte	102,15,56,220,224
1069.byte	102,15,56,220,232
1070	movl	%r9d,80+12(%rsp)
1071	leaq	6(%r8),%r9
1072.byte	102,15,56,220,240
1073.byte	102,15,56,220,248
1074.byte	102,68,15,56,220,192
1075.byte	102,68,15,56,220,200
1076	movups	128-128(%rcx),%xmm0
1077	bswapl	%r9d
1078.byte	102,15,56,220,209
1079.byte	102,15,56,220,217
1080	xorl	%ebp,%r9d
1081.byte	0x66,0x90
1082.byte	102,15,56,220,225
1083.byte	102,15,56,220,233
1084	movl	%r9d,96+12(%rsp)
1085	leaq	7(%r8),%r9
1086.byte	102,15,56,220,241
1087.byte	102,15,56,220,249
1088.byte	102,68,15,56,220,193
1089.byte	102,68,15,56,220,201
1090	movups	144-128(%rcx),%xmm1
1091	bswapl	%r9d
1092.byte	102,15,56,220,208
1093.byte	102,15,56,220,216
1094.byte	102,15,56,220,224
1095	xorl	%ebp,%r9d
1096	movdqu	0(%rdi),%xmm10
1097.byte	102,15,56,220,232
1098	movl	%r9d,112+12(%rsp)
1099	cmpl	$11,%eax
1100.byte	102,15,56,220,240
1101.byte	102,15,56,220,248
1102.byte	102,68,15,56,220,192
1103.byte	102,68,15,56,220,200
1104	movups	160-128(%rcx),%xmm0
1105
1106	jb	L$ctr32_enc_done
1107
1108.byte	102,15,56,220,209
1109.byte	102,15,56,220,217
1110.byte	102,15,56,220,225
1111.byte	102,15,56,220,233
1112.byte	102,15,56,220,241
1113.byte	102,15,56,220,249
1114.byte	102,68,15,56,220,193
1115.byte	102,68,15,56,220,201
1116	movups	176-128(%rcx),%xmm1
1117
1118.byte	102,15,56,220,208
1119.byte	102,15,56,220,216
1120.byte	102,15,56,220,224
1121.byte	102,15,56,220,232
1122.byte	102,15,56,220,240
1123.byte	102,15,56,220,248
1124.byte	102,68,15,56,220,192
1125.byte	102,68,15,56,220,200
1126	movups	192-128(%rcx),%xmm0
1127	je	L$ctr32_enc_done
1128
1129.byte	102,15,56,220,209
1130.byte	102,15,56,220,217
1131.byte	102,15,56,220,225
1132.byte	102,15,56,220,233
1133.byte	102,15,56,220,241
1134.byte	102,15,56,220,249
1135.byte	102,68,15,56,220,193
1136.byte	102,68,15,56,220,201
1137	movups	208-128(%rcx),%xmm1
1138
1139.byte	102,15,56,220,208
1140.byte	102,15,56,220,216
1141.byte	102,15,56,220,224
1142.byte	102,15,56,220,232
1143.byte	102,15,56,220,240
1144.byte	102,15,56,220,248
1145.byte	102,68,15,56,220,192
1146.byte	102,68,15,56,220,200
1147	movups	224-128(%rcx),%xmm0
1148	jmp	L$ctr32_enc_done
1149
1150.p2align	4
1151L$ctr32_enc_done:
1152	movdqu	16(%rdi),%xmm11
1153	pxor	%xmm0,%xmm10
1154	movdqu	32(%rdi),%xmm12
1155	pxor	%xmm0,%xmm11
1156	movdqu	48(%rdi),%xmm13
1157	pxor	%xmm0,%xmm12
1158	movdqu	64(%rdi),%xmm14
1159	pxor	%xmm0,%xmm13
1160	movdqu	80(%rdi),%xmm15
1161	pxor	%xmm0,%xmm14
1162	prefetcht0	448(%rdi)
1163	prefetcht0	512(%rdi)
1164	pxor	%xmm0,%xmm15
1165.byte	102,15,56,220,209
1166.byte	102,15,56,220,217
1167.byte	102,15,56,220,225
1168.byte	102,15,56,220,233
1169.byte	102,15,56,220,241
1170.byte	102,15,56,220,249
1171.byte	102,68,15,56,220,193
1172.byte	102,68,15,56,220,201
1173	movdqu	96(%rdi),%xmm1
1174	leaq	128(%rdi),%rdi
1175
1176.byte	102,65,15,56,221,210
1177	pxor	%xmm0,%xmm1
1178	movdqu	112-128(%rdi),%xmm10
1179.byte	102,65,15,56,221,219
1180	pxor	%xmm0,%xmm10
1181	movdqa	0(%rsp),%xmm11
1182.byte	102,65,15,56,221,228
1183.byte	102,65,15,56,221,237
1184	movdqa	16(%rsp),%xmm12
1185	movdqa	32(%rsp),%xmm13
1186.byte	102,65,15,56,221,246
1187.byte	102,65,15,56,221,255
1188	movdqa	48(%rsp),%xmm14
1189	movdqa	64(%rsp),%xmm15
1190.byte	102,68,15,56,221,193
1191	movdqa	80(%rsp),%xmm0
1192	movups	16-128(%rcx),%xmm1
1193.byte	102,69,15,56,221,202
1194
1195	movups	%xmm2,(%rsi)
1196	movdqa	%xmm11,%xmm2
1197	movups	%xmm3,16(%rsi)
1198	movdqa	%xmm12,%xmm3
1199	movups	%xmm4,32(%rsi)
1200	movdqa	%xmm13,%xmm4
1201	movups	%xmm5,48(%rsi)
1202	movdqa	%xmm14,%xmm5
1203	movups	%xmm6,64(%rsi)
1204	movdqa	%xmm15,%xmm6
1205	movups	%xmm7,80(%rsi)
1206	movdqa	%xmm0,%xmm7
1207	movups	%xmm8,96(%rsi)
1208	movups	%xmm9,112(%rsi)
1209	leaq	128(%rsi),%rsi
1210
1211	subq	$8,%rdx
1212	jnc	L$ctr32_loop8
1213
1214	addq	$8,%rdx
1215	jz	L$ctr32_done
1216	leaq	-128(%rcx),%rcx
1217
1218L$ctr32_tail:
1219
1220
1221	leaq	16(%rcx),%rcx
1222	cmpq	$4,%rdx
1223	jb	L$ctr32_loop3
1224	je	L$ctr32_loop4
1225
1226
1227	shll	$4,%eax
1228	movdqa	96(%rsp),%xmm8
1229	pxor	%xmm9,%xmm9
1230
1231	movups	16(%rcx),%xmm0
1232.byte	102,15,56,220,209
1233.byte	102,15,56,220,217
1234	leaq	32-16(%rcx,%rax,1),%rcx
1235	negq	%rax
1236.byte	102,15,56,220,225
1237	addq	$16,%rax
1238	movups	(%rdi),%xmm10
1239.byte	102,15,56,220,233
1240.byte	102,15,56,220,241
1241	movups	16(%rdi),%xmm11
1242	movups	32(%rdi),%xmm12
1243.byte	102,15,56,220,249
1244.byte	102,68,15,56,220,193
1245
1246	call	L$enc_loop8_enter
1247
1248	movdqu	48(%rdi),%xmm13
1249	pxor	%xmm10,%xmm2
1250	movdqu	64(%rdi),%xmm10
1251	pxor	%xmm11,%xmm3
1252	movdqu	%xmm2,(%rsi)
1253	pxor	%xmm12,%xmm4
1254	movdqu	%xmm3,16(%rsi)
1255	pxor	%xmm13,%xmm5
1256	movdqu	%xmm4,32(%rsi)
1257	pxor	%xmm10,%xmm6
1258	movdqu	%xmm5,48(%rsi)
1259	movdqu	%xmm6,64(%rsi)
1260	cmpq	$6,%rdx
1261	jb	L$ctr32_done
1262
1263	movups	80(%rdi),%xmm11
1264	xorps	%xmm11,%xmm7
1265	movups	%xmm7,80(%rsi)
1266	je	L$ctr32_done
1267
1268	movups	96(%rdi),%xmm12
1269	xorps	%xmm12,%xmm8
1270	movups	%xmm8,96(%rsi)
1271	jmp	L$ctr32_done
1272
1273.p2align	5
1274L$ctr32_loop4:
1275.byte	102,15,56,220,209
1276	leaq	16(%rcx),%rcx
1277	decl	%eax
1278.byte	102,15,56,220,217
1279.byte	102,15,56,220,225
1280.byte	102,15,56,220,233
1281	movups	(%rcx),%xmm1
1282	jnz	L$ctr32_loop4
1283.byte	102,15,56,221,209
1284.byte	102,15,56,221,217
1285	movups	(%rdi),%xmm10
1286	movups	16(%rdi),%xmm11
1287.byte	102,15,56,221,225
1288.byte	102,15,56,221,233
1289	movups	32(%rdi),%xmm12
1290	movups	48(%rdi),%xmm13
1291
1292	xorps	%xmm10,%xmm2
1293	movups	%xmm2,(%rsi)
1294	xorps	%xmm11,%xmm3
1295	movups	%xmm3,16(%rsi)
1296	pxor	%xmm12,%xmm4
1297	movdqu	%xmm4,32(%rsi)
1298	pxor	%xmm13,%xmm5
1299	movdqu	%xmm5,48(%rsi)
1300	jmp	L$ctr32_done
1301
1302.p2align	5
1303L$ctr32_loop3:
1304.byte	102,15,56,220,209
1305	leaq	16(%rcx),%rcx
1306	decl	%eax
1307.byte	102,15,56,220,217
1308.byte	102,15,56,220,225
1309	movups	(%rcx),%xmm1
1310	jnz	L$ctr32_loop3
1311.byte	102,15,56,221,209
1312.byte	102,15,56,221,217
1313.byte	102,15,56,221,225
1314
1315	movups	(%rdi),%xmm10
1316	xorps	%xmm10,%xmm2
1317	movups	%xmm2,(%rsi)
1318	cmpq	$2,%rdx
1319	jb	L$ctr32_done
1320
1321	movups	16(%rdi),%xmm11
1322	xorps	%xmm11,%xmm3
1323	movups	%xmm3,16(%rsi)
1324	je	L$ctr32_done
1325
1326	movups	32(%rdi),%xmm12
1327	xorps	%xmm12,%xmm4
1328	movups	%xmm4,32(%rsi)
1329
1330L$ctr32_done:
1331	xorps	%xmm0,%xmm0
1332	xorl	%ebp,%ebp
1333	pxor	%xmm1,%xmm1
1334	pxor	%xmm2,%xmm2
1335	pxor	%xmm3,%xmm3
1336	pxor	%xmm4,%xmm4
1337	pxor	%xmm5,%xmm5
1338	pxor	%xmm6,%xmm6
1339	pxor	%xmm7,%xmm7
1340	movaps	%xmm0,0(%rsp)
1341	pxor	%xmm8,%xmm8
1342	movaps	%xmm0,16(%rsp)
1343	pxor	%xmm9,%xmm9
1344	movaps	%xmm0,32(%rsp)
1345	pxor	%xmm10,%xmm10
1346	movaps	%xmm0,48(%rsp)
1347	pxor	%xmm11,%xmm11
1348	movaps	%xmm0,64(%rsp)
1349	pxor	%xmm12,%xmm12
1350	movaps	%xmm0,80(%rsp)
1351	pxor	%xmm13,%xmm13
1352	movaps	%xmm0,96(%rsp)
1353	pxor	%xmm14,%xmm14
1354	movaps	%xmm0,112(%rsp)
1355	pxor	%xmm15,%xmm15
1356	movq	-8(%r11),%rbp
1357
1358	leaq	(%r11),%rsp
1359
1360L$ctr32_epilogue:
1361	ret
1362
1363
1364.globl	_aes_hw_cbc_encrypt
1365.private_extern _aes_hw_cbc_encrypt
1366
1367.p2align	4
1368_aes_hw_cbc_encrypt:
1369
1370_CET_ENDBR
1371	testq	%rdx,%rdx
1372	jz	L$cbc_ret
1373
1374	movl	240(%rcx),%r10d
1375	movq	%rcx,%r11
1376	testl	%r9d,%r9d
1377	jz	L$cbc_decrypt
1378
1379	movups	(%r8),%xmm2
1380	movl	%r10d,%eax
1381	cmpq	$16,%rdx
1382	jb	L$cbc_enc_tail
1383	subq	$16,%rdx
1384	jmp	L$cbc_enc_loop
1385.p2align	4
1386L$cbc_enc_loop:
1387	movups	(%rdi),%xmm3
1388	leaq	16(%rdi),%rdi
1389
1390	movups	(%rcx),%xmm0
1391	movups	16(%rcx),%xmm1
1392	xorps	%xmm0,%xmm3
1393	leaq	32(%rcx),%rcx
1394	xorps	%xmm3,%xmm2
1395L$oop_enc1_6:
1396.byte	102,15,56,220,209
1397	decl	%eax
1398	movups	(%rcx),%xmm1
1399	leaq	16(%rcx),%rcx
1400	jnz	L$oop_enc1_6
1401.byte	102,15,56,221,209
1402	movl	%r10d,%eax
1403	movq	%r11,%rcx
1404	movups	%xmm2,0(%rsi)
1405	leaq	16(%rsi),%rsi
1406	subq	$16,%rdx
1407	jnc	L$cbc_enc_loop
1408	addq	$16,%rdx
1409	jnz	L$cbc_enc_tail
1410	pxor	%xmm0,%xmm0
1411	pxor	%xmm1,%xmm1
1412	movups	%xmm2,(%r8)
1413	pxor	%xmm2,%xmm2
1414	pxor	%xmm3,%xmm3
1415	jmp	L$cbc_ret
1416
1417L$cbc_enc_tail:
1418	movq	%rdx,%rcx
1419	xchgq	%rdi,%rsi
1420.long	0x9066A4F3
1421	movl	$16,%ecx
1422	subq	%rdx,%rcx
1423	xorl	%eax,%eax
1424.long	0x9066AAF3
1425	leaq	-16(%rdi),%rdi
1426	movl	%r10d,%eax
1427	movq	%rdi,%rsi
1428	movq	%r11,%rcx
1429	xorq	%rdx,%rdx
1430	jmp	L$cbc_enc_loop
1431
1432.p2align	4
1433L$cbc_decrypt:
1434	cmpq	$16,%rdx
1435	jne	L$cbc_decrypt_bulk
1436
1437
1438
1439	movdqu	(%rdi),%xmm2
1440	movdqu	(%r8),%xmm3
1441	movdqa	%xmm2,%xmm4
1442	movups	(%rcx),%xmm0
1443	movups	16(%rcx),%xmm1
1444	leaq	32(%rcx),%rcx
1445	xorps	%xmm0,%xmm2
1446L$oop_dec1_7:
1447.byte	102,15,56,222,209
1448	decl	%r10d
1449	movups	(%rcx),%xmm1
1450	leaq	16(%rcx),%rcx
1451	jnz	L$oop_dec1_7
1452.byte	102,15,56,223,209
1453	pxor	%xmm0,%xmm0
1454	pxor	%xmm1,%xmm1
1455	movdqu	%xmm4,(%r8)
1456	xorps	%xmm3,%xmm2
1457	pxor	%xmm3,%xmm3
1458	movups	%xmm2,(%rsi)
1459	pxor	%xmm2,%xmm2
1460	jmp	L$cbc_ret
1461.p2align	4
1462L$cbc_decrypt_bulk:
1463	leaq	(%rsp),%r11
1464
1465	pushq	%rbp
1466
1467	subq	$16,%rsp
1468	andq	$-16,%rsp
1469	movq	%rcx,%rbp
1470	movups	(%r8),%xmm10
1471	movl	%r10d,%eax
1472	cmpq	$0x50,%rdx
1473	jbe	L$cbc_dec_tail
1474
1475	movups	(%rcx),%xmm0
1476	movdqu	0(%rdi),%xmm2
1477	movdqu	16(%rdi),%xmm3
1478	movdqa	%xmm2,%xmm11
1479	movdqu	32(%rdi),%xmm4
1480	movdqa	%xmm3,%xmm12
1481	movdqu	48(%rdi),%xmm5
1482	movdqa	%xmm4,%xmm13
1483	movdqu	64(%rdi),%xmm6
1484	movdqa	%xmm5,%xmm14
1485	movdqu	80(%rdi),%xmm7
1486	movdqa	%xmm6,%xmm15
1487	cmpq	$0x70,%rdx
1488	jbe	L$cbc_dec_six_or_seven
1489
1490	subq	$0x70,%rdx
1491	leaq	112(%rcx),%rcx
1492	jmp	L$cbc_dec_loop8_enter
1493.p2align	4
1494L$cbc_dec_loop8:
1495	movups	%xmm9,(%rsi)
1496	leaq	16(%rsi),%rsi
1497L$cbc_dec_loop8_enter:
1498	movdqu	96(%rdi),%xmm8
1499	pxor	%xmm0,%xmm2
1500	movdqu	112(%rdi),%xmm9
1501	pxor	%xmm0,%xmm3
1502	movups	16-112(%rcx),%xmm1
1503	pxor	%xmm0,%xmm4
1504	movq	$-1,%rbp
1505	cmpq	$0x70,%rdx
1506	pxor	%xmm0,%xmm5
1507	pxor	%xmm0,%xmm6
1508	pxor	%xmm0,%xmm7
1509	pxor	%xmm0,%xmm8
1510
1511.byte	102,15,56,222,209
1512	pxor	%xmm0,%xmm9
1513	movups	32-112(%rcx),%xmm0
1514.byte	102,15,56,222,217
1515.byte	102,15,56,222,225
1516.byte	102,15,56,222,233
1517.byte	102,15,56,222,241
1518.byte	102,15,56,222,249
1519.byte	102,68,15,56,222,193
1520	adcq	$0,%rbp
1521	andq	$128,%rbp
1522.byte	102,68,15,56,222,201
1523	addq	%rdi,%rbp
1524	movups	48-112(%rcx),%xmm1
1525.byte	102,15,56,222,208
1526.byte	102,15,56,222,216
1527.byte	102,15,56,222,224
1528.byte	102,15,56,222,232
1529.byte	102,15,56,222,240
1530.byte	102,15,56,222,248
1531.byte	102,68,15,56,222,192
1532.byte	102,68,15,56,222,200
1533	movups	64-112(%rcx),%xmm0
1534	nop
1535.byte	102,15,56,222,209
1536.byte	102,15,56,222,217
1537.byte	102,15,56,222,225
1538.byte	102,15,56,222,233
1539.byte	102,15,56,222,241
1540.byte	102,15,56,222,249
1541.byte	102,68,15,56,222,193
1542.byte	102,68,15,56,222,201
1543	movups	80-112(%rcx),%xmm1
1544	nop
1545.byte	102,15,56,222,208
1546.byte	102,15,56,222,216
1547.byte	102,15,56,222,224
1548.byte	102,15,56,222,232
1549.byte	102,15,56,222,240
1550.byte	102,15,56,222,248
1551.byte	102,68,15,56,222,192
1552.byte	102,68,15,56,222,200
1553	movups	96-112(%rcx),%xmm0
1554	nop
1555.byte	102,15,56,222,209
1556.byte	102,15,56,222,217
1557.byte	102,15,56,222,225
1558.byte	102,15,56,222,233
1559.byte	102,15,56,222,241
1560.byte	102,15,56,222,249
1561.byte	102,68,15,56,222,193
1562.byte	102,68,15,56,222,201
1563	movups	112-112(%rcx),%xmm1
1564	nop
1565.byte	102,15,56,222,208
1566.byte	102,15,56,222,216
1567.byte	102,15,56,222,224
1568.byte	102,15,56,222,232
1569.byte	102,15,56,222,240
1570.byte	102,15,56,222,248
1571.byte	102,68,15,56,222,192
1572.byte	102,68,15,56,222,200
1573	movups	128-112(%rcx),%xmm0
1574	nop
1575.byte	102,15,56,222,209
1576.byte	102,15,56,222,217
1577.byte	102,15,56,222,225
1578.byte	102,15,56,222,233
1579.byte	102,15,56,222,241
1580.byte	102,15,56,222,249
1581.byte	102,68,15,56,222,193
1582.byte	102,68,15,56,222,201
1583	movups	144-112(%rcx),%xmm1
1584	cmpl	$11,%eax
1585.byte	102,15,56,222,208
1586.byte	102,15,56,222,216
1587.byte	102,15,56,222,224
1588.byte	102,15,56,222,232
1589.byte	102,15,56,222,240
1590.byte	102,15,56,222,248
1591.byte	102,68,15,56,222,192
1592.byte	102,68,15,56,222,200
1593	movups	160-112(%rcx),%xmm0
1594	jb	L$cbc_dec_done
1595.byte	102,15,56,222,209
1596.byte	102,15,56,222,217
1597.byte	102,15,56,222,225
1598.byte	102,15,56,222,233
1599.byte	102,15,56,222,241
1600.byte	102,15,56,222,249
1601.byte	102,68,15,56,222,193
1602.byte	102,68,15,56,222,201
1603	movups	176-112(%rcx),%xmm1
1604	nop
1605.byte	102,15,56,222,208
1606.byte	102,15,56,222,216
1607.byte	102,15,56,222,224
1608.byte	102,15,56,222,232
1609.byte	102,15,56,222,240
1610.byte	102,15,56,222,248
1611.byte	102,68,15,56,222,192
1612.byte	102,68,15,56,222,200
1613	movups	192-112(%rcx),%xmm0
1614	je	L$cbc_dec_done
1615.byte	102,15,56,222,209
1616.byte	102,15,56,222,217
1617.byte	102,15,56,222,225
1618.byte	102,15,56,222,233
1619.byte	102,15,56,222,241
1620.byte	102,15,56,222,249
1621.byte	102,68,15,56,222,193
1622.byte	102,68,15,56,222,201
1623	movups	208-112(%rcx),%xmm1
1624	nop
1625.byte	102,15,56,222,208
1626.byte	102,15,56,222,216
1627.byte	102,15,56,222,224
1628.byte	102,15,56,222,232
1629.byte	102,15,56,222,240
1630.byte	102,15,56,222,248
1631.byte	102,68,15,56,222,192
1632.byte	102,68,15,56,222,200
1633	movups	224-112(%rcx),%xmm0
1634	jmp	L$cbc_dec_done
1635.p2align	4
1636L$cbc_dec_done:
1637.byte	102,15,56,222,209
1638.byte	102,15,56,222,217
1639	pxor	%xmm0,%xmm10
1640	pxor	%xmm0,%xmm11
1641.byte	102,15,56,222,225
1642.byte	102,15,56,222,233
1643	pxor	%xmm0,%xmm12
1644	pxor	%xmm0,%xmm13
1645.byte	102,15,56,222,241
1646.byte	102,15,56,222,249
1647	pxor	%xmm0,%xmm14
1648	pxor	%xmm0,%xmm15
1649.byte	102,68,15,56,222,193
1650.byte	102,68,15,56,222,201
1651	movdqu	80(%rdi),%xmm1
1652
1653.byte	102,65,15,56,223,210
1654	movdqu	96(%rdi),%xmm10
1655	pxor	%xmm0,%xmm1
1656.byte	102,65,15,56,223,219
1657	pxor	%xmm0,%xmm10
1658	movdqu	112(%rdi),%xmm0
1659.byte	102,65,15,56,223,228
1660	leaq	128(%rdi),%rdi
1661	movdqu	0(%rbp),%xmm11
1662.byte	102,65,15,56,223,237
1663.byte	102,65,15,56,223,246
1664	movdqu	16(%rbp),%xmm12
1665	movdqu	32(%rbp),%xmm13
1666.byte	102,65,15,56,223,255
1667.byte	102,68,15,56,223,193
1668	movdqu	48(%rbp),%xmm14
1669	movdqu	64(%rbp),%xmm15
1670.byte	102,69,15,56,223,202
1671	movdqa	%xmm0,%xmm10
1672	movdqu	80(%rbp),%xmm1
1673	movups	-112(%rcx),%xmm0
1674
1675	movups	%xmm2,(%rsi)
1676	movdqa	%xmm11,%xmm2
1677	movups	%xmm3,16(%rsi)
1678	movdqa	%xmm12,%xmm3
1679	movups	%xmm4,32(%rsi)
1680	movdqa	%xmm13,%xmm4
1681	movups	%xmm5,48(%rsi)
1682	movdqa	%xmm14,%xmm5
1683	movups	%xmm6,64(%rsi)
1684	movdqa	%xmm15,%xmm6
1685	movups	%xmm7,80(%rsi)
1686	movdqa	%xmm1,%xmm7
1687	movups	%xmm8,96(%rsi)
1688	leaq	112(%rsi),%rsi
1689
1690	subq	$0x80,%rdx
1691	ja	L$cbc_dec_loop8
1692
1693	movaps	%xmm9,%xmm2
1694	leaq	-112(%rcx),%rcx
1695	addq	$0x70,%rdx
1696	jle	L$cbc_dec_clear_tail_collected
1697	movups	%xmm9,(%rsi)
1698	leaq	16(%rsi),%rsi
1699	cmpq	$0x50,%rdx
1700	jbe	L$cbc_dec_tail
1701
1702	movaps	%xmm11,%xmm2
1703L$cbc_dec_six_or_seven:
1704	cmpq	$0x60,%rdx
1705	ja	L$cbc_dec_seven
1706
1707	movaps	%xmm7,%xmm8
1708	call	_aesni_decrypt6
1709	pxor	%xmm10,%xmm2
1710	movaps	%xmm8,%xmm10
1711	pxor	%xmm11,%xmm3
1712	movdqu	%xmm2,(%rsi)
1713	pxor	%xmm12,%xmm4
1714	movdqu	%xmm3,16(%rsi)
1715	pxor	%xmm3,%xmm3
1716	pxor	%xmm13,%xmm5
1717	movdqu	%xmm4,32(%rsi)
1718	pxor	%xmm4,%xmm4
1719	pxor	%xmm14,%xmm6
1720	movdqu	%xmm5,48(%rsi)
1721	pxor	%xmm5,%xmm5
1722	pxor	%xmm15,%xmm7
1723	movdqu	%xmm6,64(%rsi)
1724	pxor	%xmm6,%xmm6
1725	leaq	80(%rsi),%rsi
1726	movdqa	%xmm7,%xmm2
1727	pxor	%xmm7,%xmm7
1728	jmp	L$cbc_dec_tail_collected
1729
1730.p2align	4
1731L$cbc_dec_seven:
1732	movups	96(%rdi),%xmm8
1733	xorps	%xmm9,%xmm9
1734	call	_aesni_decrypt8
1735	movups	80(%rdi),%xmm9
1736	pxor	%xmm10,%xmm2
1737	movups	96(%rdi),%xmm10
1738	pxor	%xmm11,%xmm3
1739	movdqu	%xmm2,(%rsi)
1740	pxor	%xmm12,%xmm4
1741	movdqu	%xmm3,16(%rsi)
1742	pxor	%xmm3,%xmm3
1743	pxor	%xmm13,%xmm5
1744	movdqu	%xmm4,32(%rsi)
1745	pxor	%xmm4,%xmm4
1746	pxor	%xmm14,%xmm6
1747	movdqu	%xmm5,48(%rsi)
1748	pxor	%xmm5,%xmm5
1749	pxor	%xmm15,%xmm7
1750	movdqu	%xmm6,64(%rsi)
1751	pxor	%xmm6,%xmm6
1752	pxor	%xmm9,%xmm8
1753	movdqu	%xmm7,80(%rsi)
1754	pxor	%xmm7,%xmm7
1755	leaq	96(%rsi),%rsi
1756	movdqa	%xmm8,%xmm2
1757	pxor	%xmm8,%xmm8
1758	pxor	%xmm9,%xmm9
1759	jmp	L$cbc_dec_tail_collected
1760
1761L$cbc_dec_tail:
1762	movups	(%rdi),%xmm2
1763	subq	$0x10,%rdx
1764	jbe	L$cbc_dec_one
1765
1766	movups	16(%rdi),%xmm3
1767	movaps	%xmm2,%xmm11
1768	subq	$0x10,%rdx
1769	jbe	L$cbc_dec_two
1770
1771	movups	32(%rdi),%xmm4
1772	movaps	%xmm3,%xmm12
1773	subq	$0x10,%rdx
1774	jbe	L$cbc_dec_three
1775
1776	movups	48(%rdi),%xmm5
1777	movaps	%xmm4,%xmm13
1778	subq	$0x10,%rdx
1779	jbe	L$cbc_dec_four
1780
1781	movups	64(%rdi),%xmm6
1782	movaps	%xmm5,%xmm14
1783	movaps	%xmm6,%xmm15
1784	xorps	%xmm7,%xmm7
1785	call	_aesni_decrypt6
1786	pxor	%xmm10,%xmm2
1787	movaps	%xmm15,%xmm10
1788	pxor	%xmm11,%xmm3
1789	movdqu	%xmm2,(%rsi)
1790	pxor	%xmm12,%xmm4
1791	movdqu	%xmm3,16(%rsi)
1792	pxor	%xmm3,%xmm3
1793	pxor	%xmm13,%xmm5
1794	movdqu	%xmm4,32(%rsi)
1795	pxor	%xmm4,%xmm4
1796	pxor	%xmm14,%xmm6
1797	movdqu	%xmm5,48(%rsi)
1798	pxor	%xmm5,%xmm5
1799	leaq	64(%rsi),%rsi
1800	movdqa	%xmm6,%xmm2
1801	pxor	%xmm6,%xmm6
1802	pxor	%xmm7,%xmm7
1803	subq	$0x10,%rdx
1804	jmp	L$cbc_dec_tail_collected
1805
1806.p2align	4
1807L$cbc_dec_one:
1808	movaps	%xmm2,%xmm11
1809	movups	(%rcx),%xmm0
1810	movups	16(%rcx),%xmm1
1811	leaq	32(%rcx),%rcx
1812	xorps	%xmm0,%xmm2
1813L$oop_dec1_8:
1814.byte	102,15,56,222,209
1815	decl	%eax
1816	movups	(%rcx),%xmm1
1817	leaq	16(%rcx),%rcx
1818	jnz	L$oop_dec1_8
1819.byte	102,15,56,223,209
1820	xorps	%xmm10,%xmm2
1821	movaps	%xmm11,%xmm10
1822	jmp	L$cbc_dec_tail_collected
1823.p2align	4
1824L$cbc_dec_two:
1825	movaps	%xmm3,%xmm12
1826	call	_aesni_decrypt2
1827	pxor	%xmm10,%xmm2
1828	movaps	%xmm12,%xmm10
1829	pxor	%xmm11,%xmm3
1830	movdqu	%xmm2,(%rsi)
1831	movdqa	%xmm3,%xmm2
1832	pxor	%xmm3,%xmm3
1833	leaq	16(%rsi),%rsi
1834	jmp	L$cbc_dec_tail_collected
1835.p2align	4
1836L$cbc_dec_three:
1837	movaps	%xmm4,%xmm13
1838	call	_aesni_decrypt3
1839	pxor	%xmm10,%xmm2
1840	movaps	%xmm13,%xmm10
1841	pxor	%xmm11,%xmm3
1842	movdqu	%xmm2,(%rsi)
1843	pxor	%xmm12,%xmm4
1844	movdqu	%xmm3,16(%rsi)
1845	pxor	%xmm3,%xmm3
1846	movdqa	%xmm4,%xmm2
1847	pxor	%xmm4,%xmm4
1848	leaq	32(%rsi),%rsi
1849	jmp	L$cbc_dec_tail_collected
1850.p2align	4
1851L$cbc_dec_four:
1852	movaps	%xmm5,%xmm14
1853	call	_aesni_decrypt4
1854	pxor	%xmm10,%xmm2
1855	movaps	%xmm14,%xmm10
1856	pxor	%xmm11,%xmm3
1857	movdqu	%xmm2,(%rsi)
1858	pxor	%xmm12,%xmm4
1859	movdqu	%xmm3,16(%rsi)
1860	pxor	%xmm3,%xmm3
1861	pxor	%xmm13,%xmm5
1862	movdqu	%xmm4,32(%rsi)
1863	pxor	%xmm4,%xmm4
1864	movdqa	%xmm5,%xmm2
1865	pxor	%xmm5,%xmm5
1866	leaq	48(%rsi),%rsi
1867	jmp	L$cbc_dec_tail_collected
1868
1869.p2align	4
1870L$cbc_dec_clear_tail_collected:
1871	pxor	%xmm3,%xmm3
1872	pxor	%xmm4,%xmm4
1873	pxor	%xmm5,%xmm5
1874	pxor	%xmm6,%xmm6
1875	pxor	%xmm7,%xmm7
1876	pxor	%xmm8,%xmm8
1877	pxor	%xmm9,%xmm9
1878L$cbc_dec_tail_collected:
1879	movups	%xmm10,(%r8)
1880	andq	$15,%rdx
1881	jnz	L$cbc_dec_tail_partial
1882	movups	%xmm2,(%rsi)
1883	pxor	%xmm2,%xmm2
1884	jmp	L$cbc_dec_ret
1885.p2align	4
1886L$cbc_dec_tail_partial:
1887	movaps	%xmm2,(%rsp)
1888	pxor	%xmm2,%xmm2
1889	movq	$16,%rcx
1890	movq	%rsi,%rdi
1891	subq	%rdx,%rcx
1892	leaq	(%rsp),%rsi
1893.long	0x9066A4F3
1894	movdqa	%xmm2,(%rsp)
1895
1896L$cbc_dec_ret:
1897	xorps	%xmm0,%xmm0
1898	pxor	%xmm1,%xmm1
1899	movq	-8(%r11),%rbp
1900
1901	leaq	(%r11),%rsp
1902
1903L$cbc_ret:
1904	ret
1905
1906
1907.globl	_aes_hw_encrypt_key_to_decrypt_key
1908.private_extern _aes_hw_encrypt_key_to_decrypt_key
1909
1910.p2align	4
1911_aes_hw_encrypt_key_to_decrypt_key:
1912
1913_CET_ENDBR
1914
1915	movl	240(%rdi),%esi
1916	shll	$4,%esi
1917
1918	leaq	16(%rdi,%rsi,1),%rdx
1919
1920	movups	(%rdi),%xmm0
1921	movups	(%rdx),%xmm1
1922	movups	%xmm0,(%rdx)
1923	movups	%xmm1,(%rdi)
1924	leaq	16(%rdi),%rdi
1925	leaq	-16(%rdx),%rdx
1926
1927L$dec_key_inverse:
1928	movups	(%rdi),%xmm0
1929	movups	(%rdx),%xmm1
1930.byte	102,15,56,219,192
1931.byte	102,15,56,219,201
1932	leaq	16(%rdi),%rdi
1933	leaq	-16(%rdx),%rdx
1934	movups	%xmm0,16(%rdx)
1935	movups	%xmm1,-16(%rdi)
1936	cmpq	%rdi,%rdx
1937	ja	L$dec_key_inverse
1938
1939	movups	(%rdi),%xmm0
1940.byte	102,15,56,219,192
1941	pxor	%xmm1,%xmm1
1942	movups	%xmm0,(%rdx)
1943	pxor	%xmm0,%xmm0
1944	ret
1945
1946
1947.globl	_aes_hw_set_encrypt_key_base
1948.private_extern _aes_hw_set_encrypt_key_base
1949
1950.p2align	4
1951_aes_hw_set_encrypt_key_base:
1952
1953
1954_CET_ENDBR
1955#ifdef BORINGSSL_DISPATCH_TEST
1956	movb	$1,_BORINGSSL_function_hit+3(%rip)
1957#endif
1958	subq	$8,%rsp
1959
1960
1961
1962	movups	(%rdi),%xmm0
1963	xorps	%xmm4,%xmm4
1964	leaq	16(%rdx),%rax
1965	cmpl	$256,%esi
1966	je	L$14rounds
1967	cmpl	$192,%esi
1968	je	L$12rounds
1969	cmpl	$128,%esi
1970	jne	L$bad_keybits
1971
1972L$10rounds:
1973	movl	$9,%esi
1974
1975	movups	%xmm0,(%rdx)
1976.byte	102,15,58,223,200,1
1977	call	L$key_expansion_128_cold
1978.byte	102,15,58,223,200,2
1979	call	L$key_expansion_128
1980.byte	102,15,58,223,200,4
1981	call	L$key_expansion_128
1982.byte	102,15,58,223,200,8
1983	call	L$key_expansion_128
1984.byte	102,15,58,223,200,16
1985	call	L$key_expansion_128
1986.byte	102,15,58,223,200,32
1987	call	L$key_expansion_128
1988.byte	102,15,58,223,200,64
1989	call	L$key_expansion_128
1990.byte	102,15,58,223,200,128
1991	call	L$key_expansion_128
1992.byte	102,15,58,223,200,27
1993	call	L$key_expansion_128
1994.byte	102,15,58,223,200,54
1995	call	L$key_expansion_128
1996	movups	%xmm0,(%rax)
1997	movl	%esi,80(%rax)
1998	xorl	%eax,%eax
1999	jmp	L$enc_key_ret
2000
2001.p2align	4
2002L$12rounds:
2003	movq	16(%rdi),%xmm2
2004	movl	$11,%esi
2005
2006	movups	%xmm0,(%rdx)
2007.byte	102,15,58,223,202,1
2008	call	L$key_expansion_192a_cold
2009.byte	102,15,58,223,202,2
2010	call	L$key_expansion_192b
2011.byte	102,15,58,223,202,4
2012	call	L$key_expansion_192a
2013.byte	102,15,58,223,202,8
2014	call	L$key_expansion_192b
2015.byte	102,15,58,223,202,16
2016	call	L$key_expansion_192a
2017.byte	102,15,58,223,202,32
2018	call	L$key_expansion_192b
2019.byte	102,15,58,223,202,64
2020	call	L$key_expansion_192a
2021.byte	102,15,58,223,202,128
2022	call	L$key_expansion_192b
2023	movups	%xmm0,(%rax)
2024	movl	%esi,48(%rax)
2025	xorq	%rax,%rax
2026	jmp	L$enc_key_ret
2027
2028.p2align	4
2029L$14rounds:
2030	movups	16(%rdi),%xmm2
2031	movl	$13,%esi
2032	leaq	16(%rax),%rax
2033
2034	movups	%xmm0,(%rdx)
2035	movups	%xmm2,16(%rdx)
2036.byte	102,15,58,223,202,1
2037	call	L$key_expansion_256a_cold
2038.byte	102,15,58,223,200,1
2039	call	L$key_expansion_256b
2040.byte	102,15,58,223,202,2
2041	call	L$key_expansion_256a
2042.byte	102,15,58,223,200,2
2043	call	L$key_expansion_256b
2044.byte	102,15,58,223,202,4
2045	call	L$key_expansion_256a
2046.byte	102,15,58,223,200,4
2047	call	L$key_expansion_256b
2048.byte	102,15,58,223,202,8
2049	call	L$key_expansion_256a
2050.byte	102,15,58,223,200,8
2051	call	L$key_expansion_256b
2052.byte	102,15,58,223,202,16
2053	call	L$key_expansion_256a
2054.byte	102,15,58,223,200,16
2055	call	L$key_expansion_256b
2056.byte	102,15,58,223,202,32
2057	call	L$key_expansion_256a
2058.byte	102,15,58,223,200,32
2059	call	L$key_expansion_256b
2060.byte	102,15,58,223,202,64
2061	call	L$key_expansion_256a
2062	movups	%xmm0,(%rax)
2063	movl	%esi,16(%rax)
2064	xorq	%rax,%rax
2065	jmp	L$enc_key_ret
2066
2067.p2align	4
2068L$bad_keybits:
2069	movq	$-2,%rax
2070L$enc_key_ret:
2071	pxor	%xmm0,%xmm0
2072	pxor	%xmm1,%xmm1
2073	pxor	%xmm2,%xmm2
2074	pxor	%xmm3,%xmm3
2075	pxor	%xmm4,%xmm4
2076	pxor	%xmm5,%xmm5
2077	addq	$8,%rsp
2078
2079	ret
2080
2081
2082
2083.p2align	4
2084L$key_expansion_128:
2085
2086	movups	%xmm0,(%rax)
2087	leaq	16(%rax),%rax
2088L$key_expansion_128_cold:
2089	shufps	$16,%xmm0,%xmm4
2090	xorps	%xmm4,%xmm0
2091	shufps	$140,%xmm0,%xmm4
2092	xorps	%xmm4,%xmm0
2093	shufps	$255,%xmm1,%xmm1
2094	xorps	%xmm1,%xmm0
2095	ret
2096
2097
2098.p2align	4
2099L$key_expansion_192a:
2100
2101	movups	%xmm0,(%rax)
2102	leaq	16(%rax),%rax
2103L$key_expansion_192a_cold:
2104	movaps	%xmm2,%xmm5
2105L$key_expansion_192b_warm:
2106	shufps	$16,%xmm0,%xmm4
2107	movdqa	%xmm2,%xmm3
2108	xorps	%xmm4,%xmm0
2109	shufps	$140,%xmm0,%xmm4
2110	pslldq	$4,%xmm3
2111	xorps	%xmm4,%xmm0
2112	pshufd	$85,%xmm1,%xmm1
2113	pxor	%xmm3,%xmm2
2114	pxor	%xmm1,%xmm0
2115	pshufd	$255,%xmm0,%xmm3
2116	pxor	%xmm3,%xmm2
2117	ret
2118
2119
2120.p2align	4
2121L$key_expansion_192b:
2122
2123	movaps	%xmm0,%xmm3
2124	shufps	$68,%xmm0,%xmm5
2125	movups	%xmm5,(%rax)
2126	shufps	$78,%xmm2,%xmm3
2127	movups	%xmm3,16(%rax)
2128	leaq	32(%rax),%rax
2129	jmp	L$key_expansion_192b_warm
2130
2131
2132.p2align	4
2133L$key_expansion_256a:
2134
2135	movups	%xmm2,(%rax)
2136	leaq	16(%rax),%rax
2137L$key_expansion_256a_cold:
2138	shufps	$16,%xmm0,%xmm4
2139	xorps	%xmm4,%xmm0
2140	shufps	$140,%xmm0,%xmm4
2141	xorps	%xmm4,%xmm0
2142	shufps	$255,%xmm1,%xmm1
2143	xorps	%xmm1,%xmm0
2144	ret
2145
2146
2147.p2align	4
2148L$key_expansion_256b:
2149
2150	movups	%xmm0,(%rax)
2151	leaq	16(%rax),%rax
2152
2153	shufps	$16,%xmm2,%xmm4
2154	xorps	%xmm4,%xmm2
2155	shufps	$140,%xmm2,%xmm4
2156	xorps	%xmm4,%xmm2
2157	shufps	$170,%xmm1,%xmm1
2158	xorps	%xmm1,%xmm2
2159	ret
2160
2161
2162
2163.globl	_aes_hw_set_encrypt_key_alt
2164.private_extern _aes_hw_set_encrypt_key_alt
2165
2166.p2align	4
2167_aes_hw_set_encrypt_key_alt:
2168
2169
2170_CET_ENDBR
2171#ifdef BORINGSSL_DISPATCH_TEST
2172	movb	$1,_BORINGSSL_function_hit+3(%rip)
2173#endif
2174	subq	$8,%rsp
2175
2176
2177
2178	movups	(%rdi),%xmm0
2179	xorps	%xmm4,%xmm4
2180	leaq	16(%rdx),%rax
2181	cmpl	$256,%esi
2182	je	L$14rounds_alt
2183	cmpl	$192,%esi
2184	je	L$12rounds_alt
2185	cmpl	$128,%esi
2186	jne	L$bad_keybits_alt
2187
2188	movl	$9,%esi
2189	movdqa	L$key_rotate(%rip),%xmm5
2190	movl	$8,%r10d
2191	movdqa	L$key_rcon1(%rip),%xmm4
2192	movdqa	%xmm0,%xmm2
2193	movdqu	%xmm0,(%rdx)
2194	jmp	L$oop_key128
2195
2196.p2align	4
2197L$oop_key128:
2198.byte	102,15,56,0,197
2199.byte	102,15,56,221,196
2200	pslld	$1,%xmm4
2201	leaq	16(%rax),%rax
2202
2203	movdqa	%xmm2,%xmm3
2204	pslldq	$4,%xmm2
2205	pxor	%xmm2,%xmm3
2206	pslldq	$4,%xmm2
2207	pxor	%xmm2,%xmm3
2208	pslldq	$4,%xmm2
2209	pxor	%xmm3,%xmm2
2210
2211	pxor	%xmm2,%xmm0
2212	movdqu	%xmm0,-16(%rax)
2213	movdqa	%xmm0,%xmm2
2214
2215	decl	%r10d
2216	jnz	L$oop_key128
2217
2218	movdqa	L$key_rcon1b(%rip),%xmm4
2219
2220.byte	102,15,56,0,197
2221.byte	102,15,56,221,196
2222	pslld	$1,%xmm4
2223
2224	movdqa	%xmm2,%xmm3
2225	pslldq	$4,%xmm2
2226	pxor	%xmm2,%xmm3
2227	pslldq	$4,%xmm2
2228	pxor	%xmm2,%xmm3
2229	pslldq	$4,%xmm2
2230	pxor	%xmm3,%xmm2
2231
2232	pxor	%xmm2,%xmm0
2233	movdqu	%xmm0,(%rax)
2234
2235	movdqa	%xmm0,%xmm2
2236.byte	102,15,56,0,197
2237.byte	102,15,56,221,196
2238
2239	movdqa	%xmm2,%xmm3
2240	pslldq	$4,%xmm2
2241	pxor	%xmm2,%xmm3
2242	pslldq	$4,%xmm2
2243	pxor	%xmm2,%xmm3
2244	pslldq	$4,%xmm2
2245	pxor	%xmm3,%xmm2
2246
2247	pxor	%xmm2,%xmm0
2248	movdqu	%xmm0,16(%rax)
2249
2250	movl	%esi,96(%rax)
2251	xorl	%eax,%eax
2252	jmp	L$enc_key_ret_alt
2253
2254.p2align	4
2255L$12rounds_alt:
2256	movq	16(%rdi),%xmm2
2257	movl	$11,%esi
2258	movdqa	L$key_rotate192(%rip),%xmm5
2259	movdqa	L$key_rcon1(%rip),%xmm4
2260	movl	$8,%r10d
2261	movdqu	%xmm0,(%rdx)
2262	jmp	L$oop_key192
2263
2264.p2align	4
2265L$oop_key192:
2266	movq	%xmm2,0(%rax)
2267	movdqa	%xmm2,%xmm1
2268.byte	102,15,56,0,213
2269.byte	102,15,56,221,212
2270	pslld	$1,%xmm4
2271	leaq	24(%rax),%rax
2272
2273	movdqa	%xmm0,%xmm3
2274	pslldq	$4,%xmm0
2275	pxor	%xmm0,%xmm3
2276	pslldq	$4,%xmm0
2277	pxor	%xmm0,%xmm3
2278	pslldq	$4,%xmm0
2279	pxor	%xmm3,%xmm0
2280
2281	pshufd	$0xff,%xmm0,%xmm3
2282	pxor	%xmm1,%xmm3
2283	pslldq	$4,%xmm1
2284	pxor	%xmm1,%xmm3
2285
2286	pxor	%xmm2,%xmm0
2287	pxor	%xmm3,%xmm2
2288	movdqu	%xmm0,-16(%rax)
2289
2290	decl	%r10d
2291	jnz	L$oop_key192
2292
2293	movl	%esi,32(%rax)
2294	xorl	%eax,%eax
2295	jmp	L$enc_key_ret_alt
2296
2297.p2align	4
2298L$14rounds_alt:
2299	movups	16(%rdi),%xmm2
2300	movl	$13,%esi
2301	leaq	16(%rax),%rax
2302	movdqa	L$key_rotate(%rip),%xmm5
2303	movdqa	L$key_rcon1(%rip),%xmm4
2304	movl	$7,%r10d
2305	movdqu	%xmm0,0(%rdx)
2306	movdqa	%xmm2,%xmm1
2307	movdqu	%xmm2,16(%rdx)
2308	jmp	L$oop_key256
2309
2310.p2align	4
2311L$oop_key256:
2312.byte	102,15,56,0,213
2313.byte	102,15,56,221,212
2314
2315	movdqa	%xmm0,%xmm3
2316	pslldq	$4,%xmm0
2317	pxor	%xmm0,%xmm3
2318	pslldq	$4,%xmm0
2319	pxor	%xmm0,%xmm3
2320	pslldq	$4,%xmm0
2321	pxor	%xmm3,%xmm0
2322	pslld	$1,%xmm4
2323
2324	pxor	%xmm2,%xmm0
2325	movdqu	%xmm0,(%rax)
2326
2327	decl	%r10d
2328	jz	L$done_key256
2329
2330	pshufd	$0xff,%xmm0,%xmm2
2331	pxor	%xmm3,%xmm3
2332.byte	102,15,56,221,211
2333
2334	movdqa	%xmm1,%xmm3
2335	pslldq	$4,%xmm1
2336	pxor	%xmm1,%xmm3
2337	pslldq	$4,%xmm1
2338	pxor	%xmm1,%xmm3
2339	pslldq	$4,%xmm1
2340	pxor	%xmm3,%xmm1
2341
2342	pxor	%xmm1,%xmm2
2343	movdqu	%xmm2,16(%rax)
2344	leaq	32(%rax),%rax
2345	movdqa	%xmm2,%xmm1
2346
2347	jmp	L$oop_key256
2348
2349L$done_key256:
2350	movl	%esi,16(%rax)
2351	xorl	%eax,%eax
2352	jmp	L$enc_key_ret_alt
2353
2354.p2align	4
2355L$bad_keybits_alt:
2356	movq	$-2,%rax
2357L$enc_key_ret_alt:
2358	pxor	%xmm0,%xmm0
2359	pxor	%xmm1,%xmm1
2360	pxor	%xmm2,%xmm2
2361	pxor	%xmm3,%xmm3
2362	pxor	%xmm4,%xmm4
2363	pxor	%xmm5,%xmm5
2364	addq	$8,%rsp
2365
2366	ret
2367
2368
2369
2370.section	__DATA,__const
2371.p2align	6
2372L$bswap_mask:
2373.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
2374L$increment32:
2375.long	6,6,6,0
2376L$increment64:
2377.long	1,0,0,0
2378L$xts_magic:
2379.long	0x87,0,1,0
2380L$increment1:
2381.byte	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2382L$key_rotate:
2383.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
2384L$key_rotate192:
2385.long	0x04070605,0x04070605,0x04070605,0x04070605
2386L$key_rcon1:
2387.long	1,1,1,1
2388L$key_rcon1b:
2389.long	0x1b,0x1b,0x1b,0x1b
2390
2391.byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2392.p2align	6
2393.text
2394#endif
2395