xref: /aosp_15_r20/external/boringssl/src/gen/bcm/aesni-x86-win.asm (revision 8fb009dc861624b67b6cdb62ea21f0f22d0c584b)
1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4%ifdef BORINGSSL_PREFIX
5%include "boringssl_prefix_symbols_nasm.inc"
6%endif
7%ifidn __OUTPUT_FORMAT__, win32
8%ifidn __OUTPUT_FORMAT__,obj
9section	code	use32 class=code align=64
10%elifidn __OUTPUT_FORMAT__,win32
11$@feat.00 equ 1
12section	.text	code align=64
13%else
14section	.text	code
15%endif
16%ifdef BORINGSSL_DISPATCH_TEST
17extern	_BORINGSSL_function_hit
18%endif
19global	_aes_hw_encrypt
20align	16
21_aes_hw_encrypt:
22L$_aes_hw_encrypt_begin:
23%ifdef BORINGSSL_DISPATCH_TEST
24	push	ebx
25	push	edx
26	call	L$000pic_for_function_hit
27L$000pic_for_function_hit:
28	pop	ebx
29	lea	ebx,[(_BORINGSSL_function_hit+1-L$000pic_for_function_hit)+ebx]
30	mov	edx,1
31	mov	BYTE [ebx],dl
32	pop	edx
33	pop	ebx
34%endif
35	mov	eax,DWORD [4+esp]
36	mov	edx,DWORD [12+esp]
37	movups	xmm2,[eax]
38	mov	ecx,DWORD [240+edx]
39	mov	eax,DWORD [8+esp]
40	movups	xmm0,[edx]
41	movups	xmm1,[16+edx]
42	lea	edx,[32+edx]
43	xorps	xmm2,xmm0
44L$001enc1_loop_1:
45db	102,15,56,220,209
46	dec	ecx
47	movups	xmm1,[edx]
48	lea	edx,[16+edx]
49	jnz	NEAR L$001enc1_loop_1
50db	102,15,56,221,209
51	pxor	xmm0,xmm0
52	pxor	xmm1,xmm1
53	movups	[eax],xmm2
54	pxor	xmm2,xmm2
55	ret
56global	_aes_hw_decrypt
57align	16
58_aes_hw_decrypt:
59L$_aes_hw_decrypt_begin:
60	mov	eax,DWORD [4+esp]
61	mov	edx,DWORD [12+esp]
62	movups	xmm2,[eax]
63	mov	ecx,DWORD [240+edx]
64	mov	eax,DWORD [8+esp]
65	movups	xmm0,[edx]
66	movups	xmm1,[16+edx]
67	lea	edx,[32+edx]
68	xorps	xmm2,xmm0
69L$002dec1_loop_2:
70db	102,15,56,222,209
71	dec	ecx
72	movups	xmm1,[edx]
73	lea	edx,[16+edx]
74	jnz	NEAR L$002dec1_loop_2
75db	102,15,56,223,209
76	pxor	xmm0,xmm0
77	pxor	xmm1,xmm1
78	movups	[eax],xmm2
79	pxor	xmm2,xmm2
80	ret
81align	16
82__aesni_encrypt2:
83	movups	xmm0,[edx]
84	shl	ecx,4
85	movups	xmm1,[16+edx]
86	xorps	xmm2,xmm0
87	pxor	xmm3,xmm0
88	movups	xmm0,[32+edx]
89	lea	edx,[32+ecx*1+edx]
90	neg	ecx
91	add	ecx,16
92L$003enc2_loop:
93db	102,15,56,220,209
94db	102,15,56,220,217
95	movups	xmm1,[ecx*1+edx]
96	add	ecx,32
97db	102,15,56,220,208
98db	102,15,56,220,216
99	movups	xmm0,[ecx*1+edx-16]
100	jnz	NEAR L$003enc2_loop
101db	102,15,56,220,209
102db	102,15,56,220,217
103db	102,15,56,221,208
104db	102,15,56,221,216
105	ret
106align	16
107__aesni_decrypt2:
108	movups	xmm0,[edx]
109	shl	ecx,4
110	movups	xmm1,[16+edx]
111	xorps	xmm2,xmm0
112	pxor	xmm3,xmm0
113	movups	xmm0,[32+edx]
114	lea	edx,[32+ecx*1+edx]
115	neg	ecx
116	add	ecx,16
117L$004dec2_loop:
118db	102,15,56,222,209
119db	102,15,56,222,217
120	movups	xmm1,[ecx*1+edx]
121	add	ecx,32
122db	102,15,56,222,208
123db	102,15,56,222,216
124	movups	xmm0,[ecx*1+edx-16]
125	jnz	NEAR L$004dec2_loop
126db	102,15,56,222,209
127db	102,15,56,222,217
128db	102,15,56,223,208
129db	102,15,56,223,216
130	ret
131align	16
132__aesni_encrypt3:
133	movups	xmm0,[edx]
134	shl	ecx,4
135	movups	xmm1,[16+edx]
136	xorps	xmm2,xmm0
137	pxor	xmm3,xmm0
138	pxor	xmm4,xmm0
139	movups	xmm0,[32+edx]
140	lea	edx,[32+ecx*1+edx]
141	neg	ecx
142	add	ecx,16
143L$005enc3_loop:
144db	102,15,56,220,209
145db	102,15,56,220,217
146db	102,15,56,220,225
147	movups	xmm1,[ecx*1+edx]
148	add	ecx,32
149db	102,15,56,220,208
150db	102,15,56,220,216
151db	102,15,56,220,224
152	movups	xmm0,[ecx*1+edx-16]
153	jnz	NEAR L$005enc3_loop
154db	102,15,56,220,209
155db	102,15,56,220,217
156db	102,15,56,220,225
157db	102,15,56,221,208
158db	102,15,56,221,216
159db	102,15,56,221,224
160	ret
161align	16
162__aesni_decrypt3:
163	movups	xmm0,[edx]
164	shl	ecx,4
165	movups	xmm1,[16+edx]
166	xorps	xmm2,xmm0
167	pxor	xmm3,xmm0
168	pxor	xmm4,xmm0
169	movups	xmm0,[32+edx]
170	lea	edx,[32+ecx*1+edx]
171	neg	ecx
172	add	ecx,16
173L$006dec3_loop:
174db	102,15,56,222,209
175db	102,15,56,222,217
176db	102,15,56,222,225
177	movups	xmm1,[ecx*1+edx]
178	add	ecx,32
179db	102,15,56,222,208
180db	102,15,56,222,216
181db	102,15,56,222,224
182	movups	xmm0,[ecx*1+edx-16]
183	jnz	NEAR L$006dec3_loop
184db	102,15,56,222,209
185db	102,15,56,222,217
186db	102,15,56,222,225
187db	102,15,56,223,208
188db	102,15,56,223,216
189db	102,15,56,223,224
190	ret
191align	16
192__aesni_encrypt4:
193	movups	xmm0,[edx]
194	movups	xmm1,[16+edx]
195	shl	ecx,4
196	xorps	xmm2,xmm0
197	pxor	xmm3,xmm0
198	pxor	xmm4,xmm0
199	pxor	xmm5,xmm0
200	movups	xmm0,[32+edx]
201	lea	edx,[32+ecx*1+edx]
202	neg	ecx
203db	15,31,64,0
204	add	ecx,16
205L$007enc4_loop:
206db	102,15,56,220,209
207db	102,15,56,220,217
208db	102,15,56,220,225
209db	102,15,56,220,233
210	movups	xmm1,[ecx*1+edx]
211	add	ecx,32
212db	102,15,56,220,208
213db	102,15,56,220,216
214db	102,15,56,220,224
215db	102,15,56,220,232
216	movups	xmm0,[ecx*1+edx-16]
217	jnz	NEAR L$007enc4_loop
218db	102,15,56,220,209
219db	102,15,56,220,217
220db	102,15,56,220,225
221db	102,15,56,220,233
222db	102,15,56,221,208
223db	102,15,56,221,216
224db	102,15,56,221,224
225db	102,15,56,221,232
226	ret
227align	16
228__aesni_decrypt4:
229	movups	xmm0,[edx]
230	movups	xmm1,[16+edx]
231	shl	ecx,4
232	xorps	xmm2,xmm0
233	pxor	xmm3,xmm0
234	pxor	xmm4,xmm0
235	pxor	xmm5,xmm0
236	movups	xmm0,[32+edx]
237	lea	edx,[32+ecx*1+edx]
238	neg	ecx
239db	15,31,64,0
240	add	ecx,16
241L$008dec4_loop:
242db	102,15,56,222,209
243db	102,15,56,222,217
244db	102,15,56,222,225
245db	102,15,56,222,233
246	movups	xmm1,[ecx*1+edx]
247	add	ecx,32
248db	102,15,56,222,208
249db	102,15,56,222,216
250db	102,15,56,222,224
251db	102,15,56,222,232
252	movups	xmm0,[ecx*1+edx-16]
253	jnz	NEAR L$008dec4_loop
254db	102,15,56,222,209
255db	102,15,56,222,217
256db	102,15,56,222,225
257db	102,15,56,222,233
258db	102,15,56,223,208
259db	102,15,56,223,216
260db	102,15,56,223,224
261db	102,15,56,223,232
262	ret
263align	16
264__aesni_encrypt6:
265	movups	xmm0,[edx]
266	shl	ecx,4
267	movups	xmm1,[16+edx]
268	xorps	xmm2,xmm0
269	pxor	xmm3,xmm0
270	pxor	xmm4,xmm0
271db	102,15,56,220,209
272	pxor	xmm5,xmm0
273	pxor	xmm6,xmm0
274db	102,15,56,220,217
275	lea	edx,[32+ecx*1+edx]
276	neg	ecx
277db	102,15,56,220,225
278	pxor	xmm7,xmm0
279	movups	xmm0,[ecx*1+edx]
280	add	ecx,16
281	jmp	NEAR L$009_aesni_encrypt6_inner
282align	16
283L$010enc6_loop:
284db	102,15,56,220,209
285db	102,15,56,220,217
286db	102,15,56,220,225
287L$009_aesni_encrypt6_inner:
288db	102,15,56,220,233
289db	102,15,56,220,241
290db	102,15,56,220,249
291L$_aesni_encrypt6_enter:
292	movups	xmm1,[ecx*1+edx]
293	add	ecx,32
294db	102,15,56,220,208
295db	102,15,56,220,216
296db	102,15,56,220,224
297db	102,15,56,220,232
298db	102,15,56,220,240
299db	102,15,56,220,248
300	movups	xmm0,[ecx*1+edx-16]
301	jnz	NEAR L$010enc6_loop
302db	102,15,56,220,209
303db	102,15,56,220,217
304db	102,15,56,220,225
305db	102,15,56,220,233
306db	102,15,56,220,241
307db	102,15,56,220,249
308db	102,15,56,221,208
309db	102,15,56,221,216
310db	102,15,56,221,224
311db	102,15,56,221,232
312db	102,15,56,221,240
313db	102,15,56,221,248
314	ret
315align	16
316__aesni_decrypt6:
317	movups	xmm0,[edx]
318	shl	ecx,4
319	movups	xmm1,[16+edx]
320	xorps	xmm2,xmm0
321	pxor	xmm3,xmm0
322	pxor	xmm4,xmm0
323db	102,15,56,222,209
324	pxor	xmm5,xmm0
325	pxor	xmm6,xmm0
326db	102,15,56,222,217
327	lea	edx,[32+ecx*1+edx]
328	neg	ecx
329db	102,15,56,222,225
330	pxor	xmm7,xmm0
331	movups	xmm0,[ecx*1+edx]
332	add	ecx,16
333	jmp	NEAR L$011_aesni_decrypt6_inner
334align	16
335L$012dec6_loop:
336db	102,15,56,222,209
337db	102,15,56,222,217
338db	102,15,56,222,225
339L$011_aesni_decrypt6_inner:
340db	102,15,56,222,233
341db	102,15,56,222,241
342db	102,15,56,222,249
343L$_aesni_decrypt6_enter:
344	movups	xmm1,[ecx*1+edx]
345	add	ecx,32
346db	102,15,56,222,208
347db	102,15,56,222,216
348db	102,15,56,222,224
349db	102,15,56,222,232
350db	102,15,56,222,240
351db	102,15,56,222,248
352	movups	xmm0,[ecx*1+edx-16]
353	jnz	NEAR L$012dec6_loop
354db	102,15,56,222,209
355db	102,15,56,222,217
356db	102,15,56,222,225
357db	102,15,56,222,233
358db	102,15,56,222,241
359db	102,15,56,222,249
360db	102,15,56,223,208
361db	102,15,56,223,216
362db	102,15,56,223,224
363db	102,15,56,223,232
364db	102,15,56,223,240
365db	102,15,56,223,248
366	ret
367global	_aes_hw_ecb_encrypt
368align	16
369_aes_hw_ecb_encrypt:
370L$_aes_hw_ecb_encrypt_begin:
371	push	ebp
372	push	ebx
373	push	esi
374	push	edi
375	mov	esi,DWORD [20+esp]
376	mov	edi,DWORD [24+esp]
377	mov	eax,DWORD [28+esp]
378	mov	edx,DWORD [32+esp]
379	mov	ebx,DWORD [36+esp]
380	and	eax,-16
381	jz	NEAR L$013ecb_ret
382	mov	ecx,DWORD [240+edx]
383	test	ebx,ebx
384	jz	NEAR L$014ecb_decrypt
385	mov	ebp,edx
386	mov	ebx,ecx
387	cmp	eax,96
388	jb	NEAR L$015ecb_enc_tail
389	movdqu	xmm2,[esi]
390	movdqu	xmm3,[16+esi]
391	movdqu	xmm4,[32+esi]
392	movdqu	xmm5,[48+esi]
393	movdqu	xmm6,[64+esi]
394	movdqu	xmm7,[80+esi]
395	lea	esi,[96+esi]
396	sub	eax,96
397	jmp	NEAR L$016ecb_enc_loop6_enter
398align	16
399L$017ecb_enc_loop6:
400	movups	[edi],xmm2
401	movdqu	xmm2,[esi]
402	movups	[16+edi],xmm3
403	movdqu	xmm3,[16+esi]
404	movups	[32+edi],xmm4
405	movdqu	xmm4,[32+esi]
406	movups	[48+edi],xmm5
407	movdqu	xmm5,[48+esi]
408	movups	[64+edi],xmm6
409	movdqu	xmm6,[64+esi]
410	movups	[80+edi],xmm7
411	lea	edi,[96+edi]
412	movdqu	xmm7,[80+esi]
413	lea	esi,[96+esi]
414L$016ecb_enc_loop6_enter:
415	call	__aesni_encrypt6
416	mov	edx,ebp
417	mov	ecx,ebx
418	sub	eax,96
419	jnc	NEAR L$017ecb_enc_loop6
420	movups	[edi],xmm2
421	movups	[16+edi],xmm3
422	movups	[32+edi],xmm4
423	movups	[48+edi],xmm5
424	movups	[64+edi],xmm6
425	movups	[80+edi],xmm7
426	lea	edi,[96+edi]
427	add	eax,96
428	jz	NEAR L$013ecb_ret
429L$015ecb_enc_tail:
430	movups	xmm2,[esi]
431	cmp	eax,32
432	jb	NEAR L$018ecb_enc_one
433	movups	xmm3,[16+esi]
434	je	NEAR L$019ecb_enc_two
435	movups	xmm4,[32+esi]
436	cmp	eax,64
437	jb	NEAR L$020ecb_enc_three
438	movups	xmm5,[48+esi]
439	je	NEAR L$021ecb_enc_four
440	movups	xmm6,[64+esi]
441	xorps	xmm7,xmm7
442	call	__aesni_encrypt6
443	movups	[edi],xmm2
444	movups	[16+edi],xmm3
445	movups	[32+edi],xmm4
446	movups	[48+edi],xmm5
447	movups	[64+edi],xmm6
448	jmp	NEAR L$013ecb_ret
449align	16
450L$018ecb_enc_one:
451	movups	xmm0,[edx]
452	movups	xmm1,[16+edx]
453	lea	edx,[32+edx]
454	xorps	xmm2,xmm0
455L$022enc1_loop_3:
456db	102,15,56,220,209
457	dec	ecx
458	movups	xmm1,[edx]
459	lea	edx,[16+edx]
460	jnz	NEAR L$022enc1_loop_3
461db	102,15,56,221,209
462	movups	[edi],xmm2
463	jmp	NEAR L$013ecb_ret
464align	16
465L$019ecb_enc_two:
466	call	__aesni_encrypt2
467	movups	[edi],xmm2
468	movups	[16+edi],xmm3
469	jmp	NEAR L$013ecb_ret
470align	16
471L$020ecb_enc_three:
472	call	__aesni_encrypt3
473	movups	[edi],xmm2
474	movups	[16+edi],xmm3
475	movups	[32+edi],xmm4
476	jmp	NEAR L$013ecb_ret
477align	16
478L$021ecb_enc_four:
479	call	__aesni_encrypt4
480	movups	[edi],xmm2
481	movups	[16+edi],xmm3
482	movups	[32+edi],xmm4
483	movups	[48+edi],xmm5
484	jmp	NEAR L$013ecb_ret
485align	16
486L$014ecb_decrypt:
487	mov	ebp,edx
488	mov	ebx,ecx
489	cmp	eax,96
490	jb	NEAR L$023ecb_dec_tail
491	movdqu	xmm2,[esi]
492	movdqu	xmm3,[16+esi]
493	movdqu	xmm4,[32+esi]
494	movdqu	xmm5,[48+esi]
495	movdqu	xmm6,[64+esi]
496	movdqu	xmm7,[80+esi]
497	lea	esi,[96+esi]
498	sub	eax,96
499	jmp	NEAR L$024ecb_dec_loop6_enter
500align	16
501L$025ecb_dec_loop6:
502	movups	[edi],xmm2
503	movdqu	xmm2,[esi]
504	movups	[16+edi],xmm3
505	movdqu	xmm3,[16+esi]
506	movups	[32+edi],xmm4
507	movdqu	xmm4,[32+esi]
508	movups	[48+edi],xmm5
509	movdqu	xmm5,[48+esi]
510	movups	[64+edi],xmm6
511	movdqu	xmm6,[64+esi]
512	movups	[80+edi],xmm7
513	lea	edi,[96+edi]
514	movdqu	xmm7,[80+esi]
515	lea	esi,[96+esi]
516L$024ecb_dec_loop6_enter:
517	call	__aesni_decrypt6
518	mov	edx,ebp
519	mov	ecx,ebx
520	sub	eax,96
521	jnc	NEAR L$025ecb_dec_loop6
522	movups	[edi],xmm2
523	movups	[16+edi],xmm3
524	movups	[32+edi],xmm4
525	movups	[48+edi],xmm5
526	movups	[64+edi],xmm6
527	movups	[80+edi],xmm7
528	lea	edi,[96+edi]
529	add	eax,96
530	jz	NEAR L$013ecb_ret
531L$023ecb_dec_tail:
532	movups	xmm2,[esi]
533	cmp	eax,32
534	jb	NEAR L$026ecb_dec_one
535	movups	xmm3,[16+esi]
536	je	NEAR L$027ecb_dec_two
537	movups	xmm4,[32+esi]
538	cmp	eax,64
539	jb	NEAR L$028ecb_dec_three
540	movups	xmm5,[48+esi]
541	je	NEAR L$029ecb_dec_four
542	movups	xmm6,[64+esi]
543	xorps	xmm7,xmm7
544	call	__aesni_decrypt6
545	movups	[edi],xmm2
546	movups	[16+edi],xmm3
547	movups	[32+edi],xmm4
548	movups	[48+edi],xmm5
549	movups	[64+edi],xmm6
550	jmp	NEAR L$013ecb_ret
551align	16
552L$026ecb_dec_one:
553	movups	xmm0,[edx]
554	movups	xmm1,[16+edx]
555	lea	edx,[32+edx]
556	xorps	xmm2,xmm0
557L$030dec1_loop_4:
558db	102,15,56,222,209
559	dec	ecx
560	movups	xmm1,[edx]
561	lea	edx,[16+edx]
562	jnz	NEAR L$030dec1_loop_4
563db	102,15,56,223,209
564	movups	[edi],xmm2
565	jmp	NEAR L$013ecb_ret
566align	16
567L$027ecb_dec_two:
568	call	__aesni_decrypt2
569	movups	[edi],xmm2
570	movups	[16+edi],xmm3
571	jmp	NEAR L$013ecb_ret
572align	16
573L$028ecb_dec_three:
574	call	__aesni_decrypt3
575	movups	[edi],xmm2
576	movups	[16+edi],xmm3
577	movups	[32+edi],xmm4
578	jmp	NEAR L$013ecb_ret
579align	16
580L$029ecb_dec_four:
581	call	__aesni_decrypt4
582	movups	[edi],xmm2
583	movups	[16+edi],xmm3
584	movups	[32+edi],xmm4
585	movups	[48+edi],xmm5
586L$013ecb_ret:
587	pxor	xmm0,xmm0
588	pxor	xmm1,xmm1
589	pxor	xmm2,xmm2
590	pxor	xmm3,xmm3
591	pxor	xmm4,xmm4
592	pxor	xmm5,xmm5
593	pxor	xmm6,xmm6
594	pxor	xmm7,xmm7
595	pop	edi
596	pop	esi
597	pop	ebx
598	pop	ebp
599	ret
600global	_aes_hw_ccm64_encrypt_blocks
601align	16
602_aes_hw_ccm64_encrypt_blocks:
603L$_aes_hw_ccm64_encrypt_blocks_begin:
604	push	ebp
605	push	ebx
606	push	esi
607	push	edi
608	mov	esi,DWORD [20+esp]
609	mov	edi,DWORD [24+esp]
610	mov	eax,DWORD [28+esp]
611	mov	edx,DWORD [32+esp]
612	mov	ebx,DWORD [36+esp]
613	mov	ecx,DWORD [40+esp]
614	mov	ebp,esp
615	sub	esp,60
616	and	esp,-16
617	mov	DWORD [48+esp],ebp
618	movdqu	xmm7,[ebx]
619	movdqu	xmm3,[ecx]
620	mov	ecx,DWORD [240+edx]
621	mov	DWORD [esp],202182159
622	mov	DWORD [4+esp],134810123
623	mov	DWORD [8+esp],67438087
624	mov	DWORD [12+esp],66051
625	mov	ebx,1
626	xor	ebp,ebp
627	mov	DWORD [16+esp],ebx
628	mov	DWORD [20+esp],ebp
629	mov	DWORD [24+esp],ebp
630	mov	DWORD [28+esp],ebp
631	shl	ecx,4
632	mov	ebx,16
633	lea	ebp,[edx]
634	movdqa	xmm5,[esp]
635	movdqa	xmm2,xmm7
636	lea	edx,[32+ecx*1+edx]
637	sub	ebx,ecx
638db	102,15,56,0,253
639L$031ccm64_enc_outer:
640	movups	xmm0,[ebp]
641	mov	ecx,ebx
642	movups	xmm6,[esi]
643	xorps	xmm2,xmm0
644	movups	xmm1,[16+ebp]
645	xorps	xmm0,xmm6
646	xorps	xmm3,xmm0
647	movups	xmm0,[32+ebp]
648L$032ccm64_enc2_loop:
649db	102,15,56,220,209
650db	102,15,56,220,217
651	movups	xmm1,[ecx*1+edx]
652	add	ecx,32
653db	102,15,56,220,208
654db	102,15,56,220,216
655	movups	xmm0,[ecx*1+edx-16]
656	jnz	NEAR L$032ccm64_enc2_loop
657db	102,15,56,220,209
658db	102,15,56,220,217
659	paddq	xmm7,[16+esp]
660	dec	eax
661db	102,15,56,221,208
662db	102,15,56,221,216
663	lea	esi,[16+esi]
664	xorps	xmm6,xmm2
665	movdqa	xmm2,xmm7
666	movups	[edi],xmm6
667db	102,15,56,0,213
668	lea	edi,[16+edi]
669	jnz	NEAR L$031ccm64_enc_outer
670	mov	esp,DWORD [48+esp]
671	mov	edi,DWORD [40+esp]
672	movups	[edi],xmm3
673	pxor	xmm0,xmm0
674	pxor	xmm1,xmm1
675	pxor	xmm2,xmm2
676	pxor	xmm3,xmm3
677	pxor	xmm4,xmm4
678	pxor	xmm5,xmm5
679	pxor	xmm6,xmm6
680	pxor	xmm7,xmm7
681	pop	edi
682	pop	esi
683	pop	ebx
684	pop	ebp
685	ret
686global	_aes_hw_ccm64_decrypt_blocks
687align	16
688_aes_hw_ccm64_decrypt_blocks:
689L$_aes_hw_ccm64_decrypt_blocks_begin:
690	push	ebp
691	push	ebx
692	push	esi
693	push	edi
694	mov	esi,DWORD [20+esp]
695	mov	edi,DWORD [24+esp]
696	mov	eax,DWORD [28+esp]
697	mov	edx,DWORD [32+esp]
698	mov	ebx,DWORD [36+esp]
699	mov	ecx,DWORD [40+esp]
700	mov	ebp,esp
701	sub	esp,60
702	and	esp,-16
703	mov	DWORD [48+esp],ebp
704	movdqu	xmm7,[ebx]
705	movdqu	xmm3,[ecx]
706	mov	ecx,DWORD [240+edx]
707	mov	DWORD [esp],202182159
708	mov	DWORD [4+esp],134810123
709	mov	DWORD [8+esp],67438087
710	mov	DWORD [12+esp],66051
711	mov	ebx,1
712	xor	ebp,ebp
713	mov	DWORD [16+esp],ebx
714	mov	DWORD [20+esp],ebp
715	mov	DWORD [24+esp],ebp
716	mov	DWORD [28+esp],ebp
717	movdqa	xmm5,[esp]
718	movdqa	xmm2,xmm7
719	mov	ebp,edx
720	mov	ebx,ecx
721db	102,15,56,0,253
722	movups	xmm0,[edx]
723	movups	xmm1,[16+edx]
724	lea	edx,[32+edx]
725	xorps	xmm2,xmm0
726L$033enc1_loop_5:
727db	102,15,56,220,209
728	dec	ecx
729	movups	xmm1,[edx]
730	lea	edx,[16+edx]
731	jnz	NEAR L$033enc1_loop_5
732db	102,15,56,221,209
733	shl	ebx,4
734	mov	ecx,16
735	movups	xmm6,[esi]
736	paddq	xmm7,[16+esp]
737	lea	esi,[16+esi]
738	sub	ecx,ebx
739	lea	edx,[32+ebx*1+ebp]
740	mov	ebx,ecx
741	jmp	NEAR L$034ccm64_dec_outer
742align	16
743L$034ccm64_dec_outer:
744	xorps	xmm6,xmm2
745	movdqa	xmm2,xmm7
746	movups	[edi],xmm6
747	lea	edi,[16+edi]
748db	102,15,56,0,213
749	sub	eax,1
750	jz	NEAR L$035ccm64_dec_break
751	movups	xmm0,[ebp]
752	mov	ecx,ebx
753	movups	xmm1,[16+ebp]
754	xorps	xmm6,xmm0
755	xorps	xmm2,xmm0
756	xorps	xmm3,xmm6
757	movups	xmm0,[32+ebp]
758L$036ccm64_dec2_loop:
759db	102,15,56,220,209
760db	102,15,56,220,217
761	movups	xmm1,[ecx*1+edx]
762	add	ecx,32
763db	102,15,56,220,208
764db	102,15,56,220,216
765	movups	xmm0,[ecx*1+edx-16]
766	jnz	NEAR L$036ccm64_dec2_loop
767	movups	xmm6,[esi]
768	paddq	xmm7,[16+esp]
769db	102,15,56,220,209
770db	102,15,56,220,217
771db	102,15,56,221,208
772db	102,15,56,221,216
773	lea	esi,[16+esi]
774	jmp	NEAR L$034ccm64_dec_outer
775align	16
776L$035ccm64_dec_break:
777	mov	ecx,DWORD [240+ebp]
778	mov	edx,ebp
779	movups	xmm0,[edx]
780	movups	xmm1,[16+edx]
781	xorps	xmm6,xmm0
782	lea	edx,[32+edx]
783	xorps	xmm3,xmm6
784L$037enc1_loop_6:
785db	102,15,56,220,217
786	dec	ecx
787	movups	xmm1,[edx]
788	lea	edx,[16+edx]
789	jnz	NEAR L$037enc1_loop_6
790db	102,15,56,221,217
791	mov	esp,DWORD [48+esp]
792	mov	edi,DWORD [40+esp]
793	movups	[edi],xmm3
794	pxor	xmm0,xmm0
795	pxor	xmm1,xmm1
796	pxor	xmm2,xmm2
797	pxor	xmm3,xmm3
798	pxor	xmm4,xmm4
799	pxor	xmm5,xmm5
800	pxor	xmm6,xmm6
801	pxor	xmm7,xmm7
802	pop	edi
803	pop	esi
804	pop	ebx
805	pop	ebp
806	ret
807global	_aes_hw_ctr32_encrypt_blocks
808align	16
809_aes_hw_ctr32_encrypt_blocks:
810L$_aes_hw_ctr32_encrypt_blocks_begin:
811	push	ebp
812	push	ebx
813	push	esi
814	push	edi
815%ifdef BORINGSSL_DISPATCH_TEST
816	push	ebx
817	push	edx
818	call	L$038pic_for_function_hit
819L$038pic_for_function_hit:
820	pop	ebx
821	lea	ebx,[(_BORINGSSL_function_hit+0-L$038pic_for_function_hit)+ebx]
822	mov	edx,1
823	mov	BYTE [ebx],dl
824	pop	edx
825	pop	ebx
826%endif
827	mov	esi,DWORD [20+esp]
828	mov	edi,DWORD [24+esp]
829	mov	eax,DWORD [28+esp]
830	mov	edx,DWORD [32+esp]
831	mov	ebx,DWORD [36+esp]
832	mov	ebp,esp
833	sub	esp,88
834	and	esp,-16
835	mov	DWORD [80+esp],ebp
836	cmp	eax,1
837	je	NEAR L$039ctr32_one_shortcut
838	movdqu	xmm7,[ebx]
839	mov	DWORD [esp],202182159
840	mov	DWORD [4+esp],134810123
841	mov	DWORD [8+esp],67438087
842	mov	DWORD [12+esp],66051
843	mov	ecx,6
844	xor	ebp,ebp
845	mov	DWORD [16+esp],ecx
846	mov	DWORD [20+esp],ecx
847	mov	DWORD [24+esp],ecx
848	mov	DWORD [28+esp],ebp
849db	102,15,58,22,251,3
850db	102,15,58,34,253,3
851	mov	ecx,DWORD [240+edx]
852	bswap	ebx
853	pxor	xmm0,xmm0
854	pxor	xmm1,xmm1
855	movdqa	xmm2,[esp]
856db	102,15,58,34,195,0
857	lea	ebp,[3+ebx]
858db	102,15,58,34,205,0
859	inc	ebx
860db	102,15,58,34,195,1
861	inc	ebp
862db	102,15,58,34,205,1
863	inc	ebx
864db	102,15,58,34,195,2
865	inc	ebp
866db	102,15,58,34,205,2
867	movdqa	[48+esp],xmm0
868db	102,15,56,0,194
869	movdqu	xmm6,[edx]
870	movdqa	[64+esp],xmm1
871db	102,15,56,0,202
872	pshufd	xmm2,xmm0,192
873	pshufd	xmm3,xmm0,128
874	cmp	eax,6
875	jb	NEAR L$040ctr32_tail
876	pxor	xmm7,xmm6
877	shl	ecx,4
878	mov	ebx,16
879	movdqa	[32+esp],xmm7
880	mov	ebp,edx
881	sub	ebx,ecx
882	lea	edx,[32+ecx*1+edx]
883	sub	eax,6
884	jmp	NEAR L$041ctr32_loop6
885align	16
886L$041ctr32_loop6:
887	pshufd	xmm4,xmm0,64
888	movdqa	xmm0,[32+esp]
889	pshufd	xmm5,xmm1,192
890	pxor	xmm2,xmm0
891	pshufd	xmm6,xmm1,128
892	pxor	xmm3,xmm0
893	pshufd	xmm7,xmm1,64
894	movups	xmm1,[16+ebp]
895	pxor	xmm4,xmm0
896	pxor	xmm5,xmm0
897db	102,15,56,220,209
898	pxor	xmm6,xmm0
899	pxor	xmm7,xmm0
900db	102,15,56,220,217
901	movups	xmm0,[32+ebp]
902	mov	ecx,ebx
903db	102,15,56,220,225
904db	102,15,56,220,233
905db	102,15,56,220,241
906db	102,15,56,220,249
907	call	L$_aesni_encrypt6_enter
908	movups	xmm1,[esi]
909	movups	xmm0,[16+esi]
910	xorps	xmm2,xmm1
911	movups	xmm1,[32+esi]
912	xorps	xmm3,xmm0
913	movups	[edi],xmm2
914	movdqa	xmm0,[16+esp]
915	xorps	xmm4,xmm1
916	movdqa	xmm1,[64+esp]
917	movups	[16+edi],xmm3
918	movups	[32+edi],xmm4
919	paddd	xmm1,xmm0
920	paddd	xmm0,[48+esp]
921	movdqa	xmm2,[esp]
922	movups	xmm3,[48+esi]
923	movups	xmm4,[64+esi]
924	xorps	xmm5,xmm3
925	movups	xmm3,[80+esi]
926	lea	esi,[96+esi]
927	movdqa	[48+esp],xmm0
928db	102,15,56,0,194
929	xorps	xmm6,xmm4
930	movups	[48+edi],xmm5
931	xorps	xmm7,xmm3
932	movdqa	[64+esp],xmm1
933db	102,15,56,0,202
934	movups	[64+edi],xmm6
935	pshufd	xmm2,xmm0,192
936	movups	[80+edi],xmm7
937	lea	edi,[96+edi]
938	pshufd	xmm3,xmm0,128
939	sub	eax,6
940	jnc	NEAR L$041ctr32_loop6
941	add	eax,6
942	jz	NEAR L$042ctr32_ret
943	movdqu	xmm7,[ebp]
944	mov	edx,ebp
945	pxor	xmm7,[32+esp]
946	mov	ecx,DWORD [240+ebp]
947L$040ctr32_tail:
948	por	xmm2,xmm7
949	cmp	eax,2
950	jb	NEAR L$043ctr32_one
951	pshufd	xmm4,xmm0,64
952	por	xmm3,xmm7
953	je	NEAR L$044ctr32_two
954	pshufd	xmm5,xmm1,192
955	por	xmm4,xmm7
956	cmp	eax,4
957	jb	NEAR L$045ctr32_three
958	pshufd	xmm6,xmm1,128
959	por	xmm5,xmm7
960	je	NEAR L$046ctr32_four
961	por	xmm6,xmm7
962	call	__aesni_encrypt6
963	movups	xmm1,[esi]
964	movups	xmm0,[16+esi]
965	xorps	xmm2,xmm1
966	movups	xmm1,[32+esi]
967	xorps	xmm3,xmm0
968	movups	xmm0,[48+esi]
969	xorps	xmm4,xmm1
970	movups	xmm1,[64+esi]
971	xorps	xmm5,xmm0
972	movups	[edi],xmm2
973	xorps	xmm6,xmm1
974	movups	[16+edi],xmm3
975	movups	[32+edi],xmm4
976	movups	[48+edi],xmm5
977	movups	[64+edi],xmm6
978	jmp	NEAR L$042ctr32_ret
979align	16
980L$039ctr32_one_shortcut:
981	movups	xmm2,[ebx]
982	mov	ecx,DWORD [240+edx]
983L$043ctr32_one:
984	movups	xmm0,[edx]
985	movups	xmm1,[16+edx]
986	lea	edx,[32+edx]
987	xorps	xmm2,xmm0
988L$047enc1_loop_7:
989db	102,15,56,220,209
990	dec	ecx
991	movups	xmm1,[edx]
992	lea	edx,[16+edx]
993	jnz	NEAR L$047enc1_loop_7
994db	102,15,56,221,209
995	movups	xmm6,[esi]
996	xorps	xmm6,xmm2
997	movups	[edi],xmm6
998	jmp	NEAR L$042ctr32_ret
999align	16
1000L$044ctr32_two:
1001	call	__aesni_encrypt2
1002	movups	xmm5,[esi]
1003	movups	xmm6,[16+esi]
1004	xorps	xmm2,xmm5
1005	xorps	xmm3,xmm6
1006	movups	[edi],xmm2
1007	movups	[16+edi],xmm3
1008	jmp	NEAR L$042ctr32_ret
1009align	16
1010L$045ctr32_three:
1011	call	__aesni_encrypt3
1012	movups	xmm5,[esi]
1013	movups	xmm6,[16+esi]
1014	xorps	xmm2,xmm5
1015	movups	xmm7,[32+esi]
1016	xorps	xmm3,xmm6
1017	movups	[edi],xmm2
1018	xorps	xmm4,xmm7
1019	movups	[16+edi],xmm3
1020	movups	[32+edi],xmm4
1021	jmp	NEAR L$042ctr32_ret
1022align	16
1023L$046ctr32_four:
1024	call	__aesni_encrypt4
1025	movups	xmm6,[esi]
1026	movups	xmm7,[16+esi]
1027	movups	xmm1,[32+esi]
1028	xorps	xmm2,xmm6
1029	movups	xmm0,[48+esi]
1030	xorps	xmm3,xmm7
1031	movups	[edi],xmm2
1032	xorps	xmm4,xmm1
1033	movups	[16+edi],xmm3
1034	xorps	xmm5,xmm0
1035	movups	[32+edi],xmm4
1036	movups	[48+edi],xmm5
1037L$042ctr32_ret:
1038	pxor	xmm0,xmm0
1039	pxor	xmm1,xmm1
1040	pxor	xmm2,xmm2
1041	pxor	xmm3,xmm3
1042	pxor	xmm4,xmm4
1043	movdqa	[32+esp],xmm0
1044	pxor	xmm5,xmm5
1045	movdqa	[48+esp],xmm0
1046	pxor	xmm6,xmm6
1047	movdqa	[64+esp],xmm0
1048	pxor	xmm7,xmm7
1049	mov	esp,DWORD [80+esp]
1050	pop	edi
1051	pop	esi
1052	pop	ebx
1053	pop	ebp
1054	ret
1055global	_aes_hw_xts_encrypt
1056align	16
1057_aes_hw_xts_encrypt:
1058L$_aes_hw_xts_encrypt_begin:
1059	push	ebp
1060	push	ebx
1061	push	esi
1062	push	edi
1063	mov	edx,DWORD [36+esp]
1064	mov	esi,DWORD [40+esp]
1065	mov	ecx,DWORD [240+edx]
1066	movups	xmm2,[esi]
1067	movups	xmm0,[edx]
1068	movups	xmm1,[16+edx]
1069	lea	edx,[32+edx]
1070	xorps	xmm2,xmm0
1071L$048enc1_loop_8:
1072db	102,15,56,220,209
1073	dec	ecx
1074	movups	xmm1,[edx]
1075	lea	edx,[16+edx]
1076	jnz	NEAR L$048enc1_loop_8
1077db	102,15,56,221,209
1078	mov	esi,DWORD [20+esp]
1079	mov	edi,DWORD [24+esp]
1080	mov	eax,DWORD [28+esp]
1081	mov	edx,DWORD [32+esp]
1082	mov	ebp,esp
1083	sub	esp,120
1084	mov	ecx,DWORD [240+edx]
1085	and	esp,-16
1086	mov	DWORD [96+esp],135
1087	mov	DWORD [100+esp],0
1088	mov	DWORD [104+esp],1
1089	mov	DWORD [108+esp],0
1090	mov	DWORD [112+esp],eax
1091	mov	DWORD [116+esp],ebp
1092	movdqa	xmm1,xmm2
1093	pxor	xmm0,xmm0
1094	movdqa	xmm3,[96+esp]
1095	pcmpgtd	xmm0,xmm1
1096	and	eax,-16
1097	mov	ebp,edx
1098	mov	ebx,ecx
1099	sub	eax,96
1100	jc	NEAR L$049xts_enc_short
1101	shl	ecx,4
1102	mov	ebx,16
1103	sub	ebx,ecx
1104	lea	edx,[32+ecx*1+edx]
1105	jmp	NEAR L$050xts_enc_loop6
1106align	16
1107L$050xts_enc_loop6:
1108	pshufd	xmm2,xmm0,19
1109	pxor	xmm0,xmm0
1110	movdqa	[esp],xmm1
1111	paddq	xmm1,xmm1
1112	pand	xmm2,xmm3
1113	pcmpgtd	xmm0,xmm1
1114	pxor	xmm1,xmm2
1115	pshufd	xmm2,xmm0,19
1116	pxor	xmm0,xmm0
1117	movdqa	[16+esp],xmm1
1118	paddq	xmm1,xmm1
1119	pand	xmm2,xmm3
1120	pcmpgtd	xmm0,xmm1
1121	pxor	xmm1,xmm2
1122	pshufd	xmm2,xmm0,19
1123	pxor	xmm0,xmm0
1124	movdqa	[32+esp],xmm1
1125	paddq	xmm1,xmm1
1126	pand	xmm2,xmm3
1127	pcmpgtd	xmm0,xmm1
1128	pxor	xmm1,xmm2
1129	pshufd	xmm2,xmm0,19
1130	pxor	xmm0,xmm0
1131	movdqa	[48+esp],xmm1
1132	paddq	xmm1,xmm1
1133	pand	xmm2,xmm3
1134	pcmpgtd	xmm0,xmm1
1135	pxor	xmm1,xmm2
1136	pshufd	xmm7,xmm0,19
1137	movdqa	[64+esp],xmm1
1138	paddq	xmm1,xmm1
1139	movups	xmm0,[ebp]
1140	pand	xmm7,xmm3
1141	movups	xmm2,[esi]
1142	pxor	xmm7,xmm1
1143	mov	ecx,ebx
1144	movdqu	xmm3,[16+esi]
1145	xorps	xmm2,xmm0
1146	movdqu	xmm4,[32+esi]
1147	pxor	xmm3,xmm0
1148	movdqu	xmm5,[48+esi]
1149	pxor	xmm4,xmm0
1150	movdqu	xmm6,[64+esi]
1151	pxor	xmm5,xmm0
1152	movdqu	xmm1,[80+esi]
1153	pxor	xmm6,xmm0
1154	lea	esi,[96+esi]
1155	pxor	xmm2,[esp]
1156	movdqa	[80+esp],xmm7
1157	pxor	xmm7,xmm1
1158	movups	xmm1,[16+ebp]
1159	pxor	xmm3,[16+esp]
1160	pxor	xmm4,[32+esp]
1161db	102,15,56,220,209
1162	pxor	xmm5,[48+esp]
1163	pxor	xmm6,[64+esp]
1164db	102,15,56,220,217
1165	pxor	xmm7,xmm0
1166	movups	xmm0,[32+ebp]
1167db	102,15,56,220,225
1168db	102,15,56,220,233
1169db	102,15,56,220,241
1170db	102,15,56,220,249
1171	call	L$_aesni_encrypt6_enter
1172	movdqa	xmm1,[80+esp]
1173	pxor	xmm0,xmm0
1174	xorps	xmm2,[esp]
1175	pcmpgtd	xmm0,xmm1
1176	xorps	xmm3,[16+esp]
1177	movups	[edi],xmm2
1178	xorps	xmm4,[32+esp]
1179	movups	[16+edi],xmm3
1180	xorps	xmm5,[48+esp]
1181	movups	[32+edi],xmm4
1182	xorps	xmm6,[64+esp]
1183	movups	[48+edi],xmm5
1184	xorps	xmm7,xmm1
1185	movups	[64+edi],xmm6
1186	pshufd	xmm2,xmm0,19
1187	movups	[80+edi],xmm7
1188	lea	edi,[96+edi]
1189	movdqa	xmm3,[96+esp]
1190	pxor	xmm0,xmm0
1191	paddq	xmm1,xmm1
1192	pand	xmm2,xmm3
1193	pcmpgtd	xmm0,xmm1
1194	pxor	xmm1,xmm2
1195	sub	eax,96
1196	jnc	NEAR L$050xts_enc_loop6
1197	mov	ecx,DWORD [240+ebp]
1198	mov	edx,ebp
1199	mov	ebx,ecx
1200L$049xts_enc_short:
1201	add	eax,96
1202	jz	NEAR L$051xts_enc_done6x
1203	movdqa	xmm5,xmm1
1204	cmp	eax,32
1205	jb	NEAR L$052xts_enc_one
1206	pshufd	xmm2,xmm0,19
1207	pxor	xmm0,xmm0
1208	paddq	xmm1,xmm1
1209	pand	xmm2,xmm3
1210	pcmpgtd	xmm0,xmm1
1211	pxor	xmm1,xmm2
1212	je	NEAR L$053xts_enc_two
1213	pshufd	xmm2,xmm0,19
1214	pxor	xmm0,xmm0
1215	movdqa	xmm6,xmm1
1216	paddq	xmm1,xmm1
1217	pand	xmm2,xmm3
1218	pcmpgtd	xmm0,xmm1
1219	pxor	xmm1,xmm2
1220	cmp	eax,64
1221	jb	NEAR L$054xts_enc_three
1222	pshufd	xmm2,xmm0,19
1223	pxor	xmm0,xmm0
1224	movdqa	xmm7,xmm1
1225	paddq	xmm1,xmm1
1226	pand	xmm2,xmm3
1227	pcmpgtd	xmm0,xmm1
1228	pxor	xmm1,xmm2
1229	movdqa	[esp],xmm5
1230	movdqa	[16+esp],xmm6
1231	je	NEAR L$055xts_enc_four
1232	movdqa	[32+esp],xmm7
1233	pshufd	xmm7,xmm0,19
1234	movdqa	[48+esp],xmm1
1235	paddq	xmm1,xmm1
1236	pand	xmm7,xmm3
1237	pxor	xmm7,xmm1
1238	movdqu	xmm2,[esi]
1239	movdqu	xmm3,[16+esi]
1240	movdqu	xmm4,[32+esi]
1241	pxor	xmm2,[esp]
1242	movdqu	xmm5,[48+esi]
1243	pxor	xmm3,[16+esp]
1244	movdqu	xmm6,[64+esi]
1245	pxor	xmm4,[32+esp]
1246	lea	esi,[80+esi]
1247	pxor	xmm5,[48+esp]
1248	movdqa	[64+esp],xmm7
1249	pxor	xmm6,xmm7
1250	call	__aesni_encrypt6
1251	movaps	xmm1,[64+esp]
1252	xorps	xmm2,[esp]
1253	xorps	xmm3,[16+esp]
1254	xorps	xmm4,[32+esp]
1255	movups	[edi],xmm2
1256	xorps	xmm5,[48+esp]
1257	movups	[16+edi],xmm3
1258	xorps	xmm6,xmm1
1259	movups	[32+edi],xmm4
1260	movups	[48+edi],xmm5
1261	movups	[64+edi],xmm6
1262	lea	edi,[80+edi]
1263	jmp	NEAR L$056xts_enc_done
1264align	16
1265L$052xts_enc_one:
1266	movups	xmm2,[esi]
1267	lea	esi,[16+esi]
1268	xorps	xmm2,xmm5
1269	movups	xmm0,[edx]
1270	movups	xmm1,[16+edx]
1271	lea	edx,[32+edx]
1272	xorps	xmm2,xmm0
1273L$057enc1_loop_9:
1274db	102,15,56,220,209
1275	dec	ecx
1276	movups	xmm1,[edx]
1277	lea	edx,[16+edx]
1278	jnz	NEAR L$057enc1_loop_9
1279db	102,15,56,221,209
1280	xorps	xmm2,xmm5
1281	movups	[edi],xmm2
1282	lea	edi,[16+edi]
1283	movdqa	xmm1,xmm5
1284	jmp	NEAR L$056xts_enc_done
1285align	16
1286L$053xts_enc_two:
1287	movaps	xmm6,xmm1
1288	movups	xmm2,[esi]
1289	movups	xmm3,[16+esi]
1290	lea	esi,[32+esi]
1291	xorps	xmm2,xmm5
1292	xorps	xmm3,xmm6
1293	call	__aesni_encrypt2
1294	xorps	xmm2,xmm5
1295	xorps	xmm3,xmm6
1296	movups	[edi],xmm2
1297	movups	[16+edi],xmm3
1298	lea	edi,[32+edi]
1299	movdqa	xmm1,xmm6
1300	jmp	NEAR L$056xts_enc_done
1301align	16
1302L$054xts_enc_three:
1303	movaps	xmm7,xmm1
1304	movups	xmm2,[esi]
1305	movups	xmm3,[16+esi]
1306	movups	xmm4,[32+esi]
1307	lea	esi,[48+esi]
1308	xorps	xmm2,xmm5
1309	xorps	xmm3,xmm6
1310	xorps	xmm4,xmm7
1311	call	__aesni_encrypt3
1312	xorps	xmm2,xmm5
1313	xorps	xmm3,xmm6
1314	xorps	xmm4,xmm7
1315	movups	[edi],xmm2
1316	movups	[16+edi],xmm3
1317	movups	[32+edi],xmm4
1318	lea	edi,[48+edi]
1319	movdqa	xmm1,xmm7
1320	jmp	NEAR L$056xts_enc_done
1321align	16
1322L$055xts_enc_four:
1323	movaps	xmm6,xmm1
1324	movups	xmm2,[esi]
1325	movups	xmm3,[16+esi]
1326	movups	xmm4,[32+esi]
1327	xorps	xmm2,[esp]
1328	movups	xmm5,[48+esi]
1329	lea	esi,[64+esi]
1330	xorps	xmm3,[16+esp]
1331	xorps	xmm4,xmm7
1332	xorps	xmm5,xmm6
1333	call	__aesni_encrypt4
1334	xorps	xmm2,[esp]
1335	xorps	xmm3,[16+esp]
1336	xorps	xmm4,xmm7
1337	movups	[edi],xmm2
1338	xorps	xmm5,xmm6
1339	movups	[16+edi],xmm3
1340	movups	[32+edi],xmm4
1341	movups	[48+edi],xmm5
1342	lea	edi,[64+edi]
1343	movdqa	xmm1,xmm6
1344	jmp	NEAR L$056xts_enc_done
1345align	16
1346L$051xts_enc_done6x:
1347	mov	eax,DWORD [112+esp]
1348	and	eax,15
1349	jz	NEAR L$058xts_enc_ret
1350	movdqa	xmm5,xmm1
1351	mov	DWORD [112+esp],eax
1352	jmp	NEAR L$059xts_enc_steal
1353align	16
1354L$056xts_enc_done:
1355	mov	eax,DWORD [112+esp]
1356	pxor	xmm0,xmm0
1357	and	eax,15
1358	jz	NEAR L$058xts_enc_ret
1359	pcmpgtd	xmm0,xmm1
1360	mov	DWORD [112+esp],eax
1361	pshufd	xmm5,xmm0,19
1362	paddq	xmm1,xmm1
1363	pand	xmm5,[96+esp]
1364	pxor	xmm5,xmm1
1365L$059xts_enc_steal:
1366	movzx	ecx,BYTE [esi]
1367	movzx	edx,BYTE [edi-16]
1368	lea	esi,[1+esi]
1369	mov	BYTE [edi-16],cl
1370	mov	BYTE [edi],dl
1371	lea	edi,[1+edi]
1372	sub	eax,1
1373	jnz	NEAR L$059xts_enc_steal
1374	sub	edi,DWORD [112+esp]
1375	mov	edx,ebp
1376	mov	ecx,ebx
1377	movups	xmm2,[edi-16]
1378	xorps	xmm2,xmm5
1379	movups	xmm0,[edx]
1380	movups	xmm1,[16+edx]
1381	lea	edx,[32+edx]
1382	xorps	xmm2,xmm0
1383L$060enc1_loop_10:
1384db	102,15,56,220,209
1385	dec	ecx
1386	movups	xmm1,[edx]
1387	lea	edx,[16+edx]
1388	jnz	NEAR L$060enc1_loop_10
1389db	102,15,56,221,209
1390	xorps	xmm2,xmm5
1391	movups	[edi-16],xmm2
1392L$058xts_enc_ret:
1393	pxor	xmm0,xmm0
1394	pxor	xmm1,xmm1
1395	pxor	xmm2,xmm2
1396	movdqa	[esp],xmm0
1397	pxor	xmm3,xmm3
1398	movdqa	[16+esp],xmm0
1399	pxor	xmm4,xmm4
1400	movdqa	[32+esp],xmm0
1401	pxor	xmm5,xmm5
1402	movdqa	[48+esp],xmm0
1403	pxor	xmm6,xmm6
1404	movdqa	[64+esp],xmm0
1405	pxor	xmm7,xmm7
1406	movdqa	[80+esp],xmm0
1407	mov	esp,DWORD [116+esp]
1408	pop	edi
1409	pop	esi
1410	pop	ebx
1411	pop	ebp
1412	ret
1413global	_aes_hw_xts_decrypt
1414align	16
1415_aes_hw_xts_decrypt:
1416L$_aes_hw_xts_decrypt_begin:
1417	push	ebp
1418	push	ebx
1419	push	esi
1420	push	edi
1421	mov	edx,DWORD [36+esp]
1422	mov	esi,DWORD [40+esp]
1423	mov	ecx,DWORD [240+edx]
1424	movups	xmm2,[esi]
1425	movups	xmm0,[edx]
1426	movups	xmm1,[16+edx]
1427	lea	edx,[32+edx]
1428	xorps	xmm2,xmm0
1429L$061enc1_loop_11:
1430db	102,15,56,220,209
1431	dec	ecx
1432	movups	xmm1,[edx]
1433	lea	edx,[16+edx]
1434	jnz	NEAR L$061enc1_loop_11
1435db	102,15,56,221,209
1436	mov	esi,DWORD [20+esp]
1437	mov	edi,DWORD [24+esp]
1438	mov	eax,DWORD [28+esp]
1439	mov	edx,DWORD [32+esp]
1440	mov	ebp,esp
1441	sub	esp,120
1442	and	esp,-16
1443	xor	ebx,ebx
1444	test	eax,15
1445	setnz	bl
1446	shl	ebx,4
1447	sub	eax,ebx
1448	mov	DWORD [96+esp],135
1449	mov	DWORD [100+esp],0
1450	mov	DWORD [104+esp],1
1451	mov	DWORD [108+esp],0
1452	mov	DWORD [112+esp],eax
1453	mov	DWORD [116+esp],ebp
1454	mov	ecx,DWORD [240+edx]
1455	mov	ebp,edx
1456	mov	ebx,ecx
1457	movdqa	xmm1,xmm2
1458	pxor	xmm0,xmm0
1459	movdqa	xmm3,[96+esp]
1460	pcmpgtd	xmm0,xmm1
1461	and	eax,-16
1462	sub	eax,96
1463	jc	NEAR L$062xts_dec_short
1464	shl	ecx,4
1465	mov	ebx,16
1466	sub	ebx,ecx
1467	lea	edx,[32+ecx*1+edx]
1468	jmp	NEAR L$063xts_dec_loop6
1469align	16
1470L$063xts_dec_loop6:
1471	pshufd	xmm2,xmm0,19
1472	pxor	xmm0,xmm0
1473	movdqa	[esp],xmm1
1474	paddq	xmm1,xmm1
1475	pand	xmm2,xmm3
1476	pcmpgtd	xmm0,xmm1
1477	pxor	xmm1,xmm2
1478	pshufd	xmm2,xmm0,19
1479	pxor	xmm0,xmm0
1480	movdqa	[16+esp],xmm1
1481	paddq	xmm1,xmm1
1482	pand	xmm2,xmm3
1483	pcmpgtd	xmm0,xmm1
1484	pxor	xmm1,xmm2
1485	pshufd	xmm2,xmm0,19
1486	pxor	xmm0,xmm0
1487	movdqa	[32+esp],xmm1
1488	paddq	xmm1,xmm1
1489	pand	xmm2,xmm3
1490	pcmpgtd	xmm0,xmm1
1491	pxor	xmm1,xmm2
1492	pshufd	xmm2,xmm0,19
1493	pxor	xmm0,xmm0
1494	movdqa	[48+esp],xmm1
1495	paddq	xmm1,xmm1
1496	pand	xmm2,xmm3
1497	pcmpgtd	xmm0,xmm1
1498	pxor	xmm1,xmm2
1499	pshufd	xmm7,xmm0,19
1500	movdqa	[64+esp],xmm1
1501	paddq	xmm1,xmm1
1502	movups	xmm0,[ebp]
1503	pand	xmm7,xmm3
1504	movups	xmm2,[esi]
1505	pxor	xmm7,xmm1
1506	mov	ecx,ebx
1507	movdqu	xmm3,[16+esi]
1508	xorps	xmm2,xmm0
1509	movdqu	xmm4,[32+esi]
1510	pxor	xmm3,xmm0
1511	movdqu	xmm5,[48+esi]
1512	pxor	xmm4,xmm0
1513	movdqu	xmm6,[64+esi]
1514	pxor	xmm5,xmm0
1515	movdqu	xmm1,[80+esi]
1516	pxor	xmm6,xmm0
1517	lea	esi,[96+esi]
1518	pxor	xmm2,[esp]
1519	movdqa	[80+esp],xmm7
1520	pxor	xmm7,xmm1
1521	movups	xmm1,[16+ebp]
1522	pxor	xmm3,[16+esp]
1523	pxor	xmm4,[32+esp]
1524db	102,15,56,222,209
1525	pxor	xmm5,[48+esp]
1526	pxor	xmm6,[64+esp]
1527db	102,15,56,222,217
1528	pxor	xmm7,xmm0
1529	movups	xmm0,[32+ebp]
1530db	102,15,56,222,225
1531db	102,15,56,222,233
1532db	102,15,56,222,241
1533db	102,15,56,222,249
1534	call	L$_aesni_decrypt6_enter
1535	movdqa	xmm1,[80+esp]
1536	pxor	xmm0,xmm0
1537	xorps	xmm2,[esp]
1538	pcmpgtd	xmm0,xmm1
1539	xorps	xmm3,[16+esp]
1540	movups	[edi],xmm2
1541	xorps	xmm4,[32+esp]
1542	movups	[16+edi],xmm3
1543	xorps	xmm5,[48+esp]
1544	movups	[32+edi],xmm4
1545	xorps	xmm6,[64+esp]
1546	movups	[48+edi],xmm5
1547	xorps	xmm7,xmm1
1548	movups	[64+edi],xmm6
1549	pshufd	xmm2,xmm0,19
1550	movups	[80+edi],xmm7
1551	lea	edi,[96+edi]
1552	movdqa	xmm3,[96+esp]
1553	pxor	xmm0,xmm0
1554	paddq	xmm1,xmm1
1555	pand	xmm2,xmm3
1556	pcmpgtd	xmm0,xmm1
1557	pxor	xmm1,xmm2
1558	sub	eax,96
1559	jnc	NEAR L$063xts_dec_loop6
1560	mov	ecx,DWORD [240+ebp]
1561	mov	edx,ebp
1562	mov	ebx,ecx
1563L$062xts_dec_short:
1564	add	eax,96
1565	jz	NEAR L$064xts_dec_done6x
1566	movdqa	xmm5,xmm1
1567	cmp	eax,32
1568	jb	NEAR L$065xts_dec_one
1569	pshufd	xmm2,xmm0,19
1570	pxor	xmm0,xmm0
1571	paddq	xmm1,xmm1
1572	pand	xmm2,xmm3
1573	pcmpgtd	xmm0,xmm1
1574	pxor	xmm1,xmm2
1575	je	NEAR L$066xts_dec_two
1576	pshufd	xmm2,xmm0,19
1577	pxor	xmm0,xmm0
1578	movdqa	xmm6,xmm1
1579	paddq	xmm1,xmm1
1580	pand	xmm2,xmm3
1581	pcmpgtd	xmm0,xmm1
1582	pxor	xmm1,xmm2
1583	cmp	eax,64
1584	jb	NEAR L$067xts_dec_three
1585	pshufd	xmm2,xmm0,19
1586	pxor	xmm0,xmm0
1587	movdqa	xmm7,xmm1
1588	paddq	xmm1,xmm1
1589	pand	xmm2,xmm3
1590	pcmpgtd	xmm0,xmm1
1591	pxor	xmm1,xmm2
1592	movdqa	[esp],xmm5
1593	movdqa	[16+esp],xmm6
1594	je	NEAR L$068xts_dec_four
1595	movdqa	[32+esp],xmm7
1596	pshufd	xmm7,xmm0,19
1597	movdqa	[48+esp],xmm1
1598	paddq	xmm1,xmm1
1599	pand	xmm7,xmm3
1600	pxor	xmm7,xmm1
1601	movdqu	xmm2,[esi]
1602	movdqu	xmm3,[16+esi]
1603	movdqu	xmm4,[32+esi]
1604	pxor	xmm2,[esp]
1605	movdqu	xmm5,[48+esi]
1606	pxor	xmm3,[16+esp]
1607	movdqu	xmm6,[64+esi]
1608	pxor	xmm4,[32+esp]
1609	lea	esi,[80+esi]
1610	pxor	xmm5,[48+esp]
1611	movdqa	[64+esp],xmm7
1612	pxor	xmm6,xmm7
1613	call	__aesni_decrypt6
1614	movaps	xmm1,[64+esp]
1615	xorps	xmm2,[esp]
1616	xorps	xmm3,[16+esp]
1617	xorps	xmm4,[32+esp]
1618	movups	[edi],xmm2
1619	xorps	xmm5,[48+esp]
1620	movups	[16+edi],xmm3
1621	xorps	xmm6,xmm1
1622	movups	[32+edi],xmm4
1623	movups	[48+edi],xmm5
1624	movups	[64+edi],xmm6
1625	lea	edi,[80+edi]
1626	jmp	NEAR L$069xts_dec_done
1627align	16
1628L$065xts_dec_one:
1629	movups	xmm2,[esi]
1630	lea	esi,[16+esi]
1631	xorps	xmm2,xmm5
1632	movups	xmm0,[edx]
1633	movups	xmm1,[16+edx]
1634	lea	edx,[32+edx]
1635	xorps	xmm2,xmm0
1636L$070dec1_loop_12:
1637db	102,15,56,222,209
1638	dec	ecx
1639	movups	xmm1,[edx]
1640	lea	edx,[16+edx]
1641	jnz	NEAR L$070dec1_loop_12
1642db	102,15,56,223,209
1643	xorps	xmm2,xmm5
1644	movups	[edi],xmm2
1645	lea	edi,[16+edi]
1646	movdqa	xmm1,xmm5
1647	jmp	NEAR L$069xts_dec_done
1648align	16
1649L$066xts_dec_two:
1650	movaps	xmm6,xmm1
1651	movups	xmm2,[esi]
1652	movups	xmm3,[16+esi]
1653	lea	esi,[32+esi]
1654	xorps	xmm2,xmm5
1655	xorps	xmm3,xmm6
1656	call	__aesni_decrypt2
1657	xorps	xmm2,xmm5
1658	xorps	xmm3,xmm6
1659	movups	[edi],xmm2
1660	movups	[16+edi],xmm3
1661	lea	edi,[32+edi]
1662	movdqa	xmm1,xmm6
1663	jmp	NEAR L$069xts_dec_done
1664align	16
1665L$067xts_dec_three:
1666	movaps	xmm7,xmm1
1667	movups	xmm2,[esi]
1668	movups	xmm3,[16+esi]
1669	movups	xmm4,[32+esi]
1670	lea	esi,[48+esi]
1671	xorps	xmm2,xmm5
1672	xorps	xmm3,xmm6
1673	xorps	xmm4,xmm7
1674	call	__aesni_decrypt3
1675	xorps	xmm2,xmm5
1676	xorps	xmm3,xmm6
1677	xorps	xmm4,xmm7
1678	movups	[edi],xmm2
1679	movups	[16+edi],xmm3
1680	movups	[32+edi],xmm4
1681	lea	edi,[48+edi]
1682	movdqa	xmm1,xmm7
1683	jmp	NEAR L$069xts_dec_done
1684align	16
1685L$068xts_dec_four:
1686	movaps	xmm6,xmm1
1687	movups	xmm2,[esi]
1688	movups	xmm3,[16+esi]
1689	movups	xmm4,[32+esi]
1690	xorps	xmm2,[esp]
1691	movups	xmm5,[48+esi]
1692	lea	esi,[64+esi]
1693	xorps	xmm3,[16+esp]
1694	xorps	xmm4,xmm7
1695	xorps	xmm5,xmm6
1696	call	__aesni_decrypt4
1697	xorps	xmm2,[esp]
1698	xorps	xmm3,[16+esp]
1699	xorps	xmm4,xmm7
1700	movups	[edi],xmm2
1701	xorps	xmm5,xmm6
1702	movups	[16+edi],xmm3
1703	movups	[32+edi],xmm4
1704	movups	[48+edi],xmm5
1705	lea	edi,[64+edi]
1706	movdqa	xmm1,xmm6
1707	jmp	NEAR L$069xts_dec_done
1708align	16
1709L$064xts_dec_done6x:
1710	mov	eax,DWORD [112+esp]
1711	and	eax,15
1712	jz	NEAR L$071xts_dec_ret
1713	mov	DWORD [112+esp],eax
1714	jmp	NEAR L$072xts_dec_only_one_more
1715align	16
1716L$069xts_dec_done:
1717	mov	eax,DWORD [112+esp]
1718	pxor	xmm0,xmm0
1719	and	eax,15
1720	jz	NEAR L$071xts_dec_ret
1721	pcmpgtd	xmm0,xmm1
1722	mov	DWORD [112+esp],eax
1723	pshufd	xmm2,xmm0,19
1724	pxor	xmm0,xmm0
1725	movdqa	xmm3,[96+esp]
1726	paddq	xmm1,xmm1
1727	pand	xmm2,xmm3
1728	pcmpgtd	xmm0,xmm1
1729	pxor	xmm1,xmm2
1730L$072xts_dec_only_one_more:
1731	pshufd	xmm5,xmm0,19
1732	movdqa	xmm6,xmm1
1733	paddq	xmm1,xmm1
1734	pand	xmm5,xmm3
1735	pxor	xmm5,xmm1
1736	mov	edx,ebp
1737	mov	ecx,ebx
1738	movups	xmm2,[esi]
1739	xorps	xmm2,xmm5
1740	movups	xmm0,[edx]
1741	movups	xmm1,[16+edx]
1742	lea	edx,[32+edx]
1743	xorps	xmm2,xmm0
1744L$073dec1_loop_13:
1745db	102,15,56,222,209
1746	dec	ecx
1747	movups	xmm1,[edx]
1748	lea	edx,[16+edx]
1749	jnz	NEAR L$073dec1_loop_13
1750db	102,15,56,223,209
1751	xorps	xmm2,xmm5
1752	movups	[edi],xmm2
1753L$074xts_dec_steal:
1754	movzx	ecx,BYTE [16+esi]
1755	movzx	edx,BYTE [edi]
1756	lea	esi,[1+esi]
1757	mov	BYTE [edi],cl
1758	mov	BYTE [16+edi],dl
1759	lea	edi,[1+edi]
1760	sub	eax,1
1761	jnz	NEAR L$074xts_dec_steal
1762	sub	edi,DWORD [112+esp]
1763	mov	edx,ebp
1764	mov	ecx,ebx
1765	movups	xmm2,[edi]
1766	xorps	xmm2,xmm6
1767	movups	xmm0,[edx]
1768	movups	xmm1,[16+edx]
1769	lea	edx,[32+edx]
1770	xorps	xmm2,xmm0
1771L$075dec1_loop_14:
1772db	102,15,56,222,209
1773	dec	ecx
1774	movups	xmm1,[edx]
1775	lea	edx,[16+edx]
1776	jnz	NEAR L$075dec1_loop_14
1777db	102,15,56,223,209
1778	xorps	xmm2,xmm6
1779	movups	[edi],xmm2
1780L$071xts_dec_ret:
1781	pxor	xmm0,xmm0
1782	pxor	xmm1,xmm1
1783	pxor	xmm2,xmm2
1784	movdqa	[esp],xmm0
1785	pxor	xmm3,xmm3
1786	movdqa	[16+esp],xmm0
1787	pxor	xmm4,xmm4
1788	movdqa	[32+esp],xmm0
1789	pxor	xmm5,xmm5
1790	movdqa	[48+esp],xmm0
1791	pxor	xmm6,xmm6
1792	movdqa	[64+esp],xmm0
1793	pxor	xmm7,xmm7
1794	movdqa	[80+esp],xmm0
1795	mov	esp,DWORD [116+esp]
1796	pop	edi
1797	pop	esi
1798	pop	ebx
1799	pop	ebp
1800	ret
1801global	_aes_hw_cbc_encrypt
1802align	16
1803_aes_hw_cbc_encrypt:
1804L$_aes_hw_cbc_encrypt_begin:
1805	push	ebp
1806	push	ebx
1807	push	esi
1808	push	edi
1809	mov	esi,DWORD [20+esp]
1810	mov	ebx,esp
1811	mov	edi,DWORD [24+esp]
1812	sub	ebx,24
1813	mov	eax,DWORD [28+esp]
1814	and	ebx,-16
1815	mov	edx,DWORD [32+esp]
1816	mov	ebp,DWORD [36+esp]
1817	test	eax,eax
1818	jz	NEAR L$076cbc_abort
1819	cmp	DWORD [40+esp],0
1820	xchg	ebx,esp
1821	movups	xmm7,[ebp]
1822	mov	ecx,DWORD [240+edx]
1823	mov	ebp,edx
1824	mov	DWORD [16+esp],ebx
1825	mov	ebx,ecx
1826	je	NEAR L$077cbc_decrypt
1827	movaps	xmm2,xmm7
1828	cmp	eax,16
1829	jb	NEAR L$078cbc_enc_tail
1830	sub	eax,16
1831	jmp	NEAR L$079cbc_enc_loop
1832align	16
1833L$079cbc_enc_loop:
1834	movups	xmm7,[esi]
1835	lea	esi,[16+esi]
1836	movups	xmm0,[edx]
1837	movups	xmm1,[16+edx]
1838	xorps	xmm7,xmm0
1839	lea	edx,[32+edx]
1840	xorps	xmm2,xmm7
1841L$080enc1_loop_15:
1842db	102,15,56,220,209
1843	dec	ecx
1844	movups	xmm1,[edx]
1845	lea	edx,[16+edx]
1846	jnz	NEAR L$080enc1_loop_15
1847db	102,15,56,221,209
1848	mov	ecx,ebx
1849	mov	edx,ebp
1850	movups	[edi],xmm2
1851	lea	edi,[16+edi]
1852	sub	eax,16
1853	jnc	NEAR L$079cbc_enc_loop
1854	add	eax,16
1855	jnz	NEAR L$078cbc_enc_tail
1856	movaps	xmm7,xmm2
1857	pxor	xmm2,xmm2
1858	jmp	NEAR L$081cbc_ret
1859L$078cbc_enc_tail:
1860	mov	ecx,eax
1861dd	2767451785
1862	mov	ecx,16
1863	sub	ecx,eax
1864	xor	eax,eax
1865dd	2868115081
1866	lea	edi,[edi-16]
1867	mov	ecx,ebx
1868	mov	esi,edi
1869	mov	edx,ebp
1870	jmp	NEAR L$079cbc_enc_loop
1871align	16
1872L$077cbc_decrypt:
1873	cmp	eax,80
1874	jbe	NEAR L$082cbc_dec_tail
1875	movaps	[esp],xmm7
1876	sub	eax,80
1877	jmp	NEAR L$083cbc_dec_loop6_enter
1878align	16
1879L$084cbc_dec_loop6:
1880	movaps	[esp],xmm0
1881	movups	[edi],xmm7
1882	lea	edi,[16+edi]
1883L$083cbc_dec_loop6_enter:
1884	movdqu	xmm2,[esi]
1885	movdqu	xmm3,[16+esi]
1886	movdqu	xmm4,[32+esi]
1887	movdqu	xmm5,[48+esi]
1888	movdqu	xmm6,[64+esi]
1889	movdqu	xmm7,[80+esi]
1890	call	__aesni_decrypt6
1891	movups	xmm1,[esi]
1892	movups	xmm0,[16+esi]
1893	xorps	xmm2,[esp]
1894	xorps	xmm3,xmm1
1895	movups	xmm1,[32+esi]
1896	xorps	xmm4,xmm0
1897	movups	xmm0,[48+esi]
1898	xorps	xmm5,xmm1
1899	movups	xmm1,[64+esi]
1900	xorps	xmm6,xmm0
1901	movups	xmm0,[80+esi]
1902	xorps	xmm7,xmm1
1903	movups	[edi],xmm2
1904	movups	[16+edi],xmm3
1905	lea	esi,[96+esi]
1906	movups	[32+edi],xmm4
1907	mov	ecx,ebx
1908	movups	[48+edi],xmm5
1909	mov	edx,ebp
1910	movups	[64+edi],xmm6
1911	lea	edi,[80+edi]
1912	sub	eax,96
1913	ja	NEAR L$084cbc_dec_loop6
1914	movaps	xmm2,xmm7
1915	movaps	xmm7,xmm0
1916	add	eax,80
1917	jle	NEAR L$085cbc_dec_clear_tail_collected
1918	movups	[edi],xmm2
1919	lea	edi,[16+edi]
1920L$082cbc_dec_tail:
1921	movups	xmm2,[esi]
1922	movaps	xmm6,xmm2
1923	cmp	eax,16
1924	jbe	NEAR L$086cbc_dec_one
1925	movups	xmm3,[16+esi]
1926	movaps	xmm5,xmm3
1927	cmp	eax,32
1928	jbe	NEAR L$087cbc_dec_two
1929	movups	xmm4,[32+esi]
1930	cmp	eax,48
1931	jbe	NEAR L$088cbc_dec_three
1932	movups	xmm5,[48+esi]
1933	cmp	eax,64
1934	jbe	NEAR L$089cbc_dec_four
1935	movups	xmm6,[64+esi]
1936	movaps	[esp],xmm7
1937	movups	xmm2,[esi]
1938	xorps	xmm7,xmm7
1939	call	__aesni_decrypt6
1940	movups	xmm1,[esi]
1941	movups	xmm0,[16+esi]
1942	xorps	xmm2,[esp]
1943	xorps	xmm3,xmm1
1944	movups	xmm1,[32+esi]
1945	xorps	xmm4,xmm0
1946	movups	xmm0,[48+esi]
1947	xorps	xmm5,xmm1
1948	movups	xmm7,[64+esi]
1949	xorps	xmm6,xmm0
1950	movups	[edi],xmm2
1951	movups	[16+edi],xmm3
1952	pxor	xmm3,xmm3
1953	movups	[32+edi],xmm4
1954	pxor	xmm4,xmm4
1955	movups	[48+edi],xmm5
1956	pxor	xmm5,xmm5
1957	lea	edi,[64+edi]
1958	movaps	xmm2,xmm6
1959	pxor	xmm6,xmm6
1960	sub	eax,80
1961	jmp	NEAR L$090cbc_dec_tail_collected
1962align	16
1963L$086cbc_dec_one:
1964	movups	xmm0,[edx]
1965	movups	xmm1,[16+edx]
1966	lea	edx,[32+edx]
1967	xorps	xmm2,xmm0
1968L$091dec1_loop_16:
1969db	102,15,56,222,209
1970	dec	ecx
1971	movups	xmm1,[edx]
1972	lea	edx,[16+edx]
1973	jnz	NEAR L$091dec1_loop_16
1974db	102,15,56,223,209
1975	xorps	xmm2,xmm7
1976	movaps	xmm7,xmm6
1977	sub	eax,16
1978	jmp	NEAR L$090cbc_dec_tail_collected
1979align	16
1980L$087cbc_dec_two:
1981	call	__aesni_decrypt2
1982	xorps	xmm2,xmm7
1983	xorps	xmm3,xmm6
1984	movups	[edi],xmm2
1985	movaps	xmm2,xmm3
1986	pxor	xmm3,xmm3
1987	lea	edi,[16+edi]
1988	movaps	xmm7,xmm5
1989	sub	eax,32
1990	jmp	NEAR L$090cbc_dec_tail_collected
1991align	16
1992L$088cbc_dec_three:
1993	call	__aesni_decrypt3
1994	xorps	xmm2,xmm7
1995	xorps	xmm3,xmm6
1996	xorps	xmm4,xmm5
1997	movups	[edi],xmm2
1998	movaps	xmm2,xmm4
1999	pxor	xmm4,xmm4
2000	movups	[16+edi],xmm3
2001	pxor	xmm3,xmm3
2002	lea	edi,[32+edi]
2003	movups	xmm7,[32+esi]
2004	sub	eax,48
2005	jmp	NEAR L$090cbc_dec_tail_collected
2006align	16
2007L$089cbc_dec_four:
2008	call	__aesni_decrypt4
2009	movups	xmm1,[16+esi]
2010	movups	xmm0,[32+esi]
2011	xorps	xmm2,xmm7
2012	movups	xmm7,[48+esi]
2013	xorps	xmm3,xmm6
2014	movups	[edi],xmm2
2015	xorps	xmm4,xmm1
2016	movups	[16+edi],xmm3
2017	pxor	xmm3,xmm3
2018	xorps	xmm5,xmm0
2019	movups	[32+edi],xmm4
2020	pxor	xmm4,xmm4
2021	lea	edi,[48+edi]
2022	movaps	xmm2,xmm5
2023	pxor	xmm5,xmm5
2024	sub	eax,64
2025	jmp	NEAR L$090cbc_dec_tail_collected
2026align	16
2027L$085cbc_dec_clear_tail_collected:
2028	pxor	xmm3,xmm3
2029	pxor	xmm4,xmm4
2030	pxor	xmm5,xmm5
2031	pxor	xmm6,xmm6
2032L$090cbc_dec_tail_collected:
2033	and	eax,15
2034	jnz	NEAR L$092cbc_dec_tail_partial
2035	movups	[edi],xmm2
2036	pxor	xmm0,xmm0
2037	jmp	NEAR L$081cbc_ret
2038align	16
2039L$092cbc_dec_tail_partial:
2040	movaps	[esp],xmm2
2041	pxor	xmm0,xmm0
2042	mov	ecx,16
2043	mov	esi,esp
2044	sub	ecx,eax
2045dd	2767451785
2046	movdqa	[esp],xmm2
2047L$081cbc_ret:
2048	mov	esp,DWORD [16+esp]
2049	mov	ebp,DWORD [36+esp]
2050	pxor	xmm2,xmm2
2051	pxor	xmm1,xmm1
2052	movups	[ebp],xmm7
2053	pxor	xmm7,xmm7
2054L$076cbc_abort:
2055	pop	edi
2056	pop	esi
2057	pop	ebx
2058	pop	ebp
2059	ret
2060global	_aes_hw_set_encrypt_key_base
2061align	16
2062_aes_hw_set_encrypt_key_base:
2063L$_aes_hw_set_encrypt_key_base_begin:
2064%ifdef BORINGSSL_DISPATCH_TEST
2065	push	ebx
2066	push	edx
2067	call	L$093pic_for_function_hit
2068L$093pic_for_function_hit:
2069	pop	ebx
2070	lea	ebx,[(_BORINGSSL_function_hit+3-L$093pic_for_function_hit)+ebx]
2071	mov	edx,1
2072	mov	BYTE [ebx],dl
2073	pop	edx
2074	pop	ebx
2075%endif
2076	mov	eax,DWORD [4+esp]
2077	mov	ecx,DWORD [8+esp]
2078	mov	edx,DWORD [12+esp]
2079	push	ebx
2080	call	L$094pic
2081L$094pic:
2082	pop	ebx
2083	lea	ebx,[(L$key_const-L$094pic)+ebx]
2084	movups	xmm0,[eax]
2085	xorps	xmm4,xmm4
2086	lea	edx,[16+edx]
2087	cmp	ecx,256
2088	je	NEAR L$09514rounds
2089	cmp	ecx,192
2090	je	NEAR L$09612rounds
2091	cmp	ecx,128
2092	jne	NEAR L$097bad_keybits
2093align	16
2094L$09810rounds:
2095	mov	ecx,9
2096	movups	[edx-16],xmm0
2097db	102,15,58,223,200,1
2098	call	L$099key_128_cold
2099db	102,15,58,223,200,2
2100	call	L$100key_128
2101db	102,15,58,223,200,4
2102	call	L$100key_128
2103db	102,15,58,223,200,8
2104	call	L$100key_128
2105db	102,15,58,223,200,16
2106	call	L$100key_128
2107db	102,15,58,223,200,32
2108	call	L$100key_128
2109db	102,15,58,223,200,64
2110	call	L$100key_128
2111db	102,15,58,223,200,128
2112	call	L$100key_128
2113db	102,15,58,223,200,27
2114	call	L$100key_128
2115db	102,15,58,223,200,54
2116	call	L$100key_128
2117	movups	[edx],xmm0
2118	mov	DWORD [80+edx],ecx
2119	jmp	NEAR L$101good_key
2120align	16
2121L$100key_128:
2122	movups	[edx],xmm0
2123	lea	edx,[16+edx]
2124L$099key_128_cold:
2125	shufps	xmm4,xmm0,16
2126	xorps	xmm0,xmm4
2127	shufps	xmm4,xmm0,140
2128	xorps	xmm0,xmm4
2129	shufps	xmm1,xmm1,255
2130	xorps	xmm0,xmm1
2131	ret
2132align	16
2133L$09612rounds:
2134	movq	xmm2,[16+eax]
2135	mov	ecx,11
2136	movups	[edx-16],xmm0
2137db	102,15,58,223,202,1
2138	call	L$102key_192a_cold
2139db	102,15,58,223,202,2
2140	call	L$103key_192b
2141db	102,15,58,223,202,4
2142	call	L$104key_192a
2143db	102,15,58,223,202,8
2144	call	L$103key_192b
2145db	102,15,58,223,202,16
2146	call	L$104key_192a
2147db	102,15,58,223,202,32
2148	call	L$103key_192b
2149db	102,15,58,223,202,64
2150	call	L$104key_192a
2151db	102,15,58,223,202,128
2152	call	L$103key_192b
2153	movups	[edx],xmm0
2154	mov	DWORD [48+edx],ecx
2155	jmp	NEAR L$101good_key
2156align	16
2157L$104key_192a:
2158	movups	[edx],xmm0
2159	lea	edx,[16+edx]
2160align	16
2161L$102key_192a_cold:
2162	movaps	xmm5,xmm2
2163L$105key_192b_warm:
2164	shufps	xmm4,xmm0,16
2165	movdqa	xmm3,xmm2
2166	xorps	xmm0,xmm4
2167	shufps	xmm4,xmm0,140
2168	pslldq	xmm3,4
2169	xorps	xmm0,xmm4
2170	pshufd	xmm1,xmm1,85
2171	pxor	xmm2,xmm3
2172	pxor	xmm0,xmm1
2173	pshufd	xmm3,xmm0,255
2174	pxor	xmm2,xmm3
2175	ret
2176align	16
2177L$103key_192b:
2178	movaps	xmm3,xmm0
2179	shufps	xmm5,xmm0,68
2180	movups	[edx],xmm5
2181	shufps	xmm3,xmm2,78
2182	movups	[16+edx],xmm3
2183	lea	edx,[32+edx]
2184	jmp	NEAR L$105key_192b_warm
2185align	16
2186L$09514rounds:
2187	movups	xmm2,[16+eax]
2188	lea	edx,[16+edx]
2189	mov	ecx,13
2190	movups	[edx-32],xmm0
2191	movups	[edx-16],xmm2
2192db	102,15,58,223,202,1
2193	call	L$106key_256a_cold
2194db	102,15,58,223,200,1
2195	call	L$107key_256b
2196db	102,15,58,223,202,2
2197	call	L$108key_256a
2198db	102,15,58,223,200,2
2199	call	L$107key_256b
2200db	102,15,58,223,202,4
2201	call	L$108key_256a
2202db	102,15,58,223,200,4
2203	call	L$107key_256b
2204db	102,15,58,223,202,8
2205	call	L$108key_256a
2206db	102,15,58,223,200,8
2207	call	L$107key_256b
2208db	102,15,58,223,202,16
2209	call	L$108key_256a
2210db	102,15,58,223,200,16
2211	call	L$107key_256b
2212db	102,15,58,223,202,32
2213	call	L$108key_256a
2214db	102,15,58,223,200,32
2215	call	L$107key_256b
2216db	102,15,58,223,202,64
2217	call	L$108key_256a
2218	movups	[edx],xmm0
2219	mov	DWORD [16+edx],ecx
2220	xor	eax,eax
2221	jmp	NEAR L$101good_key
2222align	16
2223L$108key_256a:
2224	movups	[edx],xmm2
2225	lea	edx,[16+edx]
2226L$106key_256a_cold:
2227	shufps	xmm4,xmm0,16
2228	xorps	xmm0,xmm4
2229	shufps	xmm4,xmm0,140
2230	xorps	xmm0,xmm4
2231	shufps	xmm1,xmm1,255
2232	xorps	xmm0,xmm1
2233	ret
2234align	16
2235L$107key_256b:
2236	movups	[edx],xmm0
2237	lea	edx,[16+edx]
2238	shufps	xmm4,xmm2,16
2239	xorps	xmm2,xmm4
2240	shufps	xmm4,xmm2,140
2241	xorps	xmm2,xmm4
2242	shufps	xmm1,xmm1,170
2243	xorps	xmm2,xmm1
2244	ret
2245L$101good_key:
2246	pxor	xmm0,xmm0
2247	pxor	xmm1,xmm1
2248	pxor	xmm2,xmm2
2249	pxor	xmm3,xmm3
2250	pxor	xmm4,xmm4
2251	pxor	xmm5,xmm5
2252	xor	eax,eax
2253	pop	ebx
2254	ret
2255align	4
2256L$097bad_keybits:
2257	pxor	xmm0,xmm0
2258	mov	eax,-2
2259	pop	ebx
2260	ret
2261global	_aes_hw_set_encrypt_key_alt
2262align	16
2263_aes_hw_set_encrypt_key_alt:
2264L$_aes_hw_set_encrypt_key_alt_begin:
2265%ifdef BORINGSSL_DISPATCH_TEST
2266	push	ebx
2267	push	edx
2268	call	L$109pic_for_function_hit
2269L$109pic_for_function_hit:
2270	pop	ebx
2271	lea	ebx,[(_BORINGSSL_function_hit+3-L$109pic_for_function_hit)+ebx]
2272	mov	edx,1
2273	mov	BYTE [ebx],dl
2274	pop	edx
2275	pop	ebx
2276%endif
2277	mov	eax,DWORD [4+esp]
2278	mov	ecx,DWORD [8+esp]
2279	mov	edx,DWORD [12+esp]
2280	push	ebx
2281	call	L$110pic
2282L$110pic:
2283	pop	ebx
2284	lea	ebx,[(L$key_const-L$110pic)+ebx]
2285	movups	xmm0,[eax]
2286	xorps	xmm4,xmm4
2287	lea	edx,[16+edx]
2288	cmp	ecx,256
2289	je	NEAR L$11114rounds_alt
2290	cmp	ecx,192
2291	je	NEAR L$11212rounds_alt
2292	cmp	ecx,128
2293	jne	NEAR L$113bad_keybits
2294align	16
2295L$11410rounds_alt:
2296	movdqa	xmm5,[ebx]
2297	mov	ecx,8
2298	movdqa	xmm4,[32+ebx]
2299	movdqa	xmm2,xmm0
2300	movdqu	[edx-16],xmm0
2301L$115loop_key128:
2302db	102,15,56,0,197
2303db	102,15,56,221,196
2304	pslld	xmm4,1
2305	lea	edx,[16+edx]
2306	movdqa	xmm3,xmm2
2307	pslldq	xmm2,4
2308	pxor	xmm3,xmm2
2309	pslldq	xmm2,4
2310	pxor	xmm3,xmm2
2311	pslldq	xmm2,4
2312	pxor	xmm2,xmm3
2313	pxor	xmm0,xmm2
2314	movdqu	[edx-16],xmm0
2315	movdqa	xmm2,xmm0
2316	dec	ecx
2317	jnz	NEAR L$115loop_key128
2318	movdqa	xmm4,[48+ebx]
2319db	102,15,56,0,197
2320db	102,15,56,221,196
2321	pslld	xmm4,1
2322	movdqa	xmm3,xmm2
2323	pslldq	xmm2,4
2324	pxor	xmm3,xmm2
2325	pslldq	xmm2,4
2326	pxor	xmm3,xmm2
2327	pslldq	xmm2,4
2328	pxor	xmm2,xmm3
2329	pxor	xmm0,xmm2
2330	movdqu	[edx],xmm0
2331	movdqa	xmm2,xmm0
2332db	102,15,56,0,197
2333db	102,15,56,221,196
2334	movdqa	xmm3,xmm2
2335	pslldq	xmm2,4
2336	pxor	xmm3,xmm2
2337	pslldq	xmm2,4
2338	pxor	xmm3,xmm2
2339	pslldq	xmm2,4
2340	pxor	xmm2,xmm3
2341	pxor	xmm0,xmm2
2342	movdqu	[16+edx],xmm0
2343	mov	ecx,9
2344	mov	DWORD [96+edx],ecx
2345	jmp	NEAR L$116good_key
2346align	16
2347L$11212rounds_alt:
2348	movq	xmm2,[16+eax]
2349	movdqa	xmm5,[16+ebx]
2350	movdqa	xmm4,[32+ebx]
2351	mov	ecx,8
2352	movdqu	[edx-16],xmm0
2353L$117loop_key192:
2354	movq	[edx],xmm2
2355	movdqa	xmm1,xmm2
2356db	102,15,56,0,213
2357db	102,15,56,221,212
2358	pslld	xmm4,1
2359	lea	edx,[24+edx]
2360	movdqa	xmm3,xmm0
2361	pslldq	xmm0,4
2362	pxor	xmm3,xmm0
2363	pslldq	xmm0,4
2364	pxor	xmm3,xmm0
2365	pslldq	xmm0,4
2366	pxor	xmm0,xmm3
2367	pshufd	xmm3,xmm0,255
2368	pxor	xmm3,xmm1
2369	pslldq	xmm1,4
2370	pxor	xmm3,xmm1
2371	pxor	xmm0,xmm2
2372	pxor	xmm2,xmm3
2373	movdqu	[edx-16],xmm0
2374	dec	ecx
2375	jnz	NEAR L$117loop_key192
2376	mov	ecx,11
2377	mov	DWORD [32+edx],ecx
2378	jmp	NEAR L$116good_key
2379align	16
2380L$11114rounds_alt:
2381	movups	xmm2,[16+eax]
2382	lea	edx,[16+edx]
2383	movdqa	xmm5,[ebx]
2384	movdqa	xmm4,[32+ebx]
2385	mov	ecx,7
2386	movdqu	[edx-32],xmm0
2387	movdqa	xmm1,xmm2
2388	movdqu	[edx-16],xmm2
2389L$118loop_key256:
2390db	102,15,56,0,213
2391db	102,15,56,221,212
2392	movdqa	xmm3,xmm0
2393	pslldq	xmm0,4
2394	pxor	xmm3,xmm0
2395	pslldq	xmm0,4
2396	pxor	xmm3,xmm0
2397	pslldq	xmm0,4
2398	pxor	xmm0,xmm3
2399	pslld	xmm4,1
2400	pxor	xmm0,xmm2
2401	movdqu	[edx],xmm0
2402	dec	ecx
2403	jz	NEAR L$119done_key256
2404	pshufd	xmm2,xmm0,255
2405	pxor	xmm3,xmm3
2406db	102,15,56,221,211
2407	movdqa	xmm3,xmm1
2408	pslldq	xmm1,4
2409	pxor	xmm3,xmm1
2410	pslldq	xmm1,4
2411	pxor	xmm3,xmm1
2412	pslldq	xmm1,4
2413	pxor	xmm1,xmm3
2414	pxor	xmm2,xmm1
2415	movdqu	[16+edx],xmm2
2416	lea	edx,[32+edx]
2417	movdqa	xmm1,xmm2
2418	jmp	NEAR L$118loop_key256
2419L$119done_key256:
2420	mov	ecx,13
2421	mov	DWORD [16+edx],ecx
2422L$116good_key:
2423	pxor	xmm0,xmm0
2424	pxor	xmm1,xmm1
2425	pxor	xmm2,xmm2
2426	pxor	xmm3,xmm3
2427	pxor	xmm4,xmm4
2428	pxor	xmm5,xmm5
2429	xor	eax,eax
2430	pop	ebx
2431	ret
2432align	4
2433L$113bad_keybits:
2434	pxor	xmm0,xmm0
2435	mov	eax,-2
2436	pop	ebx
2437	ret
2438global	_aes_hw_encrypt_key_to_decrypt_key
2439align	16
2440_aes_hw_encrypt_key_to_decrypt_key:
2441L$_aes_hw_encrypt_key_to_decrypt_key_begin:
2442	mov	edx,DWORD [4+esp]
2443	mov	ecx,DWORD [240+edx]
2444	shl	ecx,4
2445	lea	eax,[16+ecx*1+edx]
2446	movups	xmm0,[edx]
2447	movups	xmm1,[eax]
2448	movups	[eax],xmm0
2449	movups	[edx],xmm1
2450	lea	edx,[16+edx]
2451	lea	eax,[eax-16]
2452L$120dec_key_inverse:
2453	movups	xmm0,[edx]
2454	movups	xmm1,[eax]
2455db	102,15,56,219,192
2456db	102,15,56,219,201
2457	lea	edx,[16+edx]
2458	lea	eax,[eax-16]
2459	movups	[16+eax],xmm0
2460	movups	[edx-16],xmm1
2461	cmp	eax,edx
2462	ja	NEAR L$120dec_key_inverse
2463	movups	xmm0,[edx]
2464db	102,15,56,219,192
2465	movups	[edx],xmm0
2466	pxor	xmm0,xmm0
2467	pxor	xmm1,xmm1
2468	ret
2469align	64
2470L$key_const:
2471dd	202313229,202313229,202313229,202313229
2472dd	67569157,67569157,67569157,67569157
2473dd	1,1,1,1
2474dd	27,27,27,27
2475db	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
2476db	83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
2477db	32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
2478db	115,108,46,111,114,103,62,0
2479%else
2480; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738
2481ret
2482%endif
2483