xref: /aosp_15_r20/external/boringssl/src/gen/bcm/ghash-x86_64-win.asm (revision 8fb009dc861624b67b6cdb62ea21f0f22d0c584b)
1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4%ifidn __OUTPUT_FORMAT__, win64
5default	rel
6%define XMMWORD
7%define YMMWORD
8%define ZMMWORD
9%define _CET_ENDBR
10
11%ifdef BORINGSSL_PREFIX
12%include "boringssl_prefix_symbols_nasm.inc"
13%endif
14section	.text code align=64
15
16global	gcm_init_clmul
17
18ALIGN	16
19gcm_init_clmul:
20
21$L$SEH_begin_gcm_init_clmul_1:
22_CET_ENDBR
23$L$_init_clmul:
24	sub	rsp,0x18
25$L$SEH_prologue_gcm_init_clmul_2:
26	movaps	XMMWORD[rsp],xmm6
27$L$SEH_prologue_gcm_init_clmul_3:
28$L$SEH_endprologue_gcm_init_clmul_4:
29	movdqu	xmm2,XMMWORD[rdx]
30	pshufd	xmm2,xmm2,78
31
32
33	pshufd	xmm4,xmm2,255
34	movdqa	xmm3,xmm2
35	psllq	xmm2,1
36	pxor	xmm5,xmm5
37	psrlq	xmm3,63
38	pcmpgtd	xmm5,xmm4
39	pslldq	xmm3,8
40	por	xmm2,xmm3
41
42
43	pand	xmm5,XMMWORD[$L$0x1c2_polynomial]
44	pxor	xmm2,xmm5
45
46
47	pshufd	xmm6,xmm2,78
48	movdqa	xmm0,xmm2
49	pxor	xmm6,xmm2
50	movdqa	xmm1,xmm0
51	pshufd	xmm3,xmm0,78
52	pxor	xmm3,xmm0
53DB	102,15,58,68,194,0
54DB	102,15,58,68,202,17
55DB	102,15,58,68,222,0
56	pxor	xmm3,xmm0
57	pxor	xmm3,xmm1
58
59	movdqa	xmm4,xmm3
60	psrldq	xmm3,8
61	pslldq	xmm4,8
62	pxor	xmm1,xmm3
63	pxor	xmm0,xmm4
64
65	movdqa	xmm4,xmm0
66	movdqa	xmm3,xmm0
67	psllq	xmm0,5
68	pxor	xmm3,xmm0
69	psllq	xmm0,1
70	pxor	xmm0,xmm3
71	psllq	xmm0,57
72	movdqa	xmm3,xmm0
73	pslldq	xmm0,8
74	psrldq	xmm3,8
75	pxor	xmm0,xmm4
76	pxor	xmm1,xmm3
77
78
79	movdqa	xmm4,xmm0
80	psrlq	xmm0,1
81	pxor	xmm1,xmm4
82	pxor	xmm4,xmm0
83	psrlq	xmm0,5
84	pxor	xmm0,xmm4
85	psrlq	xmm0,1
86	pxor	xmm0,xmm1
87	pshufd	xmm3,xmm2,78
88	pshufd	xmm4,xmm0,78
89	pxor	xmm3,xmm2
90	movdqu	XMMWORD[rcx],xmm2
91	pxor	xmm4,xmm0
92	movdqu	XMMWORD[16+rcx],xmm0
93DB	102,15,58,15,227,8
94	movdqu	XMMWORD[32+rcx],xmm4
95	movdqa	xmm1,xmm0
96	pshufd	xmm3,xmm0,78
97	pxor	xmm3,xmm0
98DB	102,15,58,68,194,0
99DB	102,15,58,68,202,17
100DB	102,15,58,68,222,0
101	pxor	xmm3,xmm0
102	pxor	xmm3,xmm1
103
104	movdqa	xmm4,xmm3
105	psrldq	xmm3,8
106	pslldq	xmm4,8
107	pxor	xmm1,xmm3
108	pxor	xmm0,xmm4
109
110	movdqa	xmm4,xmm0
111	movdqa	xmm3,xmm0
112	psllq	xmm0,5
113	pxor	xmm3,xmm0
114	psllq	xmm0,1
115	pxor	xmm0,xmm3
116	psllq	xmm0,57
117	movdqa	xmm3,xmm0
118	pslldq	xmm0,8
119	psrldq	xmm3,8
120	pxor	xmm0,xmm4
121	pxor	xmm1,xmm3
122
123
124	movdqa	xmm4,xmm0
125	psrlq	xmm0,1
126	pxor	xmm1,xmm4
127	pxor	xmm4,xmm0
128	psrlq	xmm0,5
129	pxor	xmm0,xmm4
130	psrlq	xmm0,1
131	pxor	xmm0,xmm1
132	movdqa	xmm5,xmm0
133	movdqa	xmm1,xmm0
134	pshufd	xmm3,xmm0,78
135	pxor	xmm3,xmm0
136DB	102,15,58,68,194,0
137DB	102,15,58,68,202,17
138DB	102,15,58,68,222,0
139	pxor	xmm3,xmm0
140	pxor	xmm3,xmm1
141
142	movdqa	xmm4,xmm3
143	psrldq	xmm3,8
144	pslldq	xmm4,8
145	pxor	xmm1,xmm3
146	pxor	xmm0,xmm4
147
148	movdqa	xmm4,xmm0
149	movdqa	xmm3,xmm0
150	psllq	xmm0,5
151	pxor	xmm3,xmm0
152	psllq	xmm0,1
153	pxor	xmm0,xmm3
154	psllq	xmm0,57
155	movdqa	xmm3,xmm0
156	pslldq	xmm0,8
157	psrldq	xmm3,8
158	pxor	xmm0,xmm4
159	pxor	xmm1,xmm3
160
161
162	movdqa	xmm4,xmm0
163	psrlq	xmm0,1
164	pxor	xmm1,xmm4
165	pxor	xmm4,xmm0
166	psrlq	xmm0,5
167	pxor	xmm0,xmm4
168	psrlq	xmm0,1
169	pxor	xmm0,xmm1
170	pshufd	xmm3,xmm5,78
171	pshufd	xmm4,xmm0,78
172	pxor	xmm3,xmm5
173	movdqu	XMMWORD[48+rcx],xmm5
174	pxor	xmm4,xmm0
175	movdqu	XMMWORD[64+rcx],xmm0
176DB	102,15,58,15,227,8
177	movdqu	XMMWORD[80+rcx],xmm4
178	movaps	xmm6,XMMWORD[rsp]
179	lea	rsp,[24+rsp]
180	ret
181
182$L$SEH_end_gcm_init_clmul_5:
183
184global	gcm_gmult_clmul
185
186ALIGN	16
187gcm_gmult_clmul:
188
189_CET_ENDBR
190$L$_gmult_clmul:
191	movdqu	xmm0,XMMWORD[rcx]
192	movdqa	xmm5,XMMWORD[$L$bswap_mask]
193	movdqu	xmm2,XMMWORD[rdx]
194	movdqu	xmm4,XMMWORD[32+rdx]
195DB	102,15,56,0,197
196	movdqa	xmm1,xmm0
197	pshufd	xmm3,xmm0,78
198	pxor	xmm3,xmm0
199DB	102,15,58,68,194,0
200DB	102,15,58,68,202,17
201DB	102,15,58,68,220,0
202	pxor	xmm3,xmm0
203	pxor	xmm3,xmm1
204
205	movdqa	xmm4,xmm3
206	psrldq	xmm3,8
207	pslldq	xmm4,8
208	pxor	xmm1,xmm3
209	pxor	xmm0,xmm4
210
211	movdqa	xmm4,xmm0
212	movdqa	xmm3,xmm0
213	psllq	xmm0,5
214	pxor	xmm3,xmm0
215	psllq	xmm0,1
216	pxor	xmm0,xmm3
217	psllq	xmm0,57
218	movdqa	xmm3,xmm0
219	pslldq	xmm0,8
220	psrldq	xmm3,8
221	pxor	xmm0,xmm4
222	pxor	xmm1,xmm3
223
224
225	movdqa	xmm4,xmm0
226	psrlq	xmm0,1
227	pxor	xmm1,xmm4
228	pxor	xmm4,xmm0
229	psrlq	xmm0,5
230	pxor	xmm0,xmm4
231	psrlq	xmm0,1
232	pxor	xmm0,xmm1
233DB	102,15,56,0,197
234	movdqu	XMMWORD[rcx],xmm0
235	ret
236
237
238global	gcm_ghash_clmul
239
240ALIGN	32
241gcm_ghash_clmul:
242
243$L$SEH_begin_gcm_ghash_clmul_1:
244_CET_ENDBR
245$L$_ghash_clmul:
246	lea	rax,[((-136))+rsp]
247	lea	rsp,[((-32))+rax]
248$L$SEH_prologue_gcm_ghash_clmul_2:
249	movaps	XMMWORD[(-32)+rax],xmm6
250$L$SEH_prologue_gcm_ghash_clmul_3:
251	movaps	XMMWORD[(-16)+rax],xmm7
252$L$SEH_prologue_gcm_ghash_clmul_4:
253	movaps	XMMWORD[rax],xmm8
254$L$SEH_prologue_gcm_ghash_clmul_5:
255	movaps	XMMWORD[16+rax],xmm9
256$L$SEH_prologue_gcm_ghash_clmul_6:
257	movaps	XMMWORD[32+rax],xmm10
258$L$SEH_prologue_gcm_ghash_clmul_7:
259	movaps	XMMWORD[48+rax],xmm11
260$L$SEH_prologue_gcm_ghash_clmul_8:
261	movaps	XMMWORD[64+rax],xmm12
262$L$SEH_prologue_gcm_ghash_clmul_9:
263	movaps	XMMWORD[80+rax],xmm13
264$L$SEH_prologue_gcm_ghash_clmul_10:
265	movaps	XMMWORD[96+rax],xmm14
266$L$SEH_prologue_gcm_ghash_clmul_11:
267	movaps	XMMWORD[112+rax],xmm15
268$L$SEH_prologue_gcm_ghash_clmul_12:
269$L$SEH_endprologue_gcm_ghash_clmul_13:
270	movdqa	xmm10,XMMWORD[$L$bswap_mask]
271
272	movdqu	xmm0,XMMWORD[rcx]
273	movdqu	xmm2,XMMWORD[rdx]
274	movdqu	xmm7,XMMWORD[32+rdx]
275DB	102,65,15,56,0,194
276
277	sub	r9,0x10
278	jz	NEAR $L$odd_tail
279
280	movdqu	xmm6,XMMWORD[16+rdx]
281	cmp	r9,0x30
282	jb	NEAR $L$skip4x
283
284	sub	r9,0x30
285	mov	rax,0xA040608020C0E000
286	movdqu	xmm14,XMMWORD[48+rdx]
287	movdqu	xmm15,XMMWORD[64+rdx]
288
289
290
291
292	movdqu	xmm3,XMMWORD[48+r8]
293	movdqu	xmm11,XMMWORD[32+r8]
294DB	102,65,15,56,0,218
295DB	102,69,15,56,0,218
296	movdqa	xmm5,xmm3
297	pshufd	xmm4,xmm3,78
298	pxor	xmm4,xmm3
299DB	102,15,58,68,218,0
300DB	102,15,58,68,234,17
301DB	102,15,58,68,231,0
302
303	movdqa	xmm13,xmm11
304	pshufd	xmm12,xmm11,78
305	pxor	xmm12,xmm11
306DB	102,68,15,58,68,222,0
307DB	102,68,15,58,68,238,17
308DB	102,68,15,58,68,231,16
309	xorps	xmm3,xmm11
310	xorps	xmm5,xmm13
311	movups	xmm7,XMMWORD[80+rdx]
312	xorps	xmm4,xmm12
313
314	movdqu	xmm11,XMMWORD[16+r8]
315	movdqu	xmm8,XMMWORD[r8]
316DB	102,69,15,56,0,218
317DB	102,69,15,56,0,194
318	movdqa	xmm13,xmm11
319	pshufd	xmm12,xmm11,78
320	pxor	xmm0,xmm8
321	pxor	xmm12,xmm11
322DB	102,69,15,58,68,222,0
323	movdqa	xmm1,xmm0
324	pshufd	xmm8,xmm0,78
325	pxor	xmm8,xmm0
326DB	102,69,15,58,68,238,17
327DB	102,68,15,58,68,231,0
328	xorps	xmm3,xmm11
329	xorps	xmm5,xmm13
330
331	lea	r8,[64+r8]
332	sub	r9,0x40
333	jc	NEAR $L$tail4x
334
335	jmp	NEAR $L$mod4_loop
336ALIGN	32
337$L$mod4_loop:
338DB	102,65,15,58,68,199,0
339	xorps	xmm4,xmm12
340	movdqu	xmm11,XMMWORD[48+r8]
341DB	102,69,15,56,0,218
342DB	102,65,15,58,68,207,17
343	xorps	xmm0,xmm3
344	movdqu	xmm3,XMMWORD[32+r8]
345	movdqa	xmm13,xmm11
346DB	102,68,15,58,68,199,16
347	pshufd	xmm12,xmm11,78
348	xorps	xmm1,xmm5
349	pxor	xmm12,xmm11
350DB	102,65,15,56,0,218
351	movups	xmm7,XMMWORD[32+rdx]
352	xorps	xmm8,xmm4
353DB	102,68,15,58,68,218,0
354	pshufd	xmm4,xmm3,78
355
356	pxor	xmm8,xmm0
357	movdqa	xmm5,xmm3
358	pxor	xmm8,xmm1
359	pxor	xmm4,xmm3
360	movdqa	xmm9,xmm8
361DB	102,68,15,58,68,234,17
362	pslldq	xmm8,8
363	psrldq	xmm9,8
364	pxor	xmm0,xmm8
365	movdqa	xmm8,XMMWORD[$L$7_mask]
366	pxor	xmm1,xmm9
367DB	102,76,15,110,200
368
369	pand	xmm8,xmm0
370DB	102,69,15,56,0,200
371	pxor	xmm9,xmm0
372DB	102,68,15,58,68,231,0
373	psllq	xmm9,57
374	movdqa	xmm8,xmm9
375	pslldq	xmm9,8
376DB	102,15,58,68,222,0
377	psrldq	xmm8,8
378	pxor	xmm0,xmm9
379	pxor	xmm1,xmm8
380	movdqu	xmm8,XMMWORD[r8]
381
382	movdqa	xmm9,xmm0
383	psrlq	xmm0,1
384DB	102,15,58,68,238,17
385	xorps	xmm3,xmm11
386	movdqu	xmm11,XMMWORD[16+r8]
387DB	102,69,15,56,0,218
388DB	102,15,58,68,231,16
389	xorps	xmm5,xmm13
390	movups	xmm7,XMMWORD[80+rdx]
391DB	102,69,15,56,0,194
392	pxor	xmm1,xmm9
393	pxor	xmm9,xmm0
394	psrlq	xmm0,5
395
396	movdqa	xmm13,xmm11
397	pxor	xmm4,xmm12
398	pshufd	xmm12,xmm11,78
399	pxor	xmm0,xmm9
400	pxor	xmm1,xmm8
401	pxor	xmm12,xmm11
402DB	102,69,15,58,68,222,0
403	psrlq	xmm0,1
404	pxor	xmm0,xmm1
405	movdqa	xmm1,xmm0
406DB	102,69,15,58,68,238,17
407	xorps	xmm3,xmm11
408	pshufd	xmm8,xmm0,78
409	pxor	xmm8,xmm0
410
411DB	102,68,15,58,68,231,0
412	xorps	xmm5,xmm13
413
414	lea	r8,[64+r8]
415	sub	r9,0x40
416	jnc	NEAR $L$mod4_loop
417
418$L$tail4x:
419DB	102,65,15,58,68,199,0
420DB	102,65,15,58,68,207,17
421DB	102,68,15,58,68,199,16
422	xorps	xmm4,xmm12
423	xorps	xmm0,xmm3
424	xorps	xmm1,xmm5
425	pxor	xmm1,xmm0
426	pxor	xmm8,xmm4
427
428	pxor	xmm8,xmm1
429	pxor	xmm1,xmm0
430
431	movdqa	xmm9,xmm8
432	psrldq	xmm8,8
433	pslldq	xmm9,8
434	pxor	xmm1,xmm8
435	pxor	xmm0,xmm9
436
437	movdqa	xmm4,xmm0
438	movdqa	xmm3,xmm0
439	psllq	xmm0,5
440	pxor	xmm3,xmm0
441	psllq	xmm0,1
442	pxor	xmm0,xmm3
443	psllq	xmm0,57
444	movdqa	xmm3,xmm0
445	pslldq	xmm0,8
446	psrldq	xmm3,8
447	pxor	xmm0,xmm4
448	pxor	xmm1,xmm3
449
450
451	movdqa	xmm4,xmm0
452	psrlq	xmm0,1
453	pxor	xmm1,xmm4
454	pxor	xmm4,xmm0
455	psrlq	xmm0,5
456	pxor	xmm0,xmm4
457	psrlq	xmm0,1
458	pxor	xmm0,xmm1
459	add	r9,0x40
460	jz	NEAR $L$done
461	movdqu	xmm7,XMMWORD[32+rdx]
462	sub	r9,0x10
463	jz	NEAR $L$odd_tail
464$L$skip4x:
465
466
467
468
469
470	movdqu	xmm8,XMMWORD[r8]
471	movdqu	xmm3,XMMWORD[16+r8]
472DB	102,69,15,56,0,194
473DB	102,65,15,56,0,218
474	pxor	xmm0,xmm8
475
476	movdqa	xmm5,xmm3
477	pshufd	xmm4,xmm3,78
478	pxor	xmm4,xmm3
479DB	102,15,58,68,218,0
480DB	102,15,58,68,234,17
481DB	102,15,58,68,231,0
482
483	lea	r8,[32+r8]
484	nop
485	sub	r9,0x20
486	jbe	NEAR $L$even_tail
487	nop
488	jmp	NEAR $L$mod_loop
489
490ALIGN	32
491$L$mod_loop:
492	movdqa	xmm1,xmm0
493	movdqa	xmm8,xmm4
494	pshufd	xmm4,xmm0,78
495	pxor	xmm4,xmm0
496
497DB	102,15,58,68,198,0
498DB	102,15,58,68,206,17
499DB	102,15,58,68,231,16
500
501	pxor	xmm0,xmm3
502	pxor	xmm1,xmm5
503	movdqu	xmm9,XMMWORD[r8]
504	pxor	xmm8,xmm0
505DB	102,69,15,56,0,202
506	movdqu	xmm3,XMMWORD[16+r8]
507
508	pxor	xmm8,xmm1
509	pxor	xmm1,xmm9
510	pxor	xmm4,xmm8
511DB	102,65,15,56,0,218
512	movdqa	xmm8,xmm4
513	psrldq	xmm8,8
514	pslldq	xmm4,8
515	pxor	xmm1,xmm8
516	pxor	xmm0,xmm4
517
518	movdqa	xmm5,xmm3
519
520	movdqa	xmm9,xmm0
521	movdqa	xmm8,xmm0
522	psllq	xmm0,5
523	pxor	xmm8,xmm0
524DB	102,15,58,68,218,0
525	psllq	xmm0,1
526	pxor	xmm0,xmm8
527	psllq	xmm0,57
528	movdqa	xmm8,xmm0
529	pslldq	xmm0,8
530	psrldq	xmm8,8
531	pxor	xmm0,xmm9
532	pshufd	xmm4,xmm5,78
533	pxor	xmm1,xmm8
534	pxor	xmm4,xmm5
535
536	movdqa	xmm9,xmm0
537	psrlq	xmm0,1
538DB	102,15,58,68,234,17
539	pxor	xmm1,xmm9
540	pxor	xmm9,xmm0
541	psrlq	xmm0,5
542	pxor	xmm0,xmm9
543	lea	r8,[32+r8]
544	psrlq	xmm0,1
545DB	102,15,58,68,231,0
546	pxor	xmm0,xmm1
547
548	sub	r9,0x20
549	ja	NEAR $L$mod_loop
550
551$L$even_tail:
552	movdqa	xmm1,xmm0
553	movdqa	xmm8,xmm4
554	pshufd	xmm4,xmm0,78
555	pxor	xmm4,xmm0
556
557DB	102,15,58,68,198,0
558DB	102,15,58,68,206,17
559DB	102,15,58,68,231,16
560
561	pxor	xmm0,xmm3
562	pxor	xmm1,xmm5
563	pxor	xmm8,xmm0
564	pxor	xmm8,xmm1
565	pxor	xmm4,xmm8
566	movdqa	xmm8,xmm4
567	psrldq	xmm8,8
568	pslldq	xmm4,8
569	pxor	xmm1,xmm8
570	pxor	xmm0,xmm4
571
572	movdqa	xmm4,xmm0
573	movdqa	xmm3,xmm0
574	psllq	xmm0,5
575	pxor	xmm3,xmm0
576	psllq	xmm0,1
577	pxor	xmm0,xmm3
578	psllq	xmm0,57
579	movdqa	xmm3,xmm0
580	pslldq	xmm0,8
581	psrldq	xmm3,8
582	pxor	xmm0,xmm4
583	pxor	xmm1,xmm3
584
585
586	movdqa	xmm4,xmm0
587	psrlq	xmm0,1
588	pxor	xmm1,xmm4
589	pxor	xmm4,xmm0
590	psrlq	xmm0,5
591	pxor	xmm0,xmm4
592	psrlq	xmm0,1
593	pxor	xmm0,xmm1
594	test	r9,r9
595	jnz	NEAR $L$done
596
597$L$odd_tail:
598	movdqu	xmm8,XMMWORD[r8]
599DB	102,69,15,56,0,194
600	pxor	xmm0,xmm8
601	movdqa	xmm1,xmm0
602	pshufd	xmm3,xmm0,78
603	pxor	xmm3,xmm0
604DB	102,15,58,68,194,0
605DB	102,15,58,68,202,17
606DB	102,15,58,68,223,0
607	pxor	xmm3,xmm0
608	pxor	xmm3,xmm1
609
610	movdqa	xmm4,xmm3
611	psrldq	xmm3,8
612	pslldq	xmm4,8
613	pxor	xmm1,xmm3
614	pxor	xmm0,xmm4
615
616	movdqa	xmm4,xmm0
617	movdqa	xmm3,xmm0
618	psllq	xmm0,5
619	pxor	xmm3,xmm0
620	psllq	xmm0,1
621	pxor	xmm0,xmm3
622	psllq	xmm0,57
623	movdqa	xmm3,xmm0
624	pslldq	xmm0,8
625	psrldq	xmm3,8
626	pxor	xmm0,xmm4
627	pxor	xmm1,xmm3
628
629
630	movdqa	xmm4,xmm0
631	psrlq	xmm0,1
632	pxor	xmm1,xmm4
633	pxor	xmm4,xmm0
634	psrlq	xmm0,5
635	pxor	xmm0,xmm4
636	psrlq	xmm0,1
637	pxor	xmm0,xmm1
638$L$done:
639DB	102,65,15,56,0,194
640	movdqu	XMMWORD[rcx],xmm0
641	movaps	xmm6,XMMWORD[rsp]
642	movaps	xmm7,XMMWORD[16+rsp]
643	movaps	xmm8,XMMWORD[32+rsp]
644	movaps	xmm9,XMMWORD[48+rsp]
645	movaps	xmm10,XMMWORD[64+rsp]
646	movaps	xmm11,XMMWORD[80+rsp]
647	movaps	xmm12,XMMWORD[96+rsp]
648	movaps	xmm13,XMMWORD[112+rsp]
649	movaps	xmm14,XMMWORD[128+rsp]
650	movaps	xmm15,XMMWORD[144+rsp]
651	lea	rsp,[168+rsp]
652	ret
653
654$L$SEH_end_gcm_ghash_clmul_14:
655
656global	gcm_init_avx
657
658ALIGN	32
659gcm_init_avx:
660
661$L$SEH_begin_gcm_init_avx_1:
662_CET_ENDBR
663	sub	rsp,0x18
664$L$SEH_prologue_gcm_init_avx_2:
665	movaps	XMMWORD[rsp],xmm6
666$L$SEH_prologue_gcm_init_avx_3:
667$L$SEH_endprologue_gcm_init_avx_4:
668	vzeroupper
669
670	vmovdqu	xmm2,XMMWORD[rdx]
671	vpshufd	xmm2,xmm2,78
672
673
674	vpshufd	xmm4,xmm2,255
675	vpsrlq	xmm3,xmm2,63
676	vpsllq	xmm2,xmm2,1
677	vpxor	xmm5,xmm5,xmm5
678	vpcmpgtd	xmm5,xmm5,xmm4
679	vpslldq	xmm3,xmm3,8
680	vpor	xmm2,xmm2,xmm3
681
682
683	vpand	xmm5,xmm5,XMMWORD[$L$0x1c2_polynomial]
684	vpxor	xmm2,xmm2,xmm5
685
686	vpunpckhqdq	xmm6,xmm2,xmm2
687	vmovdqa	xmm0,xmm2
688	vpxor	xmm6,xmm6,xmm2
689	mov	r10,4
690	jmp	NEAR $L$init_start_avx
691ALIGN	32
692$L$init_loop_avx:
693	vpalignr	xmm5,xmm4,xmm3,8
694	vmovdqu	XMMWORD[(-16)+rcx],xmm5
695	vpunpckhqdq	xmm3,xmm0,xmm0
696	vpxor	xmm3,xmm3,xmm0
697	vpclmulqdq	xmm1,xmm0,xmm2,0x11
698	vpclmulqdq	xmm0,xmm0,xmm2,0x00
699	vpclmulqdq	xmm3,xmm3,xmm6,0x00
700	vpxor	xmm4,xmm1,xmm0
701	vpxor	xmm3,xmm3,xmm4
702
703	vpslldq	xmm4,xmm3,8
704	vpsrldq	xmm3,xmm3,8
705	vpxor	xmm0,xmm0,xmm4
706	vpxor	xmm1,xmm1,xmm3
707	vpsllq	xmm3,xmm0,57
708	vpsllq	xmm4,xmm0,62
709	vpxor	xmm4,xmm4,xmm3
710	vpsllq	xmm3,xmm0,63
711	vpxor	xmm4,xmm4,xmm3
712	vpslldq	xmm3,xmm4,8
713	vpsrldq	xmm4,xmm4,8
714	vpxor	xmm0,xmm0,xmm3
715	vpxor	xmm1,xmm1,xmm4
716
717	vpsrlq	xmm4,xmm0,1
718	vpxor	xmm1,xmm1,xmm0
719	vpxor	xmm0,xmm0,xmm4
720	vpsrlq	xmm4,xmm4,5
721	vpxor	xmm0,xmm0,xmm4
722	vpsrlq	xmm0,xmm0,1
723	vpxor	xmm0,xmm0,xmm1
724$L$init_start_avx:
725	vmovdqa	xmm5,xmm0
726	vpunpckhqdq	xmm3,xmm0,xmm0
727	vpxor	xmm3,xmm3,xmm0
728	vpclmulqdq	xmm1,xmm0,xmm2,0x11
729	vpclmulqdq	xmm0,xmm0,xmm2,0x00
730	vpclmulqdq	xmm3,xmm3,xmm6,0x00
731	vpxor	xmm4,xmm1,xmm0
732	vpxor	xmm3,xmm3,xmm4
733
734	vpslldq	xmm4,xmm3,8
735	vpsrldq	xmm3,xmm3,8
736	vpxor	xmm0,xmm0,xmm4
737	vpxor	xmm1,xmm1,xmm3
738	vpsllq	xmm3,xmm0,57
739	vpsllq	xmm4,xmm0,62
740	vpxor	xmm4,xmm4,xmm3
741	vpsllq	xmm3,xmm0,63
742	vpxor	xmm4,xmm4,xmm3
743	vpslldq	xmm3,xmm4,8
744	vpsrldq	xmm4,xmm4,8
745	vpxor	xmm0,xmm0,xmm3
746	vpxor	xmm1,xmm1,xmm4
747
748	vpsrlq	xmm4,xmm0,1
749	vpxor	xmm1,xmm1,xmm0
750	vpxor	xmm0,xmm0,xmm4
751	vpsrlq	xmm4,xmm4,5
752	vpxor	xmm0,xmm0,xmm4
753	vpsrlq	xmm0,xmm0,1
754	vpxor	xmm0,xmm0,xmm1
755	vpshufd	xmm3,xmm5,78
756	vpshufd	xmm4,xmm0,78
757	vpxor	xmm3,xmm3,xmm5
758	vmovdqu	XMMWORD[rcx],xmm5
759	vpxor	xmm4,xmm4,xmm0
760	vmovdqu	XMMWORD[16+rcx],xmm0
761	lea	rcx,[48+rcx]
762	sub	r10,1
763	jnz	NEAR $L$init_loop_avx
764
765	vpalignr	xmm5,xmm3,xmm4,8
766	vmovdqu	XMMWORD[(-16)+rcx],xmm5
767
768	vzeroupper
769	movaps	xmm6,XMMWORD[rsp]
770	lea	rsp,[24+rsp]
771	ret
772$L$SEH_end_gcm_init_avx_5:
773
774
775global	gcm_gmult_avx
776
777ALIGN	32
778gcm_gmult_avx:
779
780_CET_ENDBR
781	jmp	NEAR $L$_gmult_clmul
782
783
784global	gcm_ghash_avx
785
786ALIGN	32
787gcm_ghash_avx:
788
789$L$SEH_begin_gcm_ghash_avx_1:
790_CET_ENDBR
791	lea	rax,[((-136))+rsp]
792	lea	rsp,[((-32))+rax]
793$L$SEH_prologue_gcm_ghash_avx_2:
794	movaps	XMMWORD[(-32)+rax],xmm6
795$L$SEH_prologue_gcm_ghash_avx_3:
796	movaps	XMMWORD[(-16)+rax],xmm7
797$L$SEH_prologue_gcm_ghash_avx_4:
798	movaps	XMMWORD[rax],xmm8
799$L$SEH_prologue_gcm_ghash_avx_5:
800	movaps	XMMWORD[16+rax],xmm9
801$L$SEH_prologue_gcm_ghash_avx_6:
802	movaps	XMMWORD[32+rax],xmm10
803$L$SEH_prologue_gcm_ghash_avx_7:
804	movaps	XMMWORD[48+rax],xmm11
805$L$SEH_prologue_gcm_ghash_avx_8:
806	movaps	XMMWORD[64+rax],xmm12
807$L$SEH_prologue_gcm_ghash_avx_9:
808	movaps	XMMWORD[80+rax],xmm13
809$L$SEH_prologue_gcm_ghash_avx_10:
810	movaps	XMMWORD[96+rax],xmm14
811$L$SEH_prologue_gcm_ghash_avx_11:
812	movaps	XMMWORD[112+rax],xmm15
813$L$SEH_prologue_gcm_ghash_avx_12:
814$L$SEH_endprologue_gcm_ghash_avx_13:
815	vzeroupper
816
817	vmovdqu	xmm10,XMMWORD[rcx]
818	lea	r10,[$L$0x1c2_polynomial]
819	lea	rdx,[64+rdx]
820	vmovdqu	xmm13,XMMWORD[$L$bswap_mask]
821	vpshufb	xmm10,xmm10,xmm13
822	cmp	r9,0x80
823	jb	NEAR $L$short_avx
824	sub	r9,0x80
825
826	vmovdqu	xmm14,XMMWORD[112+r8]
827	vmovdqu	xmm6,XMMWORD[((0-64))+rdx]
828	vpshufb	xmm14,xmm14,xmm13
829	vmovdqu	xmm7,XMMWORD[((32-64))+rdx]
830
831	vpunpckhqdq	xmm9,xmm14,xmm14
832	vmovdqu	xmm15,XMMWORD[96+r8]
833	vpclmulqdq	xmm0,xmm14,xmm6,0x00
834	vpxor	xmm9,xmm9,xmm14
835	vpshufb	xmm15,xmm15,xmm13
836	vpclmulqdq	xmm1,xmm14,xmm6,0x11
837	vmovdqu	xmm6,XMMWORD[((16-64))+rdx]
838	vpunpckhqdq	xmm8,xmm15,xmm15
839	vmovdqu	xmm14,XMMWORD[80+r8]
840	vpclmulqdq	xmm2,xmm9,xmm7,0x00
841	vpxor	xmm8,xmm8,xmm15
842
843	vpshufb	xmm14,xmm14,xmm13
844	vpclmulqdq	xmm3,xmm15,xmm6,0x00
845	vpunpckhqdq	xmm9,xmm14,xmm14
846	vpclmulqdq	xmm4,xmm15,xmm6,0x11
847	vmovdqu	xmm6,XMMWORD[((48-64))+rdx]
848	vpxor	xmm9,xmm9,xmm14
849	vmovdqu	xmm15,XMMWORD[64+r8]
850	vpclmulqdq	xmm5,xmm8,xmm7,0x10
851	vmovdqu	xmm7,XMMWORD[((80-64))+rdx]
852
853	vpshufb	xmm15,xmm15,xmm13
854	vpxor	xmm3,xmm3,xmm0
855	vpclmulqdq	xmm0,xmm14,xmm6,0x00
856	vpxor	xmm4,xmm4,xmm1
857	vpunpckhqdq	xmm8,xmm15,xmm15
858	vpclmulqdq	xmm1,xmm14,xmm6,0x11
859	vmovdqu	xmm6,XMMWORD[((64-64))+rdx]
860	vpxor	xmm5,xmm5,xmm2
861	vpclmulqdq	xmm2,xmm9,xmm7,0x00
862	vpxor	xmm8,xmm8,xmm15
863
864	vmovdqu	xmm14,XMMWORD[48+r8]
865	vpxor	xmm0,xmm0,xmm3
866	vpclmulqdq	xmm3,xmm15,xmm6,0x00
867	vpxor	xmm1,xmm1,xmm4
868	vpshufb	xmm14,xmm14,xmm13
869	vpclmulqdq	xmm4,xmm15,xmm6,0x11
870	vmovdqu	xmm6,XMMWORD[((96-64))+rdx]
871	vpxor	xmm2,xmm2,xmm5
872	vpunpckhqdq	xmm9,xmm14,xmm14
873	vpclmulqdq	xmm5,xmm8,xmm7,0x10
874	vmovdqu	xmm7,XMMWORD[((128-64))+rdx]
875	vpxor	xmm9,xmm9,xmm14
876
877	vmovdqu	xmm15,XMMWORD[32+r8]
878	vpxor	xmm3,xmm3,xmm0
879	vpclmulqdq	xmm0,xmm14,xmm6,0x00
880	vpxor	xmm4,xmm4,xmm1
881	vpshufb	xmm15,xmm15,xmm13
882	vpclmulqdq	xmm1,xmm14,xmm6,0x11
883	vmovdqu	xmm6,XMMWORD[((112-64))+rdx]
884	vpxor	xmm5,xmm5,xmm2
885	vpunpckhqdq	xmm8,xmm15,xmm15
886	vpclmulqdq	xmm2,xmm9,xmm7,0x00
887	vpxor	xmm8,xmm8,xmm15
888
889	vmovdqu	xmm14,XMMWORD[16+r8]
890	vpxor	xmm0,xmm0,xmm3
891	vpclmulqdq	xmm3,xmm15,xmm6,0x00
892	vpxor	xmm1,xmm1,xmm4
893	vpshufb	xmm14,xmm14,xmm13
894	vpclmulqdq	xmm4,xmm15,xmm6,0x11
895	vmovdqu	xmm6,XMMWORD[((144-64))+rdx]
896	vpxor	xmm2,xmm2,xmm5
897	vpunpckhqdq	xmm9,xmm14,xmm14
898	vpclmulqdq	xmm5,xmm8,xmm7,0x10
899	vmovdqu	xmm7,XMMWORD[((176-64))+rdx]
900	vpxor	xmm9,xmm9,xmm14
901
902	vmovdqu	xmm15,XMMWORD[r8]
903	vpxor	xmm3,xmm3,xmm0
904	vpclmulqdq	xmm0,xmm14,xmm6,0x00
905	vpxor	xmm4,xmm4,xmm1
906	vpshufb	xmm15,xmm15,xmm13
907	vpclmulqdq	xmm1,xmm14,xmm6,0x11
908	vmovdqu	xmm6,XMMWORD[((160-64))+rdx]
909	vpxor	xmm5,xmm5,xmm2
910	vpclmulqdq	xmm2,xmm9,xmm7,0x10
911
912	lea	r8,[128+r8]
913	cmp	r9,0x80
914	jb	NEAR $L$tail_avx
915
916	vpxor	xmm15,xmm15,xmm10
917	sub	r9,0x80
918	jmp	NEAR $L$oop8x_avx
919
920ALIGN	32
921$L$oop8x_avx:
922	vpunpckhqdq	xmm8,xmm15,xmm15
923	vmovdqu	xmm14,XMMWORD[112+r8]
924	vpxor	xmm3,xmm3,xmm0
925	vpxor	xmm8,xmm8,xmm15
926	vpclmulqdq	xmm10,xmm15,xmm6,0x00
927	vpshufb	xmm14,xmm14,xmm13
928	vpxor	xmm4,xmm4,xmm1
929	vpclmulqdq	xmm11,xmm15,xmm6,0x11
930	vmovdqu	xmm6,XMMWORD[((0-64))+rdx]
931	vpunpckhqdq	xmm9,xmm14,xmm14
932	vpxor	xmm5,xmm5,xmm2
933	vpclmulqdq	xmm12,xmm8,xmm7,0x00
934	vmovdqu	xmm7,XMMWORD[((32-64))+rdx]
935	vpxor	xmm9,xmm9,xmm14
936
937	vmovdqu	xmm15,XMMWORD[96+r8]
938	vpclmulqdq	xmm0,xmm14,xmm6,0x00
939	vpxor	xmm10,xmm10,xmm3
940	vpshufb	xmm15,xmm15,xmm13
941	vpclmulqdq	xmm1,xmm14,xmm6,0x11
942	vxorps	xmm11,xmm11,xmm4
943	vmovdqu	xmm6,XMMWORD[((16-64))+rdx]
944	vpunpckhqdq	xmm8,xmm15,xmm15
945	vpclmulqdq	xmm2,xmm9,xmm7,0x00
946	vpxor	xmm12,xmm12,xmm5
947	vxorps	xmm8,xmm8,xmm15
948
949	vmovdqu	xmm14,XMMWORD[80+r8]
950	vpxor	xmm12,xmm12,xmm10
951	vpclmulqdq	xmm3,xmm15,xmm6,0x00
952	vpxor	xmm12,xmm12,xmm11
953	vpslldq	xmm9,xmm12,8
954	vpxor	xmm3,xmm3,xmm0
955	vpclmulqdq	xmm4,xmm15,xmm6,0x11
956	vpsrldq	xmm12,xmm12,8
957	vpxor	xmm10,xmm10,xmm9
958	vmovdqu	xmm6,XMMWORD[((48-64))+rdx]
959	vpshufb	xmm14,xmm14,xmm13
960	vxorps	xmm11,xmm11,xmm12
961	vpxor	xmm4,xmm4,xmm1
962	vpunpckhqdq	xmm9,xmm14,xmm14
963	vpclmulqdq	xmm5,xmm8,xmm7,0x10
964	vmovdqu	xmm7,XMMWORD[((80-64))+rdx]
965	vpxor	xmm9,xmm9,xmm14
966	vpxor	xmm5,xmm5,xmm2
967
968	vmovdqu	xmm15,XMMWORD[64+r8]
969	vpalignr	xmm12,xmm10,xmm10,8
970	vpclmulqdq	xmm0,xmm14,xmm6,0x00
971	vpshufb	xmm15,xmm15,xmm13
972	vpxor	xmm0,xmm0,xmm3
973	vpclmulqdq	xmm1,xmm14,xmm6,0x11
974	vmovdqu	xmm6,XMMWORD[((64-64))+rdx]
975	vpunpckhqdq	xmm8,xmm15,xmm15
976	vpxor	xmm1,xmm1,xmm4
977	vpclmulqdq	xmm2,xmm9,xmm7,0x00
978	vxorps	xmm8,xmm8,xmm15
979	vpxor	xmm2,xmm2,xmm5
980
981	vmovdqu	xmm14,XMMWORD[48+r8]
982	vpclmulqdq	xmm10,xmm10,XMMWORD[r10],0x10
983	vpclmulqdq	xmm3,xmm15,xmm6,0x00
984	vpshufb	xmm14,xmm14,xmm13
985	vpxor	xmm3,xmm3,xmm0
986	vpclmulqdq	xmm4,xmm15,xmm6,0x11
987	vmovdqu	xmm6,XMMWORD[((96-64))+rdx]
988	vpunpckhqdq	xmm9,xmm14,xmm14
989	vpxor	xmm4,xmm4,xmm1
990	vpclmulqdq	xmm5,xmm8,xmm7,0x10
991	vmovdqu	xmm7,XMMWORD[((128-64))+rdx]
992	vpxor	xmm9,xmm9,xmm14
993	vpxor	xmm5,xmm5,xmm2
994
995	vmovdqu	xmm15,XMMWORD[32+r8]
996	vpclmulqdq	xmm0,xmm14,xmm6,0x00
997	vpshufb	xmm15,xmm15,xmm13
998	vpxor	xmm0,xmm0,xmm3
999	vpclmulqdq	xmm1,xmm14,xmm6,0x11
1000	vmovdqu	xmm6,XMMWORD[((112-64))+rdx]
1001	vpunpckhqdq	xmm8,xmm15,xmm15
1002	vpxor	xmm1,xmm1,xmm4
1003	vpclmulqdq	xmm2,xmm9,xmm7,0x00
1004	vpxor	xmm8,xmm8,xmm15
1005	vpxor	xmm2,xmm2,xmm5
1006	vxorps	xmm10,xmm10,xmm12
1007
1008	vmovdqu	xmm14,XMMWORD[16+r8]
1009	vpalignr	xmm12,xmm10,xmm10,8
1010	vpclmulqdq	xmm3,xmm15,xmm6,0x00
1011	vpshufb	xmm14,xmm14,xmm13
1012	vpxor	xmm3,xmm3,xmm0
1013	vpclmulqdq	xmm4,xmm15,xmm6,0x11
1014	vmovdqu	xmm6,XMMWORD[((144-64))+rdx]
1015	vpclmulqdq	xmm10,xmm10,XMMWORD[r10],0x10
1016	vxorps	xmm12,xmm12,xmm11
1017	vpunpckhqdq	xmm9,xmm14,xmm14
1018	vpxor	xmm4,xmm4,xmm1
1019	vpclmulqdq	xmm5,xmm8,xmm7,0x10
1020	vmovdqu	xmm7,XMMWORD[((176-64))+rdx]
1021	vpxor	xmm9,xmm9,xmm14
1022	vpxor	xmm5,xmm5,xmm2
1023
1024	vmovdqu	xmm15,XMMWORD[r8]
1025	vpclmulqdq	xmm0,xmm14,xmm6,0x00
1026	vpshufb	xmm15,xmm15,xmm13
1027	vpclmulqdq	xmm1,xmm14,xmm6,0x11
1028	vmovdqu	xmm6,XMMWORD[((160-64))+rdx]
1029	vpxor	xmm15,xmm15,xmm12
1030	vpclmulqdq	xmm2,xmm9,xmm7,0x10
1031	vpxor	xmm15,xmm15,xmm10
1032
1033	lea	r8,[128+r8]
1034	sub	r9,0x80
1035	jnc	NEAR $L$oop8x_avx
1036
1037	add	r9,0x80
1038	jmp	NEAR $L$tail_no_xor_avx
1039
1040ALIGN	32
1041$L$short_avx:
1042	vmovdqu	xmm14,XMMWORD[((-16))+r9*1+r8]
1043	lea	r8,[r9*1+r8]
1044	vmovdqu	xmm6,XMMWORD[((0-64))+rdx]
1045	vmovdqu	xmm7,XMMWORD[((32-64))+rdx]
1046	vpshufb	xmm15,xmm14,xmm13
1047
1048	vmovdqa	xmm3,xmm0
1049	vmovdqa	xmm4,xmm1
1050	vmovdqa	xmm5,xmm2
1051	sub	r9,0x10
1052	jz	NEAR $L$tail_avx
1053
1054	vpunpckhqdq	xmm8,xmm15,xmm15
1055	vpxor	xmm3,xmm3,xmm0
1056	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1057	vpxor	xmm8,xmm8,xmm15
1058	vmovdqu	xmm14,XMMWORD[((-32))+r8]
1059	vpxor	xmm4,xmm4,xmm1
1060	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1061	vmovdqu	xmm6,XMMWORD[((16-64))+rdx]
1062	vpshufb	xmm15,xmm14,xmm13
1063	vpxor	xmm5,xmm5,xmm2
1064	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1065	vpsrldq	xmm7,xmm7,8
1066	sub	r9,0x10
1067	jz	NEAR $L$tail_avx
1068
1069	vpunpckhqdq	xmm8,xmm15,xmm15
1070	vpxor	xmm3,xmm3,xmm0
1071	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1072	vpxor	xmm8,xmm8,xmm15
1073	vmovdqu	xmm14,XMMWORD[((-48))+r8]
1074	vpxor	xmm4,xmm4,xmm1
1075	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1076	vmovdqu	xmm6,XMMWORD[((48-64))+rdx]
1077	vpshufb	xmm15,xmm14,xmm13
1078	vpxor	xmm5,xmm5,xmm2
1079	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1080	vmovdqu	xmm7,XMMWORD[((80-64))+rdx]
1081	sub	r9,0x10
1082	jz	NEAR $L$tail_avx
1083
1084	vpunpckhqdq	xmm8,xmm15,xmm15
1085	vpxor	xmm3,xmm3,xmm0
1086	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1087	vpxor	xmm8,xmm8,xmm15
1088	vmovdqu	xmm14,XMMWORD[((-64))+r8]
1089	vpxor	xmm4,xmm4,xmm1
1090	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1091	vmovdqu	xmm6,XMMWORD[((64-64))+rdx]
1092	vpshufb	xmm15,xmm14,xmm13
1093	vpxor	xmm5,xmm5,xmm2
1094	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1095	vpsrldq	xmm7,xmm7,8
1096	sub	r9,0x10
1097	jz	NEAR $L$tail_avx
1098
1099	vpunpckhqdq	xmm8,xmm15,xmm15
1100	vpxor	xmm3,xmm3,xmm0
1101	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1102	vpxor	xmm8,xmm8,xmm15
1103	vmovdqu	xmm14,XMMWORD[((-80))+r8]
1104	vpxor	xmm4,xmm4,xmm1
1105	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1106	vmovdqu	xmm6,XMMWORD[((96-64))+rdx]
1107	vpshufb	xmm15,xmm14,xmm13
1108	vpxor	xmm5,xmm5,xmm2
1109	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1110	vmovdqu	xmm7,XMMWORD[((128-64))+rdx]
1111	sub	r9,0x10
1112	jz	NEAR $L$tail_avx
1113
1114	vpunpckhqdq	xmm8,xmm15,xmm15
1115	vpxor	xmm3,xmm3,xmm0
1116	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1117	vpxor	xmm8,xmm8,xmm15
1118	vmovdqu	xmm14,XMMWORD[((-96))+r8]
1119	vpxor	xmm4,xmm4,xmm1
1120	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1121	vmovdqu	xmm6,XMMWORD[((112-64))+rdx]
1122	vpshufb	xmm15,xmm14,xmm13
1123	vpxor	xmm5,xmm5,xmm2
1124	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1125	vpsrldq	xmm7,xmm7,8
1126	sub	r9,0x10
1127	jz	NEAR $L$tail_avx
1128
1129	vpunpckhqdq	xmm8,xmm15,xmm15
1130	vpxor	xmm3,xmm3,xmm0
1131	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1132	vpxor	xmm8,xmm8,xmm15
1133	vmovdqu	xmm14,XMMWORD[((-112))+r8]
1134	vpxor	xmm4,xmm4,xmm1
1135	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1136	vmovdqu	xmm6,XMMWORD[((144-64))+rdx]
1137	vpshufb	xmm15,xmm14,xmm13
1138	vpxor	xmm5,xmm5,xmm2
1139	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1140	vmovq	xmm7,QWORD[((184-64))+rdx]
1141	sub	r9,0x10
1142	jmp	NEAR $L$tail_avx
1143
1144ALIGN	32
1145$L$tail_avx:
1146	vpxor	xmm15,xmm15,xmm10
1147$L$tail_no_xor_avx:
1148	vpunpckhqdq	xmm8,xmm15,xmm15
1149	vpxor	xmm3,xmm3,xmm0
1150	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1151	vpxor	xmm8,xmm8,xmm15
1152	vpxor	xmm4,xmm4,xmm1
1153	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1154	vpxor	xmm5,xmm5,xmm2
1155	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1156
1157	vmovdqu	xmm12,XMMWORD[r10]
1158
1159	vpxor	xmm10,xmm3,xmm0
1160	vpxor	xmm11,xmm4,xmm1
1161	vpxor	xmm5,xmm5,xmm2
1162
1163	vpxor	xmm5,xmm5,xmm10
1164	vpxor	xmm5,xmm5,xmm11
1165	vpslldq	xmm9,xmm5,8
1166	vpsrldq	xmm5,xmm5,8
1167	vpxor	xmm10,xmm10,xmm9
1168	vpxor	xmm11,xmm11,xmm5
1169
1170	vpclmulqdq	xmm9,xmm10,xmm12,0x10
1171	vpalignr	xmm10,xmm10,xmm10,8
1172	vpxor	xmm10,xmm10,xmm9
1173
1174	vpclmulqdq	xmm9,xmm10,xmm12,0x10
1175	vpalignr	xmm10,xmm10,xmm10,8
1176	vpxor	xmm10,xmm10,xmm11
1177	vpxor	xmm10,xmm10,xmm9
1178
1179	cmp	r9,0
1180	jne	NEAR $L$short_avx
1181
1182	vpshufb	xmm10,xmm10,xmm13
1183	vmovdqu	XMMWORD[rcx],xmm10
1184	vzeroupper
1185	movaps	xmm6,XMMWORD[rsp]
1186	movaps	xmm7,XMMWORD[16+rsp]
1187	movaps	xmm8,XMMWORD[32+rsp]
1188	movaps	xmm9,XMMWORD[48+rsp]
1189	movaps	xmm10,XMMWORD[64+rsp]
1190	movaps	xmm11,XMMWORD[80+rsp]
1191	movaps	xmm12,XMMWORD[96+rsp]
1192	movaps	xmm13,XMMWORD[112+rsp]
1193	movaps	xmm14,XMMWORD[128+rsp]
1194	movaps	xmm15,XMMWORD[144+rsp]
1195	lea	rsp,[168+rsp]
1196	ret
1197
1198$L$SEH_end_gcm_ghash_avx_14:
1199
1200section	.rdata rdata align=8
1201ALIGN	64
1202$L$bswap_mask:
1203	DB	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1204$L$0x1c2_polynomial:
1205	DB	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
1206$L$7_mask:
1207	DD	7,0,7,0
1208ALIGN	64
1209
1210	DB	71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52
1211	DB	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
1212	DB	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
1213	DB	114,103,62,0
1214ALIGN	64
1215section	.text
1216
1217section	.pdata rdata align=4
1218ALIGN	4
1219	DD	$L$SEH_begin_gcm_init_clmul_1 wrt ..imagebase
1220	DD	$L$SEH_end_gcm_init_clmul_5 wrt ..imagebase
1221	DD	$L$SEH_info_gcm_init_clmul_0 wrt ..imagebase
1222
1223	DD	$L$SEH_begin_gcm_ghash_clmul_1 wrt ..imagebase
1224	DD	$L$SEH_end_gcm_ghash_clmul_14 wrt ..imagebase
1225	DD	$L$SEH_info_gcm_ghash_clmul_0 wrt ..imagebase
1226
1227	DD	$L$SEH_begin_gcm_init_avx_1 wrt ..imagebase
1228	DD	$L$SEH_end_gcm_init_avx_5 wrt ..imagebase
1229	DD	$L$SEH_info_gcm_init_avx_0 wrt ..imagebase
1230
1231	DD	$L$SEH_begin_gcm_ghash_avx_1 wrt ..imagebase
1232	DD	$L$SEH_end_gcm_ghash_avx_14 wrt ..imagebase
1233	DD	$L$SEH_info_gcm_ghash_avx_0 wrt ..imagebase
1234
1235
1236section	.xdata rdata align=8
1237ALIGN	4
1238$L$SEH_info_gcm_init_clmul_0:
1239	DB	1
1240	DB	$L$SEH_endprologue_gcm_init_clmul_4-$L$SEH_begin_gcm_init_clmul_1
1241	DB	3
1242	DB	0
1243	DB	$L$SEH_prologue_gcm_init_clmul_3-$L$SEH_begin_gcm_init_clmul_1
1244	DB	104
1245	DW	0
1246	DB	$L$SEH_prologue_gcm_init_clmul_2-$L$SEH_begin_gcm_init_clmul_1
1247	DB	34
1248
1249	DW	0
1250$L$SEH_info_gcm_ghash_clmul_0:
1251	DB	1
1252	DB	$L$SEH_endprologue_gcm_ghash_clmul_13-$L$SEH_begin_gcm_ghash_clmul_1
1253	DB	22
1254	DB	0
1255	DB	$L$SEH_prologue_gcm_ghash_clmul_12-$L$SEH_begin_gcm_ghash_clmul_1
1256	DB	248
1257	DW	9
1258	DB	$L$SEH_prologue_gcm_ghash_clmul_11-$L$SEH_begin_gcm_ghash_clmul_1
1259	DB	232
1260	DW	8
1261	DB	$L$SEH_prologue_gcm_ghash_clmul_10-$L$SEH_begin_gcm_ghash_clmul_1
1262	DB	216
1263	DW	7
1264	DB	$L$SEH_prologue_gcm_ghash_clmul_9-$L$SEH_begin_gcm_ghash_clmul_1
1265	DB	200
1266	DW	6
1267	DB	$L$SEH_prologue_gcm_ghash_clmul_8-$L$SEH_begin_gcm_ghash_clmul_1
1268	DB	184
1269	DW	5
1270	DB	$L$SEH_prologue_gcm_ghash_clmul_7-$L$SEH_begin_gcm_ghash_clmul_1
1271	DB	168
1272	DW	4
1273	DB	$L$SEH_prologue_gcm_ghash_clmul_6-$L$SEH_begin_gcm_ghash_clmul_1
1274	DB	152
1275	DW	3
1276	DB	$L$SEH_prologue_gcm_ghash_clmul_5-$L$SEH_begin_gcm_ghash_clmul_1
1277	DB	136
1278	DW	2
1279	DB	$L$SEH_prologue_gcm_ghash_clmul_4-$L$SEH_begin_gcm_ghash_clmul_1
1280	DB	120
1281	DW	1
1282	DB	$L$SEH_prologue_gcm_ghash_clmul_3-$L$SEH_begin_gcm_ghash_clmul_1
1283	DB	104
1284	DW	0
1285	DB	$L$SEH_prologue_gcm_ghash_clmul_2-$L$SEH_begin_gcm_ghash_clmul_1
1286	DB	1
1287	DW	21
1288
1289$L$SEH_info_gcm_init_avx_0:
1290	DB	1
1291	DB	$L$SEH_endprologue_gcm_init_avx_4-$L$SEH_begin_gcm_init_avx_1
1292	DB	3
1293	DB	0
1294	DB	$L$SEH_prologue_gcm_init_avx_3-$L$SEH_begin_gcm_init_avx_1
1295	DB	104
1296	DW	0
1297	DB	$L$SEH_prologue_gcm_init_avx_2-$L$SEH_begin_gcm_init_avx_1
1298	DB	34
1299
1300	DW	0
1301$L$SEH_info_gcm_ghash_avx_0:
1302	DB	1
1303	DB	$L$SEH_endprologue_gcm_ghash_avx_13-$L$SEH_begin_gcm_ghash_avx_1
1304	DB	22
1305	DB	0
1306	DB	$L$SEH_prologue_gcm_ghash_avx_12-$L$SEH_begin_gcm_ghash_avx_1
1307	DB	248
1308	DW	9
1309	DB	$L$SEH_prologue_gcm_ghash_avx_11-$L$SEH_begin_gcm_ghash_avx_1
1310	DB	232
1311	DW	8
1312	DB	$L$SEH_prologue_gcm_ghash_avx_10-$L$SEH_begin_gcm_ghash_avx_1
1313	DB	216
1314	DW	7
1315	DB	$L$SEH_prologue_gcm_ghash_avx_9-$L$SEH_begin_gcm_ghash_avx_1
1316	DB	200
1317	DW	6
1318	DB	$L$SEH_prologue_gcm_ghash_avx_8-$L$SEH_begin_gcm_ghash_avx_1
1319	DB	184
1320	DW	5
1321	DB	$L$SEH_prologue_gcm_ghash_avx_7-$L$SEH_begin_gcm_ghash_avx_1
1322	DB	168
1323	DW	4
1324	DB	$L$SEH_prologue_gcm_ghash_avx_6-$L$SEH_begin_gcm_ghash_avx_1
1325	DB	152
1326	DW	3
1327	DB	$L$SEH_prologue_gcm_ghash_avx_5-$L$SEH_begin_gcm_ghash_avx_1
1328	DB	136
1329	DW	2
1330	DB	$L$SEH_prologue_gcm_ghash_avx_4-$L$SEH_begin_gcm_ghash_avx_1
1331	DB	120
1332	DW	1
1333	DB	$L$SEH_prologue_gcm_ghash_avx_3-$L$SEH_begin_gcm_ghash_avx_1
1334	DB	104
1335	DW	0
1336	DB	$L$SEH_prologue_gcm_ghash_avx_2-$L$SEH_begin_gcm_ghash_avx_1
1337	DB	1
1338	DW	21
1339%else
1340; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738
1341ret
1342%endif
1343