1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#include <ring-core/asm_base.h>
5
6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__)
7.text
8.extern	OPENSSL_ia32cap_P
9.hidden OPENSSL_ia32cap_P
10.globl	gcm_init_clmul
11.hidden gcm_init_clmul
12.type	gcm_init_clmul,@function
13.align	16
14gcm_init_clmul:
15.cfi_startproc
16
17_CET_ENDBR
18.L_init_clmul:
19	movdqu	(%rsi),%xmm2
20	pshufd	$78,%xmm2,%xmm2
21
22
23	pshufd	$255,%xmm2,%xmm4
24	movdqa	%xmm2,%xmm3
25	psllq	$1,%xmm2
26	pxor	%xmm5,%xmm5
27	psrlq	$63,%xmm3
28	pcmpgtd	%xmm4,%xmm5
29	pslldq	$8,%xmm3
30	por	%xmm3,%xmm2
31
32
33	pand	.L0x1c2_polynomial(%rip),%xmm5
34	pxor	%xmm5,%xmm2
35
36
37	pshufd	$78,%xmm2,%xmm6
38	movdqa	%xmm2,%xmm0
39	pxor	%xmm2,%xmm6
40	movdqa	%xmm0,%xmm1
41	pshufd	$78,%xmm0,%xmm3
42	pxor	%xmm0,%xmm3
43.byte	102,15,58,68,194,0
44.byte	102,15,58,68,202,17
45.byte	102,15,58,68,222,0
46	pxor	%xmm0,%xmm3
47	pxor	%xmm1,%xmm3
48
49	movdqa	%xmm3,%xmm4
50	psrldq	$8,%xmm3
51	pslldq	$8,%xmm4
52	pxor	%xmm3,%xmm1
53	pxor	%xmm4,%xmm0
54
55	movdqa	%xmm0,%xmm4
56	movdqa	%xmm0,%xmm3
57	psllq	$5,%xmm0
58	pxor	%xmm0,%xmm3
59	psllq	$1,%xmm0
60	pxor	%xmm3,%xmm0
61	psllq	$57,%xmm0
62	movdqa	%xmm0,%xmm3
63	pslldq	$8,%xmm0
64	psrldq	$8,%xmm3
65	pxor	%xmm4,%xmm0
66	pxor	%xmm3,%xmm1
67
68
69	movdqa	%xmm0,%xmm4
70	psrlq	$1,%xmm0
71	pxor	%xmm4,%xmm1
72	pxor	%xmm0,%xmm4
73	psrlq	$5,%xmm0
74	pxor	%xmm4,%xmm0
75	psrlq	$1,%xmm0
76	pxor	%xmm1,%xmm0
77	pshufd	$78,%xmm2,%xmm3
78	pshufd	$78,%xmm0,%xmm4
79	pxor	%xmm2,%xmm3
80	movdqu	%xmm2,0(%rdi)
81	pxor	%xmm0,%xmm4
82	movdqu	%xmm0,16(%rdi)
83.byte	102,15,58,15,227,8
84	movdqu	%xmm4,32(%rdi)
85	movdqa	%xmm0,%xmm1
86	pshufd	$78,%xmm0,%xmm3
87	pxor	%xmm0,%xmm3
88.byte	102,15,58,68,194,0
89.byte	102,15,58,68,202,17
90.byte	102,15,58,68,222,0
91	pxor	%xmm0,%xmm3
92	pxor	%xmm1,%xmm3
93
94	movdqa	%xmm3,%xmm4
95	psrldq	$8,%xmm3
96	pslldq	$8,%xmm4
97	pxor	%xmm3,%xmm1
98	pxor	%xmm4,%xmm0
99
100	movdqa	%xmm0,%xmm4
101	movdqa	%xmm0,%xmm3
102	psllq	$5,%xmm0
103	pxor	%xmm0,%xmm3
104	psllq	$1,%xmm0
105	pxor	%xmm3,%xmm0
106	psllq	$57,%xmm0
107	movdqa	%xmm0,%xmm3
108	pslldq	$8,%xmm0
109	psrldq	$8,%xmm3
110	pxor	%xmm4,%xmm0
111	pxor	%xmm3,%xmm1
112
113
114	movdqa	%xmm0,%xmm4
115	psrlq	$1,%xmm0
116	pxor	%xmm4,%xmm1
117	pxor	%xmm0,%xmm4
118	psrlq	$5,%xmm0
119	pxor	%xmm4,%xmm0
120	psrlq	$1,%xmm0
121	pxor	%xmm1,%xmm0
122	movdqa	%xmm0,%xmm5
123	movdqa	%xmm0,%xmm1
124	pshufd	$78,%xmm0,%xmm3
125	pxor	%xmm0,%xmm3
126.byte	102,15,58,68,194,0
127.byte	102,15,58,68,202,17
128.byte	102,15,58,68,222,0
129	pxor	%xmm0,%xmm3
130	pxor	%xmm1,%xmm3
131
132	movdqa	%xmm3,%xmm4
133	psrldq	$8,%xmm3
134	pslldq	$8,%xmm4
135	pxor	%xmm3,%xmm1
136	pxor	%xmm4,%xmm0
137
138	movdqa	%xmm0,%xmm4
139	movdqa	%xmm0,%xmm3
140	psllq	$5,%xmm0
141	pxor	%xmm0,%xmm3
142	psllq	$1,%xmm0
143	pxor	%xmm3,%xmm0
144	psllq	$57,%xmm0
145	movdqa	%xmm0,%xmm3
146	pslldq	$8,%xmm0
147	psrldq	$8,%xmm3
148	pxor	%xmm4,%xmm0
149	pxor	%xmm3,%xmm1
150
151
152	movdqa	%xmm0,%xmm4
153	psrlq	$1,%xmm0
154	pxor	%xmm4,%xmm1
155	pxor	%xmm0,%xmm4
156	psrlq	$5,%xmm0
157	pxor	%xmm4,%xmm0
158	psrlq	$1,%xmm0
159	pxor	%xmm1,%xmm0
160	pshufd	$78,%xmm5,%xmm3
161	pshufd	$78,%xmm0,%xmm4
162	pxor	%xmm5,%xmm3
163	movdqu	%xmm5,48(%rdi)
164	pxor	%xmm0,%xmm4
165	movdqu	%xmm0,64(%rdi)
166.byte	102,15,58,15,227,8
167	movdqu	%xmm4,80(%rdi)
168	ret
169.cfi_endproc
170
171.size	gcm_init_clmul,.-gcm_init_clmul
172.globl	gcm_gmult_clmul
173.hidden gcm_gmult_clmul
174.type	gcm_gmult_clmul,@function
175.align	16
176gcm_gmult_clmul:
177.cfi_startproc
178_CET_ENDBR
179.L_gmult_clmul:
180	movdqu	(%rdi),%xmm0
181	movdqa	.Lbswap_mask(%rip),%xmm5
182	movdqu	(%rsi),%xmm2
183	movdqu	32(%rsi),%xmm4
184.byte	102,15,56,0,197
185	movdqa	%xmm0,%xmm1
186	pshufd	$78,%xmm0,%xmm3
187	pxor	%xmm0,%xmm3
188.byte	102,15,58,68,194,0
189.byte	102,15,58,68,202,17
190.byte	102,15,58,68,220,0
191	pxor	%xmm0,%xmm3
192	pxor	%xmm1,%xmm3
193
194	movdqa	%xmm3,%xmm4
195	psrldq	$8,%xmm3
196	pslldq	$8,%xmm4
197	pxor	%xmm3,%xmm1
198	pxor	%xmm4,%xmm0
199
200	movdqa	%xmm0,%xmm4
201	movdqa	%xmm0,%xmm3
202	psllq	$5,%xmm0
203	pxor	%xmm0,%xmm3
204	psllq	$1,%xmm0
205	pxor	%xmm3,%xmm0
206	psllq	$57,%xmm0
207	movdqa	%xmm0,%xmm3
208	pslldq	$8,%xmm0
209	psrldq	$8,%xmm3
210	pxor	%xmm4,%xmm0
211	pxor	%xmm3,%xmm1
212
213
214	movdqa	%xmm0,%xmm4
215	psrlq	$1,%xmm0
216	pxor	%xmm4,%xmm1
217	pxor	%xmm0,%xmm4
218	psrlq	$5,%xmm0
219	pxor	%xmm4,%xmm0
220	psrlq	$1,%xmm0
221	pxor	%xmm1,%xmm0
222.byte	102,15,56,0,197
223	movdqu	%xmm0,(%rdi)
224	ret
225.cfi_endproc
226.size	gcm_gmult_clmul,.-gcm_gmult_clmul
227.globl	gcm_ghash_clmul
228.hidden gcm_ghash_clmul
229.type	gcm_ghash_clmul,@function
230.align	32
231gcm_ghash_clmul:
232.cfi_startproc
233
234_CET_ENDBR
235.L_ghash_clmul:
236	movdqa	.Lbswap_mask(%rip),%xmm10
237
238	movdqu	(%rdi),%xmm0
239	movdqu	(%rsi),%xmm2
240	movdqu	32(%rsi),%xmm7
241.byte	102,65,15,56,0,194
242
243	subq	$0x10,%rcx
244	jz	.Lodd_tail
245
246	movdqu	16(%rsi),%xmm6
247	leaq	OPENSSL_ia32cap_P(%rip),%rax
248	movl	4(%rax),%eax
249	cmpq	$0x30,%rcx
250	jb	.Lskip4x
251
252	andl	$71303168,%eax
253	cmpl	$4194304,%eax
254	je	.Lskip4x
255
256	subq	$0x30,%rcx
257	movq	$0xA040608020C0E000,%rax
258	movdqu	48(%rsi),%xmm14
259	movdqu	64(%rsi),%xmm15
260
261
262
263
264	movdqu	48(%rdx),%xmm3
265	movdqu	32(%rdx),%xmm11
266.byte	102,65,15,56,0,218
267.byte	102,69,15,56,0,218
268	movdqa	%xmm3,%xmm5
269	pshufd	$78,%xmm3,%xmm4
270	pxor	%xmm3,%xmm4
271.byte	102,15,58,68,218,0
272.byte	102,15,58,68,234,17
273.byte	102,15,58,68,231,0
274
275	movdqa	%xmm11,%xmm13
276	pshufd	$78,%xmm11,%xmm12
277	pxor	%xmm11,%xmm12
278.byte	102,68,15,58,68,222,0
279.byte	102,68,15,58,68,238,17
280.byte	102,68,15,58,68,231,16
281	xorps	%xmm11,%xmm3
282	xorps	%xmm13,%xmm5
283	movups	80(%rsi),%xmm7
284	xorps	%xmm12,%xmm4
285
286	movdqu	16(%rdx),%xmm11
287	movdqu	0(%rdx),%xmm8
288.byte	102,69,15,56,0,218
289.byte	102,69,15,56,0,194
290	movdqa	%xmm11,%xmm13
291	pshufd	$78,%xmm11,%xmm12
292	pxor	%xmm8,%xmm0
293	pxor	%xmm11,%xmm12
294.byte	102,69,15,58,68,222,0
295	movdqa	%xmm0,%xmm1
296	pshufd	$78,%xmm0,%xmm8
297	pxor	%xmm0,%xmm8
298.byte	102,69,15,58,68,238,17
299.byte	102,68,15,58,68,231,0
300	xorps	%xmm11,%xmm3
301	xorps	%xmm13,%xmm5
302
303	leaq	64(%rdx),%rdx
304	subq	$0x40,%rcx
305	jc	.Ltail4x
306
307	jmp	.Lmod4_loop
308.align	32
309.Lmod4_loop:
310.byte	102,65,15,58,68,199,0
311	xorps	%xmm12,%xmm4
312	movdqu	48(%rdx),%xmm11
313.byte	102,69,15,56,0,218
314.byte	102,65,15,58,68,207,17
315	xorps	%xmm3,%xmm0
316	movdqu	32(%rdx),%xmm3
317	movdqa	%xmm11,%xmm13
318.byte	102,68,15,58,68,199,16
319	pshufd	$78,%xmm11,%xmm12
320	xorps	%xmm5,%xmm1
321	pxor	%xmm11,%xmm12
322.byte	102,65,15,56,0,218
323	movups	32(%rsi),%xmm7
324	xorps	%xmm4,%xmm8
325.byte	102,68,15,58,68,218,0
326	pshufd	$78,%xmm3,%xmm4
327
328	pxor	%xmm0,%xmm8
329	movdqa	%xmm3,%xmm5
330	pxor	%xmm1,%xmm8
331	pxor	%xmm3,%xmm4
332	movdqa	%xmm8,%xmm9
333.byte	102,68,15,58,68,234,17
334	pslldq	$8,%xmm8
335	psrldq	$8,%xmm9
336	pxor	%xmm8,%xmm0
337	movdqa	.L7_mask(%rip),%xmm8
338	pxor	%xmm9,%xmm1
339.byte	102,76,15,110,200
340
341	pand	%xmm0,%xmm8
342.byte	102,69,15,56,0,200
343	pxor	%xmm0,%xmm9
344.byte	102,68,15,58,68,231,0
345	psllq	$57,%xmm9
346	movdqa	%xmm9,%xmm8
347	pslldq	$8,%xmm9
348.byte	102,15,58,68,222,0
349	psrldq	$8,%xmm8
350	pxor	%xmm9,%xmm0
351	pxor	%xmm8,%xmm1
352	movdqu	0(%rdx),%xmm8
353
354	movdqa	%xmm0,%xmm9
355	psrlq	$1,%xmm0
356.byte	102,15,58,68,238,17
357	xorps	%xmm11,%xmm3
358	movdqu	16(%rdx),%xmm11
359.byte	102,69,15,56,0,218
360.byte	102,15,58,68,231,16
361	xorps	%xmm13,%xmm5
362	movups	80(%rsi),%xmm7
363.byte	102,69,15,56,0,194
364	pxor	%xmm9,%xmm1
365	pxor	%xmm0,%xmm9
366	psrlq	$5,%xmm0
367
368	movdqa	%xmm11,%xmm13
369	pxor	%xmm12,%xmm4
370	pshufd	$78,%xmm11,%xmm12
371	pxor	%xmm9,%xmm0
372	pxor	%xmm8,%xmm1
373	pxor	%xmm11,%xmm12
374.byte	102,69,15,58,68,222,0
375	psrlq	$1,%xmm0
376	pxor	%xmm1,%xmm0
377	movdqa	%xmm0,%xmm1
378.byte	102,69,15,58,68,238,17
379	xorps	%xmm11,%xmm3
380	pshufd	$78,%xmm0,%xmm8
381	pxor	%xmm0,%xmm8
382
383.byte	102,68,15,58,68,231,0
384	xorps	%xmm13,%xmm5
385
386	leaq	64(%rdx),%rdx
387	subq	$0x40,%rcx
388	jnc	.Lmod4_loop
389
390.Ltail4x:
391.byte	102,65,15,58,68,199,0
392.byte	102,65,15,58,68,207,17
393.byte	102,68,15,58,68,199,16
394	xorps	%xmm12,%xmm4
395	xorps	%xmm3,%xmm0
396	xorps	%xmm5,%xmm1
397	pxor	%xmm0,%xmm1
398	pxor	%xmm4,%xmm8
399
400	pxor	%xmm1,%xmm8
401	pxor	%xmm0,%xmm1
402
403	movdqa	%xmm8,%xmm9
404	psrldq	$8,%xmm8
405	pslldq	$8,%xmm9
406	pxor	%xmm8,%xmm1
407	pxor	%xmm9,%xmm0
408
409	movdqa	%xmm0,%xmm4
410	movdqa	%xmm0,%xmm3
411	psllq	$5,%xmm0
412	pxor	%xmm0,%xmm3
413	psllq	$1,%xmm0
414	pxor	%xmm3,%xmm0
415	psllq	$57,%xmm0
416	movdqa	%xmm0,%xmm3
417	pslldq	$8,%xmm0
418	psrldq	$8,%xmm3
419	pxor	%xmm4,%xmm0
420	pxor	%xmm3,%xmm1
421
422
423	movdqa	%xmm0,%xmm4
424	psrlq	$1,%xmm0
425	pxor	%xmm4,%xmm1
426	pxor	%xmm0,%xmm4
427	psrlq	$5,%xmm0
428	pxor	%xmm4,%xmm0
429	psrlq	$1,%xmm0
430	pxor	%xmm1,%xmm0
431	addq	$0x40,%rcx
432	jz	.Ldone
433	movdqu	32(%rsi),%xmm7
434	subq	$0x10,%rcx
435	jz	.Lodd_tail
436.Lskip4x:
437
438
439
440
441
442	movdqu	(%rdx),%xmm8
443	movdqu	16(%rdx),%xmm3
444.byte	102,69,15,56,0,194
445.byte	102,65,15,56,0,218
446	pxor	%xmm8,%xmm0
447
448	movdqa	%xmm3,%xmm5
449	pshufd	$78,%xmm3,%xmm4
450	pxor	%xmm3,%xmm4
451.byte	102,15,58,68,218,0
452.byte	102,15,58,68,234,17
453.byte	102,15,58,68,231,0
454
455	leaq	32(%rdx),%rdx
456	nop
457	subq	$0x20,%rcx
458	jbe	.Leven_tail
459	nop
460	jmp	.Lmod_loop
461
462.align	32
463.Lmod_loop:
464	movdqa	%xmm0,%xmm1
465	movdqa	%xmm4,%xmm8
466	pshufd	$78,%xmm0,%xmm4
467	pxor	%xmm0,%xmm4
468
469.byte	102,15,58,68,198,0
470.byte	102,15,58,68,206,17
471.byte	102,15,58,68,231,16
472
473	pxor	%xmm3,%xmm0
474	pxor	%xmm5,%xmm1
475	movdqu	(%rdx),%xmm9
476	pxor	%xmm0,%xmm8
477.byte	102,69,15,56,0,202
478	movdqu	16(%rdx),%xmm3
479
480	pxor	%xmm1,%xmm8
481	pxor	%xmm9,%xmm1
482	pxor	%xmm8,%xmm4
483.byte	102,65,15,56,0,218
484	movdqa	%xmm4,%xmm8
485	psrldq	$8,%xmm8
486	pslldq	$8,%xmm4
487	pxor	%xmm8,%xmm1
488	pxor	%xmm4,%xmm0
489
490	movdqa	%xmm3,%xmm5
491
492	movdqa	%xmm0,%xmm9
493	movdqa	%xmm0,%xmm8
494	psllq	$5,%xmm0
495	pxor	%xmm0,%xmm8
496.byte	102,15,58,68,218,0
497	psllq	$1,%xmm0
498	pxor	%xmm8,%xmm0
499	psllq	$57,%xmm0
500	movdqa	%xmm0,%xmm8
501	pslldq	$8,%xmm0
502	psrldq	$8,%xmm8
503	pxor	%xmm9,%xmm0
504	pshufd	$78,%xmm5,%xmm4
505	pxor	%xmm8,%xmm1
506	pxor	%xmm5,%xmm4
507
508	movdqa	%xmm0,%xmm9
509	psrlq	$1,%xmm0
510.byte	102,15,58,68,234,17
511	pxor	%xmm9,%xmm1
512	pxor	%xmm0,%xmm9
513	psrlq	$5,%xmm0
514	pxor	%xmm9,%xmm0
515	leaq	32(%rdx),%rdx
516	psrlq	$1,%xmm0
517.byte	102,15,58,68,231,0
518	pxor	%xmm1,%xmm0
519
520	subq	$0x20,%rcx
521	ja	.Lmod_loop
522
523.Leven_tail:
524	movdqa	%xmm0,%xmm1
525	movdqa	%xmm4,%xmm8
526	pshufd	$78,%xmm0,%xmm4
527	pxor	%xmm0,%xmm4
528
529.byte	102,15,58,68,198,0
530.byte	102,15,58,68,206,17
531.byte	102,15,58,68,231,16
532
533	pxor	%xmm3,%xmm0
534	pxor	%xmm5,%xmm1
535	pxor	%xmm0,%xmm8
536	pxor	%xmm1,%xmm8
537	pxor	%xmm8,%xmm4
538	movdqa	%xmm4,%xmm8
539	psrldq	$8,%xmm8
540	pslldq	$8,%xmm4
541	pxor	%xmm8,%xmm1
542	pxor	%xmm4,%xmm0
543
544	movdqa	%xmm0,%xmm4
545	movdqa	%xmm0,%xmm3
546	psllq	$5,%xmm0
547	pxor	%xmm0,%xmm3
548	psllq	$1,%xmm0
549	pxor	%xmm3,%xmm0
550	psllq	$57,%xmm0
551	movdqa	%xmm0,%xmm3
552	pslldq	$8,%xmm0
553	psrldq	$8,%xmm3
554	pxor	%xmm4,%xmm0
555	pxor	%xmm3,%xmm1
556
557
558	movdqa	%xmm0,%xmm4
559	psrlq	$1,%xmm0
560	pxor	%xmm4,%xmm1
561	pxor	%xmm0,%xmm4
562	psrlq	$5,%xmm0
563	pxor	%xmm4,%xmm0
564	psrlq	$1,%xmm0
565	pxor	%xmm1,%xmm0
566	testq	%rcx,%rcx
567	jnz	.Ldone
568
569.Lodd_tail:
570	movdqu	(%rdx),%xmm8
571.byte	102,69,15,56,0,194
572	pxor	%xmm8,%xmm0
573	movdqa	%xmm0,%xmm1
574	pshufd	$78,%xmm0,%xmm3
575	pxor	%xmm0,%xmm3
576.byte	102,15,58,68,194,0
577.byte	102,15,58,68,202,17
578.byte	102,15,58,68,223,0
579	pxor	%xmm0,%xmm3
580	pxor	%xmm1,%xmm3
581
582	movdqa	%xmm3,%xmm4
583	psrldq	$8,%xmm3
584	pslldq	$8,%xmm4
585	pxor	%xmm3,%xmm1
586	pxor	%xmm4,%xmm0
587
588	movdqa	%xmm0,%xmm4
589	movdqa	%xmm0,%xmm3
590	psllq	$5,%xmm0
591	pxor	%xmm0,%xmm3
592	psllq	$1,%xmm0
593	pxor	%xmm3,%xmm0
594	psllq	$57,%xmm0
595	movdqa	%xmm0,%xmm3
596	pslldq	$8,%xmm0
597	psrldq	$8,%xmm3
598	pxor	%xmm4,%xmm0
599	pxor	%xmm3,%xmm1
600
601
602	movdqa	%xmm0,%xmm4
603	psrlq	$1,%xmm0
604	pxor	%xmm4,%xmm1
605	pxor	%xmm0,%xmm4
606	psrlq	$5,%xmm0
607	pxor	%xmm4,%xmm0
608	psrlq	$1,%xmm0
609	pxor	%xmm1,%xmm0
610.Ldone:
611.byte	102,65,15,56,0,194
612	movdqu	%xmm0,(%rdi)
613	ret
614.cfi_endproc
615
616.size	gcm_ghash_clmul,.-gcm_ghash_clmul
617.globl	gcm_init_avx
618.hidden gcm_init_avx
619.type	gcm_init_avx,@function
620.align	32
621gcm_init_avx:
622.cfi_startproc
623_CET_ENDBR
624	vzeroupper
625
626	vmovdqu	(%rsi),%xmm2
627	vpshufd	$78,%xmm2,%xmm2
628
629
630	vpshufd	$255,%xmm2,%xmm4
631	vpsrlq	$63,%xmm2,%xmm3
632	vpsllq	$1,%xmm2,%xmm2
633	vpxor	%xmm5,%xmm5,%xmm5
634	vpcmpgtd	%xmm4,%xmm5,%xmm5
635	vpslldq	$8,%xmm3,%xmm3
636	vpor	%xmm3,%xmm2,%xmm2
637
638
639	vpand	.L0x1c2_polynomial(%rip),%xmm5,%xmm5
640	vpxor	%xmm5,%xmm2,%xmm2
641
642	vpunpckhqdq	%xmm2,%xmm2,%xmm6
643	vmovdqa	%xmm2,%xmm0
644	vpxor	%xmm2,%xmm6,%xmm6
645	movq	$4,%r10
646	jmp	.Linit_start_avx
647.align	32
648.Linit_loop_avx:
649	vpalignr	$8,%xmm3,%xmm4,%xmm5
650	vmovdqu	%xmm5,-16(%rdi)
651	vpunpckhqdq	%xmm0,%xmm0,%xmm3
652	vpxor	%xmm0,%xmm3,%xmm3
653	vpclmulqdq	$0x11,%xmm2,%xmm0,%xmm1
654	vpclmulqdq	$0x00,%xmm2,%xmm0,%xmm0
655	vpclmulqdq	$0x00,%xmm6,%xmm3,%xmm3
656	vpxor	%xmm0,%xmm1,%xmm4
657	vpxor	%xmm4,%xmm3,%xmm3
658
659	vpslldq	$8,%xmm3,%xmm4
660	vpsrldq	$8,%xmm3,%xmm3
661	vpxor	%xmm4,%xmm0,%xmm0
662	vpxor	%xmm3,%xmm1,%xmm1
663	vpsllq	$57,%xmm0,%xmm3
664	vpsllq	$62,%xmm0,%xmm4
665	vpxor	%xmm3,%xmm4,%xmm4
666	vpsllq	$63,%xmm0,%xmm3
667	vpxor	%xmm3,%xmm4,%xmm4
668	vpslldq	$8,%xmm4,%xmm3
669	vpsrldq	$8,%xmm4,%xmm4
670	vpxor	%xmm3,%xmm0,%xmm0
671	vpxor	%xmm4,%xmm1,%xmm1
672
673	vpsrlq	$1,%xmm0,%xmm4
674	vpxor	%xmm0,%xmm1,%xmm1
675	vpxor	%xmm4,%xmm0,%xmm0
676	vpsrlq	$5,%xmm4,%xmm4
677	vpxor	%xmm4,%xmm0,%xmm0
678	vpsrlq	$1,%xmm0,%xmm0
679	vpxor	%xmm1,%xmm0,%xmm0
680.Linit_start_avx:
681	vmovdqa	%xmm0,%xmm5
682	vpunpckhqdq	%xmm0,%xmm0,%xmm3
683	vpxor	%xmm0,%xmm3,%xmm3
684	vpclmulqdq	$0x11,%xmm2,%xmm0,%xmm1
685	vpclmulqdq	$0x00,%xmm2,%xmm0,%xmm0
686	vpclmulqdq	$0x00,%xmm6,%xmm3,%xmm3
687	vpxor	%xmm0,%xmm1,%xmm4
688	vpxor	%xmm4,%xmm3,%xmm3
689
690	vpslldq	$8,%xmm3,%xmm4
691	vpsrldq	$8,%xmm3,%xmm3
692	vpxor	%xmm4,%xmm0,%xmm0
693	vpxor	%xmm3,%xmm1,%xmm1
694	vpsllq	$57,%xmm0,%xmm3
695	vpsllq	$62,%xmm0,%xmm4
696	vpxor	%xmm3,%xmm4,%xmm4
697	vpsllq	$63,%xmm0,%xmm3
698	vpxor	%xmm3,%xmm4,%xmm4
699	vpslldq	$8,%xmm4,%xmm3
700	vpsrldq	$8,%xmm4,%xmm4
701	vpxor	%xmm3,%xmm0,%xmm0
702	vpxor	%xmm4,%xmm1,%xmm1
703
704	vpsrlq	$1,%xmm0,%xmm4
705	vpxor	%xmm0,%xmm1,%xmm1
706	vpxor	%xmm4,%xmm0,%xmm0
707	vpsrlq	$5,%xmm4,%xmm4
708	vpxor	%xmm4,%xmm0,%xmm0
709	vpsrlq	$1,%xmm0,%xmm0
710	vpxor	%xmm1,%xmm0,%xmm0
711	vpshufd	$78,%xmm5,%xmm3
712	vpshufd	$78,%xmm0,%xmm4
713	vpxor	%xmm5,%xmm3,%xmm3
714	vmovdqu	%xmm5,0(%rdi)
715	vpxor	%xmm0,%xmm4,%xmm4
716	vmovdqu	%xmm0,16(%rdi)
717	leaq	48(%rdi),%rdi
718	subq	$1,%r10
719	jnz	.Linit_loop_avx
720
721	vpalignr	$8,%xmm4,%xmm3,%xmm5
722	vmovdqu	%xmm5,-16(%rdi)
723
724	vzeroupper
725	ret
726
727.cfi_endproc
728.size	gcm_init_avx,.-gcm_init_avx
729.globl	gcm_ghash_avx
730.hidden gcm_ghash_avx
731.type	gcm_ghash_avx,@function
732.align	32
733gcm_ghash_avx:
734.cfi_startproc
735_CET_ENDBR
736	vzeroupper
737
738	vmovdqu	(%rdi),%xmm10
739	leaq	.L0x1c2_polynomial(%rip),%r10
740	leaq	64(%rsi),%rsi
741	vmovdqu	.Lbswap_mask(%rip),%xmm13
742	vpshufb	%xmm13,%xmm10,%xmm10
743	cmpq	$0x80,%rcx
744	jb	.Lshort_avx
745	subq	$0x80,%rcx
746
747	vmovdqu	112(%rdx),%xmm14
748	vmovdqu	0-64(%rsi),%xmm6
749	vpshufb	%xmm13,%xmm14,%xmm14
750	vmovdqu	32-64(%rsi),%xmm7
751
752	vpunpckhqdq	%xmm14,%xmm14,%xmm9
753	vmovdqu	96(%rdx),%xmm15
754	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
755	vpxor	%xmm14,%xmm9,%xmm9
756	vpshufb	%xmm13,%xmm15,%xmm15
757	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
758	vmovdqu	16-64(%rsi),%xmm6
759	vpunpckhqdq	%xmm15,%xmm15,%xmm8
760	vmovdqu	80(%rdx),%xmm14
761	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
762	vpxor	%xmm15,%xmm8,%xmm8
763
764	vpshufb	%xmm13,%xmm14,%xmm14
765	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
766	vpunpckhqdq	%xmm14,%xmm14,%xmm9
767	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
768	vmovdqu	48-64(%rsi),%xmm6
769	vpxor	%xmm14,%xmm9,%xmm9
770	vmovdqu	64(%rdx),%xmm15
771	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
772	vmovdqu	80-64(%rsi),%xmm7
773
774	vpshufb	%xmm13,%xmm15,%xmm15
775	vpxor	%xmm0,%xmm3,%xmm3
776	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
777	vpxor	%xmm1,%xmm4,%xmm4
778	vpunpckhqdq	%xmm15,%xmm15,%xmm8
779	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
780	vmovdqu	64-64(%rsi),%xmm6
781	vpxor	%xmm2,%xmm5,%xmm5
782	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
783	vpxor	%xmm15,%xmm8,%xmm8
784
785	vmovdqu	48(%rdx),%xmm14
786	vpxor	%xmm3,%xmm0,%xmm0
787	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
788	vpxor	%xmm4,%xmm1,%xmm1
789	vpshufb	%xmm13,%xmm14,%xmm14
790	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
791	vmovdqu	96-64(%rsi),%xmm6
792	vpxor	%xmm5,%xmm2,%xmm2
793	vpunpckhqdq	%xmm14,%xmm14,%xmm9
794	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
795	vmovdqu	128-64(%rsi),%xmm7
796	vpxor	%xmm14,%xmm9,%xmm9
797
798	vmovdqu	32(%rdx),%xmm15
799	vpxor	%xmm0,%xmm3,%xmm3
800	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
801	vpxor	%xmm1,%xmm4,%xmm4
802	vpshufb	%xmm13,%xmm15,%xmm15
803	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
804	vmovdqu	112-64(%rsi),%xmm6
805	vpxor	%xmm2,%xmm5,%xmm5
806	vpunpckhqdq	%xmm15,%xmm15,%xmm8
807	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
808	vpxor	%xmm15,%xmm8,%xmm8
809
810	vmovdqu	16(%rdx),%xmm14
811	vpxor	%xmm3,%xmm0,%xmm0
812	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
813	vpxor	%xmm4,%xmm1,%xmm1
814	vpshufb	%xmm13,%xmm14,%xmm14
815	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
816	vmovdqu	144-64(%rsi),%xmm6
817	vpxor	%xmm5,%xmm2,%xmm2
818	vpunpckhqdq	%xmm14,%xmm14,%xmm9
819	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
820	vmovdqu	176-64(%rsi),%xmm7
821	vpxor	%xmm14,%xmm9,%xmm9
822
823	vmovdqu	(%rdx),%xmm15
824	vpxor	%xmm0,%xmm3,%xmm3
825	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
826	vpxor	%xmm1,%xmm4,%xmm4
827	vpshufb	%xmm13,%xmm15,%xmm15
828	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
829	vmovdqu	160-64(%rsi),%xmm6
830	vpxor	%xmm2,%xmm5,%xmm5
831	vpclmulqdq	$0x10,%xmm7,%xmm9,%xmm2
832
833	leaq	128(%rdx),%rdx
834	cmpq	$0x80,%rcx
835	jb	.Ltail_avx
836
837	vpxor	%xmm10,%xmm15,%xmm15
838	subq	$0x80,%rcx
839	jmp	.Loop8x_avx
840
841.align	32
842.Loop8x_avx:
843	vpunpckhqdq	%xmm15,%xmm15,%xmm8
844	vmovdqu	112(%rdx),%xmm14
845	vpxor	%xmm0,%xmm3,%xmm3
846	vpxor	%xmm15,%xmm8,%xmm8
847	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm10
848	vpshufb	%xmm13,%xmm14,%xmm14
849	vpxor	%xmm1,%xmm4,%xmm4
850	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm11
851	vmovdqu	0-64(%rsi),%xmm6
852	vpunpckhqdq	%xmm14,%xmm14,%xmm9
853	vpxor	%xmm2,%xmm5,%xmm5
854	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm12
855	vmovdqu	32-64(%rsi),%xmm7
856	vpxor	%xmm14,%xmm9,%xmm9
857
858	vmovdqu	96(%rdx),%xmm15
859	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
860	vpxor	%xmm3,%xmm10,%xmm10
861	vpshufb	%xmm13,%xmm15,%xmm15
862	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
863	vxorps	%xmm4,%xmm11,%xmm11
864	vmovdqu	16-64(%rsi),%xmm6
865	vpunpckhqdq	%xmm15,%xmm15,%xmm8
866	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
867	vpxor	%xmm5,%xmm12,%xmm12
868	vxorps	%xmm15,%xmm8,%xmm8
869
870	vmovdqu	80(%rdx),%xmm14
871	vpxor	%xmm10,%xmm12,%xmm12
872	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
873	vpxor	%xmm11,%xmm12,%xmm12
874	vpslldq	$8,%xmm12,%xmm9
875	vpxor	%xmm0,%xmm3,%xmm3
876	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
877	vpsrldq	$8,%xmm12,%xmm12
878	vpxor	%xmm9,%xmm10,%xmm10
879	vmovdqu	48-64(%rsi),%xmm6
880	vpshufb	%xmm13,%xmm14,%xmm14
881	vxorps	%xmm12,%xmm11,%xmm11
882	vpxor	%xmm1,%xmm4,%xmm4
883	vpunpckhqdq	%xmm14,%xmm14,%xmm9
884	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
885	vmovdqu	80-64(%rsi),%xmm7
886	vpxor	%xmm14,%xmm9,%xmm9
887	vpxor	%xmm2,%xmm5,%xmm5
888
889	vmovdqu	64(%rdx),%xmm15
890	vpalignr	$8,%xmm10,%xmm10,%xmm12
891	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
892	vpshufb	%xmm13,%xmm15,%xmm15
893	vpxor	%xmm3,%xmm0,%xmm0
894	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
895	vmovdqu	64-64(%rsi),%xmm6
896	vpunpckhqdq	%xmm15,%xmm15,%xmm8
897	vpxor	%xmm4,%xmm1,%xmm1
898	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
899	vxorps	%xmm15,%xmm8,%xmm8
900	vpxor	%xmm5,%xmm2,%xmm2
901
902	vmovdqu	48(%rdx),%xmm14
903	vpclmulqdq	$0x10,(%r10),%xmm10,%xmm10
904	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
905	vpshufb	%xmm13,%xmm14,%xmm14
906	vpxor	%xmm0,%xmm3,%xmm3
907	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
908	vmovdqu	96-64(%rsi),%xmm6
909	vpunpckhqdq	%xmm14,%xmm14,%xmm9
910	vpxor	%xmm1,%xmm4,%xmm4
911	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
912	vmovdqu	128-64(%rsi),%xmm7
913	vpxor	%xmm14,%xmm9,%xmm9
914	vpxor	%xmm2,%xmm5,%xmm5
915
916	vmovdqu	32(%rdx),%xmm15
917	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
918	vpshufb	%xmm13,%xmm15,%xmm15
919	vpxor	%xmm3,%xmm0,%xmm0
920	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
921	vmovdqu	112-64(%rsi),%xmm6
922	vpunpckhqdq	%xmm15,%xmm15,%xmm8
923	vpxor	%xmm4,%xmm1,%xmm1
924	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
925	vpxor	%xmm15,%xmm8,%xmm8
926	vpxor	%xmm5,%xmm2,%xmm2
927	vxorps	%xmm12,%xmm10,%xmm10
928
929	vmovdqu	16(%rdx),%xmm14
930	vpalignr	$8,%xmm10,%xmm10,%xmm12
931	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
932	vpshufb	%xmm13,%xmm14,%xmm14
933	vpxor	%xmm0,%xmm3,%xmm3
934	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
935	vmovdqu	144-64(%rsi),%xmm6
936	vpclmulqdq	$0x10,(%r10),%xmm10,%xmm10
937	vxorps	%xmm11,%xmm12,%xmm12
938	vpunpckhqdq	%xmm14,%xmm14,%xmm9
939	vpxor	%xmm1,%xmm4,%xmm4
940	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
941	vmovdqu	176-64(%rsi),%xmm7
942	vpxor	%xmm14,%xmm9,%xmm9
943	vpxor	%xmm2,%xmm5,%xmm5
944
945	vmovdqu	(%rdx),%xmm15
946	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
947	vpshufb	%xmm13,%xmm15,%xmm15
948	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
949	vmovdqu	160-64(%rsi),%xmm6
950	vpxor	%xmm12,%xmm15,%xmm15
951	vpclmulqdq	$0x10,%xmm7,%xmm9,%xmm2
952	vpxor	%xmm10,%xmm15,%xmm15
953
954	leaq	128(%rdx),%rdx
955	subq	$0x80,%rcx
956	jnc	.Loop8x_avx
957
958	addq	$0x80,%rcx
959	jmp	.Ltail_no_xor_avx
960
961.align	32
962.Lshort_avx:
963	vmovdqu	-16(%rdx,%rcx,1),%xmm14
964	leaq	(%rdx,%rcx,1),%rdx
965	vmovdqu	0-64(%rsi),%xmm6
966	vmovdqu	32-64(%rsi),%xmm7
967	vpshufb	%xmm13,%xmm14,%xmm15
968
969	vmovdqa	%xmm0,%xmm3
970	vmovdqa	%xmm1,%xmm4
971	vmovdqa	%xmm2,%xmm5
972	subq	$0x10,%rcx
973	jz	.Ltail_avx
974
975	vpunpckhqdq	%xmm15,%xmm15,%xmm8
976	vpxor	%xmm0,%xmm3,%xmm3
977	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
978	vpxor	%xmm15,%xmm8,%xmm8
979	vmovdqu	-32(%rdx),%xmm14
980	vpxor	%xmm1,%xmm4,%xmm4
981	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
982	vmovdqu	16-64(%rsi),%xmm6
983	vpshufb	%xmm13,%xmm14,%xmm15
984	vpxor	%xmm2,%xmm5,%xmm5
985	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
986	vpsrldq	$8,%xmm7,%xmm7
987	subq	$0x10,%rcx
988	jz	.Ltail_avx
989
990	vpunpckhqdq	%xmm15,%xmm15,%xmm8
991	vpxor	%xmm0,%xmm3,%xmm3
992	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
993	vpxor	%xmm15,%xmm8,%xmm8
994	vmovdqu	-48(%rdx),%xmm14
995	vpxor	%xmm1,%xmm4,%xmm4
996	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
997	vmovdqu	48-64(%rsi),%xmm6
998	vpshufb	%xmm13,%xmm14,%xmm15
999	vpxor	%xmm2,%xmm5,%xmm5
1000	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
1001	vmovdqu	80-64(%rsi),%xmm7
1002	subq	$0x10,%rcx
1003	jz	.Ltail_avx
1004
1005	vpunpckhqdq	%xmm15,%xmm15,%xmm8
1006	vpxor	%xmm0,%xmm3,%xmm3
1007	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
1008	vpxor	%xmm15,%xmm8,%xmm8
1009	vmovdqu	-64(%rdx),%xmm14
1010	vpxor	%xmm1,%xmm4,%xmm4
1011	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
1012	vmovdqu	64-64(%rsi),%xmm6
1013	vpshufb	%xmm13,%xmm14,%xmm15
1014	vpxor	%xmm2,%xmm5,%xmm5
1015	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
1016	vpsrldq	$8,%xmm7,%xmm7
1017	subq	$0x10,%rcx
1018	jz	.Ltail_avx
1019
1020	vpunpckhqdq	%xmm15,%xmm15,%xmm8
1021	vpxor	%xmm0,%xmm3,%xmm3
1022	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
1023	vpxor	%xmm15,%xmm8,%xmm8
1024	vmovdqu	-80(%rdx),%xmm14
1025	vpxor	%xmm1,%xmm4,%xmm4
1026	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
1027	vmovdqu	96-64(%rsi),%xmm6
1028	vpshufb	%xmm13,%xmm14,%xmm15
1029	vpxor	%xmm2,%xmm5,%xmm5
1030	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
1031	vmovdqu	128-64(%rsi),%xmm7
1032	subq	$0x10,%rcx
1033	jz	.Ltail_avx
1034
1035	vpunpckhqdq	%xmm15,%xmm15,%xmm8
1036	vpxor	%xmm0,%xmm3,%xmm3
1037	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
1038	vpxor	%xmm15,%xmm8,%xmm8
1039	vmovdqu	-96(%rdx),%xmm14
1040	vpxor	%xmm1,%xmm4,%xmm4
1041	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
1042	vmovdqu	112-64(%rsi),%xmm6
1043	vpshufb	%xmm13,%xmm14,%xmm15
1044	vpxor	%xmm2,%xmm5,%xmm5
1045	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
1046	vpsrldq	$8,%xmm7,%xmm7
1047	subq	$0x10,%rcx
1048	jz	.Ltail_avx
1049
1050	vpunpckhqdq	%xmm15,%xmm15,%xmm8
1051	vpxor	%xmm0,%xmm3,%xmm3
1052	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
1053	vpxor	%xmm15,%xmm8,%xmm8
1054	vmovdqu	-112(%rdx),%xmm14
1055	vpxor	%xmm1,%xmm4,%xmm4
1056	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
1057	vmovdqu	144-64(%rsi),%xmm6
1058	vpshufb	%xmm13,%xmm14,%xmm15
1059	vpxor	%xmm2,%xmm5,%xmm5
1060	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
1061	vmovq	184-64(%rsi),%xmm7
1062	subq	$0x10,%rcx
1063	jmp	.Ltail_avx
1064
1065.align	32
1066.Ltail_avx:
1067	vpxor	%xmm10,%xmm15,%xmm15
1068.Ltail_no_xor_avx:
1069	vpunpckhqdq	%xmm15,%xmm15,%xmm8
1070	vpxor	%xmm0,%xmm3,%xmm3
1071	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
1072	vpxor	%xmm15,%xmm8,%xmm8
1073	vpxor	%xmm1,%xmm4,%xmm4
1074	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
1075	vpxor	%xmm2,%xmm5,%xmm5
1076	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
1077
1078	vmovdqu	(%r10),%xmm12
1079
1080	vpxor	%xmm0,%xmm3,%xmm10
1081	vpxor	%xmm1,%xmm4,%xmm11
1082	vpxor	%xmm2,%xmm5,%xmm5
1083
1084	vpxor	%xmm10,%xmm5,%xmm5
1085	vpxor	%xmm11,%xmm5,%xmm5
1086	vpslldq	$8,%xmm5,%xmm9
1087	vpsrldq	$8,%xmm5,%xmm5
1088	vpxor	%xmm9,%xmm10,%xmm10
1089	vpxor	%xmm5,%xmm11,%xmm11
1090
1091	vpclmulqdq	$0x10,%xmm12,%xmm10,%xmm9
1092	vpalignr	$8,%xmm10,%xmm10,%xmm10
1093	vpxor	%xmm9,%xmm10,%xmm10
1094
1095	vpclmulqdq	$0x10,%xmm12,%xmm10,%xmm9
1096	vpalignr	$8,%xmm10,%xmm10,%xmm10
1097	vpxor	%xmm11,%xmm10,%xmm10
1098	vpxor	%xmm9,%xmm10,%xmm10
1099
1100	cmpq	$0,%rcx
1101	jne	.Lshort_avx
1102
1103	vpshufb	%xmm13,%xmm10,%xmm10
1104	vmovdqu	%xmm10,(%rdi)
1105	vzeroupper
1106	ret
1107.cfi_endproc
1108
1109.size	gcm_ghash_avx,.-gcm_ghash_avx
1110.section	.rodata
1111.align	64
1112.Lbswap_mask:
1113.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1114.L0x1c2_polynomial:
1115.byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
1116.L7_mask:
1117.long	7,0,7,0
1118.align	64
1119
1120.byte	71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1121.align	64
1122.text
1123#endif
1124